change config specifiers in input file format

Instead of a dictionary/object, input file options are now specified
by a 'key=value' pair starting with '-' for options only applying to
the next URL or '-G' for Global options applying to all following URLs.

See the docstring of parse_inputfile() for details.

Example option specifiers:

- filename = "{id}.{extension}"
- extractor.pixiv.user.directory = ["Pixiv Users", "{user[id]}"]
-spaces="are_optional"
-G keywords = {"global": "option"}
This commit is contained in:
Mike Fährmann
2018-02-15 21:15:33 +01:00
parent f970a8f13c
commit b50bdbf3d7
4 changed files with 99 additions and 45 deletions

View File

@@ -72,25 +72,34 @@ def prepare_filter(filterexpr, target):
def parse_inputfile(file): def parse_inputfile(file):
"""Filter and strip strings from an input file """Filter and process strings from an input file.
Lines starting with '#' and empty lines will be ignored. Lines starting with '#' and empty lines will be ignored.
Lines starting with '{' will be interpreted as JSON-object and Lines starting with '-' will be interpreted as a key-value pair separated
its values, while processing the next URL, are going to be by an '='. where 'key' is a dot-separated option name and 'value' is a
applied to the global config. JSON-parsable value for it. These config options will be applied while
processing the next URL.
Lines starting with '-G' are the same as above, except these options will
be valid for all following URLs, i.e. they are Global.
Everything else will be used as potential URL. Everything else will be used as potential URL.
Example input file: Example input file:
# this is a comment # settings global options
{"base-directory": "/tmp/", "skip": false} -G base-directory = "/tmp/"
{"more": "multiple objects before an URL will be merged together"} -G skip = false
# setting local options for the next URL
-filename="spaces_are_optional.jpg"
-skip = true
https://example.org/ https://example.org/
# config is back to its initial values # next URL uses default filename and 'skip' is false.
https://example.com/index.htm https://example.com/index.htm
""" """
confdict = None gconf = []
lconf = []
for line in file: for line in file:
line = line.strip() line = line.strip()
@@ -99,26 +108,36 @@ def parse_inputfile(file):
# empty line or comment # empty line or comment
continue continue
elif line[0] == "{": elif line[0] == "-":
# url-specific config spec # config spec
try: if len(line) >= 2 and line[1] == "G":
cfd = json.loads(line) conf = gconf
except ValueError as exc: line = line[2:]
log.warning("input file: unable to parse config line: %s",exc) else:
conf = lconf
line = line[1:]
key, sep, value = line.partition("=")
if not sep:
log.warning("input file: invalid <key>=<value> pair: %s", line)
continue continue
if confdict: try:
util.combine_dict(confdict, cfd) value = json.loads(value.strip())
else: except ValueError as exc:
confdict = cfd log.warning("input file: unable to parse '%s': %s", value, exc)
continue
conf.append((key.strip().split("."), value))
else: else:
# url # url
if confdict: if gconf or lconf:
yield util.ExtendedUrl(line, confdict) yield util.ExtendedUrl(line, gconf, lconf)
gconf = []
lconf = []
else: else:
yield line yield line
confdict = None
def main(): def main():
@@ -231,7 +250,9 @@ def main():
try: try:
log.debug("Starting %s for '%s'", jobtype.__name__, url) log.debug("Starting %s for '%s'", jobtype.__name__, url)
if isinstance(url, util.ExtendedUrl): if isinstance(url, util.ExtendedUrl):
with config.apply(url.config): for key, value in url.gconfig:
config.set(key, value)
with config.apply(url.lconfig):
jobtype(url.value).run() jobtype(url.value).run()
else: else:
jobtype(url).run() jobtype(url).run()

View File

@@ -126,22 +126,32 @@ def setdefault(keys, value, conf=_config):
return conf.setdefault(keys[-1], value) return conf.setdefault(keys[-1], value)
def unset(keys, conf=_config):
"""Unset the value of property 'key'"""
try:
for k in keys[:-1]:
conf = conf[k]
del conf[keys[-1]]
except (KeyError, AttributeError):
pass
class apply(): class apply():
"""Context Manager to apply a dict to global config""" """Context Manager to temporarily apply a collection of key-value pairs"""
_sentinel = object() _sentinel = object()
def __init__(self, config_dict): def __init__(self, kvlist):
self.original_values = {} self.original = []
self.config_dict = config_dict self.kvlist = kvlist
for key, value in config_dict.items():
self.original_values[key] = _config.get(key, self._sentinel)
def __enter__(self): def __enter__(self):
_config.update(self.config_dict) for key, value in self.kvlist:
self.original.append((key, get(key, self._sentinel)))
set(key, value)
def __exit__(self, etype, value, traceback): def __exit__(self, etype, value, traceback):
for key, value in self.original_values.items(): for key, value in self.original:
if value is self._sentinel: if value is self._sentinel:
del _config[key] unset(key)
else: else:
_config[key] = value set(key, value)

View File

@@ -261,10 +261,9 @@ class ChainPredicate():
class ExtendedUrl(): class ExtendedUrl():
"""URL with attached config dict""" """URL with attached config key-value pairs"""
def __init__(self, url, confdict): def __init__(self, url, gconf, lconf):
self.value = url self.value, self.gconfig, self.lconfig = url, gconf, lconf
self.config = confdict
def __str__(self): def __str__(self):
return self.value return self.value

View File

@@ -31,6 +31,15 @@ class TestConfig(unittest.TestCase):
self.assertEqual(config.get(["d"]), None) self.assertEqual(config.get(["d"]), None)
self.assertEqual(config.get(["e", "f", "g"], 123), 123) self.assertEqual(config.get(["e", "f", "g"], 123), 123)
def test_interpolate(self):
self.assertEqual(config.interpolate(["a"]), "1")
self.assertEqual(config.interpolate(["b", "a"]), "1")
self.assertEqual(config.interpolate(["b", "c"], "2"), "text")
self.assertEqual(config.interpolate(["b", "d"], "2"), "2")
config.set(["d"], 123)
self.assertEqual(config.interpolate(["b", "d"], "2"), 123)
self.assertEqual(config.interpolate(["d", "d"], "2"), 123)
def test_set(self): def test_set(self):
config.set(["b", "c"], [1, 2, 3]) config.set(["b", "c"], [1, 2, 3])
config.set(["e", "f", "g"], value=234) config.set(["e", "f", "g"], value=234)
@@ -43,14 +52,29 @@ class TestConfig(unittest.TestCase):
self.assertEqual(config.get(["b", "c"]), "text") self.assertEqual(config.get(["b", "c"]), "text")
self.assertEqual(config.get(["e", "f", "g"]), 234) self.assertEqual(config.get(["e", "f", "g"]), 234)
def test_interpolate(self): def test_unset(self):
self.assertEqual(config.interpolate(["a"]), "1") config.unset(["a"])
self.assertEqual(config.interpolate(["b", "a"]), "1") config.unset(["b", "c"])
self.assertEqual(config.interpolate(["b", "c"], "2"), "text") config.unset(["c", "d"])
self.assertEqual(config.interpolate(["b", "d"], "2"), "2") self.assertEqual(config.get(["a"]), None)
config.set(["d"], 123) self.assertEqual(config.get(["b", "a"]), 2)
self.assertEqual(config.interpolate(["b", "d"], "2"), 123) self.assertEqual(config.get(["b", "c"]), None)
self.assertEqual(config.interpolate(["d", "d"], "2"), 123)
def test_apply(self):
options = (
(["b", "c"], [1, 2, 3]),
(["e", "f", "g"], 234),
)
self.assertEqual(config.get(["b", "c"]), "text")
self.assertEqual(config.get(["e", "f", "g"]), None)
with config.apply(options):
self.assertEqual(config.get(["b", "c"]), [1, 2, 3])
self.assertEqual(config.get(["e", "f", "g"]), 234)
self.assertEqual(config.get(["b", "c"]), "text")
self.assertEqual(config.get(["e", "f", "g"]), None)
if __name__ == '__main__': if __name__ == '__main__':