change config specifiers in input file format
Instead of a dictionary/object, input file options are now specified
by a 'key=value' pair starting with '-' for options only applying to
the next URL or '-G' for Global options applying to all following URLs.
See the docstring of parse_inputfile() for details.
Example option specifiers:
- filename = "{id}.{extension}"
- extractor.pixiv.user.directory = ["Pixiv Users", "{user[id]}"]
-spaces="are_optional"
-G keywords = {"global": "option"}
This commit is contained in:
@@ -72,25 +72,34 @@ def prepare_filter(filterexpr, target):
|
|||||||
|
|
||||||
|
|
||||||
def parse_inputfile(file):
|
def parse_inputfile(file):
|
||||||
"""Filter and strip strings from an input file
|
"""Filter and process strings from an input file.
|
||||||
|
|
||||||
Lines starting with '#' and empty lines will be ignored.
|
Lines starting with '#' and empty lines will be ignored.
|
||||||
Lines starting with '{' will be interpreted as JSON-object and
|
Lines starting with '-' will be interpreted as a key-value pair separated
|
||||||
its values, while processing the next URL, are going to be
|
by an '='. where 'key' is a dot-separated option name and 'value' is a
|
||||||
applied to the global config.
|
JSON-parsable value for it. These config options will be applied while
|
||||||
|
processing the next URL.
|
||||||
|
Lines starting with '-G' are the same as above, except these options will
|
||||||
|
be valid for all following URLs, i.e. they are Global.
|
||||||
Everything else will be used as potential URL.
|
Everything else will be used as potential URL.
|
||||||
|
|
||||||
Example input file:
|
Example input file:
|
||||||
|
|
||||||
# this is a comment
|
# settings global options
|
||||||
{"base-directory": "/tmp/", "skip": false}
|
-G base-directory = "/tmp/"
|
||||||
{"more": "multiple objects before an URL will be merged together"}
|
-G skip = false
|
||||||
|
|
||||||
|
# setting local options for the next URL
|
||||||
|
-filename="spaces_are_optional.jpg"
|
||||||
|
-skip = true
|
||||||
|
|
||||||
https://example.org/
|
https://example.org/
|
||||||
|
|
||||||
# config is back to its initial values
|
# next URL uses default filename and 'skip' is false.
|
||||||
https://example.com/index.htm
|
https://example.com/index.htm
|
||||||
"""
|
"""
|
||||||
confdict = None
|
gconf = []
|
||||||
|
lconf = []
|
||||||
|
|
||||||
for line in file:
|
for line in file:
|
||||||
line = line.strip()
|
line = line.strip()
|
||||||
@@ -99,26 +108,36 @@ def parse_inputfile(file):
|
|||||||
# empty line or comment
|
# empty line or comment
|
||||||
continue
|
continue
|
||||||
|
|
||||||
elif line[0] == "{":
|
elif line[0] == "-":
|
||||||
# url-specific config spec
|
# config spec
|
||||||
try:
|
if len(line) >= 2 and line[1] == "G":
|
||||||
cfd = json.loads(line)
|
conf = gconf
|
||||||
except ValueError as exc:
|
line = line[2:]
|
||||||
log.warning("input file: unable to parse config line: %s",exc)
|
else:
|
||||||
|
conf = lconf
|
||||||
|
line = line[1:]
|
||||||
|
|
||||||
|
key, sep, value = line.partition("=")
|
||||||
|
if not sep:
|
||||||
|
log.warning("input file: invalid <key>=<value> pair: %s", line)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if confdict:
|
try:
|
||||||
util.combine_dict(confdict, cfd)
|
value = json.loads(value.strip())
|
||||||
else:
|
except ValueError as exc:
|
||||||
confdict = cfd
|
log.warning("input file: unable to parse '%s': %s", value, exc)
|
||||||
|
continue
|
||||||
|
|
||||||
|
conf.append((key.strip().split("."), value))
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# url
|
# url
|
||||||
if confdict:
|
if gconf or lconf:
|
||||||
yield util.ExtendedUrl(line, confdict)
|
yield util.ExtendedUrl(line, gconf, lconf)
|
||||||
|
gconf = []
|
||||||
|
lconf = []
|
||||||
else:
|
else:
|
||||||
yield line
|
yield line
|
||||||
confdict = None
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
@@ -231,7 +250,9 @@ def main():
|
|||||||
try:
|
try:
|
||||||
log.debug("Starting %s for '%s'", jobtype.__name__, url)
|
log.debug("Starting %s for '%s'", jobtype.__name__, url)
|
||||||
if isinstance(url, util.ExtendedUrl):
|
if isinstance(url, util.ExtendedUrl):
|
||||||
with config.apply(url.config):
|
for key, value in url.gconfig:
|
||||||
|
config.set(key, value)
|
||||||
|
with config.apply(url.lconfig):
|
||||||
jobtype(url.value).run()
|
jobtype(url.value).run()
|
||||||
else:
|
else:
|
||||||
jobtype(url).run()
|
jobtype(url).run()
|
||||||
|
|||||||
@@ -126,22 +126,32 @@ def setdefault(keys, value, conf=_config):
|
|||||||
return conf.setdefault(keys[-1], value)
|
return conf.setdefault(keys[-1], value)
|
||||||
|
|
||||||
|
|
||||||
|
def unset(keys, conf=_config):
|
||||||
|
"""Unset the value of property 'key'"""
|
||||||
|
try:
|
||||||
|
for k in keys[:-1]:
|
||||||
|
conf = conf[k]
|
||||||
|
del conf[keys[-1]]
|
||||||
|
except (KeyError, AttributeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class apply():
|
class apply():
|
||||||
"""Context Manager to apply a dict to global config"""
|
"""Context Manager to temporarily apply a collection of key-value pairs"""
|
||||||
_sentinel = object()
|
_sentinel = object()
|
||||||
|
|
||||||
def __init__(self, config_dict):
|
def __init__(self, kvlist):
|
||||||
self.original_values = {}
|
self.original = []
|
||||||
self.config_dict = config_dict
|
self.kvlist = kvlist
|
||||||
for key, value in config_dict.items():
|
|
||||||
self.original_values[key] = _config.get(key, self._sentinel)
|
|
||||||
|
|
||||||
def __enter__(self):
|
def __enter__(self):
|
||||||
_config.update(self.config_dict)
|
for key, value in self.kvlist:
|
||||||
|
self.original.append((key, get(key, self._sentinel)))
|
||||||
|
set(key, value)
|
||||||
|
|
||||||
def __exit__(self, etype, value, traceback):
|
def __exit__(self, etype, value, traceback):
|
||||||
for key, value in self.original_values.items():
|
for key, value in self.original:
|
||||||
if value is self._sentinel:
|
if value is self._sentinel:
|
||||||
del _config[key]
|
unset(key)
|
||||||
else:
|
else:
|
||||||
_config[key] = value
|
set(key, value)
|
||||||
|
|||||||
@@ -261,10 +261,9 @@ class ChainPredicate():
|
|||||||
|
|
||||||
|
|
||||||
class ExtendedUrl():
|
class ExtendedUrl():
|
||||||
"""URL with attached config dict"""
|
"""URL with attached config key-value pairs"""
|
||||||
def __init__(self, url, confdict):
|
def __init__(self, url, gconf, lconf):
|
||||||
self.value = url
|
self.value, self.gconfig, self.lconfig = url, gconf, lconf
|
||||||
self.config = confdict
|
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return self.value
|
return self.value
|
||||||
|
|||||||
@@ -31,6 +31,15 @@ class TestConfig(unittest.TestCase):
|
|||||||
self.assertEqual(config.get(["d"]), None)
|
self.assertEqual(config.get(["d"]), None)
|
||||||
self.assertEqual(config.get(["e", "f", "g"], 123), 123)
|
self.assertEqual(config.get(["e", "f", "g"], 123), 123)
|
||||||
|
|
||||||
|
def test_interpolate(self):
|
||||||
|
self.assertEqual(config.interpolate(["a"]), "1")
|
||||||
|
self.assertEqual(config.interpolate(["b", "a"]), "1")
|
||||||
|
self.assertEqual(config.interpolate(["b", "c"], "2"), "text")
|
||||||
|
self.assertEqual(config.interpolate(["b", "d"], "2"), "2")
|
||||||
|
config.set(["d"], 123)
|
||||||
|
self.assertEqual(config.interpolate(["b", "d"], "2"), 123)
|
||||||
|
self.assertEqual(config.interpolate(["d", "d"], "2"), 123)
|
||||||
|
|
||||||
def test_set(self):
|
def test_set(self):
|
||||||
config.set(["b", "c"], [1, 2, 3])
|
config.set(["b", "c"], [1, 2, 3])
|
||||||
config.set(["e", "f", "g"], value=234)
|
config.set(["e", "f", "g"], value=234)
|
||||||
@@ -43,14 +52,29 @@ class TestConfig(unittest.TestCase):
|
|||||||
self.assertEqual(config.get(["b", "c"]), "text")
|
self.assertEqual(config.get(["b", "c"]), "text")
|
||||||
self.assertEqual(config.get(["e", "f", "g"]), 234)
|
self.assertEqual(config.get(["e", "f", "g"]), 234)
|
||||||
|
|
||||||
def test_interpolate(self):
|
def test_unset(self):
|
||||||
self.assertEqual(config.interpolate(["a"]), "1")
|
config.unset(["a"])
|
||||||
self.assertEqual(config.interpolate(["b", "a"]), "1")
|
config.unset(["b", "c"])
|
||||||
self.assertEqual(config.interpolate(["b", "c"], "2"), "text")
|
config.unset(["c", "d"])
|
||||||
self.assertEqual(config.interpolate(["b", "d"], "2"), "2")
|
self.assertEqual(config.get(["a"]), None)
|
||||||
config.set(["d"], 123)
|
self.assertEqual(config.get(["b", "a"]), 2)
|
||||||
self.assertEqual(config.interpolate(["b", "d"], "2"), 123)
|
self.assertEqual(config.get(["b", "c"]), None)
|
||||||
self.assertEqual(config.interpolate(["d", "d"], "2"), 123)
|
|
||||||
|
def test_apply(self):
|
||||||
|
options = (
|
||||||
|
(["b", "c"], [1, 2, 3]),
|
||||||
|
(["e", "f", "g"], 234),
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertEqual(config.get(["b", "c"]), "text")
|
||||||
|
self.assertEqual(config.get(["e", "f", "g"]), None)
|
||||||
|
|
||||||
|
with config.apply(options):
|
||||||
|
self.assertEqual(config.get(["b", "c"]), [1, 2, 3])
|
||||||
|
self.assertEqual(config.get(["e", "f", "g"]), 234)
|
||||||
|
|
||||||
|
self.assertEqual(config.get(["b", "c"]), "text")
|
||||||
|
self.assertEqual(config.get(["e", "f", "g"]), None)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|||||||
Reference in New Issue
Block a user