diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py index 7504fa46..b64fa2f0 100644 --- a/gallery_dl/__init__.py +++ b/gallery_dl/__init__.py @@ -7,7 +7,6 @@ # published by the Free Software Foundation. import sys -import json import logging from . import version, config, option, output, extractor, job, util, exception @@ -32,81 +31,6 @@ def progress(urls, pformat): yield pinfo["url"] -def parse_inputfile(file, log): - """Filter and process strings from an input file. - - Lines starting with '#' and empty lines will be ignored. - Lines starting with '-' will be interpreted as a key-value pair separated - by an '='. where 'key' is a dot-separated option name and 'value' is a - JSON-parsable value. These configuration options will be applied while - processing the next URL. - Lines starting with '-G' are the same as above, except these options will - be applied for *all* following URLs, i.e. they are Global. - Everything else will be used as a potential URL. - - Example input file: - - # settings global options - -G base-directory = "/tmp/" - -G skip = false - - # setting local options for the next URL - -filename="spaces_are_optional.jpg" - -skip = true - - https://example.org/ - - # next URL uses default filename and 'skip' is false. - https://example.com/index.htm # comment1 - https://example.com/404.htm # comment2 - """ - gconf = [] - lconf = [] - - for line in file: - line = line.strip() - - if not line or line[0] == "#": - # empty line or comment - continue - - elif line[0] == "-": - # config spec - if len(line) >= 2 and line[1] == "G": - conf = gconf - line = line[2:] - else: - conf = lconf - line = line[1:] - - key, sep, value = line.partition("=") - if not sep: - log.warning("input file: invalid = pair: %s", line) - continue - - try: - value = json.loads(value.strip()) - except ValueError as exc: - log.warning("input file: unable to parse '%s': %s", value, exc) - continue - - key = key.strip().split(".") - conf.append((key[:-1], key[-1], value)) - - else: - # url - if " #" in line: - line = line.partition(" #")[0].rstrip() - elif "\t#" in line: - line = line.partition("\t#")[0].rstrip() - if gconf or lconf: - yield util.ExtendedUrl(line, gconf, lconf) - gconf = [] - lconf = [] - else: - yield line - - def main(): try: if sys.stdout and sys.stdout.encoding.lower() != "utf-8": @@ -275,12 +199,12 @@ def main(): try: if inputfile == "-": if sys.stdin: - urls += parse_inputfile(sys.stdin, log) + urls += util.parse_inputfile(sys.stdin, log) else: log.warning("input file: stdin is not readable") else: with open(inputfile, encoding="utf-8") as file: - urls += parse_inputfile(file, log) + urls += util.parse_inputfile(file, log) except OSError as exc: log.warning("input file: %s", exc) diff --git a/gallery_dl/util.py b/gallery_dl/util.py index 4ba1cbae..1650b0a3 100644 --- a/gallery_dl/util.py +++ b/gallery_dl/util.py @@ -418,6 +418,82 @@ CODES = { } +def parse_inputfile(file, log): + """Filter and process strings from an input file. + + Lines starting with '#' and empty lines will be ignored. + Lines starting with '-' will be interpreted as a key-value pair separated + by an '='. where 'key' is a dot-separated option name and 'value' is a + JSON-parsable value. These configuration options will be applied while + processing the next URL. + Lines starting with '-G' are the same as above, except these options will + be applied for *all* following URLs, i.e. they are Global. + Everything else will be used as a potential URL. + + Example input file: + + # settings global options + -G base-directory = "/tmp/" + -G skip = false + + # setting local options for the next URL + -filename="spaces_are_optional.jpg" + -skip = true + + https://example.org/ + + # next URL uses default filename and 'skip' is false. + https://example.com/index.htm # comment1 + https://example.com/404.htm # comment2 + """ + gconf = [] + lconf = [] + strip_comment = None + + for line in file: + line = line.strip() + + if not line or line[0] == "#": + # empty line or comment + continue + + elif line[0] == "-": + # config spec + if len(line) >= 2 and line[1] == "G": + conf = gconf + line = line[2:] + else: + conf = lconf + line = line[1:] + + key, sep, value = line.partition("=") + if not sep: + log.warning("input file: invalid = pair: %s", line) + continue + + try: + value = json.loads(value.strip()) + except ValueError as exc: + log.warning("input file: unable to parse '%s': %s", value, exc) + continue + + key = key.strip().split(".") + conf.append((key[:-1], key[-1], value)) + + else: + # url + if " #" in line or "\t#" in line: + if strip_comment is None: + strip_comment = re.compile(r"\s+#.*").sub + line = strip_comment("", line) + if gconf or lconf: + yield ExtendedUrl(line, gconf, lconf) + gconf = [] + lconf = [] + else: + yield line + + class UniversalNone(): """None-style object that supports more operations than None itself""" __slots__ = ()