rework and extend input file processing (#4732)
- add 2 command-line options to modify input file contents - -I/--input-file-comment - -x/--input-file-delete - implement InputManager class - move code from util.py to __init__.py (mainly to avoid import cycles)
This commit is contained in:
@@ -18,19 +18,6 @@ __email__ = "mike_faehrmann@web.de"
|
||||
__version__ = version.__version__
|
||||
|
||||
|
||||
def progress(urls, pformat):
|
||||
"""Wrapper around urls to output a simple progress indicator"""
|
||||
if pformat is True:
|
||||
pformat = "[{current}/{total}] {url}\n"
|
||||
else:
|
||||
pformat += "\n"
|
||||
|
||||
pinfo = {"total": len(urls)}
|
||||
for pinfo["current"], pinfo["url"] in enumerate(urls, 1):
|
||||
output.stderr_write(pformat.format_map(pinfo))
|
||||
yield pinfo["url"]
|
||||
|
||||
|
||||
def main():
|
||||
try:
|
||||
parser = option.build_parser()
|
||||
@@ -224,7 +211,7 @@ def main():
|
||||
return config.initialize()
|
||||
|
||||
else:
|
||||
if not args.urls and not args.inputfiles:
|
||||
if not args.urls and not args.input_files:
|
||||
parser.error(
|
||||
"The following arguments are required: URL\n"
|
||||
"Use 'gallery-dl --help' to get a list of all options.")
|
||||
@@ -238,22 +225,6 @@ def main():
|
||||
else:
|
||||
jobtype = args.jobtype or job.DownloadJob
|
||||
|
||||
urls = args.urls
|
||||
if args.inputfiles:
|
||||
for inputfile in args.inputfiles:
|
||||
try:
|
||||
if inputfile == "-":
|
||||
if sys.stdin:
|
||||
urls += util.parse_inputfile(sys.stdin, log)
|
||||
else:
|
||||
log.warning(
|
||||
"input file: stdin is not readable")
|
||||
else:
|
||||
with open(inputfile, encoding="utf-8") as file:
|
||||
urls += util.parse_inputfile(file, log)
|
||||
except OSError as exc:
|
||||
log.warning("input file: %s", exc)
|
||||
|
||||
# unsupported file logging handler
|
||||
handler = output.setup_logging_handler(
|
||||
"unsupportedfile", fmt="{message}")
|
||||
@@ -263,25 +234,44 @@ def main():
|
||||
ulog.propagate = False
|
||||
job.Job.ulog = ulog
|
||||
|
||||
# collect input URLs
|
||||
input_manager = InputManager()
|
||||
input_manager.log = input_log = logging.getLogger("inputfile")
|
||||
input_manager.add_list(args.urls)
|
||||
|
||||
if args.input_files:
|
||||
for input_file, action in args.input_files:
|
||||
try:
|
||||
path = util.expand_path(input_file)
|
||||
input_manager.add_file(path, action)
|
||||
except Exception as exc:
|
||||
input_log.error(exc)
|
||||
return getattr(exc, "code", 128)
|
||||
|
||||
pformat = config.get(("output",), "progress", True)
|
||||
if pformat and len(urls) > 1 and args.loglevel < logging.ERROR:
|
||||
urls = progress(urls, pformat)
|
||||
else:
|
||||
urls = iter(urls)
|
||||
if pformat and len(input_manager.urls) > 1 and \
|
||||
args.loglevel < logging.ERROR:
|
||||
input_manager.progress(pformat)
|
||||
|
||||
# process input URLs
|
||||
retval = 0
|
||||
url = next(urls, None)
|
||||
|
||||
while url is not None:
|
||||
for url in input_manager:
|
||||
try:
|
||||
log.debug("Starting %s for '%s'", jobtype.__name__, url)
|
||||
if isinstance(url, util.ExtendedUrl):
|
||||
|
||||
if isinstance(url, ExtendedUrl):
|
||||
for opts in url.gconfig:
|
||||
config.set(*opts)
|
||||
with config.apply(url.lconfig):
|
||||
retval |= jobtype(url.value).run()
|
||||
status = jobtype(url.value).run()
|
||||
else:
|
||||
retval |= jobtype(url).run()
|
||||
status = jobtype(url).run()
|
||||
|
||||
if status:
|
||||
retval |= status
|
||||
else:
|
||||
input_manager.success()
|
||||
|
||||
except exception.TerminateExtraction:
|
||||
pass
|
||||
except exception.RestartExtraction:
|
||||
@@ -291,8 +281,7 @@ def main():
|
||||
log.error("Unsupported URL '%s'", url)
|
||||
retval |= 64
|
||||
|
||||
url = next(urls, None)
|
||||
|
||||
input_manager.next()
|
||||
return retval
|
||||
|
||||
except KeyboardInterrupt:
|
||||
@@ -304,3 +293,206 @@ def main():
|
||||
if exc.errno != errno.EPIPE:
|
||||
raise
|
||||
return 1
|
||||
|
||||
|
||||
class InputManager():
|
||||
|
||||
def __init__(self):
|
||||
self.urls = []
|
||||
self.files = ()
|
||||
self._index = 0
|
||||
self._current = None
|
||||
self._pformat = None
|
||||
|
||||
def add_url(self, url):
|
||||
self.urls.append(url)
|
||||
|
||||
def add_list(self, urls):
|
||||
self.urls += urls
|
||||
|
||||
def add_file(self, path, action=None):
|
||||
"""Process an input file.
|
||||
|
||||
Lines starting with '#' and empty lines will be ignored.
|
||||
Lines starting with '-' will be interpreted as a key-value pair
|
||||
separated by an '='. where
|
||||
'key' is a dot-separated option name and
|
||||
'value' is a JSON-parsable string.
|
||||
These configuration options will be applied
|
||||
while processing the next URL only.
|
||||
Lines starting with '-G' are the same as above, except these options
|
||||
will be applied for *all* following URLs, i.e. they are Global.
|
||||
Everything else will be used as a potential URL.
|
||||
|
||||
Example input file:
|
||||
|
||||
# settings global options
|
||||
-G base-directory = "/tmp/"
|
||||
-G skip = false
|
||||
|
||||
# setting local options for the next URL
|
||||
-filename="spaces_are_optional.jpg"
|
||||
-skip = true
|
||||
|
||||
https://example.org/
|
||||
|
||||
# next URL uses default filename and 'skip' is false.
|
||||
https://example.com/index.htm # comment1
|
||||
https://example.com/404.htm # comment2
|
||||
"""
|
||||
if path == "-" and not action:
|
||||
try:
|
||||
lines = sys.stdin.readlines()
|
||||
except Exception:
|
||||
raise exception.InputFileError("stdin is not readable")
|
||||
path = None
|
||||
else:
|
||||
try:
|
||||
with open(path, encoding="utf-8") as fp:
|
||||
lines = fp.readlines()
|
||||
except Exception as exc:
|
||||
raise exception.InputFileError(str(exc))
|
||||
|
||||
if self.files:
|
||||
self.files[path] = lines
|
||||
else:
|
||||
self.files = {path: lines}
|
||||
|
||||
if action == "c":
|
||||
action = self._action_comment
|
||||
elif action == "d":
|
||||
action = self._action_delete
|
||||
else:
|
||||
action = None
|
||||
|
||||
gconf = []
|
||||
lconf = []
|
||||
indicies = []
|
||||
strip_comment = None
|
||||
append = self.urls.append
|
||||
|
||||
for n, line in enumerate(lines):
|
||||
line = line.strip()
|
||||
|
||||
if not line or line[0] == "#":
|
||||
# empty line or comment
|
||||
continue
|
||||
|
||||
elif line[0] == "-":
|
||||
# config spec
|
||||
if len(line) >= 2 and line[1] == "G":
|
||||
conf = gconf
|
||||
line = line[2:]
|
||||
else:
|
||||
conf = lconf
|
||||
line = line[1:]
|
||||
if action:
|
||||
indicies.append(n)
|
||||
|
||||
key, sep, value = line.partition("=")
|
||||
if not sep:
|
||||
raise exception.InputFileError(
|
||||
"Invalid KEY=VALUE pair '%s' on line %s in %s",
|
||||
line, n+1, path)
|
||||
|
||||
try:
|
||||
value = util.json_loads(value.strip())
|
||||
except ValueError as exc:
|
||||
self.log.debug("%s: %s", exc.__class__.__name__, exc)
|
||||
raise exception.InputFileError(
|
||||
"Unable to parse '%s' on line %s in %s",
|
||||
value, n+1, path)
|
||||
|
||||
key = key.strip().split(".")
|
||||
conf.append((key[:-1], key[-1], value))
|
||||
|
||||
else:
|
||||
# url
|
||||
if " #" in line or "\t#" in line:
|
||||
if strip_comment is None:
|
||||
import re
|
||||
strip_comment = re.compile(r"\s+#.*").sub
|
||||
line = strip_comment("", line)
|
||||
if gconf or lconf:
|
||||
url = ExtendedUrl(line, gconf, lconf)
|
||||
gconf = []
|
||||
lconf = []
|
||||
else:
|
||||
url = line
|
||||
|
||||
if action:
|
||||
indicies.append(n)
|
||||
append((url, path, action, indicies))
|
||||
indicies = []
|
||||
else:
|
||||
append(url)
|
||||
|
||||
def progress(self, pformat=True):
|
||||
if pformat is True:
|
||||
pformat = "[{current}/{total}] {url}\n"
|
||||
else:
|
||||
pformat += "\n"
|
||||
self._pformat = pformat.format_map
|
||||
|
||||
def next(self):
|
||||
self._index += 1
|
||||
|
||||
def success(self):
|
||||
if self._current:
|
||||
url, path, action, indicies = self._current
|
||||
lines = self.files[path]
|
||||
action(lines, indicies)
|
||||
try:
|
||||
with open(path, "w", encoding="utf-8") as fp:
|
||||
fp.writelines(lines)
|
||||
except Exception as exc:
|
||||
self.log.warning(
|
||||
"Unable to update '%s' (%s: %s)",
|
||||
path, exc.__class__.__name__, exc)
|
||||
|
||||
@staticmethod
|
||||
def _action_comment(lines, indicies):
|
||||
for i in indicies:
|
||||
lines[i] = "# " + lines[i]
|
||||
|
||||
@staticmethod
|
||||
def _action_delete(lines, indicies):
|
||||
for i in indicies:
|
||||
lines[i] = ""
|
||||
|
||||
def __iter__(self):
|
||||
self._index = 0
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
try:
|
||||
item = self.urls[self._index]
|
||||
except IndexError:
|
||||
raise StopIteration
|
||||
|
||||
if isinstance(item, tuple):
|
||||
self._current = item
|
||||
item = item[0]
|
||||
else:
|
||||
self._current = None
|
||||
|
||||
if self._pformat:
|
||||
output.stderr_write(self._pformat({
|
||||
"total" : len(self.urls),
|
||||
"current": self._index + 1,
|
||||
"url" : item,
|
||||
}))
|
||||
return item
|
||||
|
||||
|
||||
class ExtendedUrl():
|
||||
"""URL with attached config key-value pairs"""
|
||||
__slots__ = ("value", "gconfig", "lconfig")
|
||||
|
||||
def __init__(self, url, gconf, lconf):
|
||||
self.value = url
|
||||
self.gconfig = gconf
|
||||
self.lconfig = lconf
|
||||
|
||||
def __str__(self):
|
||||
return self.value
|
||||
|
||||
Reference in New Issue
Block a user