implement '-e/--error-file' command-line option (#4732)

copying per-URL options from regular, read-only input files
does currently not work
This commit is contained in:
Mike Fährmann
2023-12-05 20:49:51 +01:00
parent 4eb3590103
commit 99b76628f7
3 changed files with 61 additions and 16 deletions

View File

@@ -39,6 +39,7 @@
-E, --extractor-info Print extractor defaults and settings
-K, --list-keywords Print a list of available keywords and example
values for the given URLs
-e, --error-file FILE Add input URLs which returned an error to FILE
--list-modules Print a list of available extractor modules
--list-extractors Print a list of extractor classes with
description, (sub)category and example URL
@@ -51,7 +52,8 @@
## Downloader Options:
-r, --limit-rate RATE Maximum download rate (e.g. 500k or 2.5M)
-R, --retries N Maximum number of retries for failed HTTP
requests or -1 for infinite retries (default: 4)
requests or -1 for infinite retries (default:
4)
--http-timeout SECONDS Timeout for HTTP connections (default: 30.0)
--sleep SECONDS Number of seconds to wait before each download.
This can be either a constant value or a range

View File

@@ -249,6 +249,9 @@ def main():
input_log.error(exc)
return getattr(exc, "code", 128)
if args.error_file:
input_manager.error_file(args.error_file)
pformat = config.get(("output",), "progress", True)
if pformat and len(input_manager.urls) > 1 and \
args.loglevel < logging.ERROR:
@@ -270,6 +273,7 @@ def main():
if status:
retval |= status
input_manager.error()
else:
input_manager.success()
@@ -281,6 +285,7 @@ def main():
except exception.NoExtractorError:
log.error("Unsupported URL '%s'", url)
retval |= 64
input_manager.error()
input_manager.next()
return retval
@@ -301,9 +306,12 @@ class InputManager():
def __init__(self):
self.urls = []
self.files = ()
self._url = ""
self._item = None
self._index = 0
self._current = None
self._pformat = None
self._error_fp = None
def add_url(self, url):
self.urls.append(url)
@@ -428,6 +436,15 @@ class InputManager():
else:
append(url)
def error_file(self, path):
try:
path = util.expand_path(path)
self._error_fp = open(path, "a", encoding="utf-8")
except Exception as exc:
self.log.warning(
"Unable to open error file (%s: %s)",
exc.__class__.__name__, exc)
def progress(self, pformat=True):
if pformat is True:
pformat = "[{current}/{total}] {url}\n"
@@ -439,17 +456,37 @@ class InputManager():
self._index += 1
def success(self):
if self._current:
url, path, action, indicies = self._current
lines = self.files[path]
action(lines, indicies)
if self._item:
self._rewrite()
def error(self):
if self._error_fp:
if self._item:
url, path, action, indicies = self._item
lines = self.files[path]
out = "".join(lines[i] for i in indicies)
self._rewrite()
else:
out = str(self._url) + "\n"
try:
with open(path, "w", encoding="utf-8") as fp:
fp.writelines(lines)
self._error_fp.write(out)
except Exception as exc:
self.log.warning(
"Unable to update '%s' (%s: %s)",
path, exc.__class__.__name__, exc)
self._error_fp.name, exc.__class__.__name__, exc)
def _rewrite(self):
url, path, action, indicies = self._item
lines = self.files[path]
action(lines, indicies)
try:
with open(path, "w", encoding="utf-8") as fp:
fp.writelines(lines)
except Exception as exc:
self.log.warning(
"Unable to update '%s' (%s: %s)",
path, exc.__class__.__name__, exc)
@staticmethod
def _action_comment(lines, indicies):
@@ -467,23 +504,24 @@ class InputManager():
def __next__(self):
try:
item = self.urls[self._index]
url = self.urls[self._index]
except IndexError:
raise StopIteration
if isinstance(item, tuple):
self._current = item
item = item[0]
if isinstance(url, tuple):
self._item = url
url = url[0]
else:
self._current = None
self._item = None
self._url = url
if self._pformat:
output.stderr_write(self._pformat({
"total" : len(self.urls),
"current": self._index + 1,
"url" : item,
"url" : url,
}))
return item
return url
class ExtendedUrl():

View File

@@ -286,6 +286,11 @@ def build_parser():
help=("Print a list of available keywords and example values "
"for the given URLs"),
)
output.add_argument(
"-e", "--error-file",
dest="error_file", metavar="FILE",
help="Add input URLs which returned an error to FILE",
)
output.add_argument(
"--list-modules",
dest="list_modules", action="store_true",