[common] introduce 'status' attribute to Extractors

allows reporting error codes for exceptions that are not handled
by the Job.run() try-except block

- fixes Job.status being 0 in certain situations even when errors occurred
- fixes some URLs not getting written to -e/--error-file (#7758)
This commit is contained in:
Mike Fährmann
2025-07-05 21:19:48 +02:00
parent d349c248c0
commit 1bbacba4ed
3 changed files with 9 additions and 3 deletions

View File

@@ -36,6 +36,7 @@ class Extractor():
directory_fmt = ("{category}",) directory_fmt = ("{category}",)
filename_fmt = "{filename}.{extension}" filename_fmt = "{filename}.{extension}"
archive_fmt = "" archive_fmt = ""
status = 0
root = "" root = ""
cookies_domain = "" cookies_domain = ""
cookies_index = 0 cookies_index = 0
@@ -207,6 +208,7 @@ class Extractor():
response.encoding = encoding response.encoding = encoding
return response return response
if notfound and code == 404: if notfound and code == 404:
self.status |= exception.NotFoundError.code
raise exception.NotFoundError(notfound) raise exception.NotFoundError(notfound)
msg = f"'{code} {response.reason}' for '{response.url}'" msg = f"'{code} {response.reason}' for '{response.url}'"
@@ -246,6 +248,8 @@ class Extractor():
if not fatal or fatal is ...: if not fatal or fatal is ...:
self.log.warning(msg) self.log.warning(msg)
return util.NullResponse(url, msg) return util.NullResponse(url, msg)
self.status |= exception.HttpError.code
raise exception.HttpError(msg, response) raise exception.HttpError(msg, response)
def request_location(self, url, **kwargs): def request_location(self, url, **kwargs):

View File

@@ -1166,15 +1166,15 @@ class DeviantartStatusExtractor(DeviantartExtractor):
def deviations(self): def deviations(self):
for status in self.api.user_statuses(self.user, self.offset): for status in self.api.user_statuses(self.user, self.offset):
yield from self.status(status) yield from self.process_status(status)
def status(self, status): def process_status(self, status):
for item in status.get("items") or (): # do not trust is_share for item in status.get("items") or (): # do not trust is_share
# shared deviations/statuses # shared deviations/statuses
if "deviation" in item: if "deviation" in item:
yield item["deviation"].copy() yield item["deviation"].copy()
if "status" in item: if "status" in item:
yield from self.status(item["status"].copy()) yield from self.process_status(item["status"].copy())
# assume is_deleted == true means necessary fields are missing # assume is_deleted == true means necessary fields are missing
if status["is_deleted"]: if status["is_deleted"]:
self.log.warning( self.log.warning(

View File

@@ -186,6 +186,8 @@ class Job():
self.handle_finalize() self.handle_finalize()
extractor.finalize() extractor.finalize()
if s := extractor.status:
self.status |= s
return self.status return self.status
def dispatch(self, msg): def dispatch(self, msg):