[wikimedia] ignore missing files (#8388)

Co-authored-by: ClosedPort22 <44864697+ClosedPort22@users.noreply.github.com>
This commit is contained in:
Mike Fährmann
2025-10-11 20:32:53 +02:00
parent 214acc39e6
commit 65feed5b64

View File

@@ -90,7 +90,16 @@ class WikimediaExtractor(BaseExtractor):
self.prepare_info(info)
yield Message.Directory, info
for info["num"], image in enumerate(images, 1):
num = 0
for image in images:
# https://www.mediawiki.org/wiki/Release_notes/1.34
if "filemissing" in image:
self.log.warning(
"File %s (or its revision) is missing",
image["canonicaltitle"].partition(":")[2])
continue
num += 1
image["num"] = num
self.prepare_image(image)
image.update(info)
yield Message.Url, image["url"], image