[wikimedia] add 'format=original' to file URLs (#5512)

add 'format' option
This commit is contained in:
Mike Fährmann
2025-10-22 19:14:23 +02:00
parent df1b6204c6
commit eefd8f9c00
8 changed files with 43 additions and 13 deletions

View File

@@ -51,6 +51,7 @@ class WikimediaExtractor(BaseExtractor):
# https://www.mediawiki.org/wiki/API:Revisions
# https://www.mediawiki.org/wiki/API:Imageinfo
self.image_revisions = self.config("image-revisions", 1)
self.format = self.config("format", "original")
@cache(maxage=36500*86400, keyarg=1)
def _search_api_path(self, root):
@@ -74,9 +75,15 @@ class WikimediaExtractor(BaseExtractor):
m["name"]: m["value"]
for m in image["commonmetadata"] or ()}
text.nameext_from_url(image["canonicaltitle"].partition(":")[2], image)
text.nameext_from_name(
image["canonicaltitle"].partition(":")[2], image)
image["date"] = self.parse_datetime_iso(image["timestamp"])
if self.format:
url = image["url"]
image["url"] = (f"{url}{'&' if '?' in url else '?'}"
f"format={self.format}")
def items(self):
for info in self._pagination(self.params):
try: