[wikimedia] add 'format=original' to file URLs (#5512)

add 'format' option
This commit is contained in:
Mike Fährmann
2025-10-22 19:14:23 +02:00
parent df1b6204c6
commit eefd8f9c00
8 changed files with 43 additions and 13 deletions

View File

@@ -6647,6 +6647,17 @@ Description
Download video files.
extractor.wikimedia.format
--------------------------
Type
``string``
Default
``"original"``
Description
Sets the `format` query parameter value
added to all download URLs.
extractor.wikimedia.image-revisions
-----------------------------------
Type

View File

@@ -1129,6 +1129,7 @@
"wikimedia":
{
"sleep-request": "1.0-2.0",
"format": "original",
"image-revisions": 1,
"limit": 50,
"subcategories": true

View File

@@ -51,6 +51,7 @@ class WikimediaExtractor(BaseExtractor):
# https://www.mediawiki.org/wiki/API:Revisions
# https://www.mediawiki.org/wiki/API:Imageinfo
self.image_revisions = self.config("image-revisions", 1)
self.format = self.config("format", "original")
@cache(maxage=36500*86400, keyarg=1)
def _search_api_path(self, root):
@@ -74,9 +75,15 @@ class WikimediaExtractor(BaseExtractor):
m["name"]: m["value"]
for m in image["commonmetadata"] or ()}
text.nameext_from_url(image["canonicaltitle"].partition(":")[2], image)
text.nameext_from_name(
image["canonicaltitle"].partition(":")[2], image)
image["date"] = self.parse_datetime_iso(image["timestamp"])
if self.format:
url = image["url"]
image["url"] = (f"{url}{'&' if '?' in url else '?'}"
f"format={self.format}")
def items(self):
for info in self._pagination(self.params):
try:

View File

@@ -19,7 +19,7 @@ __tests__ = (
"#url" : "https://mushishi.fandom.com/wiki/Yahagi",
"#category": ("wikimedia", "fandom-mushishi", "article"),
"#class" : wikimedia.WikimediaArticleExtractor,
"#results" : "https://static.wikia.nocookie.net/mushi-shi/images/f/f8/Yahagi.png/revision/latest?cb=20150128052255",
"#results" : "https://static.wikia.nocookie.net/mushi-shi/images/f/f8/Yahagi.png/revision/latest?cb=20150128052255&format=original",
"bitdepth" : 8,
"canonicaltitle": "File:Yahagi.png",
@@ -75,7 +75,7 @@ __tests__ = (
"sha1" : "e3078a97976215323dbabb0c86b7acc55b512d16",
"size" : 429912,
"timestamp" : "2015-01-28T05:22:55Z",
"url" : "https://static.wikia.nocookie.net/mushi-shi/images/f/f8/Yahagi.png/revision/latest?cb=20150128052255",
"url" : "https://static.wikia.nocookie.net/mushi-shi/images/f/f8/Yahagi.png/revision/latest?cb=20150128052255&format=original",
"user" : "ITHYRIAL",
"userid" : 4637089,
"width" : 728,
@@ -90,12 +90,23 @@ __tests__ = (
"metadata" : {},
},
{
"#url" : "https://hildatheseries.fandom.com/wiki/Burku",
"#comment" : "'.webp' file without 'format=original' (#5512)",
"#category": ("wikimedia", "fandom-hildatheseries", "article"),
"#class" : wikimedia.WikimediaArticleExtractor,
"#options" : {"format": ""},
"#range" : "1",
"#results" : "https://static.wikia.nocookie.net/hildatheseries/images/2/24/Burku.png/revision/latest?cb=20251010033752",
"#sha1_content": "36dce0e511fa8f6e1f834b92150126804fde971f",
},
{
"#url" : "https://discogs.fandom.com/zh/wiki/File:CH-0430D2.jpg",
"#comment" : "non-English language prefix (#6370)",
"#category": ("wikimedia", "fandom-discogs", "file"),
"#class" : wikimedia.WikimediaArticleExtractor,
"#results" : "https://static.wikia.nocookie.net/discogs/images/a/ab/CH-0430D2.jpg/revision/latest?cb=20241007150151&path-prefix=zh",
"#results" : "https://static.wikia.nocookie.net/discogs/images/a/ab/CH-0430D2.jpg/revision/latest?cb=20241007150151&path-prefix=zh&format=original",
},
{

View File

@@ -13,11 +13,11 @@ __tests__ = (
"#category": ("wikimedia", "mediawiki", "help"),
"#class" : wikimedia.WikimediaArticleExtractor,
"#results" : (
"https://upload.wikimedia.org/wikipedia/commons/e/ec/OOjs_UI_icon_information-progressive.svg",
"https://upload.wikimedia.org/wikipedia/commons/6/62/PD-icon.svg",
"https://upload.wikimedia.org/wikipedia/commons/0/0e/Vector_Sidebar.png",
"https://upload.wikimedia.org/wikipedia/commons/7/77/Vector_page_tabs.png",
"https://upload.wikimedia.org/wikipedia/commons/6/6e/Vector_user_links.png",
"https://upload.wikimedia.org/wikipedia/commons/0/07/Codex_icon_specialPages_color-progressive.svg?format=original",
"https://upload.wikimedia.org/wikipedia/commons/6/62/PD-icon.svg?format=original",
"https://upload.wikimedia.org/wikipedia/commons/0/0e/Vector_Sidebar.png?format=original",
"https://upload.wikimedia.org/wikipedia/commons/7/77/Vector_page_tabs.png?format=original",
"https://upload.wikimedia.org/wikipedia/commons/6/6e/Vector_user_links.png?format=original",
),
},

View File

@@ -12,7 +12,7 @@ __tests__ = (
"#url" : "https://www.pidgi.net/wiki/File:Key_art_-_Fight_Knight.png",
"#category": ("wikimedia", "pidgiwiki", "file"),
"#class" : wikimedia.WikimediaArticleExtractor,
"#results" : "https://cdn.pidgi.net/images/0/0c/Key_art_-_Fight_Knight.png",
"#results" : "https://cdn.pidgi.net/images/0/0c/Key_art_-_Fight_Knight.png?format=original",
},
{

View File

@@ -12,7 +12,7 @@ __tests__ = (
"#url" : "https://commons.wikimedia.org/wiki/File:Starr-050516-1367-Pimenta_dioica-flowers-Maunaloa-Molokai_(24762757525).jpg",
"#category": ("wikimedia", "wikimediacommons", "file"),
"#class" : wikimedia.WikimediaArticleExtractor,
"#results" : "https://upload.wikimedia.org/wikipedia/commons/f/fa/Starr-050516-1367-Pimenta_dioica-flowers-Maunaloa-Molokai_%2824762757525%29.jpg",
"#results" : "https://upload.wikimedia.org/wikipedia/commons/f/fa/Starr-050516-1367-Pimenta_dioica-flowers-Maunaloa-Molokai_%2824762757525%29.jpg?format=original",
},
{
@@ -35,7 +35,7 @@ __tests__ = (
"https://commons.wikimedia.org/wiki/Category:Ivan_Shishkin_in_art",
"https://commons.wikimedia.org/wiki/Category:Ivan_Shishkin._To_the_190th_anniversary_of_the_birth",
"https://commons.wikimedia.org/wiki/Category:Paintings_by_Ivan_Shishkin",
"https://commons.wikimedia.org/wiki/Category:Shishkin_street_(Martyshkino)",
"https://commons.wikimedia.org/wiki/Category:Shishkin_Street_(Martyshkino)",
"https://commons.wikimedia.org/wiki/Category:Shishkin_street,_Moscow",
"https://commons.wikimedia.org/wiki/Category:Shishkin's_Pine",
),

View File

@@ -12,7 +12,7 @@ __tests__ = (
"#url" : "https://species.wikimedia.org/wiki/Geranospiza",
"#category": ("wikimedia", "wikispecies", "article"),
"#class" : wikimedia.WikimediaArticleExtractor,
"#results" : "https://upload.wikimedia.org/wikipedia/commons/0/01/Geranospiza_caerulescens.jpg",
"#results" : "https://upload.wikimedia.org/wikipedia/commons/0/01/Geranospiza_caerulescens.jpg?format=original",
"#sha1_content": "3a17c14b15489928e4154f826af1c42afb5a523e",
},