From 55742c6a2cf8a1c7ac6563b73f3eaf8c98aa2613 Mon Sep 17 00:00:00 2001 From: ClosedPort22 <44864697+ClosedPort22@users.noreply.github.com> Date: Thu, 2 Oct 2025 15:46:17 +0800 Subject: [PATCH] [wikimedia] add ability to fetch image revisions as long as the number of revisions of a single image does not exceed 500 --- docs/configuration.rst | 14 ++++++++++++++ gallery_dl/extractor/wikimedia.py | 18 +++++++++++++----- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/docs/configuration.rst b/docs/configuration.rst index 3128cffb..b46b0f90 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -6548,6 +6548,20 @@ Description Download video files. +extractor.wikimedia.image-revisions +----------------------------------- +Type + ``integer`` +Default + ``1`` +Description + Number of revisions to return for a single image. + + The dafault value of 1 only returns the latest revision. + + The value must be between 1 and 500. + + extractor.wikimedia.limit ------------------------- Type diff --git a/gallery_dl/extractor/wikimedia.py b/gallery_dl/extractor/wikimedia.py index 00266bd0..7fd427a4 100644 --- a/gallery_dl/extractor/wikimedia.py +++ b/gallery_dl/extractor/wikimedia.py @@ -46,6 +46,12 @@ class WikimediaExtractor(BaseExtractor): else: self.api_url = None + # note: image revisions are different from page revisions + # ref: + # https://www.mediawiki.org/wiki/API:Revisions + # https://www.mediawiki.org/wiki/API:Imageinfo + self.image_revisions = self.config("image-revisions", 1) + @cache(maxage=36500*86400, keyarg=1) def _search_api_path(self, root): self.log.debug("Probing possible API endpoints") @@ -74,14 +80,15 @@ class WikimediaExtractor(BaseExtractor): def items(self): for info in self._pagination(self.params): try: - image = info["imageinfo"][0] - except LookupError: + images = info["imageinfo"] + except KeyError: self.log.debug("Missing 'imageinfo' for %s", info) continue - self.prepare(image) - yield Message.Directory, image - yield Message.Url, image["url"], image + for image in images: + self.prepare(image) + yield Message.Directory, image + yield Message.Url, image["url"], image if self.subcategories: base = self.root + "/wiki/" @@ -108,6 +115,7 @@ class WikimediaExtractor(BaseExtractor): "timestamp|user|userid|comment|canonicaltitle|url|size|" "sha1|mime|metadata|commonmetadata|extmetadata|bitdepth" ) + params["iilimit"] = self.image_revisions while True: data = self.request_json(url, params=params)