[wikimedia] add 'subcategories' option (#2340)

https://github.com/mikf/gallery-dl/pull/2340#issuecomment-2707177295
This commit is contained in:
Mike Fährmann
2025-03-12 21:09:28 +01:00
parent 3e64ec4f15
commit cd1ddb0a67
4 changed files with 45 additions and 2 deletions

View File

@@ -5288,6 +5288,16 @@ Description
The value must be between 10 and 500. The value must be between 10 and 500.
extractor.wikimedia.subcategories
---------------------------------
Type
``bool``
Default
``true``
Description
For ``Category:`` pages, recursively descent into subcategories.
extractor.ytdl.cmdline-args extractor.ytdl.cmdline-args
--------------------------- ---------------------------
Type Type

View File

@@ -905,7 +905,8 @@
"wikimedia": "wikimedia":
{ {
"sleep-request": "1.0-2.0", "sleep-request": "1.0-2.0",
"limit": 50 "limit": 50,
"subcategories": true
}, },
"booru": "booru":

View File

@@ -54,7 +54,7 @@ class WikimediaExtractor(BaseExtractor):
@staticmethod @staticmethod
def prepare(image): def prepare(image):
"""Adjust the content of a image object""" """Adjust the content of an image object"""
image["metadata"] = { image["metadata"] = {
m["name"]: m["value"] m["name"]: m["value"]
for m in image["metadata"] or ()} for m in image["metadata"] or ()}
@@ -80,6 +80,14 @@ class WikimediaExtractor(BaseExtractor):
yield Message.Directory, image yield Message.Directory, image
yield Message.Url, image["url"], image yield Message.Url, image["url"], image
if self.subcategories:
base = self.root + "/wiki/"
self.params["gcmtype"] = "subcat"
for subcat in self._pagination(self.params):
url = base + subcat["title"].replace(" ", "_")
subcat["_extractor"] = WikimediaArticleExtractor
yield Message.Queue, url, subcat
def _pagination(self, params): def _pagination(self, params):
""" """
https://www.mediawiki.org/wiki/API:Query https://www.mediawiki.org/wiki/API:Query
@@ -208,6 +216,8 @@ class WikimediaArticleExtractor(WikimediaExtractor):
self.subcategory = prefix self.subcategory = prefix
if prefix == "category": if prefix == "category":
self.subcategories = \
True if self.config("subcategories", True) else False
self.params = { self.params = {
"generator": "categorymembers", "generator": "categorymembers",
"gcmtitle" : path, "gcmtitle" : path,
@@ -215,10 +225,12 @@ class WikimediaArticleExtractor(WikimediaExtractor):
"gcmlimit" : self.per_page, "gcmlimit" : self.per_page,
} }
elif prefix == "file": elif prefix == "file":
self.subcategories = False
self.params = { self.params = {
"titles" : path, "titles" : path,
} }
else: else:
self.subcategories = False
self.params = { self.params = {
"generator": "images", "generator": "images",
"gimlimit" : self.per_page, "gimlimit" : self.per_page,

View File

@@ -21,4 +21,24 @@ __tests__ = (
"#class" : wikimedia.WikimediaArticleExtractor, "#class" : wikimedia.WikimediaArticleExtractor,
}, },
{
"#url" : "https://commons.wikimedia.org/wiki/Category:Ivan_Shishkin",
"#comment" : "subcategories",
"#category": ("wikimedia", "wikimediacommons", "category"),
"#class" : wikimedia.WikimediaArticleExtractor,
"#options" : {"image-filter": "False"},
"#urls": (
"https://commons.wikimedia.org/wiki/Category:3558_Shishkin",
"https://commons.wikimedia.org/wiki/Category:Drawings_by_Ivan_Shishkin",
"https://commons.wikimedia.org/wiki/Category:Ivan_Shishkin_grave",
"https://commons.wikimedia.org/wiki/Category:Ivan_Shishkin_in_art",
"https://commons.wikimedia.org/wiki/Category:Ivan_Shishkin._To_the_190th_anniversary_of_the_birth",
"https://commons.wikimedia.org/wiki/Category:Paintings_by_Ivan_Shishkin",
"https://commons.wikimedia.org/wiki/Category:Shishkin_street_(Martyshkino)",
"https://commons.wikimedia.org/wiki/Category:Shishkin_street,_Moscow",
"https://commons.wikimedia.org/wiki/Category:Shishkin's_Pine",
),
},
) )