diff --git a/docs/configuration.rst b/docs/configuration.rst index 7fd569e3..7ffdb6b4 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -5288,6 +5288,16 @@ Description The value must be between 10 and 500. +extractor.wikimedia.subcategories +--------------------------------- +Type + ``bool`` +Default + ``true`` +Description + For ``Category:`` pages, recursively descent into subcategories. + + extractor.ytdl.cmdline-args --------------------------- Type diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 0ad87c0f..f907a5b5 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -905,7 +905,8 @@ "wikimedia": { "sleep-request": "1.0-2.0", - "limit": 50 + "limit": 50, + "subcategories": true }, "booru": diff --git a/gallery_dl/extractor/wikimedia.py b/gallery_dl/extractor/wikimedia.py index 4eae5375..3b23f3a8 100644 --- a/gallery_dl/extractor/wikimedia.py +++ b/gallery_dl/extractor/wikimedia.py @@ -54,7 +54,7 @@ class WikimediaExtractor(BaseExtractor): @staticmethod def prepare(image): - """Adjust the content of a image object""" + """Adjust the content of an image object""" image["metadata"] = { m["name"]: m["value"] for m in image["metadata"] or ()} @@ -80,6 +80,14 @@ class WikimediaExtractor(BaseExtractor): yield Message.Directory, image yield Message.Url, image["url"], image + if self.subcategories: + base = self.root + "/wiki/" + self.params["gcmtype"] = "subcat" + for subcat in self._pagination(self.params): + url = base + subcat["title"].replace(" ", "_") + subcat["_extractor"] = WikimediaArticleExtractor + yield Message.Queue, url, subcat + def _pagination(self, params): """ https://www.mediawiki.org/wiki/API:Query @@ -208,6 +216,8 @@ class WikimediaArticleExtractor(WikimediaExtractor): self.subcategory = prefix if prefix == "category": + self.subcategories = \ + True if self.config("subcategories", True) else False self.params = { "generator": "categorymembers", "gcmtitle" : path, @@ -215,10 +225,12 @@ class WikimediaArticleExtractor(WikimediaExtractor): "gcmlimit" : self.per_page, } elif prefix == "file": + self.subcategories = False self.params = { "titles" : path, } else: + self.subcategories = False self.params = { "generator": "images", "gimlimit" : self.per_page, diff --git a/test/results/wikimediacommons.py b/test/results/wikimediacommons.py index b61a9061..f2a262cd 100644 --- a/test/results/wikimediacommons.py +++ b/test/results/wikimediacommons.py @@ -21,4 +21,24 @@ __tests__ = ( "#class" : wikimedia.WikimediaArticleExtractor, }, +{ + "#url" : "https://commons.wikimedia.org/wiki/Category:Ivan_Shishkin", + "#comment" : "subcategories", + "#category": ("wikimedia", "wikimediacommons", "category"), + "#class" : wikimedia.WikimediaArticleExtractor, + "#options" : {"image-filter": "False"}, + + "#urls": ( + "https://commons.wikimedia.org/wiki/Category:3558_Shishkin", + "https://commons.wikimedia.org/wiki/Category:Drawings_by_Ivan_Shishkin", + "https://commons.wikimedia.org/wiki/Category:Ivan_Shishkin_grave", + "https://commons.wikimedia.org/wiki/Category:Ivan_Shishkin_in_art", + "https://commons.wikimedia.org/wiki/Category:Ivan_Shishkin._To_the_190th_anniversary_of_the_birth", + "https://commons.wikimedia.org/wiki/Category:Paintings_by_Ivan_Shishkin", + "https://commons.wikimedia.org/wiki/Category:Shishkin_street_(Martyshkino)", + "https://commons.wikimedia.org/wiki/Category:Shishkin_street,_Moscow", + "https://commons.wikimedia.org/wiki/Category:Shishkin's_Pine", + ), +}, + )