From 46b6b71159eaf86d4b2bc850494a57aa2d8e8d1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Mon, 6 Jan 2025 17:18:04 +0100 Subject: [PATCH] [wallhaven] extract 'search[tags]' and 'search[tag_id]' metadata (#6772) --- gallery_dl/extractor/wallhaven.py | 23 +++++++++++++++++++---- test/results/wallhaven.py | 19 +++++++++++++++++++ 2 files changed, 38 insertions(+), 4 deletions(-) diff --git a/gallery_dl/extractor/wallhaven.py b/gallery_dl/extractor/wallhaven.py index 479e8a85..e5b764ab 100644 --- a/gallery_dl/extractor/wallhaven.py +++ b/gallery_dl/extractor/wallhaven.py @@ -54,7 +54,7 @@ class WallhavenExtractor(Extractor): class WallhavenSearchExtractor(WallhavenExtractor): """Extractor for search results on wallhaven.cc""" subcategory = "search" - directory_fmt = ("{category}", "{search[q]}") + directory_fmt = ("{category}", "{search[tags]}") archive_fmt = "s_{search[q]}_{id}" pattern = r"(?:https?://)?wallhaven\.cc/search(?:/?\?([^#]+))?" example = "https://wallhaven.cc/search?q=QUERY" @@ -64,7 +64,7 @@ class WallhavenSearchExtractor(WallhavenExtractor): self.params = text.parse_query(match.group(1)) def wallpapers(self): - return self.api.search(self.params.copy()) + return self.api.search(self.params) def metadata(self): return {"search": self.params} @@ -141,7 +141,7 @@ class WallhavenUploadsExtractor(WallhavenExtractor): def wallpapers(self): params = {"q": "@" + self.username} - return self.api.search(params.copy()) + return self.api.search(params) def metadata(self): return {"username": self.username} @@ -215,20 +215,35 @@ class WallhavenAPI(): def _pagination(self, endpoint, params=None, metadata=None): if params is None: + params_ptr = None params = {} + else: + params_ptr = params + params = params.copy() if metadata is None: metadata = self.extractor.config("metadata") while True: data = self._call(endpoint, params) + meta = data.get("meta") + if params_ptr is not None: + if meta and "query" in meta: + query = meta["query"] + if isinstance(query, dict): + params_ptr["tags"] = query.get("tag") + params_ptr["tag_id"] = query.get("id") + else: + params_ptr["tags"] = query + params_ptr["tag_id"] = 0 + params_ptr = None + if metadata: for wp in data["data"]: yield self.info(str(wp["id"])) else: yield from data["data"] - meta = data.get("meta") if not meta or meta["current_page"] >= meta["last_page"]: return params["page"] = meta["current_page"] + 1 diff --git a/test/results/wallhaven.py b/test/results/wallhaven.py index 47a8ba77..cf942aaf 100644 --- a/test/results/wallhaven.py +++ b/test/results/wallhaven.py @@ -12,6 +12,14 @@ __tests__ = ( "#url" : "https://wallhaven.cc/search?q=touhou", "#category": ("", "wallhaven", "search"), "#class" : wallhaven.WallhavenSearchExtractor, + "#pattern" : r"https://w\.wallhaven\.cc/full/\w\w/wallhaven-\w+\.\w+", + "#range" : "1-10", + + "search": { + "q" : "touhou", + "tags" : "touhou", + "tag_id": 0, + }, }, { @@ -20,6 +28,17 @@ __tests__ = ( "#class" : wallhaven.WallhavenSearchExtractor, "#pattern" : r"https://w\.wallhaven\.cc/full/\w\w/wallhaven-\w+\.\w+", "#count" : "<= 30", + + "search": { + "categories": "111", + "order" : "asc", + "page" : "3", + "purity" : "100", + "sorting" : "date_added", + "q" : "id:87", + "tags" : "Fujibayashi Kyou", + "tag_id" : 87, + }, }, {