[wallhaven] extract 'search[tags]' and 'search[tag_id]' metadata

(#6772)
This commit is contained in:
Mike Fährmann
2025-01-06 17:18:04 +01:00
parent 270aaea8ab
commit 46b6b71159
2 changed files with 38 additions and 4 deletions

View File

@@ -54,7 +54,7 @@ class WallhavenExtractor(Extractor):
class WallhavenSearchExtractor(WallhavenExtractor):
"""Extractor for search results on wallhaven.cc"""
subcategory = "search"
directory_fmt = ("{category}", "{search[q]}")
directory_fmt = ("{category}", "{search[tags]}")
archive_fmt = "s_{search[q]}_{id}"
pattern = r"(?:https?://)?wallhaven\.cc/search(?:/?\?([^#]+))?"
example = "https://wallhaven.cc/search?q=QUERY"
@@ -64,7 +64,7 @@ class WallhavenSearchExtractor(WallhavenExtractor):
self.params = text.parse_query(match.group(1))
def wallpapers(self):
return self.api.search(self.params.copy())
return self.api.search(self.params)
def metadata(self):
return {"search": self.params}
@@ -141,7 +141,7 @@ class WallhavenUploadsExtractor(WallhavenExtractor):
def wallpapers(self):
params = {"q": "@" + self.username}
return self.api.search(params.copy())
return self.api.search(params)
def metadata(self):
return {"username": self.username}
@@ -215,20 +215,35 @@ class WallhavenAPI():
def _pagination(self, endpoint, params=None, metadata=None):
if params is None:
params_ptr = None
params = {}
else:
params_ptr = params
params = params.copy()
if metadata is None:
metadata = self.extractor.config("metadata")
while True:
data = self._call(endpoint, params)
meta = data.get("meta")
if params_ptr is not None:
if meta and "query" in meta:
query = meta["query"]
if isinstance(query, dict):
params_ptr["tags"] = query.get("tag")
params_ptr["tag_id"] = query.get("id")
else:
params_ptr["tags"] = query
params_ptr["tag_id"] = 0
params_ptr = None
if metadata:
for wp in data["data"]:
yield self.info(str(wp["id"]))
else:
yield from data["data"]
meta = data.get("meta")
if not meta or meta["current_page"] >= meta["last_page"]:
return
params["page"] = meta["current_page"] + 1