[rule34] fix file downloads (#7697)

replace 'api-cdn' subdomain of image files with 'wimg'
This commit is contained in:
Mike Fährmann
2025-06-20 15:07:10 +02:00
parent bcfce6b7db
commit 74c9356442
2 changed files with 29 additions and 3 deletions

View File

@@ -21,6 +21,9 @@ class GelbooruV02Extractor(booru.BooruExtractor):
self.user_id = self.config("user-id")
self.root_api = self.config_instance("root-api") or self.root
if self.category == "rule34":
self._file_url = self._file_url_rule34
def _api_request(self, params):
url = self.root_api + "/index.php?page=dapi&s=post&q=index"
return self.request_xml(url, params=params)
@@ -91,6 +94,16 @@ class GelbooruV02Extractor(booru.BooruExtractor):
return
params["pid"] += self.per_page
def _file_url_rule34(self, post):
url = post["file_url"]
if text.ext_from_url(url) not in util.EXTS_VIDEO:
path = url.partition(".")[2]
post["_fallback"] = (url,)
post["file_url"] = url = "https://wimg." + path
return url
def _prepare(self, post):
post["tags"] = post["tags"].strip()
post["date"] = text.parse_datetime(

View File

@@ -12,8 +12,10 @@ __tests__ = (
"#url" : "https://rule34.xxx/index.php?page=post&s=list&tags=danraku",
"#category": ("gelbooru_v02", "rule34", "tag"),
"#class" : gelbooru_v02.GelbooruV02TagExtractor,
"#pattern" : r"https?://.*rule34\.xxx/images/\d+/[0-9a-f]+\.jpg",
"#count" : 2,
"#results" : (
"https://wimg.rule34.xxx/images/4615/00722987a1e8b5617a15a20c19a0915157048d3b.jpg",
"https://wimg.rule34.xxx/images/1845/04981deeac105a9c5fedc34a6ff017789e74f2a8.jpg",
),
"#sha1_content": [
"5c6ae9ee13e6d4bc9cb8bdce224c84e67fbfa36c",
"622e80be3f496672c44aab5c47fbc6941c61bc79",
@@ -53,7 +55,7 @@ __tests__ = (
"tags" : True,
"notes": True,
},
"#pattern" : r"https://api-cdn\.rule34\.xxx/images/1/6aafbdb3e22f3f3b412ea2cf53321317a37063f3\.jpg",
"#results" : r"https://wimg.rule34.xxx/images/1/6aafbdb3e22f3f3b412ea2cf53321317a37063f3.jpg",
"#sha1_content": [
"a43f418aa350039af0d11cae501396a33bbe2201",
"67b516295950867e1c1ab6bc13b35d3b762ed2a3",
@@ -84,4 +86,15 @@ __tests__ = (
],
},
{
"#url" : "https://rule34.xxx/index.php?page=post&s=view&id=13853212",
"#comment" : "HTML response with 'api-cdn.' subdomain (#7697)",
"#category": ("gelbooru_v02", "rule34", "post"),
"#class" : gelbooru_v02.GelbooruV02PostExtractor,
"#results" : "https://wimg.rule34.xxx/images/2164/60d61d06f3cd51be5152852d9c642d80.jpeg",
"#sha1_content": [
"0a07eb9e871589a012ec922c71eb4640fce09bb2",
],
},
)