[paheal] fix '404 Not Found' for tags with URL encoded characters (#7642)

This commit is contained in:
Mike Fährmann
2025-06-08 16:20:03 +02:00
parent 17d39c06e3
commit 827eeca0bc
2 changed files with 14 additions and 12 deletions

View File

@@ -80,10 +80,6 @@ class PahealTagExtractor(PahealExtractor):
page_start = 1
per_page = 70
def __init__(self, match):
PahealExtractor.__init__(self, match)
self.tags = text.unquote(match.group(1))
def _init(self):
if self.config("metadata"):
self._extract_data = self._extract_data_ex
@@ -94,13 +90,14 @@ class PahealTagExtractor(PahealExtractor):
return pages * self.per_page
def get_metadata(self):
return {"search_tags": self.tags}
return {"search_tags": text.unquote(self.groups[0])}
def get_posts(self):
pnum = self.page_start
base = "{}/post/list/{}/".format(self.root, self.groups[0])
while True:
url = "{}/post/list/{}/{}".format(self.root, self.tags, pnum)
page = self.request(url).text
page = self.request(base + str(pnum)).text
pos = page.find("id='image-list'")
for post in text.extract_iter(
@@ -149,9 +146,5 @@ class PahealPostExtractor(PahealExtractor):
r"/post/view/(\d+)")
example = "https://rule34.paheal.net/post/view/12345"
def __init__(self, match):
PahealExtractor.__init__(self, match)
self.post_id = match.group(1)
def get_posts(self):
return (self._extract_post(self.post_id),)
return (self._extract_post(self.groups[0]),)

View File

@@ -51,6 +51,15 @@ __tests__ = (
"width" : 1024,
},
{
"#url" : "https://rule34.paheal.net/post/list/Ranma_1%2F2/1",
"#comment" : "percent-encoded character in tag (#7642)",
"#category": ("shimmie2", "paheal", "tag"),
"#class" : paheal.PahealTagExtractor,
"#range" : "1-200",
"#count" : 200,
},
{
"#url" : "https://rule34.paheal.net/post/view/481609",
"#category": ("shimmie2", "paheal", "post"),