[poipiku] improve error detection (#8356)

This commit is contained in:
Mike Fährmann
2025-10-07 18:13:38 +02:00
parent 338c69e5ed
commit d49b819886
2 changed files with 151 additions and 48 deletions

View File

@@ -44,10 +44,10 @@ class PoipikuExtractor(Extractor):
def items(self):
if self.cookies_check(("POIPIKU_LK",)):
extract_files = self._extract_files_auth
original = True
self.logged_in = True
else:
extract_files = self._extract_files_noauth
original = False
self.logged_in = False
if self.cookies_warning:
self.log.warning("no 'POIPIKU_LK' cookie set")
PoipikuExtractor.cookies_warning = False
@@ -55,8 +55,6 @@ class PoipikuExtractor(Extractor):
for post_url in self.posts():
if post_url[0] == "/":
post_url = f"{self.root}{post_url}"
self.headers["Referer"] = post_url
page = self.request(post_url).text
extr = text.extract_from(page)
parts = post_url.rsplit("/", 2)
@@ -69,15 +67,82 @@ class PoipikuExtractor(Extractor):
'<h2 class="UserInfoUserName">', '</').rpartition(">")[2]),
"description": text.unescape(extr(
'class="IllustItemDesc" >', '</h1>')),
"original" : original,
"original" : self.logged_in,
"_http_headers": {"Referer": post_url},
}
yield Message.Directory, post
for post["num"], url in enumerate(extract_files(post, extr), 1):
thumb = extr('class="IllustItemThumbImg" src="', '"')
if reason := self._discard_post(post, thumb):
if isinstance(reason, str):
self.log.warning("%s: '%s'", post["post_id"], reason)
continue
elif reason is not False:
thumb = reason
self.headers["Referer"] = post_url
for post["num"], url in enumerate(extract_files(
post, thumb, extr), 1):
yield Message.Url, url, text.nameext_from_url(url, post)
def _extract_files_auth(self, post, _):
def _discard_post(self, post, thumb):
if not thumb:
return True
if thumb.startswith("https://cdn.poipiku.com/img/"):
self.log.debug("%s: %s", post["post_id"], thumb)
type = text.rextr(thumb, "/", ".")
if type == "warning":
return None
elif type == "publish_pass":
return ""
elif type == "publish_login":
return 0 if self.logged_in else "You need to sign in"
elif type == "publish_follower":
return "Favorite only"
elif type == "publish_t_rt":
return "Retweet required"
if thumb.startswith((
"https://img.poipiku.com/img/",
"//img.poipiku.com/img/",
"/img/",
)):
self.log.debug("%s: %s", post["post_id"], thumb)
if "/warning" in thumb:
return None
return True
return False
def _extract_files_auth(self, post, thumb, extr):
data = self._show_illust_detail(post)
if data.get("error_code"):
data = self._show_append_file(post)
html = data["html"]
self.log.warning("%s: '%s'",
post["post_id"], html.replace("<br/>", " "))
return ()
return text.extract_iter(data["html"], 'src="', '"')
def _extract_files_noauth(self, post, thumb, extr):
if thumb:
if not extr('ShowAppendFile', '<'):
return (thumb,)
files = [thumb]
else:
files = []
data = self._show_append_file(post)
html = data["html"]
if (data.get("result_num") or 0) < 0:
self.log.warning("%s: '%s'",
post["post_id"], html.replace("<br/>", " "))
files.extend(text.extract_iter(
html, 'class="IllustItemThumbImg" src="', '"'))
return files
def _show_illust_detail(self, post):
url = f"{self.root}/f/ShowIllustDetailF.jsp"
data = {
"ID" : post["user_id"],
@@ -85,37 +150,11 @@ class PoipikuExtractor(Extractor):
"AD" : "-1",
"PAS": self.password,
}
resp = self.request_json(
url, method="POST", headers=self.headers, data=data)
html = resp["html"]
if (resp.get("result_num") or 0) < 0:
self.log.warning("%s: '%s'",
post["post_id"], html.replace("<br/>", " "))
return text.extract_iter(html, 'src="', '"')
def _extract_files_noauth(self, post, extr):
files = []
warning = False
while True:
thumb = extr('class="IllustItemThumbImg" src="', '"')
if not thumb:
break
if thumb.startswith((
"https://img.poipiku.com/img/",
"//img.poipiku.com/img/",
"/img/",
)):
if "/warning" in thumb:
warning = True
self.log.debug("%s: %s", post["post_id"], thumb)
continue
files.append(thumb)
if not warning and not extr('ShowAppendFile', '<'):
return files
return self.request_json(
url, method="POST", headers=self.headers, data=data,
interval=False)
def _show_append_file(self, post):
url = f"{self.root}/f/ShowAppendFileF.jsp"
data = {
"UID": post["user_id"],
@@ -124,17 +163,9 @@ class PoipikuExtractor(Extractor):
"MD" : "0",
"TWF": "-1",
}
resp = self.request_json(
url, method="POST", headers=self.headers, data=data)
html = resp["html"]
if (resp.get("result_num") or 0) < 0:
self.log.warning("%s: '%s'",
post["post_id"], html.replace("<br/>", " "))
files.extend(text.extract_iter(
html, 'class="IllustItemThumbImg" src="', '"'))
return files
return self.request_json(
url, method="POST", headers=self.headers, data=data,
interval=False)
class PoipikuUserExtractor(PoipikuExtractor):