[poipiku] improve error detection (#8356)
This commit is contained in:
@@ -44,10 +44,10 @@ class PoipikuExtractor(Extractor):
|
||||
def items(self):
|
||||
if self.cookies_check(("POIPIKU_LK",)):
|
||||
extract_files = self._extract_files_auth
|
||||
original = True
|
||||
self.logged_in = True
|
||||
else:
|
||||
extract_files = self._extract_files_noauth
|
||||
original = False
|
||||
self.logged_in = False
|
||||
if self.cookies_warning:
|
||||
self.log.warning("no 'POIPIKU_LK' cookie set")
|
||||
PoipikuExtractor.cookies_warning = False
|
||||
@@ -55,8 +55,6 @@ class PoipikuExtractor(Extractor):
|
||||
for post_url in self.posts():
|
||||
if post_url[0] == "/":
|
||||
post_url = f"{self.root}{post_url}"
|
||||
self.headers["Referer"] = post_url
|
||||
|
||||
page = self.request(post_url).text
|
||||
extr = text.extract_from(page)
|
||||
parts = post_url.rsplit("/", 2)
|
||||
@@ -69,15 +67,82 @@ class PoipikuExtractor(Extractor):
|
||||
'<h2 class="UserInfoUserName">', '</').rpartition(">")[2]),
|
||||
"description": text.unescape(extr(
|
||||
'class="IllustItemDesc" >', '</h1>')),
|
||||
"original" : original,
|
||||
"original" : self.logged_in,
|
||||
"_http_headers": {"Referer": post_url},
|
||||
}
|
||||
|
||||
yield Message.Directory, post
|
||||
for post["num"], url in enumerate(extract_files(post, extr), 1):
|
||||
|
||||
thumb = extr('class="IllustItemThumbImg" src="', '"')
|
||||
if reason := self._discard_post(post, thumb):
|
||||
if isinstance(reason, str):
|
||||
self.log.warning("%s: '%s'", post["post_id"], reason)
|
||||
continue
|
||||
elif reason is not False:
|
||||
thumb = reason
|
||||
|
||||
self.headers["Referer"] = post_url
|
||||
for post["num"], url in enumerate(extract_files(
|
||||
post, thumb, extr), 1):
|
||||
yield Message.Url, url, text.nameext_from_url(url, post)
|
||||
|
||||
def _extract_files_auth(self, post, _):
|
||||
def _discard_post(self, post, thumb):
|
||||
if not thumb:
|
||||
return True
|
||||
if thumb.startswith("https://cdn.poipiku.com/img/"):
|
||||
self.log.debug("%s: %s", post["post_id"], thumb)
|
||||
type = text.rextr(thumb, "/", ".")
|
||||
if type == "warning":
|
||||
return None
|
||||
elif type == "publish_pass":
|
||||
return ""
|
||||
elif type == "publish_login":
|
||||
return 0 if self.logged_in else "You need to sign in"
|
||||
elif type == "publish_follower":
|
||||
return "Favorite only"
|
||||
elif type == "publish_t_rt":
|
||||
return "Retweet required"
|
||||
if thumb.startswith((
|
||||
"https://img.poipiku.com/img/",
|
||||
"//img.poipiku.com/img/",
|
||||
"/img/",
|
||||
)):
|
||||
self.log.debug("%s: %s", post["post_id"], thumb)
|
||||
if "/warning" in thumb:
|
||||
return None
|
||||
return True
|
||||
return False
|
||||
|
||||
def _extract_files_auth(self, post, thumb, extr):
|
||||
data = self._show_illust_detail(post)
|
||||
|
||||
if data.get("error_code"):
|
||||
data = self._show_append_file(post)
|
||||
html = data["html"]
|
||||
self.log.warning("%s: '%s'",
|
||||
post["post_id"], html.replace("<br/>", " "))
|
||||
return ()
|
||||
return text.extract_iter(data["html"], 'src="', '"')
|
||||
|
||||
def _extract_files_noauth(self, post, thumb, extr):
|
||||
if thumb:
|
||||
if not extr('ShowAppendFile', '<'):
|
||||
return (thumb,)
|
||||
files = [thumb]
|
||||
else:
|
||||
files = []
|
||||
|
||||
data = self._show_append_file(post)
|
||||
html = data["html"]
|
||||
if (data.get("result_num") or 0) < 0:
|
||||
self.log.warning("%s: '%s'",
|
||||
post["post_id"], html.replace("<br/>", " "))
|
||||
|
||||
files.extend(text.extract_iter(
|
||||
html, 'class="IllustItemThumbImg" src="', '"'))
|
||||
return files
|
||||
|
||||
def _show_illust_detail(self, post):
|
||||
url = f"{self.root}/f/ShowIllustDetailF.jsp"
|
||||
data = {
|
||||
"ID" : post["user_id"],
|
||||
@@ -85,37 +150,11 @@ class PoipikuExtractor(Extractor):
|
||||
"AD" : "-1",
|
||||
"PAS": self.password,
|
||||
}
|
||||
resp = self.request_json(
|
||||
url, method="POST", headers=self.headers, data=data)
|
||||
|
||||
html = resp["html"]
|
||||
if (resp.get("result_num") or 0) < 0:
|
||||
self.log.warning("%s: '%s'",
|
||||
post["post_id"], html.replace("<br/>", " "))
|
||||
return text.extract_iter(html, 'src="', '"')
|
||||
|
||||
def _extract_files_noauth(self, post, extr):
|
||||
files = []
|
||||
warning = False
|
||||
|
||||
while True:
|
||||
thumb = extr('class="IllustItemThumbImg" src="', '"')
|
||||
if not thumb:
|
||||
break
|
||||
if thumb.startswith((
|
||||
"https://img.poipiku.com/img/",
|
||||
"//img.poipiku.com/img/",
|
||||
"/img/",
|
||||
)):
|
||||
if "/warning" in thumb:
|
||||
warning = True
|
||||
self.log.debug("%s: %s", post["post_id"], thumb)
|
||||
continue
|
||||
files.append(thumb)
|
||||
|
||||
if not warning and not extr('ShowAppendFile', '<'):
|
||||
return files
|
||||
return self.request_json(
|
||||
url, method="POST", headers=self.headers, data=data,
|
||||
interval=False)
|
||||
|
||||
def _show_append_file(self, post):
|
||||
url = f"{self.root}/f/ShowAppendFileF.jsp"
|
||||
data = {
|
||||
"UID": post["user_id"],
|
||||
@@ -124,17 +163,9 @@ class PoipikuExtractor(Extractor):
|
||||
"MD" : "0",
|
||||
"TWF": "-1",
|
||||
}
|
||||
resp = self.request_json(
|
||||
url, method="POST", headers=self.headers, data=data)
|
||||
|
||||
html = resp["html"]
|
||||
if (resp.get("result_num") or 0) < 0:
|
||||
self.log.warning("%s: '%s'",
|
||||
post["post_id"], html.replace("<br/>", " "))
|
||||
|
||||
files.extend(text.extract_iter(
|
||||
html, 'class="IllustItemThumbImg" src="', '"'))
|
||||
return files
|
||||
return self.request_json(
|
||||
url, method="POST", headers=self.headers, data=data,
|
||||
interval=False)
|
||||
|
||||
|
||||
class PoipikuUserExtractor(PoipikuExtractor):
|
||||
|
||||
Reference in New Issue
Block a user