[poipiku] fix original file downloads (#8356)

now requires 'POIPIKU_LK' cookie
This commit is contained in:
Mike Fährmann
2025-10-06 11:06:35 +02:00
parent d67bc12ea0
commit 0aac99673d
6 changed files with 125 additions and 78 deletions

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2022-2023 Mike Fährmann
# Copyright 2022-2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -21,102 +21,136 @@ class PoipikuExtractor(Extractor):
directory_fmt = ("{category}", "{user_id} {user_name}")
filename_fmt = "{post_id}_{num}.{extension}"
archive_fmt = "{post_id}_{num}"
cookies_domain = "poipiku.com"
cookies_warning = True
request_interval = (0.5, 1.5)
def _init(self):
self.cookies.set(
"LANG", "en", domain="poipiku.com")
"LANG", "en", domain=self.cookies_domain)
self.cookies.set(
"POIPIKU_CONTENTS_VIEW_MODE", "1", domain="poipiku.com")
"POIPIKU_CONTENTS_VIEW_MODE", "1", domain=self.cookies_domain)
self.headers = {
"Accept" : "application/json, text/javascript, */*; q=0.01",
"X-Requested-With": "XMLHttpRequest",
"Origin" : self.root,
"Referer": None,
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
}
self.password = self.config("password", "")
def items(self):
password = self.config("password", "")
if self.cookies_check(("POIPIKU_LK",)):
extract_files = self._extract_files_auth
original = True
else:
extract_files = self._extract_files_noauth
original = False
if self.cookies_warning:
self.log.warning("no 'POIPIKU_LK' cookie set")
PoipikuExtractor.cookies_warning = False
for post_url in self.posts():
parts = post_url.split("/")
if post_url[0] == "/":
post_url = self.root + post_url
post_url = f"{self.root}{post_url}"
self.headers["Referer"] = post_url
page = self.request(post_url).text
extr = text.extract_from(page)
parts = post_url.rsplit("/", 2)
post = {
"post_category": extr("<title>[", "]"),
"count" : text.parse_int(extr("(", " ")),
"post_id" : parts[-1].partition(".")[0],
"user_id" : parts[-2],
"post_id" : parts[2].partition(".")[0],
"user_id" : parts[1],
"user_name" : text.unescape(extr(
'<h2 class="UserInfoUserName">', '</').rpartition(">")[2]),
"description": text.unescape(extr(
'class="IllustItemDesc" >', '</h1>')),
"original" : original,
"_http_headers": {"Referer": post_url},
}
yield Message.Directory, post
post["num"] = warning = 0
while True:
thumb = extr('class="IllustItemThumbImg" src="', '"')
if not thumb:
break
elif thumb.startswith(("//img.poipiku.com/img/", "/img/")):
if "/warning" in thumb:
warning = True
self.log.debug("%s: %s", post["post_id"], thumb)
continue
post["num"] += 1
url = text.ensure_http_scheme(thumb[:-8]).replace(
"//img.", "//img-org.", 1)
for post["num"], url in enumerate(extract_files(post, extr), 1):
yield Message.Url, url, text.nameext_from_url(url, post)
if not warning and not extr('ShowAppendFile', '<'):
def _extract_files_auth(self, post, _):
url = f"{self.root}/f/ShowIllustDetailF.jsp"
data = {
"ID" : post["user_id"],
"TD" : post["post_id"],
"AD" : "-1",
"PAS": self.password,
}
resp = self.request_json(
url, method="POST", headers=self.headers, data=data)
html = resp["html"]
if (resp.get("result_num") or 0) < 0:
self.log.warning("%s: '%s'",
post["post_id"], html.replace("<br/>", " "))
return text.extract_iter(html, 'src="', '"')
def _extract_files_noauth(self, post, extr):
files = []
warning = False
while True:
thumb = extr('class="IllustItemThumbImg" src="', '"')
if not thumb:
break
if thumb.startswith((
"https://img.poipiku.com/img/",
"//img.poipiku.com/img/",
"/img/",
)):
if "/warning" in thumb:
warning = True
self.log.debug("%s: %s", post["post_id"], thumb)
continue
files.append(thumb)
url = self.root + "/f/ShowAppendFileF.jsp"
headers = {
"Accept" : "application/json, text/javascript, */*; q=0.01",
"X-Requested-With": "XMLHttpRequest",
"Origin" : self.root,
"Referer": post_url,
}
data = {
"UID": post["user_id"],
"IID": post["post_id"],
"PAS": password,
"MD" : "0",
"TWF": "-1",
}
resp = self.request_json(
url, method="POST", headers=headers, data=data)
if not warning and not extr('ShowAppendFile', '<'):
return files
page = resp["html"]
if (resp.get("result_num") or 0) < 0:
self.log.warning("%s: '%s'",
post["post_id"], page.replace("<br/>", " "))
url = f"{self.root}/f/ShowAppendFileF.jsp"
data = {
"UID": post["user_id"],
"IID": post["post_id"],
"PAS": self.password,
"MD" : "0",
"TWF": "-1",
}
resp = self.request_json(
url, method="POST", headers=self.headers, data=data)
for thumb in text.extract_iter(
page, 'class="IllustItemThumbImg" src="', '"'):
post["num"] += 1
url = text.ensure_http_scheme(thumb[:-8]).replace(
"//img.", "//img-org.", 1)
yield Message.Url, url, text.nameext_from_url(url, post)
html = resp["html"]
if (resp.get("result_num") or 0) < 0:
self.log.warning("%s: '%s'",
post["post_id"], html.replace("<br/>", " "))
files.extend(text.extract_iter(
html, 'class="IllustItemThumbImg" src="', '"'))
return files
class PoipikuUserExtractor(PoipikuExtractor):
"""Extractor for posts from a poipiku user"""
subcategory = "user"
pattern = (BASE_PATTERN + r"/(?:IllustListPcV\.jsp\?PG=(\d+)&ID=)?"
r"(\d+)/?(?:$|[?&#])")
pattern = (rf"{BASE_PATTERN}/(?:IllustListPcV\.jsp\?PG=(\d+)&ID=)?"
rf"(\d+)/?(?:$|[?&#])")
example = "https://poipiku.com/12345/"
def __init__(self, match):
PoipikuExtractor.__init__(self, match)
self._page, self.user_id = match.groups()
def posts(self):
url = self.root + "/IllustListPcV.jsp"
pnum, user_id = self.groups
url = f"{self.root}/IllustListPcV.jsp"
params = {
"PG" : text.parse_int(self._page, 0),
"ID" : self.user_id,
"PG" : text.parse_int(pnum, 0),
"ID" : user_id,
"KWD": "",
}
@@ -137,12 +171,9 @@ class PoipikuUserExtractor(PoipikuExtractor):
class PoipikuPostExtractor(PoipikuExtractor):
"""Extractor for a poipiku post"""
subcategory = "post"
pattern = BASE_PATTERN + r"/(\d+)/(\d+)"
pattern = rf"{BASE_PATTERN}/(\d+)/(\d+)"
example = "https://poipiku.com/12345/12345.html"
def __init__(self, match):
PoipikuExtractor.__init__(self, match)
self.user_id, self.post_id = match.groups()
def posts(self):
return (f"/{self.user_id}/{self.post_id}.html",)
user_id, post_id = self.groups
return (f"/{user_id}/{post_id}.html",)