[poipiku] fix original file downloads (#8356)

now requires 'POIPIKU_LK' cookie
2025-10-06 11:06:35 +02:00
parent d67bc12ea0
commit 0aac99673d
6 changed files with 125 additions and 78 deletions
--- a/gallery_dl/extractor/poipiku.py
+++ b/gallery_dl/extractor/poipiku.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-

-# Copyright 2022-2023 Mike Fährmann
+# Copyright 2022-2025 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -21,102 +21,136 @@ class PoipikuExtractor(Extractor):
    directory_fmt = ("{category}", "{user_id} {user_name}")
    filename_fmt = "{post_id}_{num}.{extension}"
    archive_fmt = "{post_id}_{num}"
+    cookies_domain = "poipiku.com"
+    cookies_warning = True
    request_interval = (0.5, 1.5)

    def _init(self):
        self.cookies.set(
-            "LANG", "en", domain="poipiku.com")
+            "LANG", "en", domain=self.cookies_domain)
        self.cookies.set(
-            "POIPIKU_CONTENTS_VIEW_MODE", "1", domain="poipiku.com")
+            "POIPIKU_CONTENTS_VIEW_MODE", "1", domain=self.cookies_domain)
+        self.headers = {
+            "Accept" : "application/json, text/javascript, */*; q=0.01",
+            "X-Requested-With": "XMLHttpRequest",
+            "Origin" : self.root,
+            "Referer": None,
+            "Sec-Fetch-Dest": "empty",
+            "Sec-Fetch-Mode": "cors",
+            "Sec-Fetch-Site": "same-origin",
+        }
+        self.password = self.config("password", "")

    def items(self):
-        password = self.config("password", "")
+        if self.cookies_check(("POIPIKU_LK",)):
+            extract_files = self._extract_files_auth
+            original = True
+        else:
+            extract_files = self._extract_files_noauth
+            original = False
+            if self.cookies_warning:
+                self.log.warning("no 'POIPIKU_LK' cookie set")
+                PoipikuExtractor.cookies_warning = False

        for post_url in self.posts():
-            parts = post_url.split("/")
            if post_url[0] == "/":
-                post_url = self.root + post_url
+                post_url = f"{self.root}{post_url}"
+            self.headers["Referer"] = post_url
+
            page = self.request(post_url).text
            extr = text.extract_from(page)
-
+            parts = post_url.rsplit("/", 2)
            post = {
                "post_category": extr("<title>[", "]"),
                "count"      : text.parse_int(extr("(", " ")),
-                "post_id"    : parts[-1].partition(".")[0],
-                "user_id"    : parts[-2],
+                "post_id"    : parts[2].partition(".")[0],
+                "user_id"    : parts[1],
                "user_name"  : text.unescape(extr(
                    '<h2 class="UserInfoUserName">', '</').rpartition(">")[2]),
                "description": text.unescape(extr(
                    'class="IllustItemDesc" >', '</h1>')),
+                "original"   : original,
                "_http_headers": {"Referer": post_url},
            }

            yield Message.Directory, post
-            post["num"] = warning = 0
-
-            while True:
-                thumb = extr('class="IllustItemThumbImg" src="', '"')
-                if not thumb:
-                    break
-                elif thumb.startswith(("//img.poipiku.com/img/", "/img/")):
-                    if "/warning" in thumb:
-                        warning = True
-                    self.log.debug("%s: %s", post["post_id"], thumb)
-                    continue
-                post["num"] += 1
-                url = text.ensure_http_scheme(thumb[:-8]).replace(
-                    "//img.", "//img-org.", 1)
+            for post["num"], url in enumerate(extract_files(post, extr), 1):
                yield Message.Url, url, text.nameext_from_url(url, post)

-            if not warning and not extr('ShowAppendFile', '<'):
+    def _extract_files_auth(self, post, _):
+        url = f"{self.root}/f/ShowIllustDetailF.jsp"
+        data = {
+            "ID" : post["user_id"],
+            "TD" : post["post_id"],
+            "AD" : "-1",
+            "PAS": self.password,
+        }
+        resp = self.request_json(
+            url, method="POST", headers=self.headers, data=data)
+
+        html = resp["html"]
+        if (resp.get("result_num") or 0) < 0:
+            self.log.warning("%s: '%s'",
+                             post["post_id"], html.replace("<br/>", " "))
+        return text.extract_iter(html, 'src="', '"')
+
+    def _extract_files_noauth(self, post, extr):
+        files = []
+        warning = False
+
+        while True:
+            thumb = extr('class="IllustItemThumbImg" src="', '"')
+            if not thumb:
+                break
+            if thumb.startswith((
+                "https://img.poipiku.com/img/",
+                "//img.poipiku.com/img/",
+                "/img/",
+            )):
+                if "/warning" in thumb:
+                    warning = True
+                self.log.debug("%s: %s", post["post_id"], thumb)
                continue
+            files.append(thumb)

-            url = self.root + "/f/ShowAppendFileF.jsp"
-            headers = {
-                "Accept" : "application/json, text/javascript, */*; q=0.01",
-                "X-Requested-With": "XMLHttpRequest",
-                "Origin" : self.root,
-                "Referer": post_url,
-            }
-            data = {
-                "UID": post["user_id"],
-                "IID": post["post_id"],
-                "PAS": password,
-                "MD" : "0",
-                "TWF": "-1",
-            }
-            resp = self.request_json(
-                url, method="POST", headers=headers, data=data)
+        if not warning and not extr('ShowAppendFile', '<'):
+            return files

-            page = resp["html"]
-            if (resp.get("result_num") or 0) < 0:
-                self.log.warning("%s: '%s'",
-                                 post["post_id"], page.replace("<br/>", " "))
+        url = f"{self.root}/f/ShowAppendFileF.jsp"
+        data = {
+            "UID": post["user_id"],
+            "IID": post["post_id"],
+            "PAS": self.password,
+            "MD" : "0",
+            "TWF": "-1",
+        }
+        resp = self.request_json(
+            url, method="POST", headers=self.headers, data=data)

-            for thumb in text.extract_iter(
-                    page, 'class="IllustItemThumbImg" src="', '"'):
-                post["num"] += 1
-                url = text.ensure_http_scheme(thumb[:-8]).replace(
-                    "//img.", "//img-org.", 1)
-                yield Message.Url, url, text.nameext_from_url(url, post)
+        html = resp["html"]
+        if (resp.get("result_num") or 0) < 0:
+            self.log.warning("%s: '%s'",
+                             post["post_id"], html.replace("<br/>", " "))
+
+        files.extend(text.extract_iter(
+            html, 'class="IllustItemThumbImg" src="', '"'))
+        return files


 class PoipikuUserExtractor(PoipikuExtractor):
    """Extractor for posts from a poipiku user"""
    subcategory = "user"
-    pattern = (BASE_PATTERN + r"/(?:IllustListPcV\.jsp\?PG=(\d+)&ID=)?"
-               r"(\d+)/?(?:$|[?&#])")
+    pattern = (rf"{BASE_PATTERN}/(?:IllustListPcV\.jsp\?PG=(\d+)&ID=)?"
+               rf"(\d+)/?(?:$|[?&#])")
    example = "https://poipiku.com/12345/"

-    def __init__(self, match):
-        PoipikuExtractor.__init__(self, match)
-        self._page, self.user_id = match.groups()
-
    def posts(self):
-        url = self.root + "/IllustListPcV.jsp"
+        pnum, user_id = self.groups
+
+        url = f"{self.root}/IllustListPcV.jsp"
        params = {
-            "PG" : text.parse_int(self._page, 0),
-            "ID" : self.user_id,
+            "PG" : text.parse_int(pnum, 0),
+            "ID" : user_id,
            "KWD": "",
        }

@@ -137,12 +171,9 @@ class PoipikuUserExtractor(PoipikuExtractor):
 class PoipikuPostExtractor(PoipikuExtractor):
    """Extractor for a poipiku post"""
    subcategory = "post"
-    pattern = BASE_PATTERN + r"/(\d+)/(\d+)"
+    pattern = rf"{BASE_PATTERN}/(\d+)/(\d+)"
    example = "https://poipiku.com/12345/12345.html"

-    def __init__(self, match):
-        PoipikuExtractor.__init__(self, match)
-        self.user_id, self.post_id = match.groups()
-
    def posts(self):
-        return (f"/{self.user_id}/{self.post_id}.html",)
+        user_id, post_id = self.groups
+        return (f"/{user_id}/{post_id}.html",)