From 1b9e1ff9ff6016f341838c8d15126df6f2903efd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Tue, 16 Sep 2025 22:10:53 +0200 Subject: [PATCH] [schalenetwork] fix extraction (#6948 #7391 #7728) - remove 'cbz' option - require 'crt' query parameter token as well as a matching User-Agent --- docs/configuration.rst | 37 +++++-- docs/gallery-dl.conf | 5 +- gallery_dl/extractor/schalenetwork.py | 146 ++++++++++++-------------- test/results/schalenetwork.py | 24 +++++ 4 files changed, 125 insertions(+), 87 deletions(-) diff --git a/docs/configuration.rst b/docs/configuration.rst index db5f5333..6f13388f 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -3586,17 +3586,26 @@ Description the first in the list gets chosen (usually `mp3`). -extractor.schalenetwork.cbz +extractor.schalenetwork.crt --------------------------- Type - ``bool`` -Default - ``true`` + ``string`` +Example + * ``"0542daa9-352c-4fd5-a497-6c6d5cf07423"`` + * ``"/12345/a1b2c3d4e5f6?crt=0542daa9-352c-4fd5-a497-6c6d5cf07423"`` Description - Download each gallery as a single ``.cbz`` file. + The ``crt`` query parameter value + sent when fetching gallery data. - Disabling this option causes a gallery - to be downloaded as individual image files. + To get this value: + + * Open your browser's Developer Tools (F12) + * Select `Network` -> `XHR` + * Open a gallery page + * Select the last `Network` entry and copy its ``crt`` value + + Note: You will also need your browser's + `user-agent `__ extractor.schalenetwork.format @@ -3627,6 +3636,20 @@ Description for example ``tags_artist`` or ``tags_character``. +extractor.schalenetwork.token +----------------------------- +Type + ``string`` +Example + * ``"3f1a9b72-4e4d-4f4e-9e5d-4a2b99f7c893"`` + * ``"Bearer 3f1a9b72-4e4d-4f4e-9e5d-4a2b99f7c893"`` + * ``"Authorization: Bearer 3f1a9b72-4e4d-4f4e-9e5d-4a2b99f7c893"`` +Description + ``Authorization`` header value + used for requests to ``https://api.schale.network`` + to access ``favorite`` galleries. + + extractor.lolisafe.domain ------------------------- Type diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 823fcc01..21230afb 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -680,11 +680,10 @@ }, "schalenetwork": { - "username": "", - "password": "", + "crt" : "", + "token": "", "sleep-request": "0.5-1.5", - "cbz" : true, "format": ["0", "1600", "1280", "980", "780"], "tags" : false }, diff --git a/gallery_dl/extractor/schalenetwork.py b/gallery_dl/extractor/schalenetwork.py index d5172870..dad04cd4 100644 --- a/gallery_dl/extractor/schalenetwork.py +++ b/gallery_dl/extractor/schalenetwork.py @@ -10,7 +10,6 @@ from .common import GalleryExtractor, Extractor, Message from .. import text, exception -from ..cache import cache import collections BASE_PATTERN = ( @@ -27,6 +26,7 @@ class SchalenetworkExtractor(Extractor): category = "schalenetwork" root = "https://niyaniya.moe" root_api = "https://api.schale.network" + root_auth = "https://auth.schale.network" request_interval = (0.5, 1.5) def _init(self): @@ -49,7 +49,7 @@ class SchalenetworkExtractor(Extractor): return for entry in entries: - url = f"{self.root}/g/{entry['id']}/{entry['public_key']}" + url = f"{self.root}/g/{entry['id']}/{entry['key']}" entry["_extractor"] = SchalenetworkGalleryExtractor yield Message.Queue, url, entry @@ -60,6 +60,34 @@ class SchalenetworkExtractor(Extractor): pass params["page"] += 1 + def _token(self): + if token := self.config("token"): + return f"Bearer {token.rpartition(' ')[2]}" + raise exception.AuthRequired("'token'", "your favorites") + + def _crt(self): + crt = self.config("crt") + if not crt: + self._require_auth() + + if not text.re(r"^[0-9a-f-]+$").match(crt): + path, _, qs = crt.partition("?") + if not qs: + qs = path + crt = text.parse_query(qs).get("crt") + if not crt: + self._require_auth() + + return crt + + def _require_auth(self, exc=None): + if exc is None: + msg = None + else: + msg = f"{exc.status} {exc.response.reason}" + raise exception.AuthRequired( + "'crt' query parameter & matching '--user-agent'", None, msg) + class SchalenetworkGalleryExtractor(SchalenetworkExtractor, GalleryExtractor): """Extractor for schale.network galleries""" @@ -67,7 +95,7 @@ class SchalenetworkGalleryExtractor(SchalenetworkExtractor, GalleryExtractor): directory_fmt = ("{category}", "{id} {title}") archive_fmt = "{id}_{num}" request_interval = 0.0 - pattern = BASE_PATTERN + r"/(?:g|reader)/(\d+)/(\w+)" + pattern = rf"{BASE_PATTERN}/(?:g|reader)/(\d+)/(\w+)" example = "https://niyaniya.moe/g/12345/67890abcde/" TAG_TYPES = { @@ -86,27 +114,10 @@ class SchalenetworkGalleryExtractor(SchalenetworkExtractor, GalleryExtractor): 12: "other", } - def __init__(self, match): - GalleryExtractor.__init__(self, match) - self.page_url = None - - def _init(self): - self.headers = { - "Accept" : "*/*", - "Referer": self.root + "/", - "Origin" : self.root, - } - - self.fmt = self.config("format") - self.cbz = self.config("cbz", True) - - if self.cbz: - self.filename_fmt = "{id} {title}.{extension}" - self.directory_fmt = ("{category}",) - def metadata(self, _): - url = f"{self.root_api}/books/detail/{self.groups[1]}/{self.groups[2]}" - self.data = data = self.request_json(url, headers=self.headers) + _, gid, gkey = self.groups + url = f"{self.root_api}/books/detail/{gid}/{gkey}" + data = self.request_json(url, headers=self.headers) data["date"] = text.parse_timestamp(data["created_at"] // 1000) tags = [] @@ -127,53 +138,42 @@ class SchalenetworkGalleryExtractor(SchalenetworkExtractor, GalleryExtractor): data["tags_" + types[type]] = values try: - if self.cbz: - data["count"] = len(data["thumbnails"]["entries"]) + data["count"] = len(data["thumbnails"]["entries"]) del data["thumbnails"] - del data["rels"] except Exception: pass return data def images(self, _): - data = self.data + crt = self._crt() + _, gid, gkey = self.groups + url = f"{self.root_api}/books/detail/{gid}/{gkey}?crt={crt}" + try: + data = self.request_json(url, method="POST", headers=self.headers) + except exception.HttpError as exc: + self._require_auth(exc) + fmt = self._select_format(data["data"]) - url = (f"{self.root_api}/books/data/{data['id']}/" - f"{data['public_key']}/{fmt['id']}/{fmt['public_key']}") - params = { - "v": data["updated_at"], - "w": fmt["w"], - } - - if self.cbz: - params["action"] = "dl" - base = self.request_json( - url, method="POST", params=params, headers=self.headers, - )["base"] - url = f"{base}?v={data['updated_at']}&w={fmt['w']}" - info = text.nameext_from_url(base) - if not info["extension"]: - info["extension"] = "cbz" - return ((url, info),) - - data = self.request_json(url, params=params, headers=self.headers) + url = (f"{self.root_api}/books/data/{gid}/{gkey}" + f"/{fmt['id']}/{fmt['key']}/{fmt['w']}?crt={crt}") + data = self.request_json(url, headers=self.headers) base = data["base"] results = [] for entry in data["entries"]: dimensions = entry["dimensions"] info = { - "w": dimensions[0], - "h": dimensions[1], + "width" : dimensions[0], + "height": dimensions[1], "_http_headers": self.headers, } results.append((base + entry["path"], info)) return results def _select_format(self, formats): - fmt = self.fmt + fmt = self.config("format") if not fmt or fmt == "best": fmtids = ("0", "1600", "1280", "980", "780") @@ -182,7 +182,7 @@ class SchalenetworkGalleryExtractor(SchalenetworkExtractor, GalleryExtractor): elif isinstance(fmt, list): fmtids = fmt else: - fmtids = (str(self.fmt),) + fmtids = (str(fmt),) for fmtid in fmtids: try: @@ -203,44 +203,36 @@ class SchalenetworkGalleryExtractor(SchalenetworkExtractor, GalleryExtractor): class SchalenetworkSearchExtractor(SchalenetworkExtractor): """Extractor for schale.network search results""" subcategory = "search" - pattern = BASE_PATTERN + r"/\?([^#]*)" - example = "https://niyaniya.moe/?s=QUERY" + pattern = rf"{BASE_PATTERN}/(?:tag/([^/?#]+)|browse)?(?:/?\?([^#]*))?$" + example = "https://niyaniya.moe/browse?s=QUERY" def items(self): - params = text.parse_query(self.groups[1]) + _, tag, qs = self.groups + + params = text.parse_query(qs) params["page"] = text.parse_int(params.get("page"), 1) + + if tag is not None: + ns, sep, tag = text.unquote(tag).partition(":") + if "+" in tag: + tag = tag.replace("+", " ") + q = '"' + else: + q = "" + q = '"' if " " in tag else "" + params["s"] = f"{ns}{sep}{q}^{tag}${q}" + return self._pagination("/books", params) class SchalenetworkFavoriteExtractor(SchalenetworkExtractor): """Extractor for schale.network favorites""" subcategory = "favorite" - pattern = BASE_PATTERN + r"/favorites(?:\?([^#]*))?" + pattern = rf"{BASE_PATTERN}/favorites(?:\?([^#]*))?" example = "https://niyaniya.moe/favorites" def items(self): - self.login() - params = text.parse_query(self.groups[1]) params["page"] = text.parse_int(params.get("page"), 1) - return self._pagination("/favorites", params) - - def login(self): - username, password = self._get_auth_info() - if username: - self.headers["Authorization"] = \ - "Bearer " + self._login_impl(username, password) - return - - raise exception.AuthenticationError("Username and password required") - - @cache(maxage=86400, keyarg=1) - def _login_impl(self, username, password): - self.log.info("Logging in as %s", username) - - url = "https://auth.schale.network/login" - data = {"uname": username, "passwd": password} - response = self.request( - url, method="POST", headers=self.headers, data=data) - - return response.json()["session"] + self.headers["Authorization"] = self._token() + return self._pagination(f"/books/favorites?crt={self._crt()}", params) diff --git a/test/results/schalenetwork.py b/test/results/schalenetwork.py index 0996474d..9fd412c6 100644 --- a/test/results/schalenetwork.py +++ b/test/results/schalenetwork.py @@ -126,6 +126,30 @@ __tests__ = ( "#count" : ">= 50", }, +{ + "#url" : "https://niyaniya.moe/browse?s=beach", + "#class" : schalenetwork.SchalenetworkSearchExtractor, +}, + +{ + "#url" : "https://niyaniya.moe/tag/tag:beach", + "#class" : schalenetwork.SchalenetworkSearchExtractor, +}, + +{ + "#url" : "https://niyaniya.moe/tag/circle:tentou+mushi+no+sanba", + "#class" : schalenetwork.SchalenetworkSearchExtractor, + "#results" : ( + "https://niyaniya.moe/g/26044/9b7ecf9bcf00", + "https://niyaniya.moe/g/24342/c723a7fe9191", + "https://niyaniya.moe/g/23787/7a51f4258481", + "https://niyaniya.moe/g/23784/d81779e07505", + "https://niyaniya.moe/g/23764/cb867963cfcb", + "https://niyaniya.moe/g/23760/a667d4a7f447", + "https://niyaniya.moe/g/23669/9ec3ff4c6737", + ), +}, + { "#url" : "https://niyaniya.moe/favorites", "#class" : schalenetwork.SchalenetworkFavoriteExtractor,