[schalenetwork] fix extraction (#6948 #7391 #7728)

- remove 'cbz' option
- require 'crt' query parameter token as well as a matching User-Agent
This commit is contained in:
Mike Fährmann
2025-09-16 22:10:53 +02:00
parent a0b3e08f64
commit 1b9e1ff9ff
4 changed files with 125 additions and 87 deletions

View File

@@ -3586,17 +3586,26 @@ Description
the first in the list gets chosen (usually `mp3`). the first in the list gets chosen (usually `mp3`).
extractor.schalenetwork.cbz extractor.schalenetwork.crt
--------------------------- ---------------------------
Type Type
``bool`` ``string``
Default Example
``true`` * ``"0542daa9-352c-4fd5-a497-6c6d5cf07423"``
* ``"/12345/a1b2c3d4e5f6?crt=0542daa9-352c-4fd5-a497-6c6d5cf07423"``
Description Description
Download each gallery as a single ``.cbz`` file. The ``crt`` query parameter value
sent when fetching gallery data.
Disabling this option causes a gallery To get this value:
to be downloaded as individual image files.
* Open your browser's Developer Tools (F12)
* Select `Network` -> `XHR`
* Open a gallery page
* Select the last `Network` entry and copy its ``crt`` value
Note: You will also need your browser's
`user-agent <extractor.*.user-agent_>`__
extractor.schalenetwork.format extractor.schalenetwork.format
@@ -3627,6 +3636,20 @@ Description
for example ``tags_artist`` or ``tags_character``. for example ``tags_artist`` or ``tags_character``.
extractor.schalenetwork.token
-----------------------------
Type
``string``
Example
* ``"3f1a9b72-4e4d-4f4e-9e5d-4a2b99f7c893"``
* ``"Bearer 3f1a9b72-4e4d-4f4e-9e5d-4a2b99f7c893"``
* ``"Authorization: Bearer 3f1a9b72-4e4d-4f4e-9e5d-4a2b99f7c893"``
Description
``Authorization`` header value
used for requests to ``https://api.schale.network``
to access ``favorite`` galleries.
extractor.lolisafe.domain extractor.lolisafe.domain
------------------------- -------------------------
Type Type

View File

@@ -680,11 +680,10 @@
}, },
"schalenetwork": "schalenetwork":
{ {
"username": "", "crt" : "",
"password": "", "token": "",
"sleep-request": "0.5-1.5", "sleep-request": "0.5-1.5",
"cbz" : true,
"format": ["0", "1600", "1280", "980", "780"], "format": ["0", "1600", "1280", "980", "780"],
"tags" : false "tags" : false
}, },

View File

@@ -10,7 +10,6 @@
from .common import GalleryExtractor, Extractor, Message from .common import GalleryExtractor, Extractor, Message
from .. import text, exception from .. import text, exception
from ..cache import cache
import collections import collections
BASE_PATTERN = ( BASE_PATTERN = (
@@ -27,6 +26,7 @@ class SchalenetworkExtractor(Extractor):
category = "schalenetwork" category = "schalenetwork"
root = "https://niyaniya.moe" root = "https://niyaniya.moe"
root_api = "https://api.schale.network" root_api = "https://api.schale.network"
root_auth = "https://auth.schale.network"
request_interval = (0.5, 1.5) request_interval = (0.5, 1.5)
def _init(self): def _init(self):
@@ -49,7 +49,7 @@ class SchalenetworkExtractor(Extractor):
return return
for entry in entries: for entry in entries:
url = f"{self.root}/g/{entry['id']}/{entry['public_key']}" url = f"{self.root}/g/{entry['id']}/{entry['key']}"
entry["_extractor"] = SchalenetworkGalleryExtractor entry["_extractor"] = SchalenetworkGalleryExtractor
yield Message.Queue, url, entry yield Message.Queue, url, entry
@@ -60,6 +60,34 @@ class SchalenetworkExtractor(Extractor):
pass pass
params["page"] += 1 params["page"] += 1
def _token(self):
if token := self.config("token"):
return f"Bearer {token.rpartition(' ')[2]}"
raise exception.AuthRequired("'token'", "your favorites")
def _crt(self):
crt = self.config("crt")
if not crt:
self._require_auth()
if not text.re(r"^[0-9a-f-]+$").match(crt):
path, _, qs = crt.partition("?")
if not qs:
qs = path
crt = text.parse_query(qs).get("crt")
if not crt:
self._require_auth()
return crt
def _require_auth(self, exc=None):
if exc is None:
msg = None
else:
msg = f"{exc.status} {exc.response.reason}"
raise exception.AuthRequired(
"'crt' query parameter & matching '--user-agent'", None, msg)
class SchalenetworkGalleryExtractor(SchalenetworkExtractor, GalleryExtractor): class SchalenetworkGalleryExtractor(SchalenetworkExtractor, GalleryExtractor):
"""Extractor for schale.network galleries""" """Extractor for schale.network galleries"""
@@ -67,7 +95,7 @@ class SchalenetworkGalleryExtractor(SchalenetworkExtractor, GalleryExtractor):
directory_fmt = ("{category}", "{id} {title}") directory_fmt = ("{category}", "{id} {title}")
archive_fmt = "{id}_{num}" archive_fmt = "{id}_{num}"
request_interval = 0.0 request_interval = 0.0
pattern = BASE_PATTERN + r"/(?:g|reader)/(\d+)/(\w+)" pattern = rf"{BASE_PATTERN}/(?:g|reader)/(\d+)/(\w+)"
example = "https://niyaniya.moe/g/12345/67890abcde/" example = "https://niyaniya.moe/g/12345/67890abcde/"
TAG_TYPES = { TAG_TYPES = {
@@ -86,27 +114,10 @@ class SchalenetworkGalleryExtractor(SchalenetworkExtractor, GalleryExtractor):
12: "other", 12: "other",
} }
def __init__(self, match):
GalleryExtractor.__init__(self, match)
self.page_url = None
def _init(self):
self.headers = {
"Accept" : "*/*",
"Referer": self.root + "/",
"Origin" : self.root,
}
self.fmt = self.config("format")
self.cbz = self.config("cbz", True)
if self.cbz:
self.filename_fmt = "{id} {title}.{extension}"
self.directory_fmt = ("{category}",)
def metadata(self, _): def metadata(self, _):
url = f"{self.root_api}/books/detail/{self.groups[1]}/{self.groups[2]}" _, gid, gkey = self.groups
self.data = data = self.request_json(url, headers=self.headers) url = f"{self.root_api}/books/detail/{gid}/{gkey}"
data = self.request_json(url, headers=self.headers)
data["date"] = text.parse_timestamp(data["created_at"] // 1000) data["date"] = text.parse_timestamp(data["created_at"] // 1000)
tags = [] tags = []
@@ -127,53 +138,42 @@ class SchalenetworkGalleryExtractor(SchalenetworkExtractor, GalleryExtractor):
data["tags_" + types[type]] = values data["tags_" + types[type]] = values
try: try:
if self.cbz: data["count"] = len(data["thumbnails"]["entries"])
data["count"] = len(data["thumbnails"]["entries"])
del data["thumbnails"] del data["thumbnails"]
del data["rels"]
except Exception: except Exception:
pass pass
return data return data
def images(self, _): def images(self, _):
data = self.data crt = self._crt()
_, gid, gkey = self.groups
url = f"{self.root_api}/books/detail/{gid}/{gkey}?crt={crt}"
try:
data = self.request_json(url, method="POST", headers=self.headers)
except exception.HttpError as exc:
self._require_auth(exc)
fmt = self._select_format(data["data"]) fmt = self._select_format(data["data"])
url = (f"{self.root_api}/books/data/{data['id']}/" url = (f"{self.root_api}/books/data/{gid}/{gkey}"
f"{data['public_key']}/{fmt['id']}/{fmt['public_key']}") f"/{fmt['id']}/{fmt['key']}/{fmt['w']}?crt={crt}")
params = { data = self.request_json(url, headers=self.headers)
"v": data["updated_at"],
"w": fmt["w"],
}
if self.cbz:
params["action"] = "dl"
base = self.request_json(
url, method="POST", params=params, headers=self.headers,
)["base"]
url = f"{base}?v={data['updated_at']}&w={fmt['w']}"
info = text.nameext_from_url(base)
if not info["extension"]:
info["extension"] = "cbz"
return ((url, info),)
data = self.request_json(url, params=params, headers=self.headers)
base = data["base"] base = data["base"]
results = [] results = []
for entry in data["entries"]: for entry in data["entries"]:
dimensions = entry["dimensions"] dimensions = entry["dimensions"]
info = { info = {
"w": dimensions[0], "width" : dimensions[0],
"h": dimensions[1], "height": dimensions[1],
"_http_headers": self.headers, "_http_headers": self.headers,
} }
results.append((base + entry["path"], info)) results.append((base + entry["path"], info))
return results return results
def _select_format(self, formats): def _select_format(self, formats):
fmt = self.fmt fmt = self.config("format")
if not fmt or fmt == "best": if not fmt or fmt == "best":
fmtids = ("0", "1600", "1280", "980", "780") fmtids = ("0", "1600", "1280", "980", "780")
@@ -182,7 +182,7 @@ class SchalenetworkGalleryExtractor(SchalenetworkExtractor, GalleryExtractor):
elif isinstance(fmt, list): elif isinstance(fmt, list):
fmtids = fmt fmtids = fmt
else: else:
fmtids = (str(self.fmt),) fmtids = (str(fmt),)
for fmtid in fmtids: for fmtid in fmtids:
try: try:
@@ -203,44 +203,36 @@ class SchalenetworkGalleryExtractor(SchalenetworkExtractor, GalleryExtractor):
class SchalenetworkSearchExtractor(SchalenetworkExtractor): class SchalenetworkSearchExtractor(SchalenetworkExtractor):
"""Extractor for schale.network search results""" """Extractor for schale.network search results"""
subcategory = "search" subcategory = "search"
pattern = BASE_PATTERN + r"/\?([^#]*)" pattern = rf"{BASE_PATTERN}/(?:tag/([^/?#]+)|browse)?(?:/?\?([^#]*))?$"
example = "https://niyaniya.moe/?s=QUERY" example = "https://niyaniya.moe/browse?s=QUERY"
def items(self): def items(self):
params = text.parse_query(self.groups[1]) _, tag, qs = self.groups
params = text.parse_query(qs)
params["page"] = text.parse_int(params.get("page"), 1) params["page"] = text.parse_int(params.get("page"), 1)
if tag is not None:
ns, sep, tag = text.unquote(tag).partition(":")
if "+" in tag:
tag = tag.replace("+", " ")
q = '"'
else:
q = ""
q = '"' if " " in tag else ""
params["s"] = f"{ns}{sep}{q}^{tag}${q}"
return self._pagination("/books", params) return self._pagination("/books", params)
class SchalenetworkFavoriteExtractor(SchalenetworkExtractor): class SchalenetworkFavoriteExtractor(SchalenetworkExtractor):
"""Extractor for schale.network favorites""" """Extractor for schale.network favorites"""
subcategory = "favorite" subcategory = "favorite"
pattern = BASE_PATTERN + r"/favorites(?:\?([^#]*))?" pattern = rf"{BASE_PATTERN}/favorites(?:\?([^#]*))?"
example = "https://niyaniya.moe/favorites" example = "https://niyaniya.moe/favorites"
def items(self): def items(self):
self.login()
params = text.parse_query(self.groups[1]) params = text.parse_query(self.groups[1])
params["page"] = text.parse_int(params.get("page"), 1) params["page"] = text.parse_int(params.get("page"), 1)
return self._pagination("/favorites", params) self.headers["Authorization"] = self._token()
return self._pagination(f"/books/favorites?crt={self._crt()}", params)
def login(self):
username, password = self._get_auth_info()
if username:
self.headers["Authorization"] = \
"Bearer " + self._login_impl(username, password)
return
raise exception.AuthenticationError("Username and password required")
@cache(maxage=86400, keyarg=1)
def _login_impl(self, username, password):
self.log.info("Logging in as %s", username)
url = "https://auth.schale.network/login"
data = {"uname": username, "passwd": password}
response = self.request(
url, method="POST", headers=self.headers, data=data)
return response.json()["session"]

View File

@@ -126,6 +126,30 @@ __tests__ = (
"#count" : ">= 50", "#count" : ">= 50",
}, },
{
"#url" : "https://niyaniya.moe/browse?s=beach",
"#class" : schalenetwork.SchalenetworkSearchExtractor,
},
{
"#url" : "https://niyaniya.moe/tag/tag:beach",
"#class" : schalenetwork.SchalenetworkSearchExtractor,
},
{
"#url" : "https://niyaniya.moe/tag/circle:tentou+mushi+no+sanba",
"#class" : schalenetwork.SchalenetworkSearchExtractor,
"#results" : (
"https://niyaniya.moe/g/26044/9b7ecf9bcf00",
"https://niyaniya.moe/g/24342/c723a7fe9191",
"https://niyaniya.moe/g/23787/7a51f4258481",
"https://niyaniya.moe/g/23784/d81779e07505",
"https://niyaniya.moe/g/23764/cb867963cfcb",
"https://niyaniya.moe/g/23760/a667d4a7f447",
"https://niyaniya.moe/g/23669/9ec3ff4c6737",
),
},
{ {
"#url" : "https://niyaniya.moe/favorites", "#url" : "https://niyaniya.moe/favorites",
"#class" : schalenetwork.SchalenetworkFavoriteExtractor, "#class" : schalenetwork.SchalenetworkFavoriteExtractor,