[schalenetwork] fix extraction (#6948 #7391 #7728)

- remove 'cbz' option
- require 'crt' query parameter token as well as a matching User-Agent
This commit is contained in:
Mike Fährmann
2025-09-16 22:10:53 +02:00
parent a0b3e08f64
commit 1b9e1ff9ff
4 changed files with 125 additions and 87 deletions

View File

@@ -3586,17 +3586,26 @@ Description
the first in the list gets chosen (usually `mp3`).
extractor.schalenetwork.cbz
extractor.schalenetwork.crt
---------------------------
Type
``bool``
Default
``true``
``string``
Example
* ``"0542daa9-352c-4fd5-a497-6c6d5cf07423"``
* ``"/12345/a1b2c3d4e5f6?crt=0542daa9-352c-4fd5-a497-6c6d5cf07423"``
Description
Download each gallery as a single ``.cbz`` file.
The ``crt`` query parameter value
sent when fetching gallery data.
Disabling this option causes a gallery
to be downloaded as individual image files.
To get this value:
* Open your browser's Developer Tools (F12)
* Select `Network` -> `XHR`
* Open a gallery page
* Select the last `Network` entry and copy its ``crt`` value
Note: You will also need your browser's
`user-agent <extractor.*.user-agent_>`__
extractor.schalenetwork.format
@@ -3627,6 +3636,20 @@ Description
for example ``tags_artist`` or ``tags_character``.
extractor.schalenetwork.token
-----------------------------
Type
``string``
Example
* ``"3f1a9b72-4e4d-4f4e-9e5d-4a2b99f7c893"``
* ``"Bearer 3f1a9b72-4e4d-4f4e-9e5d-4a2b99f7c893"``
* ``"Authorization: Bearer 3f1a9b72-4e4d-4f4e-9e5d-4a2b99f7c893"``
Description
``Authorization`` header value
used for requests to ``https://api.schale.network``
to access ``favorite`` galleries.
extractor.lolisafe.domain
-------------------------
Type

View File

@@ -680,11 +680,10 @@
},
"schalenetwork":
{
"username": "",
"password": "",
"crt" : "",
"token": "",
"sleep-request": "0.5-1.5",
"cbz" : true,
"format": ["0", "1600", "1280", "980", "780"],
"tags" : false
},

View File

@@ -10,7 +10,6 @@
from .common import GalleryExtractor, Extractor, Message
from .. import text, exception
from ..cache import cache
import collections
BASE_PATTERN = (
@@ -27,6 +26,7 @@ class SchalenetworkExtractor(Extractor):
category = "schalenetwork"
root = "https://niyaniya.moe"
root_api = "https://api.schale.network"
root_auth = "https://auth.schale.network"
request_interval = (0.5, 1.5)
def _init(self):
@@ -49,7 +49,7 @@ class SchalenetworkExtractor(Extractor):
return
for entry in entries:
url = f"{self.root}/g/{entry['id']}/{entry['public_key']}"
url = f"{self.root}/g/{entry['id']}/{entry['key']}"
entry["_extractor"] = SchalenetworkGalleryExtractor
yield Message.Queue, url, entry
@@ -60,6 +60,34 @@ class SchalenetworkExtractor(Extractor):
pass
params["page"] += 1
def _token(self):
if token := self.config("token"):
return f"Bearer {token.rpartition(' ')[2]}"
raise exception.AuthRequired("'token'", "your favorites")
def _crt(self):
crt = self.config("crt")
if not crt:
self._require_auth()
if not text.re(r"^[0-9a-f-]+$").match(crt):
path, _, qs = crt.partition("?")
if not qs:
qs = path
crt = text.parse_query(qs).get("crt")
if not crt:
self._require_auth()
return crt
def _require_auth(self, exc=None):
if exc is None:
msg = None
else:
msg = f"{exc.status} {exc.response.reason}"
raise exception.AuthRequired(
"'crt' query parameter & matching '--user-agent'", None, msg)
class SchalenetworkGalleryExtractor(SchalenetworkExtractor, GalleryExtractor):
"""Extractor for schale.network galleries"""
@@ -67,7 +95,7 @@ class SchalenetworkGalleryExtractor(SchalenetworkExtractor, GalleryExtractor):
directory_fmt = ("{category}", "{id} {title}")
archive_fmt = "{id}_{num}"
request_interval = 0.0
pattern = BASE_PATTERN + r"/(?:g|reader)/(\d+)/(\w+)"
pattern = rf"{BASE_PATTERN}/(?:g|reader)/(\d+)/(\w+)"
example = "https://niyaniya.moe/g/12345/67890abcde/"
TAG_TYPES = {
@@ -86,27 +114,10 @@ class SchalenetworkGalleryExtractor(SchalenetworkExtractor, GalleryExtractor):
12: "other",
}
def __init__(self, match):
GalleryExtractor.__init__(self, match)
self.page_url = None
def _init(self):
self.headers = {
"Accept" : "*/*",
"Referer": self.root + "/",
"Origin" : self.root,
}
self.fmt = self.config("format")
self.cbz = self.config("cbz", True)
if self.cbz:
self.filename_fmt = "{id} {title}.{extension}"
self.directory_fmt = ("{category}",)
def metadata(self, _):
url = f"{self.root_api}/books/detail/{self.groups[1]}/{self.groups[2]}"
self.data = data = self.request_json(url, headers=self.headers)
_, gid, gkey = self.groups
url = f"{self.root_api}/books/detail/{gid}/{gkey}"
data = self.request_json(url, headers=self.headers)
data["date"] = text.parse_timestamp(data["created_at"] // 1000)
tags = []
@@ -127,53 +138,42 @@ class SchalenetworkGalleryExtractor(SchalenetworkExtractor, GalleryExtractor):
data["tags_" + types[type]] = values
try:
if self.cbz:
data["count"] = len(data["thumbnails"]["entries"])
data["count"] = len(data["thumbnails"]["entries"])
del data["thumbnails"]
del data["rels"]
except Exception:
pass
return data
def images(self, _):
data = self.data
crt = self._crt()
_, gid, gkey = self.groups
url = f"{self.root_api}/books/detail/{gid}/{gkey}?crt={crt}"
try:
data = self.request_json(url, method="POST", headers=self.headers)
except exception.HttpError as exc:
self._require_auth(exc)
fmt = self._select_format(data["data"])
url = (f"{self.root_api}/books/data/{data['id']}/"
f"{data['public_key']}/{fmt['id']}/{fmt['public_key']}")
params = {
"v": data["updated_at"],
"w": fmt["w"],
}
if self.cbz:
params["action"] = "dl"
base = self.request_json(
url, method="POST", params=params, headers=self.headers,
)["base"]
url = f"{base}?v={data['updated_at']}&w={fmt['w']}"
info = text.nameext_from_url(base)
if not info["extension"]:
info["extension"] = "cbz"
return ((url, info),)
data = self.request_json(url, params=params, headers=self.headers)
url = (f"{self.root_api}/books/data/{gid}/{gkey}"
f"/{fmt['id']}/{fmt['key']}/{fmt['w']}?crt={crt}")
data = self.request_json(url, headers=self.headers)
base = data["base"]
results = []
for entry in data["entries"]:
dimensions = entry["dimensions"]
info = {
"w": dimensions[0],
"h": dimensions[1],
"width" : dimensions[0],
"height": dimensions[1],
"_http_headers": self.headers,
}
results.append((base + entry["path"], info))
return results
def _select_format(self, formats):
fmt = self.fmt
fmt = self.config("format")
if not fmt or fmt == "best":
fmtids = ("0", "1600", "1280", "980", "780")
@@ -182,7 +182,7 @@ class SchalenetworkGalleryExtractor(SchalenetworkExtractor, GalleryExtractor):
elif isinstance(fmt, list):
fmtids = fmt
else:
fmtids = (str(self.fmt),)
fmtids = (str(fmt),)
for fmtid in fmtids:
try:
@@ -203,44 +203,36 @@ class SchalenetworkGalleryExtractor(SchalenetworkExtractor, GalleryExtractor):
class SchalenetworkSearchExtractor(SchalenetworkExtractor):
"""Extractor for schale.network search results"""
subcategory = "search"
pattern = BASE_PATTERN + r"/\?([^#]*)"
example = "https://niyaniya.moe/?s=QUERY"
pattern = rf"{BASE_PATTERN}/(?:tag/([^/?#]+)|browse)?(?:/?\?([^#]*))?$"
example = "https://niyaniya.moe/browse?s=QUERY"
def items(self):
params = text.parse_query(self.groups[1])
_, tag, qs = self.groups
params = text.parse_query(qs)
params["page"] = text.parse_int(params.get("page"), 1)
if tag is not None:
ns, sep, tag = text.unquote(tag).partition(":")
if "+" in tag:
tag = tag.replace("+", " ")
q = '"'
else:
q = ""
q = '"' if " " in tag else ""
params["s"] = f"{ns}{sep}{q}^{tag}${q}"
return self._pagination("/books", params)
class SchalenetworkFavoriteExtractor(SchalenetworkExtractor):
"""Extractor for schale.network favorites"""
subcategory = "favorite"
pattern = BASE_PATTERN + r"/favorites(?:\?([^#]*))?"
pattern = rf"{BASE_PATTERN}/favorites(?:\?([^#]*))?"
example = "https://niyaniya.moe/favorites"
def items(self):
self.login()
params = text.parse_query(self.groups[1])
params["page"] = text.parse_int(params.get("page"), 1)
return self._pagination("/favorites", params)
def login(self):
username, password = self._get_auth_info()
if username:
self.headers["Authorization"] = \
"Bearer " + self._login_impl(username, password)
return
raise exception.AuthenticationError("Username and password required")
@cache(maxage=86400, keyarg=1)
def _login_impl(self, username, password):
self.log.info("Logging in as %s", username)
url = "https://auth.schale.network/login"
data = {"uname": username, "passwd": password}
response = self.request(
url, method="POST", headers=self.headers, data=data)
return response.json()["session"]
self.headers["Authorization"] = self._token()
return self._pagination(f"/books/favorites?crt={self._crt()}", params)

View File

@@ -126,6 +126,30 @@ __tests__ = (
"#count" : ">= 50",
},
{
"#url" : "https://niyaniya.moe/browse?s=beach",
"#class" : schalenetwork.SchalenetworkSearchExtractor,
},
{
"#url" : "https://niyaniya.moe/tag/tag:beach",
"#class" : schalenetwork.SchalenetworkSearchExtractor,
},
{
"#url" : "https://niyaniya.moe/tag/circle:tentou+mushi+no+sanba",
"#class" : schalenetwork.SchalenetworkSearchExtractor,
"#results" : (
"https://niyaniya.moe/g/26044/9b7ecf9bcf00",
"https://niyaniya.moe/g/24342/c723a7fe9191",
"https://niyaniya.moe/g/23787/7a51f4258481",
"https://niyaniya.moe/g/23784/d81779e07505",
"https://niyaniya.moe/g/23764/cb867963cfcb",
"https://niyaniya.moe/g/23760/a667d4a7f447",
"https://niyaniya.moe/g/23669/9ec3ff4c6737",
),
},
{
"#url" : "https://niyaniya.moe/favorites",
"#class" : schalenetwork.SchalenetworkFavoriteExtractor,