[schalenetwork] fix extraction (#6948 #7391 #7728)

- remove 'cbz' option
- require 'crt' query parameter token as well as a matching User-Agent
This commit is contained in:
Mike Fährmann
2025-09-16 22:10:53 +02:00
parent a0b3e08f64
commit 1b9e1ff9ff
4 changed files with 125 additions and 87 deletions

View File

@@ -10,7 +10,6 @@
from .common import GalleryExtractor, Extractor, Message
from .. import text, exception
from ..cache import cache
import collections
BASE_PATTERN = (
@@ -27,6 +26,7 @@ class SchalenetworkExtractor(Extractor):
category = "schalenetwork"
root = "https://niyaniya.moe"
root_api = "https://api.schale.network"
root_auth = "https://auth.schale.network"
request_interval = (0.5, 1.5)
def _init(self):
@@ -49,7 +49,7 @@ class SchalenetworkExtractor(Extractor):
return
for entry in entries:
url = f"{self.root}/g/{entry['id']}/{entry['public_key']}"
url = f"{self.root}/g/{entry['id']}/{entry['key']}"
entry["_extractor"] = SchalenetworkGalleryExtractor
yield Message.Queue, url, entry
@@ -60,6 +60,34 @@ class SchalenetworkExtractor(Extractor):
pass
params["page"] += 1
def _token(self):
if token := self.config("token"):
return f"Bearer {token.rpartition(' ')[2]}"
raise exception.AuthRequired("'token'", "your favorites")
def _crt(self):
crt = self.config("crt")
if not crt:
self._require_auth()
if not text.re(r"^[0-9a-f-]+$").match(crt):
path, _, qs = crt.partition("?")
if not qs:
qs = path
crt = text.parse_query(qs).get("crt")
if not crt:
self._require_auth()
return crt
def _require_auth(self, exc=None):
if exc is None:
msg = None
else:
msg = f"{exc.status} {exc.response.reason}"
raise exception.AuthRequired(
"'crt' query parameter & matching '--user-agent'", None, msg)
class SchalenetworkGalleryExtractor(SchalenetworkExtractor, GalleryExtractor):
"""Extractor for schale.network galleries"""
@@ -67,7 +95,7 @@ class SchalenetworkGalleryExtractor(SchalenetworkExtractor, GalleryExtractor):
directory_fmt = ("{category}", "{id} {title}")
archive_fmt = "{id}_{num}"
request_interval = 0.0
pattern = BASE_PATTERN + r"/(?:g|reader)/(\d+)/(\w+)"
pattern = rf"{BASE_PATTERN}/(?:g|reader)/(\d+)/(\w+)"
example = "https://niyaniya.moe/g/12345/67890abcde/"
TAG_TYPES = {
@@ -86,27 +114,10 @@ class SchalenetworkGalleryExtractor(SchalenetworkExtractor, GalleryExtractor):
12: "other",
}
def __init__(self, match):
GalleryExtractor.__init__(self, match)
self.page_url = None
def _init(self):
self.headers = {
"Accept" : "*/*",
"Referer": self.root + "/",
"Origin" : self.root,
}
self.fmt = self.config("format")
self.cbz = self.config("cbz", True)
if self.cbz:
self.filename_fmt = "{id} {title}.{extension}"
self.directory_fmt = ("{category}",)
def metadata(self, _):
url = f"{self.root_api}/books/detail/{self.groups[1]}/{self.groups[2]}"
self.data = data = self.request_json(url, headers=self.headers)
_, gid, gkey = self.groups
url = f"{self.root_api}/books/detail/{gid}/{gkey}"
data = self.request_json(url, headers=self.headers)
data["date"] = text.parse_timestamp(data["created_at"] // 1000)
tags = []
@@ -127,53 +138,42 @@ class SchalenetworkGalleryExtractor(SchalenetworkExtractor, GalleryExtractor):
data["tags_" + types[type]] = values
try:
if self.cbz:
data["count"] = len(data["thumbnails"]["entries"])
data["count"] = len(data["thumbnails"]["entries"])
del data["thumbnails"]
del data["rels"]
except Exception:
pass
return data
def images(self, _):
data = self.data
crt = self._crt()
_, gid, gkey = self.groups
url = f"{self.root_api}/books/detail/{gid}/{gkey}?crt={crt}"
try:
data = self.request_json(url, method="POST", headers=self.headers)
except exception.HttpError as exc:
self._require_auth(exc)
fmt = self._select_format(data["data"])
url = (f"{self.root_api}/books/data/{data['id']}/"
f"{data['public_key']}/{fmt['id']}/{fmt['public_key']}")
params = {
"v": data["updated_at"],
"w": fmt["w"],
}
if self.cbz:
params["action"] = "dl"
base = self.request_json(
url, method="POST", params=params, headers=self.headers,
)["base"]
url = f"{base}?v={data['updated_at']}&w={fmt['w']}"
info = text.nameext_from_url(base)
if not info["extension"]:
info["extension"] = "cbz"
return ((url, info),)
data = self.request_json(url, params=params, headers=self.headers)
url = (f"{self.root_api}/books/data/{gid}/{gkey}"
f"/{fmt['id']}/{fmt['key']}/{fmt['w']}?crt={crt}")
data = self.request_json(url, headers=self.headers)
base = data["base"]
results = []
for entry in data["entries"]:
dimensions = entry["dimensions"]
info = {
"w": dimensions[0],
"h": dimensions[1],
"width" : dimensions[0],
"height": dimensions[1],
"_http_headers": self.headers,
}
results.append((base + entry["path"], info))
return results
def _select_format(self, formats):
fmt = self.fmt
fmt = self.config("format")
if not fmt or fmt == "best":
fmtids = ("0", "1600", "1280", "980", "780")
@@ -182,7 +182,7 @@ class SchalenetworkGalleryExtractor(SchalenetworkExtractor, GalleryExtractor):
elif isinstance(fmt, list):
fmtids = fmt
else:
fmtids = (str(self.fmt),)
fmtids = (str(fmt),)
for fmtid in fmtids:
try:
@@ -203,44 +203,36 @@ class SchalenetworkGalleryExtractor(SchalenetworkExtractor, GalleryExtractor):
class SchalenetworkSearchExtractor(SchalenetworkExtractor):
"""Extractor for schale.network search results"""
subcategory = "search"
pattern = BASE_PATTERN + r"/\?([^#]*)"
example = "https://niyaniya.moe/?s=QUERY"
pattern = rf"{BASE_PATTERN}/(?:tag/([^/?#]+)|browse)?(?:/?\?([^#]*))?$"
example = "https://niyaniya.moe/browse?s=QUERY"
def items(self):
params = text.parse_query(self.groups[1])
_, tag, qs = self.groups
params = text.parse_query(qs)
params["page"] = text.parse_int(params.get("page"), 1)
if tag is not None:
ns, sep, tag = text.unquote(tag).partition(":")
if "+" in tag:
tag = tag.replace("+", " ")
q = '"'
else:
q = ""
q = '"' if " " in tag else ""
params["s"] = f"{ns}{sep}{q}^{tag}${q}"
return self._pagination("/books", params)
class SchalenetworkFavoriteExtractor(SchalenetworkExtractor):
"""Extractor for schale.network favorites"""
subcategory = "favorite"
pattern = BASE_PATTERN + r"/favorites(?:\?([^#]*))?"
pattern = rf"{BASE_PATTERN}/favorites(?:\?([^#]*))?"
example = "https://niyaniya.moe/favorites"
def items(self):
self.login()
params = text.parse_query(self.groups[1])
params["page"] = text.parse_int(params.get("page"), 1)
return self._pagination("/favorites", params)
def login(self):
username, password = self._get_auth_info()
if username:
self.headers["Authorization"] = \
"Bearer " + self._login_impl(username, password)
return
raise exception.AuthenticationError("Username and password required")
@cache(maxage=86400, keyarg=1)
def _login_impl(self, username, password):
self.log.info("Logging in as %s", username)
url = "https://auth.schale.network/login"
data = {"uname": username, "passwd": password}
response = self.request(
url, method="POST", headers=self.headers, data=data)
return response.json()["session"]
self.headers["Authorization"] = self._token()
return self._pagination(f"/books/favorites?crt={self._crt()}", params)