[mangadex] update to API v5 (#1535)

This commit is contained in:
Mike Fährmann
2021-06-07 01:29:42 +02:00
parent ff8d0d7280
commit 3e332eaf53

View File

@@ -11,13 +11,19 @@
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, util, exception from .. import text, util, exception
from ..cache import memcache from ..cache import memcache
from collections import defaultdict
class MangadexExtractor(Extractor): class MangadexExtractor(Extractor):
"""Base class for mangadex extractors""" """Base class for mangadex extractors"""
category = "mangadex" category = "mangadex"
directory_fmt = (
"{category}", "{manga}",
"{volume:?v/ />02}c{chapter:>03}{chapter_minor}{title:?: //}")
filename_fmt = (
"{manga}_c{chapter:>03}{chapter_minor}_{page:>03}.{extension}")
archive_fmt = "{chapter_id}_{page}"
root = "https://mangadex.org" root = "https://mangadex.org"
api_root = "https://api.mangadex.org"
# mangadex-to-iso639-1 codes # mangadex-to-iso639-1 codes
iso639_map = { iso639_map = {
@@ -29,183 +35,183 @@ class MangadexExtractor(Extractor):
def __init__(self, match): def __init__(self, match):
Extractor.__init__(self, match) Extractor.__init__(self, match)
self.api = MangadexAPI(self)
self.uuid = match.group(1)
server = self.config("api-server") def items(self):
if server is not None: for chapter in self.chapters():
self.api_root = server.rstrip("/")
def chapter_data(self, chapter_id): relationships = defaultdict(list)
"""Request API results for 'chapter_id'""" for item in chapter["relationships"]:
url = "{}/v2/chapter/{}".format(self.api_root, chapter_id) relationships[item["type"]].append(item["id"])
return self.request(url).json()["data"] manga = self.api.manga(relationships["manga"][0])
for item in manga["relationships"]:
relationships[item["type"]].append(item["id"])
@memcache(keyarg=1) cattributes = chapter["data"]["attributes"]
def manga_data(self, manga_id): mattributes = manga["data"]["attributes"]
"""Request API results for 'manga_id'""" lang = cattributes["translatedLanguage"].partition("-")[0]
url = "{}/v2/manga/{}".format(self.api_root, manga_id)
return self.request(url).json()["data"]
def manga_chapters(self, manga_id): if cattributes["chapter"]:
"""Request chapter list for 'manga_id'""" chnum, sep, minor = cattributes["chapter"].partition(".")
url = "{}/v2/manga/{}/chapters".format(self.api_root, manga_id) else:
data = self.request(url).json()["data"] chnum, sep, minor = 0, "", ""
groups = { data = {
group["id"]: group["name"] "manga" : mattributes["title"]["en"],
for group in data["groups"] "manga_id": manga["data"]["id"],
} "title" : cattributes["title"],
"volume" : text.parse_int(cattributes["volume"]),
"chapter" : text.parse_int(chnum),
"chapter_minor": sep + minor,
"chapter_id": chapter["data"]["id"],
"date" : text.parse_datetime(cattributes["publishAt"]),
"lang" : lang,
"language": util.code_to_language(lang),
"count" : len(cattributes["data"]),
}
for chapter in data["chapters"]: if self.config("metadata"):
cgroups = chapter["groups"] data["artist"] = [
for idx, group_id in enumerate(cgroups): self.api.author(uuid)["data"]["attributes"]["name"]
cgroups[idx] = groups[group_id] for uuid in relationships["artist"]]
yield chapter data["author"] = [
self.api.author(uuid)["data"]["attributes"]["name"]
for uuid in relationships["author"]]
data["group"] = [
self.api.group(uuid)["data"]["attributes"]["name"]
for uuid in relationships["scanlation_group"]]
base = "{}/data/{}/".format(
self.api.athome_server(chapter["data"]["id"])["baseUrl"],
cattributes["hash"])
yield Message.Directory, data
for data["page"], page in enumerate(cattributes["data"], 1):
text.nameext_from_url(page, data)
yield Message.Url, base + page, data
class MangadexChapterExtractor(MangadexExtractor): class MangadexChapterExtractor(MangadexExtractor):
"""Extractor for manga-chapters from mangadex.org""" """Extractor for manga-chapters from mangadex.org"""
subcategory = "chapter" subcategory = "chapter"
directory_fmt = ( pattern = (r"(?:https?://)?(?:www\.)?mangadex\.(?:org|cc)"
"{category}", "{manga}", r"/chapter/([0-9a-f-]+)")
"{volume:?v/ />02}c{chapter:>03}{chapter_minor}{title:?: //}")
filename_fmt = (
"{manga}_c{chapter:>03}{chapter_minor}_{page:>03}.{extension}")
archive_fmt = "{chapter_id}_{page}"
pattern = r"(?:https?://)?(?:www\.)?mangadex\.(?:org|cc)/chapter/(\d+)"
test = ( test = (
("https://mangadex.org/chapter/122094", { ("https://mangadex.org/chapter/f946ac53-0b71-4b5d-aeb2-7931b13c4aaa", {
"keyword": "89d1b24b4baa1fb737d32711d9f2ade6ea426987", "keyword": "f6c2b908df06eb834d56193dfe1fa1f7c2c4dccd",
# "content": "50383a4c15124682057b197d40261641a98db514", # "content": "50383a4c15124682057b197d40261641a98db514",
}), }),
# oneshot # oneshot
("https://mangadex.cc/chapter/138086", { ("https://mangadex.org/chapter/61a88817-9c29-4281-bdf1-77b3c1be9831", {
"options": (("metadata", True),),
"count": 64, "count": 64,
"keyword": "c53a0e4c12250578a4e630281085875e59532c03", "keyword": "6abcbe1e24eeb1049dc931958853cd767ee483fb",
}), }),
# MANGA Plus (#1154) # MANGA Plus (#1154)
("https://mangadex.org/chapter/1122815", { ("https://mangadex.org/chapter/8d50ed68-8298-4ac9-b63d-cb2aea143dd0", {
"exception": exception.HttpError, "exception": exception.StopExtraction,
}), }),
) )
def __init__(self, match): def chapters(self):
MangadexExtractor.__init__(self, match) return (self.api.chapter(self.uuid),)
self.chapter_id = match.group(1)
def items(self):
cdata = self.chapter_data(self.chapter_id)
if "server" not in cdata:
if cdata["status"] == "external":
raise exception.StopExtraction(
"Chapter is not available on MangaDex and can be read on "
"the official publisher's website at %s.", cdata["pages"])
raise exception.StopExtraction("No download server available.")
mdata = self.manga_data(cdata["mangaId"])
chapter, sep, minor = cdata["chapter"].partition(".")
lang = self.iso639_map.get(cdata["language"], cdata["language"])
base = cdata["server"] + cdata["hash"] + "/"
if base[0] == "/":
base = text.urljoin(self.root, base)
if "serverFallback" in cdata:
fallback = cdata["serverFallback"] + cdata["hash"] + "/"
else:
fallback = None
data = {
"manga" : text.unescape(mdata["title"]),
"manga_id": mdata["id"],
"artist" : mdata["artist"],
"author" : mdata["author"],
"title" : text.unescape(cdata["title"]),
"volume" : text.parse_int(cdata["volume"]),
"chapter" : text.parse_int(chapter),
"chapter_minor": sep + minor,
"chapter_id": cdata["id"],
"group" : [group["name"] for group in cdata["groups"]],
"date" : text.parse_timestamp(cdata["timestamp"]),
"lang" : lang,
"language": util.code_to_language(lang),
"count" : len(cdata["pages"]),
}
yield Message.Directory, data
for data["page"], page in enumerate(cdata["pages"], 1):
if fallback:
data["_fallback"] = (fallback + page,)
yield Message.Url, base + page, text.nameext_from_url(page, data)
class MangadexMangaExtractor(MangadexExtractor): class MangadexMangaExtractor(MangadexExtractor):
"""Extractor for manga from mangadex.org""" """Extractor for manga from mangadex.org"""
subcategory = "manga" subcategory = "manga"
categorytransfer = True
pattern = (r"(?:https?://)?(?:www\.)?mangadex\.(?:org|cc)" pattern = (r"(?:https?://)?(?:www\.)?mangadex\.(?:org|cc)"
r"/(?:title|manga)/(\d+)") r"/(?:title|manga)/([0-9a-f-]+)")
test = ( test = (
("https://mangadex.org/manga/2946/souten-no-koumori", { ("https://mangadex.org/title/f90c4398-8aad-4f51-8a1f-024ca09fdcbc", {
"pattern": r"https://mangadex.org/chapter/\d+",
"keyword": { "keyword": {
"manga" : "Souten no Koumori", "manga" : "Souten no Koumori",
"manga_id": 2946, "manga_id": "f90c4398-8aad-4f51-8a1f-024ca09fdcbc",
"title" : "re:One[Ss]hot", "title" : "re:One[Ss]hot",
"volume" : 0, "volume" : 0,
"chapter" : 0, "chapter" : 0,
"chapter_minor": "", "chapter_minor": "",
"chapter_id": int, "chapter_id": str,
"group" : list,
"date" : "type:datetime", "date" : "type:datetime",
"lang" : str, "lang" : str,
"language": str, "language": str,
}, },
}), }),
("https://mangadex.cc/manga/13318/dagashi-kashi/chapters/2/", { ("https://mangadex.cc/manga/d0c88e3b-ea64-4e07-9841-c1d2ac982f4a/"),
"count": ">= 100", ("https://mangadex.org/title/7c1e2742-a086-4fd3-a3be-701fd6cf0be9", {
}), "count": 16,
("https://mangadex.org/title/13004/yorumori-no-kuni-no-sora-ni", {
"count": 0,
}), }),
) )
def __init__(self, match):
MangadexExtractor.__init__(self, match)
self.manga_id = match.group(1)
def items(self):
yield Message.Version, 1
for data in self.chapters():
url = "{}/chapter/{}".format(self.root, data["chapter_id"])
yield Message.Queue, url, data
def chapters(self): def chapters(self):
"""Return a sorted list of chapter-metadata dicts""" return self.api.manga_feed(self.uuid)
manga = self.manga_data(int(self.manga_id))
results = []
for cdata in self.manga_chapters(self.manga_id):
chapter, sep, minor = cdata["chapter"].partition(".")
lang = self.iso639_map.get(cdata["language"], cdata["language"])
results.append({
"manga" : text.unescape(manga["title"]),
"manga_id": text.parse_int(self.manga_id),
"artist" : manga["artist"],
"author" : manga["author"],
"title" : text.unescape(cdata["title"]),
"volume" : text.parse_int(cdata["volume"]),
"chapter" : text.parse_int(chapter),
"chapter_minor": sep + minor,
"chapter_id": text.parse_int(cdata["id"]),
"group" : cdata["groups"],
"date" : text.parse_timestamp(cdata["timestamp"]),
"lang" : lang,
"language": util.code_to_language(lang),
"_extractor": MangadexChapterExtractor,
})
results.sort( class MangadexAPI():
key=lambda x: (x["chapter"], x["chapter_minor"]), """Interface for the MangaDex API v5"""
reverse=self.config("chapter-reverse", False),
) def __init__(self, extr):
return results self.extractor = extr
server = extr.config("api-server")
self.root = ("https://api.mangadex.org" if server is None
else text.ensure_http_scheme(server).rstrip("/"))
def athome_server(self, uuid):
return self._call("/at-home/server/" + uuid)
@memcache(keyarg=1)
def author(self, uuid):
return self._call("/author/" + uuid)
def chapter(self, uuid):
return self._call("/chapter/" + uuid)
@memcache(keyarg=1)
def group(self, uuid):
return self._call("/group/" + uuid)
@memcache(keyarg=1)
def manga(self, uuid):
return self._call("/manga/" + uuid)
def manga_feed(self, uuid):
config = self.extractor.config
order = "desc" if config("chapter-reverse") else "asc"
params = {
"order[volume]" : order,
"order[chapter]" : order,
"translatedLanguage[]": config("lang"),
}
return self._pagination("/manga/" + uuid + "/feed", params)
def _call(self, endpoint, params=None):
url = self.root + endpoint
while True:
response = self.extractor.request(url, params=params, fatal=None)
if response.status_code < 400:
return response.json()
if response.status_code == 429:
until = response.headers.get("X-RateLimit-Retry-After")
self.extractor.wait(until=until)
continue
msg = ", ".join('{title}: {detail}'.format_map(error)
for error in response.json()["errors"])
raise exception.StopExtraction(
"%s %s (%s)", response.status_code, response.reason, msg)
def _pagination(self, endpoint, params=None):
if params is None:
params = {}
params["offset"] = 0
while True:
data = self._call(endpoint, params)
yield from data["results"]
params["offset"] = data["offset"] + data["limit"]
if params["offset"] >= data["total"]:
return