[mangadex] extract more manga-related metadata (#8325)

This commit is contained in:
Mike Fährmann
2025-10-01 19:35:16 +02:00
parent 020a44245f
commit 0a76cbc8c7
2 changed files with 140 additions and 64 deletions

View File

@@ -39,7 +39,7 @@ class MangadexExtractor(Extractor):
data = self._transform(chapter)
data["_extractor"] = MangadexChapterExtractor
self._cache[uuid] = data
yield Message.Queue, self.root + "/chapter/" + uuid, data
yield Message.Queue, f"{self.root}/chapter/{uuid}", data
def _items_manga(self):
data = {"_extractor": MangadexMangaExtractor}
@@ -51,13 +51,8 @@ class MangadexExtractor(Extractor):
relationships = defaultdict(list)
for item in chapter["relationships"]:
relationships[item["type"]].append(item)
manga = self.api.manga(relationships["manga"][0]["id"])
for item in manga["relationships"]:
relationships[item["type"]].append(item)
cattributes = chapter["attributes"]
mattributes = manga["attributes"]
if lang := cattributes.get("translatedLanguage"):
lang = lang.partition("-")[0]
@@ -66,35 +61,21 @@ class MangadexExtractor(Extractor):
else:
chnum, sep, minor = 0, "", ""
data = {
"manga" : (mattributes["title"].get("en") or
next(iter(mattributes["title"].values()))),
"manga_id": manga["id"],
return {
**_manga_info(self, relationships["manga"][0]["id"]),
"title" : cattributes["title"],
"volume" : text.parse_int(cattributes["volume"]),
"chapter" : text.parse_int(chnum),
"chapter_minor": sep + minor,
"chapter_id": chapter["id"],
"date" : text.parse_datetime(cattributes["publishAt"]),
"group" : [group["attributes"]["name"]
for group in relationships["scanlation_group"]],
"lang" : lang,
"language": util.code_to_language(lang),
"count" : cattributes["pages"],
"_external_url": cattributes.get("externalUrl"),
}
data["artist"] = [artist["attributes"]["name"]
for artist in relationships["artist"]]
data["author"] = [author["attributes"]["name"]
for author in relationships["author"]]
data["group"] = [group["attributes"]["name"]
for group in relationships["scanlation_group"]]
data["status"] = mattributes["status"]
data["tags"] = [tag["attributes"]["name"]["en"]
for tag in mattributes["tags"]]
return data
class MangadexCoversExtractor(MangadexExtractor):
"""Extractor for mangadex manga covers"""
@@ -121,24 +102,10 @@ class MangadexCoversExtractor(MangadexExtractor):
relationships = defaultdict(list)
for item in cover["relationships"]:
relationships[item["type"]].append(item)
manga = self.api.manga(relationships["manga"][0]["id"])
for item in manga["relationships"]:
relationships[item["type"]].append(item)
cattributes = cover["attributes"]
mattributes = manga["attributes"]
return {
"manga" : (mattributes["title"].get("en") or
next(iter(mattributes["title"].values()))),
"manga_id": manga["id"],
"status" : mattributes["status"],
"author" : [author["attributes"]["name"]
for author in relationships["author"]],
"artist" : [artist["attributes"]["name"]
for artist in relationships["artist"]],
"tags" : [tag["attributes"]["name"]["en"]
for tag in mattributes["tags"]],
**_manga_info(self, relationships["manga"][0]["id"]),
"cover" : cattributes["fileName"],
"lang" : cattributes.get("locale"),
"volume" : text.parse_int(cattributes["volume"]),
@@ -150,7 +117,7 @@ class MangadexCoversExtractor(MangadexExtractor):
class MangadexChapterExtractor(MangadexExtractor):
"""Extractor for manga-chapters from mangadex.org"""
subcategory = "chapter"
pattern = BASE_PATTERN + r"/chapter/([0-9a-f-]+)"
pattern = f"{BASE_PATTERN}/chapter/([0-9a-f-]+)"
example = ("https://mangadex.org/chapter"
"/01234567-89ab-cdef-0123-456789abcdef")
@@ -177,13 +144,13 @@ class MangadexChapterExtractor(MangadexExtractor):
"page-reverse") else enumerate
for data["page"], page in enum(chapter["data"], 1):
text.nameext_from_url(page, data)
yield Message.Url, base + page, data
yield Message.Url, f"{base}{page}", data
class MangadexMangaExtractor(MangadexExtractor):
"""Extractor for manga from mangadex.org"""
subcategory = "manga"
pattern = BASE_PATTERN + r"/(?:title|manga)/(?!follows|feed$)([0-9a-f-]+)"
pattern = f"{BASE_PATTERN}/(?:title|manga)/(?!follows|feed$)([0-9a-f-]+)"
example = ("https://mangadex.org/title"
"/01234567-89ab-cdef-0123-456789abcdef")
@@ -194,7 +161,7 @@ class MangadexMangaExtractor(MangadexExtractor):
class MangadexFeedExtractor(MangadexExtractor):
"""Extractor for chapters from your Updates Feed"""
subcategory = "feed"
pattern = BASE_PATTERN + r"/titles?/feed$()"
pattern = f"{BASE_PATTERN}/titles?/feed$()"
example = "https://mangadex.org/title/feed"
def chapters(self):
@@ -242,7 +209,7 @@ class MangadexListExtractor(MangadexExtractor):
class MangadexAuthorExtractor(MangadexExtractor):
"""Extractor for mangadex authors"""
subcategory = "author"
pattern = BASE_PATTERN + r"/author/([0-9a-f-]+)"
pattern = f"{BASE_PATTERN}/author/([0-9a-f-]+)"
example = ("https://mangadex.org/author"
"/01234567-89ab-cdef-0123-456789abcdef/NAME")
@@ -280,30 +247,30 @@ class MangadexAPI():
else text.ensure_http_scheme(server).rstrip("/"))
def athome_server(self, uuid):
return self._call("/at-home/server/" + uuid)
return self._call(f"/at-home/server/{uuid}")
def author(self, uuid, manga=False):
params = {"includes[]": ("manga",)} if manga else None
return self._call("/author/" + uuid, params)["data"]
return self._call(f"/author/{uuid}", params)["data"]
def chapter(self, uuid):
params = {"includes[]": ("scanlation_group",)}
return self._call("/chapter/" + uuid, params)["data"]
return self._call(f"/chapter/{uuid}", params)["data"]
def covers_manga(self, uuid):
params = {"manga[]": uuid}
return self._pagination_covers("/cover", params)
def list(self, uuid):
return self._call("/list/" + uuid, None, True)["data"]
return self._call(f"/list/{uuid}", None, True)["data"]
def list_feed(self, uuid):
return self._pagination_chapters("/list/" + uuid + "/feed", None, True)
return self._pagination_chapters(f"/list/{uuid}/feed", None, True)
@memcache(keyarg=1)
def manga(self, uuid):
params = {"includes[]": ("artist", "author")}
return self._call("/manga/" + uuid, params)["data"]
return self._call(f"/manga/{uuid}", params)["data"]
def manga_author(self, uuid_author):
params = {"authorOrArtist": uuid_author}
@@ -315,7 +282,7 @@ class MangadexAPI():
"order[volume]" : order,
"order[chapter]": order,
}
return self._pagination_chapters("/manga/" + uuid + "/feed", params)
return self._pagination_chapters(f"/manga/{uuid}/feed", params)
def user_follows_manga(self):
params = {"contentRating": None}
@@ -366,17 +333,17 @@ class MangadexAPI():
_refresh_token_cache.update(
(username, "personal"), data["refresh_token"])
return "Bearer " + access_token
return f"Bearer {access_token}"
@cache(maxage=900, keyarg=1)
def _authenticate_impl_legacy(self, username, password):
if refresh_token := _refresh_token_cache(username):
self.extractor.log.info("Refreshing access token")
url = self.root + "/auth/refresh"
url = f"{self.root}/auth/refresh"
json = {"token": refresh_token}
else:
self.extractor.log.info("Logging in as %s", username)
url = self.root + "/auth/login"
url = f"{self.root}/auth/login"
json = {"username": username, "password": password}
self.extractor.log.debug("Using legacy login method")
@@ -387,7 +354,7 @@ class MangadexAPI():
if refresh_token != data["token"]["refresh"]:
_refresh_token_cache.update(username, data["token"]["refresh"])
return "Bearer " + data["token"]["session"]
return f"Bearer {data['token']['session']}"
def _call(self, endpoint, params=None, auth=False):
url = self.root + endpoint
@@ -470,3 +437,33 @@ class MangadexAPI():
@cache(maxage=90*86400, keyarg=0)
def _refresh_token_cache(username):
return None
@memcache(keyarg=1)
def _manga_info(self, uuid):
manga = self.api.manga(uuid)
rel = defaultdict(list)
for item in manga["relationships"]:
rel[item["type"]].append(item)
mattr = manga["attributes"]
return {
"manga" : (mattr["title"].get("en") or
next(iter(mattr["title"].values()))),
"manga_id": manga["id"],
"manga_titles": [t.popitem()[1]
for t in mattr.get("altTitles") or ()],
"manga_date" : text.parse_datetime(mattr.get("createdAt")),
"description" : (mattr["description"].get("en") or
next(iter(mattr["description"].values()))),
"demographic": mattr.get("publicationDemographic"),
"origin": mattr.get("originalLanguage"),
"status": mattr.get("status"),
"year" : mattr.get("year"),
"rating": mattr.get("contentRating"),
"links" : mattr.get("links"),
"tags" : [tag["attributes"]["name"]["en"] for tag in mattr["tags"]],
"artist": [artist["attributes"]["name"] for artist in rel["artist"]],
"author": [author["attributes"]["name"] for author in rel["author"]],
}

View File

@@ -10,18 +10,99 @@ import datetime
__tests__ = (
{
"#url" : "https://mangadex.org/chapter/f946ac53-0b71-4b5d-aeb2-7931b13c4aaa",
"#class" : mangadex.MangadexChapterExtractor,
"#sha1_metadata": "e86128a79ebe7201b648f1caa828496a2878dc8f",
"#count" : 5,
"artist" : ["Oda Eiichiro"],
"author" : ["Oda Eiichiro"],
"chapter" : 6,
"chapter_id" : "f946ac53-0b71-4b5d-aeb2-7931b13c4aaa",
"chapter_minor": "",
"count" : 5,
"date" : "dt:2018-02-28 10:42:50",
"demographic" : "shounen",
"description" : "One Piece Omake are short manga chapters originally published in the One Piece Log Books & Databooks.",
"extension" : {"jpg", "png"},
"filename" : str,
"group" : ["KEFI"],
"lang" : "en",
"manga" : "One Piece Omake",
"manga_date" : "dt:2018-06-29 17:22:51",
"manga_id" : "487f1f04-75f3-4a2e-a4af-76e615e32585",
"origin" : "ja",
"page" : range(1, 5),
"rating" : "safe",
"status" : "ongoing",
"tags" : ["Comedy"],
"title" : "The 6th Log - Chopper Man",
"volume" : 0,
"year" : None,
"manga_titles" : [
"One Piece: Log Book Omake",
"One Piece: Mugiwara Theater",
"One Piece: Straw Hat Theater",
"One Piece: Strawhat Theater",
],
"links" : {
"al" : "44414",
"kt" : "24849",
"mal": "14414",
},
},
{
"#url" : "https://mangadex.org/chapter/61a88817-9c29-4281-bdf1-77b3c1be9831",
"#comment" : "oneshot",
"#class" : mangadex.MangadexChapterExtractor,
"#count" : 64,
"#sha1_metadata": "d11ed057a919854696853362be35fc0ba7dded4c",
"#count" : 64,
"artist" : ["Arakawa Hiromu"],
"author" : ["Arakawa Hiromu"],
"chapter" : 0,
"chapter_id" : "61a88817-9c29-4281-bdf1-77b3c1be9831",
"chapter_minor": "",
"count" : 64,
"date" : "dt:2018-03-05 14:36:10",
"demographic" : "shounen",
"description" : "A kunoichi, Henpukumaru, awakens in the mansion of her enemy. She is introduced to the future lord of the mansion, Chiyozuru. Chiyozuru is able to get the unemotional Henpukumaru to smile and react differently than she normally would. But then Henpukumaru's former allies attack one night…",
"extension" : {"jpg", "png"},
"filename" : str,
"group" : ["Illuminati-Manga"],
"lang" : "en",
"manga" : "Souten no Koumori",
"manga_date" : "dt:2018-03-19 10:36:00",
"manga_id" : "f90c4398-8aad-4f51-8a1f-024ca09fdcbc",
"origin" : "ja",
"page" : range(1, 64),
"rating" : "safe",
"status" : "completed",
"title" : "Oneshot",
"volume" : 0,
"year" : 2006,
"manga_titles" : [
"A Bat in Blue Sky",
"Sôten no Kômori",
"Soten no Komori",
"蒼天の蝙蝠",
],
"tags" : [
"Oneshot",
"Historical",
"Action",
"Martial Arts",
"Drama",
"Tragedy",
],
"links" : {
"al" : "30948",
"ap" : "souten-no-koumori",
"kt" : "2065",
"mal": "948",
"mu" : "4786",
},
},
{
@@ -52,8 +133,7 @@ __tests__ = (
"chapter_minor": "",
"chapter_id" : str,
"date" : datetime.datetime,
"lang" : str,
"language" : str,
"lang" : "iso:lang",
"artist" : ["Arakawa Hiromu"],
"author" : ["Arakawa Hiromu"],
"status" : "completed",
@@ -76,7 +156,6 @@ __tests__ = (
"manga" : "Souten no Koumori",
"lang" : {"fr", "it"},
"language": {"French", "Italian"},
},
{
@@ -156,7 +235,7 @@ __tests__ = (
"#url" : "https://mangadex.org/author/7222d0d5-836c-4bf3-9174-72bceade8c87/kotoyama",
"#class" : mangadex.MangadexAuthorExtractor,
"#pattern" : mangadex.MangadexMangaExtractor.pattern,
"#count" : 8,
"#count" : 9,
},
{
@@ -178,7 +257,7 @@ __tests__ = (
"cover_id" : "af3c1690-1e06-4432-909e-3e0f9ee01f68",
"date" : "dt:2021-05-24 17:19:13",
"date_updated": "dt:2021-05-24 17:19:13",
"extension" : "jpg",
"extension" : {"jpg", "png"},
"filename" : "af3c1690-1e06-4432-909e-3e0f9ee01f68",
"lang" : "ja",
"manga" : "Souten no Koumori",
@@ -206,7 +285,7 @@ __tests__ = (
"cover_id" : "iso:uuid",
"date" : "type:datetime",
"date_updated": "type:datetime",
"extension" : "jpg",
"extension" : {"jpg", "png"},
"filename" : str,
"lang" : {"ja", "fa"},
"manga" : "Gachiakuta",