[mangadex] extract more manga-related metadata (#8325)

This commit is contained in:
Mike Fährmann
2025-10-01 19:35:16 +02:00
parent 020a44245f
commit 0a76cbc8c7
2 changed files with 140 additions and 64 deletions

View File

@@ -39,7 +39,7 @@ class MangadexExtractor(Extractor):
data = self._transform(chapter) data = self._transform(chapter)
data["_extractor"] = MangadexChapterExtractor data["_extractor"] = MangadexChapterExtractor
self._cache[uuid] = data self._cache[uuid] = data
yield Message.Queue, self.root + "/chapter/" + uuid, data yield Message.Queue, f"{self.root}/chapter/{uuid}", data
def _items_manga(self): def _items_manga(self):
data = {"_extractor": MangadexMangaExtractor} data = {"_extractor": MangadexMangaExtractor}
@@ -51,13 +51,8 @@ class MangadexExtractor(Extractor):
relationships = defaultdict(list) relationships = defaultdict(list)
for item in chapter["relationships"]: for item in chapter["relationships"]:
relationships[item["type"]].append(item) relationships[item["type"]].append(item)
manga = self.api.manga(relationships["manga"][0]["id"])
for item in manga["relationships"]:
relationships[item["type"]].append(item)
cattributes = chapter["attributes"] cattributes = chapter["attributes"]
mattributes = manga["attributes"]
if lang := cattributes.get("translatedLanguage"): if lang := cattributes.get("translatedLanguage"):
lang = lang.partition("-")[0] lang = lang.partition("-")[0]
@@ -66,35 +61,21 @@ class MangadexExtractor(Extractor):
else: else:
chnum, sep, minor = 0, "", "" chnum, sep, minor = 0, "", ""
data = { return {
"manga" : (mattributes["title"].get("en") or **_manga_info(self, relationships["manga"][0]["id"]),
next(iter(mattributes["title"].values()))),
"manga_id": manga["id"],
"title" : cattributes["title"], "title" : cattributes["title"],
"volume" : text.parse_int(cattributes["volume"]), "volume" : text.parse_int(cattributes["volume"]),
"chapter" : text.parse_int(chnum), "chapter" : text.parse_int(chnum),
"chapter_minor": sep + minor, "chapter_minor": sep + minor,
"chapter_id": chapter["id"], "chapter_id": chapter["id"],
"date" : text.parse_datetime(cattributes["publishAt"]), "date" : text.parse_datetime(cattributes["publishAt"]),
"group" : [group["attributes"]["name"]
for group in relationships["scanlation_group"]],
"lang" : lang, "lang" : lang,
"language": util.code_to_language(lang),
"count" : cattributes["pages"], "count" : cattributes["pages"],
"_external_url": cattributes.get("externalUrl"), "_external_url": cattributes.get("externalUrl"),
} }
data["artist"] = [artist["attributes"]["name"]
for artist in relationships["artist"]]
data["author"] = [author["attributes"]["name"]
for author in relationships["author"]]
data["group"] = [group["attributes"]["name"]
for group in relationships["scanlation_group"]]
data["status"] = mattributes["status"]
data["tags"] = [tag["attributes"]["name"]["en"]
for tag in mattributes["tags"]]
return data
class MangadexCoversExtractor(MangadexExtractor): class MangadexCoversExtractor(MangadexExtractor):
"""Extractor for mangadex manga covers""" """Extractor for mangadex manga covers"""
@@ -121,24 +102,10 @@ class MangadexCoversExtractor(MangadexExtractor):
relationships = defaultdict(list) relationships = defaultdict(list)
for item in cover["relationships"]: for item in cover["relationships"]:
relationships[item["type"]].append(item) relationships[item["type"]].append(item)
manga = self.api.manga(relationships["manga"][0]["id"])
for item in manga["relationships"]:
relationships[item["type"]].append(item)
cattributes = cover["attributes"] cattributes = cover["attributes"]
mattributes = manga["attributes"]
return { return {
"manga" : (mattributes["title"].get("en") or **_manga_info(self, relationships["manga"][0]["id"]),
next(iter(mattributes["title"].values()))),
"manga_id": manga["id"],
"status" : mattributes["status"],
"author" : [author["attributes"]["name"]
for author in relationships["author"]],
"artist" : [artist["attributes"]["name"]
for artist in relationships["artist"]],
"tags" : [tag["attributes"]["name"]["en"]
for tag in mattributes["tags"]],
"cover" : cattributes["fileName"], "cover" : cattributes["fileName"],
"lang" : cattributes.get("locale"), "lang" : cattributes.get("locale"),
"volume" : text.parse_int(cattributes["volume"]), "volume" : text.parse_int(cattributes["volume"]),
@@ -150,7 +117,7 @@ class MangadexCoversExtractor(MangadexExtractor):
class MangadexChapterExtractor(MangadexExtractor): class MangadexChapterExtractor(MangadexExtractor):
"""Extractor for manga-chapters from mangadex.org""" """Extractor for manga-chapters from mangadex.org"""
subcategory = "chapter" subcategory = "chapter"
pattern = BASE_PATTERN + r"/chapter/([0-9a-f-]+)" pattern = f"{BASE_PATTERN}/chapter/([0-9a-f-]+)"
example = ("https://mangadex.org/chapter" example = ("https://mangadex.org/chapter"
"/01234567-89ab-cdef-0123-456789abcdef") "/01234567-89ab-cdef-0123-456789abcdef")
@@ -177,13 +144,13 @@ class MangadexChapterExtractor(MangadexExtractor):
"page-reverse") else enumerate "page-reverse") else enumerate
for data["page"], page in enum(chapter["data"], 1): for data["page"], page in enum(chapter["data"], 1):
text.nameext_from_url(page, data) text.nameext_from_url(page, data)
yield Message.Url, base + page, data yield Message.Url, f"{base}{page}", data
class MangadexMangaExtractor(MangadexExtractor): class MangadexMangaExtractor(MangadexExtractor):
"""Extractor for manga from mangadex.org""" """Extractor for manga from mangadex.org"""
subcategory = "manga" subcategory = "manga"
pattern = BASE_PATTERN + r"/(?:title|manga)/(?!follows|feed$)([0-9a-f-]+)" pattern = f"{BASE_PATTERN}/(?:title|manga)/(?!follows|feed$)([0-9a-f-]+)"
example = ("https://mangadex.org/title" example = ("https://mangadex.org/title"
"/01234567-89ab-cdef-0123-456789abcdef") "/01234567-89ab-cdef-0123-456789abcdef")
@@ -194,7 +161,7 @@ class MangadexMangaExtractor(MangadexExtractor):
class MangadexFeedExtractor(MangadexExtractor): class MangadexFeedExtractor(MangadexExtractor):
"""Extractor for chapters from your Updates Feed""" """Extractor for chapters from your Updates Feed"""
subcategory = "feed" subcategory = "feed"
pattern = BASE_PATTERN + r"/titles?/feed$()" pattern = f"{BASE_PATTERN}/titles?/feed$()"
example = "https://mangadex.org/title/feed" example = "https://mangadex.org/title/feed"
def chapters(self): def chapters(self):
@@ -242,7 +209,7 @@ class MangadexListExtractor(MangadexExtractor):
class MangadexAuthorExtractor(MangadexExtractor): class MangadexAuthorExtractor(MangadexExtractor):
"""Extractor for mangadex authors""" """Extractor for mangadex authors"""
subcategory = "author" subcategory = "author"
pattern = BASE_PATTERN + r"/author/([0-9a-f-]+)" pattern = f"{BASE_PATTERN}/author/([0-9a-f-]+)"
example = ("https://mangadex.org/author" example = ("https://mangadex.org/author"
"/01234567-89ab-cdef-0123-456789abcdef/NAME") "/01234567-89ab-cdef-0123-456789abcdef/NAME")
@@ -280,30 +247,30 @@ class MangadexAPI():
else text.ensure_http_scheme(server).rstrip("/")) else text.ensure_http_scheme(server).rstrip("/"))
def athome_server(self, uuid): def athome_server(self, uuid):
return self._call("/at-home/server/" + uuid) return self._call(f"/at-home/server/{uuid}")
def author(self, uuid, manga=False): def author(self, uuid, manga=False):
params = {"includes[]": ("manga",)} if manga else None params = {"includes[]": ("manga",)} if manga else None
return self._call("/author/" + uuid, params)["data"] return self._call(f"/author/{uuid}", params)["data"]
def chapter(self, uuid): def chapter(self, uuid):
params = {"includes[]": ("scanlation_group",)} params = {"includes[]": ("scanlation_group",)}
return self._call("/chapter/" + uuid, params)["data"] return self._call(f"/chapter/{uuid}", params)["data"]
def covers_manga(self, uuid): def covers_manga(self, uuid):
params = {"manga[]": uuid} params = {"manga[]": uuid}
return self._pagination_covers("/cover", params) return self._pagination_covers("/cover", params)
def list(self, uuid): def list(self, uuid):
return self._call("/list/" + uuid, None, True)["data"] return self._call(f"/list/{uuid}", None, True)["data"]
def list_feed(self, uuid): def list_feed(self, uuid):
return self._pagination_chapters("/list/" + uuid + "/feed", None, True) return self._pagination_chapters(f"/list/{uuid}/feed", None, True)
@memcache(keyarg=1) @memcache(keyarg=1)
def manga(self, uuid): def manga(self, uuid):
params = {"includes[]": ("artist", "author")} params = {"includes[]": ("artist", "author")}
return self._call("/manga/" + uuid, params)["data"] return self._call(f"/manga/{uuid}", params)["data"]
def manga_author(self, uuid_author): def manga_author(self, uuid_author):
params = {"authorOrArtist": uuid_author} params = {"authorOrArtist": uuid_author}
@@ -315,7 +282,7 @@ class MangadexAPI():
"order[volume]" : order, "order[volume]" : order,
"order[chapter]": order, "order[chapter]": order,
} }
return self._pagination_chapters("/manga/" + uuid + "/feed", params) return self._pagination_chapters(f"/manga/{uuid}/feed", params)
def user_follows_manga(self): def user_follows_manga(self):
params = {"contentRating": None} params = {"contentRating": None}
@@ -366,17 +333,17 @@ class MangadexAPI():
_refresh_token_cache.update( _refresh_token_cache.update(
(username, "personal"), data["refresh_token"]) (username, "personal"), data["refresh_token"])
return "Bearer " + access_token return f"Bearer {access_token}"
@cache(maxage=900, keyarg=1) @cache(maxage=900, keyarg=1)
def _authenticate_impl_legacy(self, username, password): def _authenticate_impl_legacy(self, username, password):
if refresh_token := _refresh_token_cache(username): if refresh_token := _refresh_token_cache(username):
self.extractor.log.info("Refreshing access token") self.extractor.log.info("Refreshing access token")
url = self.root + "/auth/refresh" url = f"{self.root}/auth/refresh"
json = {"token": refresh_token} json = {"token": refresh_token}
else: else:
self.extractor.log.info("Logging in as %s", username) self.extractor.log.info("Logging in as %s", username)
url = self.root + "/auth/login" url = f"{self.root}/auth/login"
json = {"username": username, "password": password} json = {"username": username, "password": password}
self.extractor.log.debug("Using legacy login method") self.extractor.log.debug("Using legacy login method")
@@ -387,7 +354,7 @@ class MangadexAPI():
if refresh_token != data["token"]["refresh"]: if refresh_token != data["token"]["refresh"]:
_refresh_token_cache.update(username, data["token"]["refresh"]) _refresh_token_cache.update(username, data["token"]["refresh"])
return "Bearer " + data["token"]["session"] return f"Bearer {data['token']['session']}"
def _call(self, endpoint, params=None, auth=False): def _call(self, endpoint, params=None, auth=False):
url = self.root + endpoint url = self.root + endpoint
@@ -470,3 +437,33 @@ class MangadexAPI():
@cache(maxage=90*86400, keyarg=0) @cache(maxage=90*86400, keyarg=0)
def _refresh_token_cache(username): def _refresh_token_cache(username):
return None return None
@memcache(keyarg=1)
def _manga_info(self, uuid):
manga = self.api.manga(uuid)
rel = defaultdict(list)
for item in manga["relationships"]:
rel[item["type"]].append(item)
mattr = manga["attributes"]
return {
"manga" : (mattr["title"].get("en") or
next(iter(mattr["title"].values()))),
"manga_id": manga["id"],
"manga_titles": [t.popitem()[1]
for t in mattr.get("altTitles") or ()],
"manga_date" : text.parse_datetime(mattr.get("createdAt")),
"description" : (mattr["description"].get("en") or
next(iter(mattr["description"].values()))),
"demographic": mattr.get("publicationDemographic"),
"origin": mattr.get("originalLanguage"),
"status": mattr.get("status"),
"year" : mattr.get("year"),
"rating": mattr.get("contentRating"),
"links" : mattr.get("links"),
"tags" : [tag["attributes"]["name"]["en"] for tag in mattr["tags"]],
"artist": [artist["attributes"]["name"] for artist in rel["artist"]],
"author": [author["attributes"]["name"] for author in rel["author"]],
}

View File

@@ -10,18 +10,99 @@ import datetime
__tests__ = ( __tests__ = (
{ {
"#url" : "https://mangadex.org/chapter/f946ac53-0b71-4b5d-aeb2-7931b13c4aaa", "#url" : "https://mangadex.org/chapter/f946ac53-0b71-4b5d-aeb2-7931b13c4aaa",
"#class" : mangadex.MangadexChapterExtractor, "#class" : mangadex.MangadexChapterExtractor,
"#sha1_metadata": "e86128a79ebe7201b648f1caa828496a2878dc8f", "#count" : 5,
"artist" : ["Oda Eiichiro"],
"author" : ["Oda Eiichiro"],
"chapter" : 6,
"chapter_id" : "f946ac53-0b71-4b5d-aeb2-7931b13c4aaa",
"chapter_minor": "",
"count" : 5,
"date" : "dt:2018-02-28 10:42:50",
"demographic" : "shounen",
"description" : "One Piece Omake are short manga chapters originally published in the One Piece Log Books & Databooks.",
"extension" : {"jpg", "png"},
"filename" : str,
"group" : ["KEFI"],
"lang" : "en",
"manga" : "One Piece Omake",
"manga_date" : "dt:2018-06-29 17:22:51",
"manga_id" : "487f1f04-75f3-4a2e-a4af-76e615e32585",
"origin" : "ja",
"page" : range(1, 5),
"rating" : "safe",
"status" : "ongoing",
"tags" : ["Comedy"],
"title" : "The 6th Log - Chopper Man",
"volume" : 0,
"year" : None,
"manga_titles" : [
"One Piece: Log Book Omake",
"One Piece: Mugiwara Theater",
"One Piece: Straw Hat Theater",
"One Piece: Strawhat Theater",
],
"links" : {
"al" : "44414",
"kt" : "24849",
"mal": "14414",
},
}, },
{ {
"#url" : "https://mangadex.org/chapter/61a88817-9c29-4281-bdf1-77b3c1be9831", "#url" : "https://mangadex.org/chapter/61a88817-9c29-4281-bdf1-77b3c1be9831",
"#comment" : "oneshot", "#comment" : "oneshot",
"#class" : mangadex.MangadexChapterExtractor, "#class" : mangadex.MangadexChapterExtractor,
"#count" : 64, "#count" : 64,
"#sha1_metadata": "d11ed057a919854696853362be35fc0ba7dded4c",
"artist" : ["Arakawa Hiromu"],
"author" : ["Arakawa Hiromu"],
"chapter" : 0,
"chapter_id" : "61a88817-9c29-4281-bdf1-77b3c1be9831",
"chapter_minor": "",
"count" : 64,
"date" : "dt:2018-03-05 14:36:10",
"demographic" : "shounen",
"description" : "A kunoichi, Henpukumaru, awakens in the mansion of her enemy. She is introduced to the future lord of the mansion, Chiyozuru. Chiyozuru is able to get the unemotional Henpukumaru to smile and react differently than she normally would. But then Henpukumaru's former allies attack one night…",
"extension" : {"jpg", "png"},
"filename" : str,
"group" : ["Illuminati-Manga"],
"lang" : "en",
"manga" : "Souten no Koumori",
"manga_date" : "dt:2018-03-19 10:36:00",
"manga_id" : "f90c4398-8aad-4f51-8a1f-024ca09fdcbc",
"origin" : "ja",
"page" : range(1, 64),
"rating" : "safe",
"status" : "completed",
"title" : "Oneshot",
"volume" : 0,
"year" : 2006,
"manga_titles" : [
"A Bat in Blue Sky",
"Sôten no Kômori",
"Soten no Komori",
"蒼天の蝙蝠",
],
"tags" : [
"Oneshot",
"Historical",
"Action",
"Martial Arts",
"Drama",
"Tragedy",
],
"links" : {
"al" : "30948",
"ap" : "souten-no-koumori",
"kt" : "2065",
"mal": "948",
"mu" : "4786",
},
}, },
{ {
@@ -52,8 +133,7 @@ __tests__ = (
"chapter_minor": "", "chapter_minor": "",
"chapter_id" : str, "chapter_id" : str,
"date" : datetime.datetime, "date" : datetime.datetime,
"lang" : str, "lang" : "iso:lang",
"language" : str,
"artist" : ["Arakawa Hiromu"], "artist" : ["Arakawa Hiromu"],
"author" : ["Arakawa Hiromu"], "author" : ["Arakawa Hiromu"],
"status" : "completed", "status" : "completed",
@@ -76,7 +156,6 @@ __tests__ = (
"manga" : "Souten no Koumori", "manga" : "Souten no Koumori",
"lang" : {"fr", "it"}, "lang" : {"fr", "it"},
"language": {"French", "Italian"},
}, },
{ {
@@ -156,7 +235,7 @@ __tests__ = (
"#url" : "https://mangadex.org/author/7222d0d5-836c-4bf3-9174-72bceade8c87/kotoyama", "#url" : "https://mangadex.org/author/7222d0d5-836c-4bf3-9174-72bceade8c87/kotoyama",
"#class" : mangadex.MangadexAuthorExtractor, "#class" : mangadex.MangadexAuthorExtractor,
"#pattern" : mangadex.MangadexMangaExtractor.pattern, "#pattern" : mangadex.MangadexMangaExtractor.pattern,
"#count" : 8, "#count" : 9,
}, },
{ {
@@ -178,7 +257,7 @@ __tests__ = (
"cover_id" : "af3c1690-1e06-4432-909e-3e0f9ee01f68", "cover_id" : "af3c1690-1e06-4432-909e-3e0f9ee01f68",
"date" : "dt:2021-05-24 17:19:13", "date" : "dt:2021-05-24 17:19:13",
"date_updated": "dt:2021-05-24 17:19:13", "date_updated": "dt:2021-05-24 17:19:13",
"extension" : "jpg", "extension" : {"jpg", "png"},
"filename" : "af3c1690-1e06-4432-909e-3e0f9ee01f68", "filename" : "af3c1690-1e06-4432-909e-3e0f9ee01f68",
"lang" : "ja", "lang" : "ja",
"manga" : "Souten no Koumori", "manga" : "Souten no Koumori",
@@ -206,7 +285,7 @@ __tests__ = (
"cover_id" : "iso:uuid", "cover_id" : "iso:uuid",
"date" : "type:datetime", "date" : "type:datetime",
"date_updated": "type:datetime", "date_updated": "type:datetime",
"extension" : "jpg", "extension" : {"jpg", "png"},
"filename" : str, "filename" : str,
"lang" : {"ja", "fa"}, "lang" : {"ja", "fa"},
"manga" : "Gachiakuta", "manga" : "Gachiakuta",