[mangapark] utilizing more graphql and adjust functionality for new site (#4999)

- undo formatting changes
- simplify code
- update and fix tests
This commit is contained in:
vonProteus
2025-03-23 19:19:19 +01:00
committed by Mike Fährmann
parent fd8f652490
commit 58e7808bbb
2 changed files with 124 additions and 169 deletions

View File

@@ -10,6 +10,7 @@
from .common import ChapterExtractor, Extractor, Message from .common import ChapterExtractor, Extractor, Message
from .. import text, util, exception from .. import text, util, exception
from ..cache import memcache
import re import re
BASE_PATTERN = r"(?:https?://)?(?:www\.)?mangapark\.(?:net|com|org|io|me)" BASE_PATTERN = r"(?:https?://)?(?:www\.)?mangapark\.(?:net|com|org|io|me)"
@@ -31,6 +32,42 @@ class MangaparkBase():
match = self._match_title(title) match = self._match_title(title)
return match.groups() if match else (0, 0, "", "") return match.groups() if match else (0, 0, "", "")
@memcache(keyarg=1)
def _extract_manga(self, manga_id):
variables = {
"getComicNodeId": manga_id,
}
return self._request_graphql("Get_comicNode", variables)["data"]
def _extract_chapter(self, chapter_id):
variables = {
"getChapterNodeId": chapter_id,
}
return self._request_graphql("Get_chapterNode", variables)["data"]
def _extract_chapters_all(self, manga_id):
variables = {
"comicId": manga_id,
}
return self._request_graphql("Get_comicChapterList", variables)
def _extract_chapters_source(self, source_id):
variables = {
"sourceId": source_id,
}
return self._request_graphql(
"get_content_source_chapterList", variables)
def _request_graphql(self, opname, variables):
url = self.root + "/apo/"
data = {
"query" : QUERIES[opname],
"variables" : variables,
"operationName": opname,
}
return self.request(
url, method="POST", json=data).json()["data"].popitem()[1]
class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor): class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor):
"""Extractor for manga-chapters from mangapark.net""" """Extractor for manga-chapters from mangapark.net"""
@@ -39,43 +76,36 @@ class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor):
def __init__(self, match): def __init__(self, match):
self.root = text.root_from_url(match.group(0)) self.root = text.root_from_url(match.group(0))
url = "{}/title/_/{}".format(self.root, match.group(1)) ChapterExtractor.__init__(self, match, False)
ChapterExtractor.__init__(self, match, url)
def metadata(self, page): def metadata(self, _):
data = self._extract_nextdata(page) chapter = self._extract_chapter(self.groups[0])
chapter = (data["props"]["pageProps"]["dehydratedState"] manga = self._extract_manga(chapter["comicNode"]["id"])
["queries"][0]["state"]["data"]["data"])
manga = chapter["comicNode"]["data"]
source = chapter["sourceNode"]["data"]
self._urls = chapter["imageSet"]["httpLis"] self._urls = chapter["imageFile"]["urlList"]
self._params = chapter["imageSet"]["wordLis"]
vol, ch, minor, title = self._parse_chapter_title(chapter["dname"]) vol, ch, minor, title = self._parse_chapter_title(chapter["dname"])
lang = chapter.get("lang") or "en"
return { return {
"manga" : manga["name"], "manga" : manga["name"],
"manga_id" : manga["id"], "manga_id" : text.parse_int(manga["id"]),
"artist" : source["artists"], "artist" : manga["artists"],
"author" : source["authors"], "author" : manga["authors"],
"genre" : source["genres"], "genre" : manga["genres"],
"volume" : text.parse_int(vol), "volume" : text.parse_int(vol),
"chapter" : text.parse_int(ch), "chapter" : text.parse_int(ch),
"chapter_minor": minor, "chapter_minor": minor,
"chapter_id": chapter["id"], "chapter_id": text.parse_int(chapter["id"]),
"title" : chapter["title"] or title or "", "title" : title or "",
"lang" : chapter["lang"], "lang" : lang,
"language" : util.code_to_language(chapter["lang"]), "language" : util.code_to_language(lang),
"source" : source["srcTitle"], "source" : chapter["srcTitle"],
"source_id" : source["id"], "source_id" : chapter["sourceId"],
"date" : text.parse_timestamp(chapter["dateCreate"] // 1000), "date" : text.parse_timestamp(chapter["dateCreate"] // 1000),
} }
def images(self, page): def images(self, _):
return [ return [(url, None) for url in self._urls]
(url + "?" + params, None)
for url, params in zip(self._urls, self._params)
]
class MangaparkMangaExtractor(MangaparkBase, Extractor): class MangaparkMangaExtractor(MangaparkBase, Extractor):
@@ -95,6 +125,8 @@ class MangaparkMangaExtractor(MangaparkBase, Extractor):
url = self.root + chapter["urlPath"] url = self.root + chapter["urlPath"]
vol, ch, minor, title = self._parse_chapter_title(chapter["dname"]) vol, ch, minor, title = self._parse_chapter_title(chapter["dname"])
lang = chapter.get("lang") or "en"
data = { data = {
"manga_id" : self.manga_id, "manga_id" : self.manga_id,
"volume" : text.parse_int(vol), "volume" : text.parse_int(vol),
@@ -102,8 +134,8 @@ class MangaparkMangaExtractor(MangaparkBase, Extractor):
"chapter_minor": minor, "chapter_minor": minor,
"chapter_id": chapter["id"], "chapter_id": chapter["id"],
"title" : chapter["title"] or title or "", "title" : chapter["title"] or title or "",
"lang" : chapter["lang"], "lang" : lang,
"language" : util.code_to_language(chapter["lang"]), "language" : util.code_to_language(lang),
"source" : chapter["srcTitle"], "source" : chapter["srcTitle"],
"source_id" : chapter["sourceId"], "source_id" : chapter["sourceId"],
"date" : text.parse_timestamp( "date" : text.parse_timestamp(
@@ -114,45 +146,12 @@ class MangaparkMangaExtractor(MangaparkBase, Extractor):
def chapters(self): def chapters(self):
source = self.config("source") source = self.config("source")
if not source: if source:
return self.chapters_all() source_id = self._select_source(source)
self.log.debug("Requesting chapters for source_id %s", source_id)
source_id = self._select_source(source) chapters = self._extract_chapters_source(source_id)
self.log.debug("Requesting chapters for source_id %s", source_id) else:
return self.chapters_source(source_id) chapters = self._extract_chapters_all(self.groups[0])
def chapters_all(self):
pnum = 0
variables = {
"select": {
"comicId": self.manga_id,
"range" : None,
"isAsc" : not self.config("chapter-reverse"),
}
}
while True:
data = self._request_graphql(
"get_content_comicChapterRangeList", variables)
for item in data["items"]:
yield from item["chapterNodes"]
if not pnum:
pager = data["pager"]
pnum += 1
try:
variables["select"]["range"] = pager[pnum]
except IndexError:
return
def chapters_source(self, source_id):
variables = {
"sourceId": source_id,
}
chapters = self._request_graphql(
"get_content_source_chapterList", variables)
if self.config("chapter-reverse"): if self.config("chapter-reverse"):
chapters.reverse() chapters.reverse()
@@ -180,101 +179,58 @@ class MangaparkMangaExtractor(MangaparkBase, Extractor):
raise exception.StopExtraction( raise exception.StopExtraction(
"'%s' does not match any available source", source) "'%s' does not match any available source", source)
def _request_graphql(self, opname, variables):
url = self.root + "/apo/"
data = {
"query" : QUERIES[opname],
"variables" : util.json_dumps(variables),
"operationName": opname,
}
return self.request(
url, method="POST", json=data).json()["data"][opname]
QUERIES = { QUERIES = {
"get_content_comicChapterRangeList": """ "Get_comicChapterList": """
query get_content_comicChapterRangeList($select: Content_ComicChapterRangeList_Select) { query Get_comicChapterList($comicId: ID!) {
get_content_comicChapterRangeList( get_comicChapterList(comicId: $comicId) {
select: $select data {
) { id
reqRange{x y} dname
missing title
pager {x y} lang
items{ urlPath
serial srcTitle
chapterNodes { sourceId
dateCreate
id
data {
id
sourceId
dbStatus
isNormal
isHidden
isDeleted
isFinal
dateCreate
datePublic
dateModify
lang
volume
serial
dname
title
urlPath
srcTitle srcColor
count_images
stat_count_post_child
stat_count_post_reply
stat_count_views_login
stat_count_views_guest
userId
userNode {
id
data {
id
name
uniq
avatarUrl
urlPath
verified
deleted
banned
dateCreate
dateOnline
stat_count_chapters_normal
stat_count_chapters_others
is_adm is_mod is_vip is_upr
}
}
disqusId
}
sser_read
} }
}
} }
} }
""",
"Get_chapterNode": """
query Get_chapterNode($getChapterNodeId: ID!) {
get_chapterNode(id: $getChapterNodeId) {
data {
id
dname
lang
sourceId
srcTitle
dateCreate
comicNode{
id
}
imageFile {
urlList
}
}
}
}
""",
"Get_comicNode": """
query Get_comicNode($getComicNodeId: ID!) {
get_comicNode(id: $getComicNodeId) {
data {
id
name
artists
authors
genres
}
}
}
""", """,
"get_content_source_chapterList": """ "get_content_source_chapterList": """

View File

@@ -13,11 +13,11 @@ __tests__ = (
"#url" : "https://mangapark.net/title/114972-aria/6710214-en-ch.60.2", "#url" : "https://mangapark.net/title/114972-aria/6710214-en-ch.60.2",
"#category": ("", "mangapark", "chapter"), "#category": ("", "mangapark", "chapter"),
"#class" : mangapark.MangaparkChapterExtractor, "#class" : mangapark.MangaparkChapterExtractor,
"#pattern" : r"https://[\w-]+\.mpcdn\.org/comic/2002/e67/61e29278a583b9227964076e/\d+_\d+_\d+_\d+\.jpeg\?acc=[^&#]+&exp=\d+", "#pattern" : r"https://[\w-]+\.mp\w+\.org/media/2002/e67/61e29278a583b9227964076e/\d+_\d+_\d+_\d+\.jpeg",
"#count" : 70, "#count" : 70,
"artist" : [], "artist" : ["amano kozue"],
"author" : ["Amano Kozue"], "author" : ["amano kozue"],
"chapter" : 60, "chapter" : 60,
"chapter_id" : 6710214, "chapter_id" : 6710214,
"chapter_minor": ".2", "chapter_minor": ".2",
@@ -29,7 +29,6 @@ __tests__ = (
"adventure", "adventure",
"comedy", "comedy",
"drama", "drama",
"sci_fi",
"shounen", "shounen",
"slice_of_life", "slice_of_life",
], ],
@@ -69,35 +68,35 @@ __tests__ = (
{ {
"#url" : "https://mangapark.net/title/114972-aria", "#url" : "https://mangapark.net/title/114972-aria",
"#comment" : "'source' option",
"#category": ("", "mangapark", "manga"), "#category": ("", "mangapark", "manga"),
"#class" : mangapark.MangaparkMangaExtractor, "#class" : mangapark.MangaparkMangaExtractor,
"#pattern" : mangapark.MangaparkChapterExtractor.pattern, "#pattern" : mangapark.MangaparkChapterExtractor.pattern,
"#count" : 141, "#count" : 71,
"chapter" : int, "chapter" : int,
"chapter_id" : int, "chapter_id" : r"re:\d+",
"chapter_minor": str, "chapter_minor": str,
"date" : datetime.datetime, "date" : datetime.datetime,
"lang" : "en", "lang" : "en",
"language" : "English", "language" : "English",
"manga_id" : 114972, "manga_id" : 114972,
"source" : r"re:Horse|Koala", "source" : "Horse",
"source_id" : int, "source_id" : "844",
"title" : str, "title" : str,
"volume" : int, "volume" : int,
}, },
{ {
"#url" : "https://mangapark.net/title/114972-aria", "#url" : "https://mangapark.net/title/10504-en-mushishi",
"#comment" : "'source' option", "#comment" : "'source' option",
"#skip" : "not functional",
"#category": ("", "mangapark", "manga"), "#category": ("", "mangapark", "manga"),
"#class" : mangapark.MangaparkMangaExtractor, "#class" : mangapark.MangaparkMangaExtractor,
"#options" : {"source": "koala"},
"#pattern" : mangapark.MangaparkChapterExtractor.pattern, "#pattern" : mangapark.MangaparkChapterExtractor.pattern,
"#options" : {"source": "panda"},
"#count" : 70, "#count" : 70,
"source" : "Koala", "source" : "Panda",
"source_id": 15150116, "source_id": 15150116,
}, },