[webtoons] update code

This commit is contained in:
Mike Fährmann
2025-06-26 15:24:37 +02:00
parent ebaf789ac7
commit 3c6a5657ea

View File

@@ -32,6 +32,8 @@ class WebtoonsBase():
"ageGatePass": "true", "ageGatePass": "true",
}) })
_init = setup_agegate_cookies
def request(self, url, **kwargs): def request(self, url, **kwargs):
response = Extractor.request(self, url, **kwargs) response = Extractor.request(self, url, **kwargs)
if response.history and "/ageGate" in response.url: if response.history and "/ageGate" in response.url:
@@ -54,12 +56,11 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
def _init(self): def _init(self):
self.setup_agegate_cookies() self.setup_agegate_cookies()
path, self.lang, self.genre, self.comic, query = self.groups base, self.lang, self.genre, self.comic, query = self.groups
params = text.parse_query(query) params = text.parse_query(query)
self.title_no = params.get("title_no") self.title_no = params.get("title_no")
self.episode_no = params.get("episode_no") self.episode_no = params.get("episode_no")
self.gallery_url = "{}/{}/viewer?{}".format(self.root, path, query) self.gallery_url = f"{self.root}/{base}/viewer?{query}"
self.thumbnails = self.config("thumbnails", False)
def metadata(self, page): def metadata(self, page):
extr = text.extract_from(page) extr = text.extract_from(page)
@@ -67,19 +68,19 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
descr = extr('<meta property="og:description" content="', '"') descr = extr('<meta property="og:description" content="', '"')
if extr('<div class="subj_info"', '\n'): if extr('<div class="subj_info"', '\n'):
comic_name = extr('>', '<') comic_name = extr(">", "<")
episode_name = extr('<h1 class="subj_episode" title="', '"') episode_name = extr('<h1 class="subj_episode" title="', '"')
else: else:
comic_name = episode_name = "" comic_name = episode_name = ""
if extr('<span class="tx _btnOpenEpisodeList ', '"'): if extr('<span class="tx _btnOpenEpisodeList ', '"'):
episode = extr('>#', '<') episode = extr(">#", "<")
else: else:
episode = "" episode = ""
if extr('<span class="author"', '\n'): if extr('<span class="author"', "\n"):
username = extr('/u/', '"') username = extr("/u/", '"')
author_name = extr('<span>', '</span>') author_name = extr("<span>", "</span>")
else: else:
username = author_name = "" username = author_name = ""
@@ -123,14 +124,14 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
if type is False: if type is False:
url = path url = path
elif type: elif type:
url = "{}?type={}".format(path, type) url = f"{path}?type={type}"
url = url.replace("://webtoon-phinf.", "://swebtoon-phinf.") url = url.replace("://webtoon-phinf.", "://swebtoon-phinf.")
results.append((url, None)) results.append((url, None))
return results return results
def assets(self, page): def assets(self, page):
if self.thumbnails: if self.config("thumbnails", False):
active = text.extr(page, 'class="on ', '</a>') active = text.extr(page, 'class="on ', '</a>')
url = text.extr(active, 'data-url="', '"') url = text.extr(active, 'data-url="', '"')
url = url.replace("://webtoon-phinf.", "://swebtoon-phinf.") url = url.replace("://webtoon-phinf.", "://swebtoon-phinf.")
@@ -141,45 +142,39 @@ class WebtoonsComicExtractor(WebtoonsBase, Extractor):
"""Extractor for an entire comic on webtoons.com""" """Extractor for an entire comic on webtoons.com"""
subcategory = "comic" subcategory = "comic"
categorytransfer = True categorytransfer = True
pattern = LANG_PATTERN + r"/([^/?#]+)/([^/?#]+))/list\?([^#]+)" pattern = LANG_PATTERN + r"/[^/?#]+/[^/?#]+)/list\?([^#]+)"
example = "https://www.webtoons.com/en/GENRE/TITLE/list?title_no=123" example = "https://www.webtoons.com/en/GENRE/TITLE/list?title_no=123"
def _init(self):
self.setup_agegate_cookies()
self.path, self.lang, self.genre, self.comic, query = self.groups
params = text.parse_query(query)
self.title_no = params.get("title_no")
self.page_no = text.parse_int(params.get("page"), 1)
def items(self): def items(self):
page = None base, lang, query = self.groups
params = text.parse_query(query)
title_no = params.get("title_no")
page_no = text.parse_int(params.get("page"), 1)
data = { data = {
"_extractor": WebtoonsEpisodeExtractor, "_extractor": WebtoonsEpisodeExtractor,
"title_no" : text.parse_int(self.title_no), "title_no" : text.parse_int(title_no),
"page" : page_no,
} }
path = f"/{base}/list?title_no={title_no}&page={page_no}"
response = self.request(self.root + path)
if response.history:
parts = response.url.split("/")
base = "/".join(parts[3:-1])
page = response.text
while True: while True:
path = "/{}/list?title_no={}&page={}".format(
self.path, self.title_no, self.page_no)
if page is not None and path not in page:
return
response = self.request(self.root + path)
if response.history:
parts = response.url.split("/")
self.path = "/".join(parts[3:-1])
page = response.text
data["page"] = self.page_no
for url in self.get_episode_urls(page): for url in self.get_episode_urls(page):
params = text.parse_query(url.rpartition("?")[2]) params = text.parse_query(url.rpartition("?")[2])
data["episode_no"] = text.parse_int(params.get("episode_no")) data["episode_no"] = text.parse_int(params.get("episode_no"))
yield Message.Queue, url, data yield Message.Queue, url, data
self.page_no += 1 data["page"] = page_no = page_no + 1
path = f"/{base}/list?title_no={title_no}&page={page_no}"
if path not in page:
return
page = self.request(self.root + path).text
def get_episode_urls(self, page): def get_episode_urls(self, page):
"""Extract and return all episode urls in 'page'""" """Extract and return all episode urls in 'page'"""
@@ -197,8 +192,6 @@ class WebtoonsArtistExtractor(WebtoonsBase, Extractor):
example = "https://www.webtoons.com/p/community/LANG/u/ARTIST" example = "https://www.webtoons.com/p/community/LANG/u/ARTIST"
def items(self): def items(self):
self.setup_agegate_cookies()
for comic in self.comics(): for comic in self.comics():
comic["_extractor"] = WebtoonsComicExtractor comic["_extractor"] = WebtoonsComicExtractor
comic_url = self.root + comic["extra"]["episodeListPath"] comic_url = self.root + comic["extra"]["episodeListPath"]
@@ -208,13 +201,11 @@ class WebtoonsArtistExtractor(WebtoonsBase, Extractor):
lang, artist = self.groups lang, artist = self.groups
language = util.code_to_language(lang).upper() language = util.code_to_language(lang).upper()
url = "{}/p/community/{}/u/{}".format( url = f"{self.root}/p/community/{lang}/u/{artist}"
self.root, lang, artist)
page = self.request(url).text page = self.request(url).text
creator_id = text.extr(page, '\\"creatorId\\":\\"', '\\') creator_id = text.extr(page, '\\"creatorId\\":\\"', '\\')
url = "{}/p/community/api/v1/creator/{}/titles".format( url = f"{self.root}/p/community/api/v1/creator/{creator_id}/titles"
self.root, creator_id)
params = { params = {
"language": language, "language": language,
"nextSize": "50", "nextSize": "50",
@@ -222,6 +213,6 @@ class WebtoonsArtistExtractor(WebtoonsBase, Extractor):
headers = { headers = {
"language": language, "language": language,
} }
data = self.request(url, params=params, headers=headers).json() data = self.request_json(url, params=params, headers=headers)
return data["result"]["titles"] return data["result"]["titles"]