[webtoons] add 'banners' option (#6468)

This commit is contained in:
Mike Fährmann
2025-06-26 19:24:32 +02:00
parent 0334a7c48c
commit 8a93616a2d
4 changed files with 64 additions and 20 deletions

View File

@@ -5578,6 +5578,17 @@ Description
| - Set a value to ``false`` to completely remove these extension's ``type`` parameter
| - Omit an extension to leave its URLs unchanged
extractor.webtoons.banners
--------------------------
Type
``bool``
Default
``false``
Description
Download the active comic's ``banner``.
extractor.webtoons.thumbnails
-----------------------------
Type
@@ -5585,7 +5596,7 @@ Type
Default
``false``
Description
Download the active chapter's ``thumbnail``.
Download the active episode's ``thumbnail``.
Useful for creating CBZ archives with actual source thumbnails.

View File

@@ -792,6 +792,7 @@
"sleep-request": "0.5-1.5",
"quality" : "original",
"banners" : false,
"thumbnails": false
},
"weebcentral":

View File

@@ -19,6 +19,9 @@ LANG_PATTERN = BASE_PATTERN + r"/(([^/?#]+)"
class WebtoonsBase():
category = "webtoons"
root = "https://www.webtoons.com"
directory_fmt = ("{category}", "{comic}")
filename_fmt = "{episode_no}-{num:>02}{type:?-//}.{extension}"
archive_fmt = "{title_no}_{episode_no}_{num}"
cookies_domain = ".webtoons.com"
request_interval = (0.5, 1.5)
@@ -45,9 +48,6 @@ class WebtoonsBase():
class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
"""Extractor for an episode on webtoons.com"""
subcategory = "episode"
directory_fmt = ("{category}", "{comic}")
filename_fmt = "{episode_no}-{num:>02}{type:?-//}.{extension}"
archive_fmt = "{title_no}_{episode_no}_{num}"
pattern = (LANG_PATTERN + r"/([^/?#]+)/([^/?#]+)/[^/?#]+)"
r"/viewer\?([^#'\"]+)")
example = ("https://www.webtoons.com/en/GENRE/TITLE/NAME/viewer"
@@ -126,15 +126,13 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
elif type:
url = f"{path}?type={type}"
url = url.replace("://webtoon-phinf.", "://swebtoon-phinf.")
results.append((url, None))
results.append((_url(url), None))
return results
def assets(self, page):
if self.config("thumbnails", False):
active = text.extr(page, 'class="on ', '</a>')
url = text.extr(active, 'data-url="', '"')
url = url.replace("://webtoon-phinf.", "://swebtoon-phinf.")
url = _url(text.extr(active, 'data-url="', '"'))
return ({"url": url, "type": "thumbnail"},)
@@ -142,20 +140,17 @@ class WebtoonsComicExtractor(WebtoonsBase, Extractor):
"""Extractor for an entire comic on webtoons.com"""
subcategory = "comic"
categorytransfer = True
pattern = LANG_PATTERN + r"/[^/?#]+/[^/?#]+)/list\?([^#]+)"
filename_fmt = "{type}.{extension}"
archive_fmt = "{title_no}_{type}"
pattern = LANG_PATTERN + r"/([^/?#]+)/([^/?#]+))/list\?([^#]+)"
example = "https://www.webtoons.com/en/GENRE/TITLE/list?title_no=123"
def items(self):
base, lang, query = self.groups
kw = self.kwdict
base, kw["lang"], kw["genre"], kw["comic"], query = self.groups
params = text.parse_query(query)
title_no = params.get("title_no")
page_no = text.parse_int(params.get("page"), 1)
data = {
"_extractor": WebtoonsEpisodeExtractor,
"title_no" : text.parse_int(title_no),
"page" : page_no,
}
kw["title_no"] = title_no = text.parse_int(params.get("title_no"))
kw["page"] = page_no = text.parse_int(params.get("page"), 1)
path = f"/{base}/list?title_no={title_no}&page={page_no}"
response = self.request(self.root + path)
@@ -164,13 +159,18 @@ class WebtoonsComicExtractor(WebtoonsBase, Extractor):
base = "/".join(parts[3:-1])
page = response.text
if self.config("banners") and (asset := self._asset_banner(page)):
yield Message.Directory, asset
yield Message.Url, asset["url"], asset
data = {"_extractor": WebtoonsEpisodeExtractor}
while True:
for url in self.get_episode_urls(page):
params = text.parse_query(url.rpartition("?")[2])
data["episode_no"] = text.parse_int(params.get("episode_no"))
yield Message.Queue, url, data
data["page"] = page_no = page_no + 1
kw["page"] = page_no = page_no + 1
path = f"/{base}/list?title_no={title_no}&page={page_no}"
if path not in page:
return
@@ -178,12 +178,21 @@ class WebtoonsComicExtractor(WebtoonsBase, Extractor):
def get_episode_urls(self, page):
"""Extract and return all episode urls in 'page'"""
page = text.extr(page, 'id="_listUl"', '</ul>')
page = text.extr(page, 'id="_listUl"', "</ul>")
return [
match[0]
for match in WebtoonsEpisodeExtractor.pattern.finditer(page)
]
def _asset_banner(self, page):
try:
pos = page.index('<span class="thmb')
except Exception:
return
url = _url(text.extract(page, 'src="', '"', pos)[0])
return text.nameext_from_url(url, {"url": url, "type": "banner"})
class WebtoonsArtistExtractor(WebtoonsBase, Extractor):
"""Extractor for webtoons.com artists"""
@@ -216,3 +225,7 @@ class WebtoonsArtistExtractor(WebtoonsBase, Extractor):
data = self.request_json(url, params=params, headers=headers)
return data["result"]["titles"]
def _url(url):
return url.replace("://webtoon-phinf.", "://swebtoon-phinf.")

View File

@@ -143,6 +143,25 @@ __tests__ = (
"episode_no": range(1, 14),
},
{
"#url" : "https://www.webtoons.com/en/comedy/live-with-yourself/list?title_no=919",
"#comment" : "banner (#6468)",
"#category": ("", "webtoons", "comic"),
"#class" : webtoons.WebtoonsComicExtractor,
"#options" : {"banners": True},
"#range" : "1-3",
"#results" : (
"https://swebtoon-phinf.pstatic.net/20190126_226/1548461599138G7THv_PNG/03_EC9E91ED9288EC8381EC84B8_PC_ECBA90EBA6ADED84B0.png",
"https://www.webtoons.com/en/comedy/live-with-yourself/ep-12-aint-gonna-face-no-defeat/viewer?title_no=919&episode_no=14",
"https://www.webtoons.com/en/comedy/live-with-yourself/interlude-2/viewer?title_no=919&episode_no=13",
"https://www.webtoons.com/en/comedy/live-with-yourself/ep-11-can-barely-stand-on-my-feet/viewer?title_no=919&episode_no=12",
),
"?type" : "banner",
"title_no" : 919,
"?episode_no": range(12, 14),
},
{
"#url" : "https://www.webtoons.com/fr/romance/subzero/list?title_no=1845&page=7",
"#comment" : "french",