[webtoons] add 'banners' option (#6468)
This commit is contained in:
@@ -5578,6 +5578,17 @@ Description
|
||||
| - Set a value to ``false`` to completely remove these extension's ``type`` parameter
|
||||
| - Omit an extension to leave its URLs unchanged
|
||||
|
||||
|
||||
extractor.webtoons.banners
|
||||
--------------------------
|
||||
Type
|
||||
``bool``
|
||||
Default
|
||||
``false``
|
||||
Description
|
||||
Download the active comic's ``banner``.
|
||||
|
||||
|
||||
extractor.webtoons.thumbnails
|
||||
-----------------------------
|
||||
Type
|
||||
@@ -5585,7 +5596,7 @@ Type
|
||||
Default
|
||||
``false``
|
||||
Description
|
||||
Download the active chapter's ``thumbnail``.
|
||||
Download the active episode's ``thumbnail``.
|
||||
|
||||
Useful for creating CBZ archives with actual source thumbnails.
|
||||
|
||||
|
||||
@@ -792,6 +792,7 @@
|
||||
"sleep-request": "0.5-1.5",
|
||||
|
||||
"quality" : "original",
|
||||
"banners" : false,
|
||||
"thumbnails": false
|
||||
},
|
||||
"weebcentral":
|
||||
|
||||
@@ -19,6 +19,9 @@ LANG_PATTERN = BASE_PATTERN + r"/(([^/?#]+)"
|
||||
class WebtoonsBase():
|
||||
category = "webtoons"
|
||||
root = "https://www.webtoons.com"
|
||||
directory_fmt = ("{category}", "{comic}")
|
||||
filename_fmt = "{episode_no}-{num:>02}{type:?-//}.{extension}"
|
||||
archive_fmt = "{title_no}_{episode_no}_{num}"
|
||||
cookies_domain = ".webtoons.com"
|
||||
request_interval = (0.5, 1.5)
|
||||
|
||||
@@ -45,9 +48,6 @@ class WebtoonsBase():
|
||||
class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
|
||||
"""Extractor for an episode on webtoons.com"""
|
||||
subcategory = "episode"
|
||||
directory_fmt = ("{category}", "{comic}")
|
||||
filename_fmt = "{episode_no}-{num:>02}{type:?-//}.{extension}"
|
||||
archive_fmt = "{title_no}_{episode_no}_{num}"
|
||||
pattern = (LANG_PATTERN + r"/([^/?#]+)/([^/?#]+)/[^/?#]+)"
|
||||
r"/viewer\?([^#'\"]+)")
|
||||
example = ("https://www.webtoons.com/en/GENRE/TITLE/NAME/viewer"
|
||||
@@ -126,15 +126,13 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
|
||||
elif type:
|
||||
url = f"{path}?type={type}"
|
||||
|
||||
url = url.replace("://webtoon-phinf.", "://swebtoon-phinf.")
|
||||
results.append((url, None))
|
||||
results.append((_url(url), None))
|
||||
return results
|
||||
|
||||
def assets(self, page):
|
||||
if self.config("thumbnails", False):
|
||||
active = text.extr(page, 'class="on ', '</a>')
|
||||
url = text.extr(active, 'data-url="', '"')
|
||||
url = url.replace("://webtoon-phinf.", "://swebtoon-phinf.")
|
||||
url = _url(text.extr(active, 'data-url="', '"'))
|
||||
return ({"url": url, "type": "thumbnail"},)
|
||||
|
||||
|
||||
@@ -142,20 +140,17 @@ class WebtoonsComicExtractor(WebtoonsBase, Extractor):
|
||||
"""Extractor for an entire comic on webtoons.com"""
|
||||
subcategory = "comic"
|
||||
categorytransfer = True
|
||||
pattern = LANG_PATTERN + r"/[^/?#]+/[^/?#]+)/list\?([^#]+)"
|
||||
filename_fmt = "{type}.{extension}"
|
||||
archive_fmt = "{title_no}_{type}"
|
||||
pattern = LANG_PATTERN + r"/([^/?#]+)/([^/?#]+))/list\?([^#]+)"
|
||||
example = "https://www.webtoons.com/en/GENRE/TITLE/list?title_no=123"
|
||||
|
||||
def items(self):
|
||||
base, lang, query = self.groups
|
||||
kw = self.kwdict
|
||||
base, kw["lang"], kw["genre"], kw["comic"], query = self.groups
|
||||
params = text.parse_query(query)
|
||||
title_no = params.get("title_no")
|
||||
page_no = text.parse_int(params.get("page"), 1)
|
||||
|
||||
data = {
|
||||
"_extractor": WebtoonsEpisodeExtractor,
|
||||
"title_no" : text.parse_int(title_no),
|
||||
"page" : page_no,
|
||||
}
|
||||
kw["title_no"] = title_no = text.parse_int(params.get("title_no"))
|
||||
kw["page"] = page_no = text.parse_int(params.get("page"), 1)
|
||||
|
||||
path = f"/{base}/list?title_no={title_no}&page={page_no}"
|
||||
response = self.request(self.root + path)
|
||||
@@ -164,13 +159,18 @@ class WebtoonsComicExtractor(WebtoonsBase, Extractor):
|
||||
base = "/".join(parts[3:-1])
|
||||
page = response.text
|
||||
|
||||
if self.config("banners") and (asset := self._asset_banner(page)):
|
||||
yield Message.Directory, asset
|
||||
yield Message.Url, asset["url"], asset
|
||||
|
||||
data = {"_extractor": WebtoonsEpisodeExtractor}
|
||||
while True:
|
||||
for url in self.get_episode_urls(page):
|
||||
params = text.parse_query(url.rpartition("?")[2])
|
||||
data["episode_no"] = text.parse_int(params.get("episode_no"))
|
||||
yield Message.Queue, url, data
|
||||
|
||||
data["page"] = page_no = page_no + 1
|
||||
kw["page"] = page_no = page_no + 1
|
||||
path = f"/{base}/list?title_no={title_no}&page={page_no}"
|
||||
if path not in page:
|
||||
return
|
||||
@@ -178,12 +178,21 @@ class WebtoonsComicExtractor(WebtoonsBase, Extractor):
|
||||
|
||||
def get_episode_urls(self, page):
|
||||
"""Extract and return all episode urls in 'page'"""
|
||||
page = text.extr(page, 'id="_listUl"', '</ul>')
|
||||
page = text.extr(page, 'id="_listUl"', "</ul>")
|
||||
return [
|
||||
match[0]
|
||||
for match in WebtoonsEpisodeExtractor.pattern.finditer(page)
|
||||
]
|
||||
|
||||
def _asset_banner(self, page):
|
||||
try:
|
||||
pos = page.index('<span class="thmb')
|
||||
except Exception:
|
||||
return
|
||||
|
||||
url = _url(text.extract(page, 'src="', '"', pos)[0])
|
||||
return text.nameext_from_url(url, {"url": url, "type": "banner"})
|
||||
|
||||
|
||||
class WebtoonsArtistExtractor(WebtoonsBase, Extractor):
|
||||
"""Extractor for webtoons.com artists"""
|
||||
@@ -216,3 +225,7 @@ class WebtoonsArtistExtractor(WebtoonsBase, Extractor):
|
||||
data = self.request_json(url, params=params, headers=headers)
|
||||
|
||||
return data["result"]["titles"]
|
||||
|
||||
|
||||
def _url(url):
|
||||
return url.replace("://webtoon-phinf.", "://swebtoon-phinf.")
|
||||
|
||||
@@ -143,6 +143,25 @@ __tests__ = (
|
||||
"episode_no": range(1, 14),
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.webtoons.com/en/comedy/live-with-yourself/list?title_no=919",
|
||||
"#comment" : "banner (#6468)",
|
||||
"#category": ("", "webtoons", "comic"),
|
||||
"#class" : webtoons.WebtoonsComicExtractor,
|
||||
"#options" : {"banners": True},
|
||||
"#range" : "1-3",
|
||||
"#results" : (
|
||||
"https://swebtoon-phinf.pstatic.net/20190126_226/1548461599138G7THv_PNG/03_EC9E91ED9288EC8381EC84B8_PC_ECBA90EBA6ADED84B0.png",
|
||||
"https://www.webtoons.com/en/comedy/live-with-yourself/ep-12-aint-gonna-face-no-defeat/viewer?title_no=919&episode_no=14",
|
||||
"https://www.webtoons.com/en/comedy/live-with-yourself/interlude-2/viewer?title_no=919&episode_no=13",
|
||||
"https://www.webtoons.com/en/comedy/live-with-yourself/ep-11-can-barely-stand-on-my-feet/viewer?title_no=919&episode_no=12",
|
||||
),
|
||||
|
||||
"?type" : "banner",
|
||||
"title_no" : 919,
|
||||
"?episode_no": range(12, 14),
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.webtoons.com/fr/romance/subzero/list?title_no=1845&page=7",
|
||||
"#comment" : "french",
|
||||
|
||||
Reference in New Issue
Block a user