[webtoons] add 'banners' option (#6468)
This commit is contained in:
@@ -19,6 +19,9 @@ LANG_PATTERN = BASE_PATTERN + r"/(([^/?#]+)"
|
||||
class WebtoonsBase():
|
||||
category = "webtoons"
|
||||
root = "https://www.webtoons.com"
|
||||
directory_fmt = ("{category}", "{comic}")
|
||||
filename_fmt = "{episode_no}-{num:>02}{type:?-//}.{extension}"
|
||||
archive_fmt = "{title_no}_{episode_no}_{num}"
|
||||
cookies_domain = ".webtoons.com"
|
||||
request_interval = (0.5, 1.5)
|
||||
|
||||
@@ -45,9 +48,6 @@ class WebtoonsBase():
|
||||
class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
|
||||
"""Extractor for an episode on webtoons.com"""
|
||||
subcategory = "episode"
|
||||
directory_fmt = ("{category}", "{comic}")
|
||||
filename_fmt = "{episode_no}-{num:>02}{type:?-//}.{extension}"
|
||||
archive_fmt = "{title_no}_{episode_no}_{num}"
|
||||
pattern = (LANG_PATTERN + r"/([^/?#]+)/([^/?#]+)/[^/?#]+)"
|
||||
r"/viewer\?([^#'\"]+)")
|
||||
example = ("https://www.webtoons.com/en/GENRE/TITLE/NAME/viewer"
|
||||
@@ -126,15 +126,13 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
|
||||
elif type:
|
||||
url = f"{path}?type={type}"
|
||||
|
||||
url = url.replace("://webtoon-phinf.", "://swebtoon-phinf.")
|
||||
results.append((url, None))
|
||||
results.append((_url(url), None))
|
||||
return results
|
||||
|
||||
def assets(self, page):
|
||||
if self.config("thumbnails", False):
|
||||
active = text.extr(page, 'class="on ', '</a>')
|
||||
url = text.extr(active, 'data-url="', '"')
|
||||
url = url.replace("://webtoon-phinf.", "://swebtoon-phinf.")
|
||||
url = _url(text.extr(active, 'data-url="', '"'))
|
||||
return ({"url": url, "type": "thumbnail"},)
|
||||
|
||||
|
||||
@@ -142,20 +140,17 @@ class WebtoonsComicExtractor(WebtoonsBase, Extractor):
|
||||
"""Extractor for an entire comic on webtoons.com"""
|
||||
subcategory = "comic"
|
||||
categorytransfer = True
|
||||
pattern = LANG_PATTERN + r"/[^/?#]+/[^/?#]+)/list\?([^#]+)"
|
||||
filename_fmt = "{type}.{extension}"
|
||||
archive_fmt = "{title_no}_{type}"
|
||||
pattern = LANG_PATTERN + r"/([^/?#]+)/([^/?#]+))/list\?([^#]+)"
|
||||
example = "https://www.webtoons.com/en/GENRE/TITLE/list?title_no=123"
|
||||
|
||||
def items(self):
|
||||
base, lang, query = self.groups
|
||||
kw = self.kwdict
|
||||
base, kw["lang"], kw["genre"], kw["comic"], query = self.groups
|
||||
params = text.parse_query(query)
|
||||
title_no = params.get("title_no")
|
||||
page_no = text.parse_int(params.get("page"), 1)
|
||||
|
||||
data = {
|
||||
"_extractor": WebtoonsEpisodeExtractor,
|
||||
"title_no" : text.parse_int(title_no),
|
||||
"page" : page_no,
|
||||
}
|
||||
kw["title_no"] = title_no = text.parse_int(params.get("title_no"))
|
||||
kw["page"] = page_no = text.parse_int(params.get("page"), 1)
|
||||
|
||||
path = f"/{base}/list?title_no={title_no}&page={page_no}"
|
||||
response = self.request(self.root + path)
|
||||
@@ -164,13 +159,18 @@ class WebtoonsComicExtractor(WebtoonsBase, Extractor):
|
||||
base = "/".join(parts[3:-1])
|
||||
page = response.text
|
||||
|
||||
if self.config("banners") and (asset := self._asset_banner(page)):
|
||||
yield Message.Directory, asset
|
||||
yield Message.Url, asset["url"], asset
|
||||
|
||||
data = {"_extractor": WebtoonsEpisodeExtractor}
|
||||
while True:
|
||||
for url in self.get_episode_urls(page):
|
||||
params = text.parse_query(url.rpartition("?")[2])
|
||||
data["episode_no"] = text.parse_int(params.get("episode_no"))
|
||||
yield Message.Queue, url, data
|
||||
|
||||
data["page"] = page_no = page_no + 1
|
||||
kw["page"] = page_no = page_no + 1
|
||||
path = f"/{base}/list?title_no={title_no}&page={page_no}"
|
||||
if path not in page:
|
||||
return
|
||||
@@ -178,12 +178,21 @@ class WebtoonsComicExtractor(WebtoonsBase, Extractor):
|
||||
|
||||
def get_episode_urls(self, page):
|
||||
"""Extract and return all episode urls in 'page'"""
|
||||
page = text.extr(page, 'id="_listUl"', '</ul>')
|
||||
page = text.extr(page, 'id="_listUl"', "</ul>")
|
||||
return [
|
||||
match[0]
|
||||
for match in WebtoonsEpisodeExtractor.pattern.finditer(page)
|
||||
]
|
||||
|
||||
def _asset_banner(self, page):
|
||||
try:
|
||||
pos = page.index('<span class="thmb')
|
||||
except Exception:
|
||||
return
|
||||
|
||||
url = _url(text.extract(page, 'src="', '"', pos)[0])
|
||||
return text.nameext_from_url(url, {"url": url, "type": "banner"})
|
||||
|
||||
|
||||
class WebtoonsArtistExtractor(WebtoonsBase, Extractor):
|
||||
"""Extractor for webtoons.com artists"""
|
||||
@@ -216,3 +225,7 @@ class WebtoonsArtistExtractor(WebtoonsBase, Extractor):
|
||||
data = self.request_json(url, params=params, headers=headers)
|
||||
|
||||
return data["result"]["titles"]
|
||||
|
||||
|
||||
def _url(url):
|
||||
return url.replace("://webtoon-phinf.", "://swebtoon-phinf.")
|
||||
|
||||
Reference in New Issue
Block a user