diff --git a/docs/configuration.rst b/docs/configuration.rst index f7cf782d..ba78d898 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -5578,6 +5578,17 @@ Description | - Set a value to ``false`` to completely remove these extension's ``type`` parameter | - Omit an extension to leave its URLs unchanged + +extractor.webtoons.banners +-------------------------- +Type + ``bool`` +Default + ``false`` +Description + Download the active comic's ``banner``. + + extractor.webtoons.thumbnails ----------------------------- Type @@ -5585,7 +5596,7 @@ Type Default ``false`` Description - Download the active chapter's ``thumbnail``. + Download the active episode's ``thumbnail``. Useful for creating CBZ archives with actual source thumbnails. diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 87ed0a7f..2c9bdbdf 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -792,6 +792,7 @@ "sleep-request": "0.5-1.5", "quality" : "original", + "banners" : false, "thumbnails": false }, "weebcentral": diff --git a/gallery_dl/extractor/webtoons.py b/gallery_dl/extractor/webtoons.py index a385f70d..5bc5cb29 100644 --- a/gallery_dl/extractor/webtoons.py +++ b/gallery_dl/extractor/webtoons.py @@ -19,6 +19,9 @@ LANG_PATTERN = BASE_PATTERN + r"/(([^/?#]+)" class WebtoonsBase(): category = "webtoons" root = "https://www.webtoons.com" + directory_fmt = ("{category}", "{comic}") + filename_fmt = "{episode_no}-{num:>02}{type:?-//}.{extension}" + archive_fmt = "{title_no}_{episode_no}_{num}" cookies_domain = ".webtoons.com" request_interval = (0.5, 1.5) @@ -45,9 +48,6 @@ class WebtoonsBase(): class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor): """Extractor for an episode on webtoons.com""" subcategory = "episode" - directory_fmt = ("{category}", "{comic}") - filename_fmt = "{episode_no}-{num:>02}{type:?-//}.{extension}" - archive_fmt = "{title_no}_{episode_no}_{num}" pattern = (LANG_PATTERN + r"/([^/?#]+)/([^/?#]+)/[^/?#]+)" r"/viewer\?([^#'\"]+)") example = ("https://www.webtoons.com/en/GENRE/TITLE/NAME/viewer" @@ -126,15 +126,13 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor): elif type: url = f"{path}?type={type}" - url = url.replace("://webtoon-phinf.", "://swebtoon-phinf.") - results.append((url, None)) + results.append((_url(url), None)) return results def assets(self, page): if self.config("thumbnails", False): active = text.extr(page, 'class="on ', '') - url = text.extr(active, 'data-url="', '"') - url = url.replace("://webtoon-phinf.", "://swebtoon-phinf.") + url = _url(text.extr(active, 'data-url="', '"')) return ({"url": url, "type": "thumbnail"},) @@ -142,20 +140,17 @@ class WebtoonsComicExtractor(WebtoonsBase, Extractor): """Extractor for an entire comic on webtoons.com""" subcategory = "comic" categorytransfer = True - pattern = LANG_PATTERN + r"/[^/?#]+/[^/?#]+)/list\?([^#]+)" + filename_fmt = "{type}.{extension}" + archive_fmt = "{title_no}_{type}" + pattern = LANG_PATTERN + r"/([^/?#]+)/([^/?#]+))/list\?([^#]+)" example = "https://www.webtoons.com/en/GENRE/TITLE/list?title_no=123" def items(self): - base, lang, query = self.groups + kw = self.kwdict + base, kw["lang"], kw["genre"], kw["comic"], query = self.groups params = text.parse_query(query) - title_no = params.get("title_no") - page_no = text.parse_int(params.get("page"), 1) - - data = { - "_extractor": WebtoonsEpisodeExtractor, - "title_no" : text.parse_int(title_no), - "page" : page_no, - } + kw["title_no"] = title_no = text.parse_int(params.get("title_no")) + kw["page"] = page_no = text.parse_int(params.get("page"), 1) path = f"/{base}/list?title_no={title_no}&page={page_no}" response = self.request(self.root + path) @@ -164,13 +159,18 @@ class WebtoonsComicExtractor(WebtoonsBase, Extractor): base = "/".join(parts[3:-1]) page = response.text + if self.config("banners") and (asset := self._asset_banner(page)): + yield Message.Directory, asset + yield Message.Url, asset["url"], asset + + data = {"_extractor": WebtoonsEpisodeExtractor} while True: for url in self.get_episode_urls(page): params = text.parse_query(url.rpartition("?")[2]) data["episode_no"] = text.parse_int(params.get("episode_no")) yield Message.Queue, url, data - data["page"] = page_no = page_no + 1 + kw["page"] = page_no = page_no + 1 path = f"/{base}/list?title_no={title_no}&page={page_no}" if path not in page: return @@ -178,12 +178,21 @@ class WebtoonsComicExtractor(WebtoonsBase, Extractor): def get_episode_urls(self, page): """Extract and return all episode urls in 'page'""" - page = text.extr(page, 'id="_listUl"', '') + page = text.extr(page, 'id="_listUl"', "") return [ match[0] for match in WebtoonsEpisodeExtractor.pattern.finditer(page) ] + def _asset_banner(self, page): + try: + pos = page.index('