diff --git a/gallery_dl/extractor/webtoons.py b/gallery_dl/extractor/webtoons.py index c819e03f..a385f70d 100644 --- a/gallery_dl/extractor/webtoons.py +++ b/gallery_dl/extractor/webtoons.py @@ -32,6 +32,8 @@ class WebtoonsBase(): "ageGatePass": "true", }) + _init = setup_agegate_cookies + def request(self, url, **kwargs): response = Extractor.request(self, url, **kwargs) if response.history and "/ageGate" in response.url: @@ -54,12 +56,11 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor): def _init(self): self.setup_agegate_cookies() - path, self.lang, self.genre, self.comic, query = self.groups + base, self.lang, self.genre, self.comic, query = self.groups params = text.parse_query(query) self.title_no = params.get("title_no") self.episode_no = params.get("episode_no") - self.gallery_url = "{}/{}/viewer?{}".format(self.root, path, query) - self.thumbnails = self.config("thumbnails", False) + self.gallery_url = f"{self.root}/{base}/viewer?{query}" def metadata(self, page): extr = text.extract_from(page) @@ -67,19 +68,19 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor): descr = extr('', '<') + comic_name = extr(">", "<") episode_name = extr('

#', '<') + episode = extr(">#", "<") else: episode = "" - if extr('', '') + if extr('", "") else: username = author_name = "" @@ -123,14 +124,14 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor): if type is False: url = path elif type: - url = "{}?type={}".format(path, type) + url = f"{path}?type={type}" url = url.replace("://webtoon-phinf.", "://swebtoon-phinf.") results.append((url, None)) return results def assets(self, page): - if self.thumbnails: + if self.config("thumbnails", False): active = text.extr(page, 'class="on ', '') url = text.extr(active, 'data-url="', '"') url = url.replace("://webtoon-phinf.", "://swebtoon-phinf.") @@ -141,45 +142,39 @@ class WebtoonsComicExtractor(WebtoonsBase, Extractor): """Extractor for an entire comic on webtoons.com""" subcategory = "comic" categorytransfer = True - pattern = LANG_PATTERN + r"/([^/?#]+)/([^/?#]+))/list\?([^#]+)" + pattern = LANG_PATTERN + r"/[^/?#]+/[^/?#]+)/list\?([^#]+)" example = "https://www.webtoons.com/en/GENRE/TITLE/list?title_no=123" - def _init(self): - self.setup_agegate_cookies() - - self.path, self.lang, self.genre, self.comic, query = self.groups - params = text.parse_query(query) - self.title_no = params.get("title_no") - self.page_no = text.parse_int(params.get("page"), 1) - def items(self): - page = None + base, lang, query = self.groups + params = text.parse_query(query) + title_no = params.get("title_no") + page_no = text.parse_int(params.get("page"), 1) + data = { "_extractor": WebtoonsEpisodeExtractor, - "title_no" : text.parse_int(self.title_no), + "title_no" : text.parse_int(title_no), + "page" : page_no, } + path = f"/{base}/list?title_no={title_no}&page={page_no}" + response = self.request(self.root + path) + if response.history: + parts = response.url.split("/") + base = "/".join(parts[3:-1]) + page = response.text + while True: - path = "/{}/list?title_no={}&page={}".format( - self.path, self.title_no, self.page_no) - - if page is not None and path not in page: - return - - response = self.request(self.root + path) - if response.history: - parts = response.url.split("/") - self.path = "/".join(parts[3:-1]) - - page = response.text - data["page"] = self.page_no - for url in self.get_episode_urls(page): params = text.parse_query(url.rpartition("?")[2]) data["episode_no"] = text.parse_int(params.get("episode_no")) yield Message.Queue, url, data - self.page_no += 1 + data["page"] = page_no = page_no + 1 + path = f"/{base}/list?title_no={title_no}&page={page_no}" + if path not in page: + return + page = self.request(self.root + path).text def get_episode_urls(self, page): """Extract and return all episode urls in 'page'""" @@ -197,8 +192,6 @@ class WebtoonsArtistExtractor(WebtoonsBase, Extractor): example = "https://www.webtoons.com/p/community/LANG/u/ARTIST" def items(self): - self.setup_agegate_cookies() - for comic in self.comics(): comic["_extractor"] = WebtoonsComicExtractor comic_url = self.root + comic["extra"]["episodeListPath"] @@ -208,13 +201,11 @@ class WebtoonsArtistExtractor(WebtoonsBase, Extractor): lang, artist = self.groups language = util.code_to_language(lang).upper() - url = "{}/p/community/{}/u/{}".format( - self.root, lang, artist) + url = f"{self.root}/p/community/{lang}/u/{artist}" page = self.request(url).text creator_id = text.extr(page, '\\"creatorId\\":\\"', '\\') - url = "{}/p/community/api/v1/creator/{}/titles".format( - self.root, creator_id) + url = f"{self.root}/p/community/api/v1/creator/{creator_id}/titles" params = { "language": language, "nextSize": "50", @@ -222,6 +213,6 @@ class WebtoonsArtistExtractor(WebtoonsBase, Extractor): headers = { "language": language, } - data = self.request(url, params=params, headers=headers).json() + data = self.request_json(url, params=params, headers=headers) return data["result"]["titles"]