diff --git a/gallery_dl/extractor/webtoons.py b/gallery_dl/extractor/webtoons.py index 1aa19199..cebb421f 100644 --- a/gallery_dl/extractor/webtoons.py +++ b/gallery_dl/extractor/webtoons.py @@ -6,23 +6,20 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extract images from https://www.webtoons.com/""" +"""Extractors for https://www.webtoons.com/""" -from .common import Extractor, Message +from .common import GalleryExtractor, Extractor, Message from .. import exception, text, util BASE_PATTERN = r"(?:https?://)?(?:www\.)?webtoons\.com/((en|fr)" -class WebtoonsExtractor(Extractor): +class WebtoonsBase(): category = "webtoons" root = "https://www.webtoons.com" - cookiedomain = "www.webtoons.com" + cookiedomain = ".webtoons.com" - def __init__(self, match): - Extractor.__init__(self, match) - self.path, self.lang, self.genre , self.comic, self.query = \ - match.groups() + def setup_agegate_cookies(self): self._update_cookies({ "atGDPR" : "AD_CONSENT", "needCCPA" : "false", @@ -34,13 +31,13 @@ class WebtoonsExtractor(Extractor): def request(self, url, **kwargs): response = Extractor.request(self, url, **kwargs) - if response.history and "/ageGate" in response.request.url: + if response.history and "/ageGate" in response.url: raise exception.StopExtraction( - "Redirected to age gate check ('%s')", response.request.url) + "HTTP redirect to age gate check ('%s')", response.request.url) return response -class WebtoonsEpisodeExtractor(WebtoonsExtractor): +class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor): """Extractor for an episode on webtoons.com""" subcategory = "episode" directory_fmt = ("{category}", "{comic}") @@ -60,54 +57,44 @@ class WebtoonsEpisodeExtractor(WebtoonsExtractor): ) def __init__(self, match): - WebtoonsExtractor.__init__(self, match) - query = text.parse_query(self.query) - self.title_no = query.get("title_no") - if not self.title_no: - raise exception.NotFoundError("title_no") - self.episode = query.get("episode_no") - if not self.episode: - raise exception.NotFoundError("episode_no") + self.path, self.lang, self.genre, self.comic, query = match.groups() - def items(self): - url = "{}/{}/viewer?{}".format(self.root, self.path, self.query) + url = "{}/{}/viewer?{}".format(self.root, self.path, query) + GalleryExtractor.__init__(self, match, url) + self.setup_agegate_cookies() self.session.headers["Referer"] = url - page = self.request(url).text - data = self.get_job_metadata(page) - imgs = self.get_image_urls(page) - data["count"] = len(imgs) + query = text.parse_query(query) + self.title_no = query.get("title_no") + self.episode = query.get("episode_no") - yield Message.Version, 1 - yield Message.Directory, data - for data["num"], url in enumerate(imgs, 1): - yield Message.Url, url, text.nameext_from_url(url, data) - - def get_job_metadata(self, page): - """Collect metadata for extractor-job""" + def metadata(self, page): title, pos = text.extract( page, '