diff --git a/docs/supportedsites.md b/docs/supportedsites.md index b70c7935..cfda2901 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -1076,7 +1076,7 @@ Consider all listed sites to potentially be NSFW. Webtoon https://www.webtoons.com/ - Comics, Episodes + Artists, Comics, Episodes diff --git a/gallery_dl/extractor/webtoons.py b/gallery_dl/extractor/webtoons.py index 25935f66..4073cc98 100644 --- a/gallery_dl/extractor/webtoons.py +++ b/gallery_dl/extractor/webtoons.py @@ -12,7 +12,8 @@ from .common import GalleryExtractor, Extractor, Message from .. import exception, text, util -BASE_PATTERN = r"(?:https?://)?(?:www\.)?webtoons\.com/(([^/?#]+)" +BASE_PATTERN = r"(?:https?://)?(?:www\.)?webtoons\.com" +LANG_PATTERN = BASE_PATTERN + r"/(([^/?#]+)" class WebtoonsBase(): @@ -44,7 +45,7 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor): directory_fmt = ("{category}", "{comic}") filename_fmt = "{episode_no}-{num:>02}.{extension}" archive_fmt = "{title_no}_{episode_no}_{num}" - pattern = (BASE_PATTERN + r"/([^/?#]+)/([^/?#]+)/[^/?#]+)" + pattern = (LANG_PATTERN + r"/([^/?#]+)/([^/?#]+)/[^/?#]+)" r"/viewer\?([^#'\"]+)") example = ("https://www.webtoons.com/en/GENRE/TITLE/NAME/viewer" "?title_no=123&episode_no=12345") @@ -108,7 +109,7 @@ class WebtoonsComicExtractor(WebtoonsBase, Extractor): """Extractor for an entire comic on webtoons.com""" subcategory = "comic" categorytransfer = True - pattern = BASE_PATTERN + r"/([^/?#]+)/([^/?#]+))/list\?([^#]+)" + pattern = LANG_PATTERN + r"/([^/?#]+)/([^/?#]+))/list\?([^#]+)" example = "https://www.webtoons.com/en/GENRE/TITLE/list?title_no=123" def _init(self): @@ -155,3 +156,40 @@ class WebtoonsComicExtractor(WebtoonsBase, Extractor): match.group(0) for match in WebtoonsEpisodeExtractor.pattern.finditer(page) ] + + +class WebtoonsArtistExtractor(WebtoonsBase, Extractor): + """Extractor for webtoons.com artists""" + subcategory = "artist" + pattern = BASE_PATTERN + r"/p/community/([^/?#]+)/u/([^/?#]+)" + example = "https://www.webtoons.com/p/community/LANG/u/ARTIST" + + def items(self): + self.setup_agegate_cookies() + + for comic in self.comics(): + comic["_extractor"] = WebtoonsComicExtractor + comic_url = self.root + comic["extra"]["episodeListPath"] + yield Message.Queue, comic_url, comic + + def comics(self): + lang, artist = self.groups + language = util.code_to_language(lang).upper() + + url = "{}/p/community/{}/u/{}".format( + self.root, lang, artist) + page = self.request(url).text + creator_id = text.extr(page, '\\"creatorId\\":\\"', '\\') + + url = "{}/p/community/api/v1/creator/{}/titles".format( + self.root, creator_id) + params = { + "language": language, + "nextSize": "50", + } + headers = { + "language": language, + } + data = self.request(url, params=params, headers=headers).json() + + return data["result"]["titles"] diff --git a/test/results/webtoons.py b/test/results/webtoons.py index 4574fd48..aa17ee6d 100644 --- a/test/results/webtoons.py +++ b/test/results/webtoons.py @@ -83,9 +83,9 @@ __tests__ = ( "#url" : "https://www.webtoons.com/en/canvas/us-over-here/1-the-wheel/viewer?title_no=919536&episode_no=1", "#category": ("", "webtoons", "episode"), "#class" : webtoons.WebtoonsEpisodeExtractor, - "#count" : 60, + "#count" : 3, - "comic_name" : "Us, over here", + "comic_name" : "(news soon)", "episode_name": "1. The Wheel", "episode" : "1", "username" : "i94q8", @@ -137,4 +137,22 @@ __tests__ = ( "#class" : webtoons.WebtoonsComicExtractor, }, +{ + "#url" : "https://www.webtoons.com/p/community/en/u/g6vj8", + "#class" : webtoons.WebtoonsArtistExtractor, + "#urls" : ( + "https://www.webtoons.com/en/canvas/scoob-and-shag/list?title_no=210827", + "https://www.webtoons.com/en/canvas/sparkle-kid/list?title_no=205304", + ), + + "id" : {"210827", "205304"}, + "subject": {"Scoob and Shag", "Sparkle Kid"}, + "authors": [ + { + "nickname": "Misterie Krew", + }, + ], + +}, + )