[webtoons] add 'artist' extractor (#7274)

This commit is contained in:
Mike Fährmann
2025-04-01 10:04:13 +02:00
parent 492ea46c25
commit 015ba76c9c
3 changed files with 62 additions and 6 deletions

View File

@@ -1076,7 +1076,7 @@ Consider all listed sites to potentially be NSFW.
<tr>
<td>Webtoon</td>
<td>https://www.webtoons.com/</td>
<td>Comics, Episodes</td>
<td>Artists, Comics, Episodes</td>
<td></td>
</tr>
<tr>

View File

@@ -12,7 +12,8 @@
from .common import GalleryExtractor, Extractor, Message
from .. import exception, text, util
BASE_PATTERN = r"(?:https?://)?(?:www\.)?webtoons\.com/(([^/?#]+)"
BASE_PATTERN = r"(?:https?://)?(?:www\.)?webtoons\.com"
LANG_PATTERN = BASE_PATTERN + r"/(([^/?#]+)"
class WebtoonsBase():
@@ -44,7 +45,7 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
directory_fmt = ("{category}", "{comic}")
filename_fmt = "{episode_no}-{num:>02}.{extension}"
archive_fmt = "{title_no}_{episode_no}_{num}"
pattern = (BASE_PATTERN + r"/([^/?#]+)/([^/?#]+)/[^/?#]+)"
pattern = (LANG_PATTERN + r"/([^/?#]+)/([^/?#]+)/[^/?#]+)"
r"/viewer\?([^#'\"]+)")
example = ("https://www.webtoons.com/en/GENRE/TITLE/NAME/viewer"
"?title_no=123&episode_no=12345")
@@ -108,7 +109,7 @@ class WebtoonsComicExtractor(WebtoonsBase, Extractor):
"""Extractor for an entire comic on webtoons.com"""
subcategory = "comic"
categorytransfer = True
pattern = BASE_PATTERN + r"/([^/?#]+)/([^/?#]+))/list\?([^#]+)"
pattern = LANG_PATTERN + r"/([^/?#]+)/([^/?#]+))/list\?([^#]+)"
example = "https://www.webtoons.com/en/GENRE/TITLE/list?title_no=123"
def _init(self):
@@ -155,3 +156,40 @@ class WebtoonsComicExtractor(WebtoonsBase, Extractor):
match.group(0)
for match in WebtoonsEpisodeExtractor.pattern.finditer(page)
]
class WebtoonsArtistExtractor(WebtoonsBase, Extractor):
"""Extractor for webtoons.com artists"""
subcategory = "artist"
pattern = BASE_PATTERN + r"/p/community/([^/?#]+)/u/([^/?#]+)"
example = "https://www.webtoons.com/p/community/LANG/u/ARTIST"
def items(self):
self.setup_agegate_cookies()
for comic in self.comics():
comic["_extractor"] = WebtoonsComicExtractor
comic_url = self.root + comic["extra"]["episodeListPath"]
yield Message.Queue, comic_url, comic
def comics(self):
lang, artist = self.groups
language = util.code_to_language(lang).upper()
url = "{}/p/community/{}/u/{}".format(
self.root, lang, artist)
page = self.request(url).text
creator_id = text.extr(page, '\\"creatorId\\":\\"', '\\')
url = "{}/p/community/api/v1/creator/{}/titles".format(
self.root, creator_id)
params = {
"language": language,
"nextSize": "50",
}
headers = {
"language": language,
}
data = self.request(url, params=params, headers=headers).json()
return data["result"]["titles"]

View File

@@ -83,9 +83,9 @@ __tests__ = (
"#url" : "https://www.webtoons.com/en/canvas/us-over-here/1-the-wheel/viewer?title_no=919536&episode_no=1",
"#category": ("", "webtoons", "episode"),
"#class" : webtoons.WebtoonsEpisodeExtractor,
"#count" : 60,
"#count" : 3,
"comic_name" : "Us, over here",
"comic_name" : "(news soon)",
"episode_name": "1. The Wheel",
"episode" : "1",
"username" : "i94q8",
@@ -137,4 +137,22 @@ __tests__ = (
"#class" : webtoons.WebtoonsComicExtractor,
},
{
"#url" : "https://www.webtoons.com/p/community/en/u/g6vj8",
"#class" : webtoons.WebtoonsArtistExtractor,
"#urls" : (
"https://www.webtoons.com/en/canvas/scoob-and-shag/list?title_no=210827",
"https://www.webtoons.com/en/canvas/sparkle-kid/list?title_no=205304",
),
"id" : {"210827", "205304"},
"subject": {"Scoob and Shag", "Sparkle Kid"},
"authors": [
{
"nickname": "Misterie Krew",
},
],
},
)