[webtoons] add 'artist' extractor (#7274)
This commit is contained in:
@@ -1076,7 +1076,7 @@ Consider all listed sites to potentially be NSFW.
|
|||||||
<tr>
|
<tr>
|
||||||
<td>Webtoon</td>
|
<td>Webtoon</td>
|
||||||
<td>https://www.webtoons.com/</td>
|
<td>https://www.webtoons.com/</td>
|
||||||
<td>Comics, Episodes</td>
|
<td>Artists, Comics, Episodes</td>
|
||||||
<td></td>
|
<td></td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
|
|||||||
@@ -12,7 +12,8 @@
|
|||||||
from .common import GalleryExtractor, Extractor, Message
|
from .common import GalleryExtractor, Extractor, Message
|
||||||
from .. import exception, text, util
|
from .. import exception, text, util
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?webtoons\.com/(([^/?#]+)"
|
BASE_PATTERN = r"(?:https?://)?(?:www\.)?webtoons\.com"
|
||||||
|
LANG_PATTERN = BASE_PATTERN + r"/(([^/?#]+)"
|
||||||
|
|
||||||
|
|
||||||
class WebtoonsBase():
|
class WebtoonsBase():
|
||||||
@@ -44,7 +45,7 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
|
|||||||
directory_fmt = ("{category}", "{comic}")
|
directory_fmt = ("{category}", "{comic}")
|
||||||
filename_fmt = "{episode_no}-{num:>02}.{extension}"
|
filename_fmt = "{episode_no}-{num:>02}.{extension}"
|
||||||
archive_fmt = "{title_no}_{episode_no}_{num}"
|
archive_fmt = "{title_no}_{episode_no}_{num}"
|
||||||
pattern = (BASE_PATTERN + r"/([^/?#]+)/([^/?#]+)/[^/?#]+)"
|
pattern = (LANG_PATTERN + r"/([^/?#]+)/([^/?#]+)/[^/?#]+)"
|
||||||
r"/viewer\?([^#'\"]+)")
|
r"/viewer\?([^#'\"]+)")
|
||||||
example = ("https://www.webtoons.com/en/GENRE/TITLE/NAME/viewer"
|
example = ("https://www.webtoons.com/en/GENRE/TITLE/NAME/viewer"
|
||||||
"?title_no=123&episode_no=12345")
|
"?title_no=123&episode_no=12345")
|
||||||
@@ -108,7 +109,7 @@ class WebtoonsComicExtractor(WebtoonsBase, Extractor):
|
|||||||
"""Extractor for an entire comic on webtoons.com"""
|
"""Extractor for an entire comic on webtoons.com"""
|
||||||
subcategory = "comic"
|
subcategory = "comic"
|
||||||
categorytransfer = True
|
categorytransfer = True
|
||||||
pattern = BASE_PATTERN + r"/([^/?#]+)/([^/?#]+))/list\?([^#]+)"
|
pattern = LANG_PATTERN + r"/([^/?#]+)/([^/?#]+))/list\?([^#]+)"
|
||||||
example = "https://www.webtoons.com/en/GENRE/TITLE/list?title_no=123"
|
example = "https://www.webtoons.com/en/GENRE/TITLE/list?title_no=123"
|
||||||
|
|
||||||
def _init(self):
|
def _init(self):
|
||||||
@@ -155,3 +156,40 @@ class WebtoonsComicExtractor(WebtoonsBase, Extractor):
|
|||||||
match.group(0)
|
match.group(0)
|
||||||
for match in WebtoonsEpisodeExtractor.pattern.finditer(page)
|
for match in WebtoonsEpisodeExtractor.pattern.finditer(page)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class WebtoonsArtistExtractor(WebtoonsBase, Extractor):
|
||||||
|
"""Extractor for webtoons.com artists"""
|
||||||
|
subcategory = "artist"
|
||||||
|
pattern = BASE_PATTERN + r"/p/community/([^/?#]+)/u/([^/?#]+)"
|
||||||
|
example = "https://www.webtoons.com/p/community/LANG/u/ARTIST"
|
||||||
|
|
||||||
|
def items(self):
|
||||||
|
self.setup_agegate_cookies()
|
||||||
|
|
||||||
|
for comic in self.comics():
|
||||||
|
comic["_extractor"] = WebtoonsComicExtractor
|
||||||
|
comic_url = self.root + comic["extra"]["episodeListPath"]
|
||||||
|
yield Message.Queue, comic_url, comic
|
||||||
|
|
||||||
|
def comics(self):
|
||||||
|
lang, artist = self.groups
|
||||||
|
language = util.code_to_language(lang).upper()
|
||||||
|
|
||||||
|
url = "{}/p/community/{}/u/{}".format(
|
||||||
|
self.root, lang, artist)
|
||||||
|
page = self.request(url).text
|
||||||
|
creator_id = text.extr(page, '\\"creatorId\\":\\"', '\\')
|
||||||
|
|
||||||
|
url = "{}/p/community/api/v1/creator/{}/titles".format(
|
||||||
|
self.root, creator_id)
|
||||||
|
params = {
|
||||||
|
"language": language,
|
||||||
|
"nextSize": "50",
|
||||||
|
}
|
||||||
|
headers = {
|
||||||
|
"language": language,
|
||||||
|
}
|
||||||
|
data = self.request(url, params=params, headers=headers).json()
|
||||||
|
|
||||||
|
return data["result"]["titles"]
|
||||||
|
|||||||
@@ -83,9 +83,9 @@ __tests__ = (
|
|||||||
"#url" : "https://www.webtoons.com/en/canvas/us-over-here/1-the-wheel/viewer?title_no=919536&episode_no=1",
|
"#url" : "https://www.webtoons.com/en/canvas/us-over-here/1-the-wheel/viewer?title_no=919536&episode_no=1",
|
||||||
"#category": ("", "webtoons", "episode"),
|
"#category": ("", "webtoons", "episode"),
|
||||||
"#class" : webtoons.WebtoonsEpisodeExtractor,
|
"#class" : webtoons.WebtoonsEpisodeExtractor,
|
||||||
"#count" : 60,
|
"#count" : 3,
|
||||||
|
|
||||||
"comic_name" : "Us, over here",
|
"comic_name" : "(news soon)",
|
||||||
"episode_name": "1. The Wheel",
|
"episode_name": "1. The Wheel",
|
||||||
"episode" : "1",
|
"episode" : "1",
|
||||||
"username" : "i94q8",
|
"username" : "i94q8",
|
||||||
@@ -137,4 +137,22 @@ __tests__ = (
|
|||||||
"#class" : webtoons.WebtoonsComicExtractor,
|
"#class" : webtoons.WebtoonsComicExtractor,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://www.webtoons.com/p/community/en/u/g6vj8",
|
||||||
|
"#class" : webtoons.WebtoonsArtistExtractor,
|
||||||
|
"#urls" : (
|
||||||
|
"https://www.webtoons.com/en/canvas/scoob-and-shag/list?title_no=210827",
|
||||||
|
"https://www.webtoons.com/en/canvas/sparkle-kid/list?title_no=205304",
|
||||||
|
),
|
||||||
|
|
||||||
|
"id" : {"210827", "205304"},
|
||||||
|
"subject": {"Scoob and Shag", "Sparkle Kid"},
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"nickname": "Misterie Krew",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
|
||||||
|
},
|
||||||
|
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user