[webtoons] update code
This commit is contained in:
@@ -32,6 +32,8 @@ class WebtoonsBase():
|
|||||||
"ageGatePass": "true",
|
"ageGatePass": "true",
|
||||||
})
|
})
|
||||||
|
|
||||||
|
_init = setup_agegate_cookies
|
||||||
|
|
||||||
def request(self, url, **kwargs):
|
def request(self, url, **kwargs):
|
||||||
response = Extractor.request(self, url, **kwargs)
|
response = Extractor.request(self, url, **kwargs)
|
||||||
if response.history and "/ageGate" in response.url:
|
if response.history and "/ageGate" in response.url:
|
||||||
@@ -54,12 +56,11 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
|
|||||||
def _init(self):
|
def _init(self):
|
||||||
self.setup_agegate_cookies()
|
self.setup_agegate_cookies()
|
||||||
|
|
||||||
path, self.lang, self.genre, self.comic, query = self.groups
|
base, self.lang, self.genre, self.comic, query = self.groups
|
||||||
params = text.parse_query(query)
|
params = text.parse_query(query)
|
||||||
self.title_no = params.get("title_no")
|
self.title_no = params.get("title_no")
|
||||||
self.episode_no = params.get("episode_no")
|
self.episode_no = params.get("episode_no")
|
||||||
self.gallery_url = "{}/{}/viewer?{}".format(self.root, path, query)
|
self.gallery_url = f"{self.root}/{base}/viewer?{query}"
|
||||||
self.thumbnails = self.config("thumbnails", False)
|
|
||||||
|
|
||||||
def metadata(self, page):
|
def metadata(self, page):
|
||||||
extr = text.extract_from(page)
|
extr = text.extract_from(page)
|
||||||
@@ -67,19 +68,19 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
|
|||||||
descr = extr('<meta property="og:description" content="', '"')
|
descr = extr('<meta property="og:description" content="', '"')
|
||||||
|
|
||||||
if extr('<div class="subj_info"', '\n'):
|
if extr('<div class="subj_info"', '\n'):
|
||||||
comic_name = extr('>', '<')
|
comic_name = extr(">", "<")
|
||||||
episode_name = extr('<h1 class="subj_episode" title="', '"')
|
episode_name = extr('<h1 class="subj_episode" title="', '"')
|
||||||
else:
|
else:
|
||||||
comic_name = episode_name = ""
|
comic_name = episode_name = ""
|
||||||
|
|
||||||
if extr('<span class="tx _btnOpenEpisodeList ', '"'):
|
if extr('<span class="tx _btnOpenEpisodeList ', '"'):
|
||||||
episode = extr('>#', '<')
|
episode = extr(">#", "<")
|
||||||
else:
|
else:
|
||||||
episode = ""
|
episode = ""
|
||||||
|
|
||||||
if extr('<span class="author"', '\n'):
|
if extr('<span class="author"', "\n"):
|
||||||
username = extr('/u/', '"')
|
username = extr("/u/", '"')
|
||||||
author_name = extr('<span>', '</span>')
|
author_name = extr("<span>", "</span>")
|
||||||
else:
|
else:
|
||||||
username = author_name = ""
|
username = author_name = ""
|
||||||
|
|
||||||
@@ -123,14 +124,14 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
|
|||||||
if type is False:
|
if type is False:
|
||||||
url = path
|
url = path
|
||||||
elif type:
|
elif type:
|
||||||
url = "{}?type={}".format(path, type)
|
url = f"{path}?type={type}"
|
||||||
|
|
||||||
url = url.replace("://webtoon-phinf.", "://swebtoon-phinf.")
|
url = url.replace("://webtoon-phinf.", "://swebtoon-phinf.")
|
||||||
results.append((url, None))
|
results.append((url, None))
|
||||||
return results
|
return results
|
||||||
|
|
||||||
def assets(self, page):
|
def assets(self, page):
|
||||||
if self.thumbnails:
|
if self.config("thumbnails", False):
|
||||||
active = text.extr(page, 'class="on ', '</a>')
|
active = text.extr(page, 'class="on ', '</a>')
|
||||||
url = text.extr(active, 'data-url="', '"')
|
url = text.extr(active, 'data-url="', '"')
|
||||||
url = url.replace("://webtoon-phinf.", "://swebtoon-phinf.")
|
url = url.replace("://webtoon-phinf.", "://swebtoon-phinf.")
|
||||||
@@ -141,45 +142,39 @@ class WebtoonsComicExtractor(WebtoonsBase, Extractor):
|
|||||||
"""Extractor for an entire comic on webtoons.com"""
|
"""Extractor for an entire comic on webtoons.com"""
|
||||||
subcategory = "comic"
|
subcategory = "comic"
|
||||||
categorytransfer = True
|
categorytransfer = True
|
||||||
pattern = LANG_PATTERN + r"/([^/?#]+)/([^/?#]+))/list\?([^#]+)"
|
pattern = LANG_PATTERN + r"/[^/?#]+/[^/?#]+)/list\?([^#]+)"
|
||||||
example = "https://www.webtoons.com/en/GENRE/TITLE/list?title_no=123"
|
example = "https://www.webtoons.com/en/GENRE/TITLE/list?title_no=123"
|
||||||
|
|
||||||
def _init(self):
|
|
||||||
self.setup_agegate_cookies()
|
|
||||||
|
|
||||||
self.path, self.lang, self.genre, self.comic, query = self.groups
|
|
||||||
params = text.parse_query(query)
|
|
||||||
self.title_no = params.get("title_no")
|
|
||||||
self.page_no = text.parse_int(params.get("page"), 1)
|
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
page = None
|
base, lang, query = self.groups
|
||||||
|
params = text.parse_query(query)
|
||||||
|
title_no = params.get("title_no")
|
||||||
|
page_no = text.parse_int(params.get("page"), 1)
|
||||||
|
|
||||||
data = {
|
data = {
|
||||||
"_extractor": WebtoonsEpisodeExtractor,
|
"_extractor": WebtoonsEpisodeExtractor,
|
||||||
"title_no" : text.parse_int(self.title_no),
|
"title_no" : text.parse_int(title_no),
|
||||||
|
"page" : page_no,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
path = f"/{base}/list?title_no={title_no}&page={page_no}"
|
||||||
|
response = self.request(self.root + path)
|
||||||
|
if response.history:
|
||||||
|
parts = response.url.split("/")
|
||||||
|
base = "/".join(parts[3:-1])
|
||||||
|
page = response.text
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
path = "/{}/list?title_no={}&page={}".format(
|
|
||||||
self.path, self.title_no, self.page_no)
|
|
||||||
|
|
||||||
if page is not None and path not in page:
|
|
||||||
return
|
|
||||||
|
|
||||||
response = self.request(self.root + path)
|
|
||||||
if response.history:
|
|
||||||
parts = response.url.split("/")
|
|
||||||
self.path = "/".join(parts[3:-1])
|
|
||||||
|
|
||||||
page = response.text
|
|
||||||
data["page"] = self.page_no
|
|
||||||
|
|
||||||
for url in self.get_episode_urls(page):
|
for url in self.get_episode_urls(page):
|
||||||
params = text.parse_query(url.rpartition("?")[2])
|
params = text.parse_query(url.rpartition("?")[2])
|
||||||
data["episode_no"] = text.parse_int(params.get("episode_no"))
|
data["episode_no"] = text.parse_int(params.get("episode_no"))
|
||||||
yield Message.Queue, url, data
|
yield Message.Queue, url, data
|
||||||
|
|
||||||
self.page_no += 1
|
data["page"] = page_no = page_no + 1
|
||||||
|
path = f"/{base}/list?title_no={title_no}&page={page_no}"
|
||||||
|
if path not in page:
|
||||||
|
return
|
||||||
|
page = self.request(self.root + path).text
|
||||||
|
|
||||||
def get_episode_urls(self, page):
|
def get_episode_urls(self, page):
|
||||||
"""Extract and return all episode urls in 'page'"""
|
"""Extract and return all episode urls in 'page'"""
|
||||||
@@ -197,8 +192,6 @@ class WebtoonsArtistExtractor(WebtoonsBase, Extractor):
|
|||||||
example = "https://www.webtoons.com/p/community/LANG/u/ARTIST"
|
example = "https://www.webtoons.com/p/community/LANG/u/ARTIST"
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
self.setup_agegate_cookies()
|
|
||||||
|
|
||||||
for comic in self.comics():
|
for comic in self.comics():
|
||||||
comic["_extractor"] = WebtoonsComicExtractor
|
comic["_extractor"] = WebtoonsComicExtractor
|
||||||
comic_url = self.root + comic["extra"]["episodeListPath"]
|
comic_url = self.root + comic["extra"]["episodeListPath"]
|
||||||
@@ -208,13 +201,11 @@ class WebtoonsArtistExtractor(WebtoonsBase, Extractor):
|
|||||||
lang, artist = self.groups
|
lang, artist = self.groups
|
||||||
language = util.code_to_language(lang).upper()
|
language = util.code_to_language(lang).upper()
|
||||||
|
|
||||||
url = "{}/p/community/{}/u/{}".format(
|
url = f"{self.root}/p/community/{lang}/u/{artist}"
|
||||||
self.root, lang, artist)
|
|
||||||
page = self.request(url).text
|
page = self.request(url).text
|
||||||
creator_id = text.extr(page, '\\"creatorId\\":\\"', '\\')
|
creator_id = text.extr(page, '\\"creatorId\\":\\"', '\\')
|
||||||
|
|
||||||
url = "{}/p/community/api/v1/creator/{}/titles".format(
|
url = f"{self.root}/p/community/api/v1/creator/{creator_id}/titles"
|
||||||
self.root, creator_id)
|
|
||||||
params = {
|
params = {
|
||||||
"language": language,
|
"language": language,
|
||||||
"nextSize": "50",
|
"nextSize": "50",
|
||||||
@@ -222,6 +213,6 @@ class WebtoonsArtistExtractor(WebtoonsBase, Extractor):
|
|||||||
headers = {
|
headers = {
|
||||||
"language": language,
|
"language": language,
|
||||||
}
|
}
|
||||||
data = self.request(url, params=params, headers=headers).json()
|
data = self.request_json(url, params=params, headers=headers)
|
||||||
|
|
||||||
return data["result"]["titles"]
|
return data["result"]["titles"]
|
||||||
|
|||||||
Reference in New Issue
Block a user