[webtoons] extract 'num_play' & 'num_stop' for background music (#8733 #8755)

* [webtoons] play_image and stop_image in background music
* [webtoons] add play_image and stop_image fields to test results
* [webtoons]: flake8
* [webtoons]: flake8 (2)

* update & improve
    - remove global 'images_urls'
    - move enumerate loop into images() extraction
* simplify 'play_image' etc extraction
* rename 'play_image' etc
    - num_play
    - num_stop
    - filename_play
    - filename_stop

---------

Co-authored-by: Mike Fährmann <mike_faehrmann@web.de>
This commit is contained in:
Oleguer Llopart
2025-12-27 09:09:10 +01:00
committed by GitHub
parent f2efccaadf
commit bea0e16970
2 changed files with 24 additions and 2 deletions

View File

@@ -52,6 +52,7 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
r"/viewer\?([^#'\"]+)")
example = ("https://www.webtoons.com/en/GENRE/TITLE/NAME/viewer"
"?title_no=123&episode_no=12345")
images_urls = []
def _init(self):
self.setup_agegate_cookies()
@@ -61,6 +62,7 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
self.title_no = params.get("title_no")
self.episode_no = params.get("episode_no")
self.page_url = f"{self.root}/{base}/viewer?{query}"
self.bgm = self.config("bgm", True)
def metadata(self, page):
extr = text.extract_from(page)
@@ -114,12 +116,21 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
elif not isinstance(quality, dict):
quality = None
if self.bgm:
num = 0
self.paths = paths = {}
else:
num = None
results = []
for url in text.extract_iter(
page, 'class="_images" data-url="', '"'):
path, _, query = url.rpartition("?")
if num is not None:
num += 1
paths[path[path.find("/", 8):]] = num
if quality is not None:
path, _, query = url.rpartition("?")
type = quality.get(path.rpartition(".")[2].lower())
if type is False:
url = path
@@ -137,7 +148,7 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
url = _url(text.extr(active, 'data-url="', '"'))
assets.append({"url": url, "type": "thumbnail"})
if self.config("bgm", True):
if self.bgm:
if bgm := text.extr(page, "episodeBgmList:", ",\n"):
self._asset_bgm(assets, util.json_loads(bgm))
@@ -159,6 +170,7 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "cross-site",
}
paths = self.paths
for bgm in bgm_list:
url = (f"https://apis.naver.com/audiocweb/audiocplayogwweb/play"
@@ -168,10 +180,16 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
token = data["result"]["playToken"]
data = util.json_loads(binascii.a2b_base64(token).decode())
audio = data["audioInfo"]
play = bgm.get("playImageUrl", "")
stop = bgm.get("stopImageUrl", "")
assets.append({
**bgm,
**audio,
"num_play": paths.get(play) or 0,
"num_stop": paths.get(stop) or 0,
"filename_play": play[play.rfind("/")+1:play.rfind(".")],
"filename_stop": stop[stop.rfind("/")+1:stop.rfind(".")],
"type": "bgm",
"url" : "ytdl:" + audio["url"],
"_ytdl_manifest": audio["type"].lower(),

View File

@@ -160,6 +160,10 @@ __tests__ = (
"lang" : "en",
"language" : "English",
"num" : 0,
"num_play" : 17,
"num_stop" : 0,
"filename_play" : "1475724249934679214",
"filename_stop" : "",
"objectType" : "mp4a.40.2",
"originalFileSize": 0,
"playImageUrl" : "/20161006_271/1475724249957QlGUF_JPEG/1475724249934679214.jpg",