merge #4682: [naver] support videos & fix recent 'date' bug (#7395)

This commit is contained in:
Mike Fährmann
2025-04-22 09:25:22 +02:00
4 changed files with 119 additions and 6 deletions

View File

@@ -3437,6 +3437,16 @@ Description
Note: Not supported by all ``moebooru`` instances.
extractor.naver.videos
----------------------
Type
``bool``
Default
``true``
Description
Download videos.
extractor.newgrounds.flash
--------------------------
Type

View File

@@ -420,6 +420,10 @@
"username": "",
"password": ""
},
"naver":
{
"videos": true
},
"newgrounds":
{
"username": "",

View File

@@ -9,7 +9,9 @@
"""Extractors for https://blog.naver.com/"""
from .common import GalleryExtractor, Extractor, Message
from .. import text
from .. import text, util
import datetime
import time
class NaverBase():
@@ -59,19 +61,66 @@ class NaverPostExtractor(NaverBase, GalleryExtractor):
"user" : extr("var nickName = '", "'"),
},
}
data["post"]["date"] = text.parse_datetime(
data["post"]["date"] = self._parse_datetime(
extr('se_publishDate pcol2">', '<') or
extr('_postAddDate">', '<'), "%Y. %m. %d. %H:%M")
extr('_postAddDate">', '<'))
return data
def _parse_datetime(self, date_string):
if "" in date_string:
ts = time.gmtime()
return datetime.datetime(ts.tm_year, ts.tm_mon, ts.tm_mday)
return text.parse_datetime(date_string, "%Y. %m. %d. %H:%M")
def images(self, page):
results = []
files = []
self._extract_images(files, page)
if self.config("videos", True):
self._extract_videos(files, page)
return files
def _extract_images(self, files, page):
for url in text.extract_iter(page, 'data-lazy-src="', '"'):
url = url.replace("://post", "://blog", 1).partition("?")[0]
if "\ufffd" in text.unquote(url):
url = text.unquote(url, encoding="EUC-KR")
results.append((url, None))
return results
files.append((url, None))
def _extract_videos(self, files, page):
for module in text.extract_iter(page, " data-module='", "'></"):
if '"v2_video"' not in module:
continue
media = util.json_loads(module)["data"]
try:
self._extract_media(files, media)
except Exception as exc:
self.log.warning("%s: Failed to extract video '%s' (%s: %s)",
self.post_id, media.get("vid"),
exc.__class__.__name__, exc)
def _extract_media(self, files, media):
url = ("https://apis.naver.com/rmcnmv/rmcnmv/vod/play/v2.0/" +
media["vid"])
params = {
"key" : media["inkey"],
"sid" : "2",
# "pid": "00000000-0000-0000-0000-000000000000",
"nonce": int(time.time()),
"devt" : "html5_pc",
"prv" : "N",
"aup" : "N",
"stpb" : "N",
"cpl" : "ko_KR",
"providerEnv": "real",
"adt" : "glad",
"lc" : "ko_KR",
}
data = self.request(url, params=params).json()
video = max(data["videos"]["list"],
key=lambda v: v.get("size") or 0)
files.append((video["source"], video))
class NaverBlogExtractor(NaverBase, Extractor):

View File

@@ -51,6 +51,56 @@ __tests__ = (
"extension": "png",
},
{
"#url" : "https://blog.naver.com/jws790103/223239681955",
"#comment" : "videos",
"#category": ("", "naver", "post"),
"#class" : naver.NaverPostExtractor,
"#pattern" : (
r"https://blogfiles.pstatic.net/MjAyMzA5MjVfMTMy/MDAxNjk1NjQ0MzI4OTE3.UxgvxTesk7Y88OWGvPMwQhbmCPp6mPA_C-5l5lJggyEg.B0DbxNEzz3DxRJtShiiBHDLzLQSCFDo_Bp6c-bcMDiog.JPEG.jws790103/20230925%EF%BC%BF080218.jpg",
r"https://blogfiles.pstatic.net/MjAyMzA5MjVfMjAz/MDAxNjk1NjQ0MzI4OTA5.Kd4VzqHhhrgby7rCA1iPdBX6f_k2DPEBnlRdOWD-kPgg.U0C1lmlKVMZMA4hhhs69nolZwCZ4Plme4KVbNfhezhkg.JPEG.jws790103/20230925%EF%BC%BF081103.jpg",
r"https://blogfiles.pstatic.net/MjAyMzA5MjVfMTg3/MDAxNjk1NjQ0MzI4OTk2.faiqny7Fl82Nnc3cJj85xa_MSBjYR3BStKeHw2bjYTwg.7Z8w0lDO9Uhjr8QTGwA0az_UZhN9haHocbYWgEyBO9gg.JPEG.jws790103/20230925%EF%BC%BF081141.jpg",
r"https://blogfiles.pstatic.net/MjAyMzA5MjVfMTIz/MDAxNjk1NjQ0MzI4OTIz.xkrCwJuYVtQID9td3XdEz8JHHrdN5UZzfOJ6nb1rW4Mg.d1FfbB8GONEej23X9Uc9uAP_oBwWnTbb9aFaBCrkfQEg.JPEG.jws790103/20230925%EF%BC%BF100506.jpg",
r"https://blogfiles.pstatic.net/MjAyMzA5MjVfMjI4/MDAxNjk1NjQ0MzI5Njg4.BHqs4eTTqOFfvYx7oZBCdeYXkQOkTFiTb8kWdC4JLeYg.8ytEDpmgyn79au0g1vGJhVxRPRVlKLF0gwQe4L0egFIg.JPEG.jws790103/20230925%EF%BC%BF100548.jpg",
r"https://a01-g-naver-vod.akamaized.net/blog/a/read/v2/VOD_ALPHA/blog_2023_10_18_2486/base_pathfinder_pf3448100_81cd756f-6cff-11ee-b67f-80615f0c46d6.mp4\?__gda__=\d+_\w+&in_out_flag=1",
r"https://a01-g-naver-vod.akamaized.net/blog/a/read/v2/VOD_ALPHA/blog_2023_10_18_162/base_pathfinder_pf3448100_810b0fc9-6cff-11ee-8895-a0369ffde1ec.mp4\?__gda__=\d+_\w+&in_out_flag=1",
),
"blog": {
"id" : "jws790103",
"num" : 25591202,
"user": "fm컴퍼니 짱",
},
"post": {
"date" : "dt:2023-10-18 06:50:00",
"description": "체육행사 기획행사는 fm컴퍼니에서 함께 하겠습니다. 어린이집 연합회 마라톤 대회에 무대렌탈 장비를 대여...",
"num" : 223239681955,
"title" : "마라톤대회 무대설치 기획행사 무대설치 체육행사 무대설치완료 fm컴퍼니에서 함께 하였습니다.",
},
"extension": {"jpg", "mp4"},
"count" : 7,
"num" : range(1, 7),
},
{
"#url" : "https://blog.naver.com/jws790103/223239681955",
"#comment" : "'videos' option",
"#category": ("", "naver", "post"),
"#class" : naver.NaverPostExtractor,
"#options" : {"videos": False},
"#urls": (
"https://blogfiles.pstatic.net/MjAyMzA5MjVfMTMy/MDAxNjk1NjQ0MzI4OTE3.UxgvxTesk7Y88OWGvPMwQhbmCPp6mPA_C-5l5lJggyEg.B0DbxNEzz3DxRJtShiiBHDLzLQSCFDo_Bp6c-bcMDiog.JPEG.jws790103/20230925%EF%BC%BF080218.jpg",
"https://blogfiles.pstatic.net/MjAyMzA5MjVfMjAz/MDAxNjk1NjQ0MzI4OTA5.Kd4VzqHhhrgby7rCA1iPdBX6f_k2DPEBnlRdOWD-kPgg.U0C1lmlKVMZMA4hhhs69nolZwCZ4Plme4KVbNfhezhkg.JPEG.jws790103/20230925%EF%BC%BF081103.jpg",
"https://blogfiles.pstatic.net/MjAyMzA5MjVfMTg3/MDAxNjk1NjQ0MzI4OTk2.faiqny7Fl82Nnc3cJj85xa_MSBjYR3BStKeHw2bjYTwg.7Z8w0lDO9Uhjr8QTGwA0az_UZhN9haHocbYWgEyBO9gg.JPEG.jws790103/20230925%EF%BC%BF081141.jpg",
"https://blogfiles.pstatic.net/MjAyMzA5MjVfMTIz/MDAxNjk1NjQ0MzI4OTIz.xkrCwJuYVtQID9td3XdEz8JHHrdN5UZzfOJ6nb1rW4Mg.d1FfbB8GONEej23X9Uc9uAP_oBwWnTbb9aFaBCrkfQEg.JPEG.jws790103/20230925%EF%BC%BF100506.jpg",
"https://blogfiles.pstatic.net/MjAyMzA5MjVfMjI4/MDAxNjk1NjQ0MzI5Njg4.BHqs4eTTqOFfvYx7oZBCdeYXkQOkTFiTb8kWdC4JLeYg.8ytEDpmgyn79au0g1vGJhVxRPRVlKLF0gwQe4L0egFIg.JPEG.jws790103/20230925%EF%BC%BF100548.jpg",
),
"extension": "jpg",
"count" : 5,
"num" : range(1, 5),
},
{
"#url" : "https://blog.naver.com/PostView.naver?blogId=rlfqjxm0&logNo=221430673006",
"#category": ("", "naver", "post"),