[chzzk] add 'comment' and 'community' extractors (#7735 #7741)

* [chzzk] add 'comment' and 'community' extractors
* [chzzk] update
* [chzzk] add tests
* [chzzk] update docs/supportedsites
* [chzzk] add 'offset' option
* [docs] add 'offset' option to gallery-dl.conf
This commit is contained in:
enduser420
2025-06-28 18:57:19 +05:30
committed by GitHub
parent c8e4a2f8d1
commit f77e98b57d
6 changed files with 184 additions and 1 deletions

View File

@@ -1778,6 +1778,16 @@ Description
* ``false``: Match only URLs with known TLDs
extractor.chzzk.offset
----------------------
Type
``integer``
Default
``0``
Description
Custom ``offset`` starting value when paginating over comments.
extractor.cien.files
--------------------
Type

View File

@@ -1,4 +1,4 @@
{
{
"#": "gallery-dl default configuration file",
"#": "full documentation at",
@@ -182,6 +182,10 @@
"endpoint": "/api/_001",
"tlds": false
},
"chzzk":
{
"offset": 0
},
"cien":
{
"sleep-request": "1.0-2.0",

View File

@@ -169,6 +169,12 @@ Consider all listed sites to potentially be NSFW.
<td>Albums, Files</td>
<td></td>
</tr>
<tr>
<td>Chzzk</td>
<td>https://chzzk.naver.com</td>
<td>Comments, Communities</td>
<td></td>
</tr>
<tr>
<td>Ci-en</td>
<td>https://ci-en.net/</td>

View File

@@ -38,6 +38,7 @@ modules = [
"bunkr",
"catbox",
"chevereto",
"chzzk",
"cien",
"civitai",
"comick",

View File

@@ -0,0 +1,81 @@
# -*- coding: utf-8 -*-
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://chzzk.naver.com"""
from .common import Extractor, Message
from .. import text, util
class ChzzkExtractor(Extractor):
"""Base class for chzzk extractors"""
category = "chzzk"
filename_fmt = "{uid}_{id}_{num}.{extension}"
directory_fmt = ("{category}", "{user[userNickname]}")
archive_fmt = "{uid}_{id}_{num}"
def request_api(self, uid, id=None, params=None):
return self.request_json(
f"https://apis.naver.com/nng_main/nng_comment_api/v1/type"
f"/CHANNEL_POST/id/{uid}/comments/{id or ''}",
params=params)["content"]
def items(self):
for comment in self.comments():
data = comment["comment"]
files = data.pop("attaches") or ()
data["id"] = data["commentId"]
data["uid"] = data["objectId"]
data["user"] = comment["user"]
data["count"] = len(files)
data["date"] = text.parse_datetime(
data["createdDate"], "%Y%m%d%H%M%S")
yield Message.Directory, data
for data["num"], file in enumerate(files, 1):
if extra := file.get("extraJson"):
file.update(util.json_loads(extra))
file["date"] = text.parse_datetime(
file["createdDate"], "%Y-%m-%dT%H:%M:%S.%f%z")
file["date_updated"] = text.parse_datetime(
file["updatedDate"], "%Y-%m-%dT%H:%M:%S.%f%z")
data["file"] = file
url = file["attachValue"]
yield Message.Url, url, text.nameext_from_url(url, data)
class ChzzkCommentExtractor(ChzzkExtractor):
"""Extractor for individual comment from chzzk.naver.com"""
subcategory = "comment"
pattern = r"(?:https?://)?chzzk\.naver\.com/(\w+)/community/detail/(\d+)"
example = "https://chzzk.naver.com/0123456789abcdef/community/detail/12345"
def comments(self):
uid, id = self.groups
res = self.request_api(uid, id)
return ({"comment": res["comment"], "user": res["user"]},)
class ChzzkCommunityExtractor(ChzzkExtractor):
"""Extractor for comments from chzzk.naver.com"""
subcategory = "community"
pattern = r"(?:https?://)?chzzk\.naver\.com/(\w+)/community"
example = "https://chzzk.naver.com/0123456789abcdef/community"
request_interval = (0.5, 1.5)
def comments(self):
uid = self.match[1]
params = {
"limit": 10,
"offset": text.parse_int(self.config("offset")),
"pagingType": "PAGE",
}
while True:
comments = self.request_api(uid, params=params)["comments"]
yield from comments["data"]
if not comments["page"]["next"]:
return
params["offset"] += params["limit"]

81
test/results/chzzk.py Normal file
View File

@@ -0,0 +1,81 @@
# -*- coding: utf-8 -*-
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
from gallery_dl.extractor import chzzk
__tests__ = (
{
"#url" : "https://chzzk.naver.com/f30b95fc9af53a75b781d7d3dd933892/community/detail/13393754",
"#class" : chzzk.ChzzkCommentExtractor,
"#results": (
"https://nng-phinf.pstatic.net/MjAyNDA3MDlfNDgg/MDAxNzIwNTMzNzg2MDUx.0K9XrEW9CCSd2b7VdQHf8RGWkHAUsqEhNnLlleA11SUg.ZLx2V3gJPZR-kzrMY3E17wbu1ZmzYjitrEKmM_ykeWkg.PNG/tftyt.png",
),
"#count" : 1,
"id" : 13393754,
"uid" : "f30b95fc9af53a75b781d7d3dd933892",
"date" : "dt:2024-07-09 23:03:07",
"num" : int,
"user" : {
"userNickname": "memoji",
"userRoleCode": "streamer",
},
"file" : {
"attachType": "PHOTO",
"date" : "dt:2024-07-09 14:03:07",
"order": int,
"date_updated": "dt:2024-07-09 14:03:07",
},
},
{
"#url" : "https://chzzk.naver.com/f30b95fc9af53a75b781d7d3dd933892/community/detail/20273040",
"#class" : chzzk.ChzzkCommentExtractor,
"#results": (
"https://nng-phinf.pstatic.net/MjAyNTA2MTNfMTUw/MDAxNzQ5ODI1NjkyMzgx.8bsZ9moAfpuK3dqhHBxdd_CQdSuP5-MRrFgyJGDfdtEg.cs9HcI9BxBVXGUqJQhsUSGyOYvB3vj2itDB-arpvmokg.GIF/%EB%AC%BC%EC%9E%90%EB%AF%B8%EB%84%A4a.gif",
"https://nng-phinf.pstatic.net/MjAyNTA2MTNfMTAg/MDAxNzQ5ODI1NzA2NDk4.8PHxVU-4N8UE6mnDoDRhTMYoao9p0niz08DPQEqm2pog.C4KZL_RiK-jGlfKgoXJS5LdO3BDZUuPDCSsaqttE6Jwg.GIF/%EB%AC%BC%EC%9E%90%EB%AF%B8%EB%84%A4ab.gif",
"https://nng-phinf.pstatic.net/MjAyNTA2MTNfMjUz/MDAxNzQ5ODI1NzAzNTIw.ZODg1ok9tj0e9jQYgdAouwb_4MPX938QPWwNyhPdGs8g.wB3uMXpHObpljfoBcUTuemJfiYHTYuUT629BDIL18cog.GIF/%EB%AC%BC%EC%9E%90%EB%AF%B8%EB%84%A4b.gif",
),
"#count" : 3,
"id" : 20273040,
"uid" : "f30b95fc9af53a75b781d7d3dd933892",
"date" : "dt:2025-06-13 23:42:18",
"content" : "https://mega.nz/file/DfoFgBAC#r5F_lbI4DUc2l5uuSlTMctMpk1I-qHC575ifLhYOWLI\nhttps://mega.nz/file/LWAmkCwR#BML88rd6vRu2rKg3UwKIJzdreU86w0StAmw_7h0Nueo\n\n",
"num" : int,
"user" : {
"userNickname": "memoji",
"userRoleCode": "streamer",
},
"file" : {
"attachType": "PHOTO",
"date" : "dt:2025-06-13 14:42:18",
"width" : int,
"order" : int,
"height": int,
"extraJson": "{\"width\":900,\"height\":800}",
"date_updated": "dt:2025-06-13 14:42:18",
},
},
{
"#url" : "https://chzzk.naver.com/f30b95fc9af53a75b781d7d3dd933892/community",
"#class": chzzk.ChzzkCommunityExtractor,
"#range": "1-50",
"#count": 50,
},
{
"#url" : "https://chzzk.naver.com/f30b95fc9af53a75b781d7d3dd933892/community",
"#class" : chzzk.ChzzkCommunityExtractor,
"#options": {"offset": 50},
"#range" : "1-50",
"#count" : 50,
},
)