From f77e98b57dd92ea78d7745e1bff96cfc899a1d17 Mon Sep 17 00:00:00 2001
From: enduser420 <91022934+enduser420@users.noreply.github.com>
Date: Sat, 28 Jun 2025 18:57:19 +0530
Subject: [PATCH] [chzzk] add 'comment' and 'community' extractors (#7735
#7741)
* [chzzk] add 'comment' and 'community' extractors
* [chzzk] update
* [chzzk] add tests
* [chzzk] update docs/supportedsites
* [chzzk] add 'offset' option
* [docs] add 'offset' option to gallery-dl.conf
---
docs/configuration.rst | 10 ++++
docs/gallery-dl.conf | 6 ++-
docs/supportedsites.md | 6 +++
gallery_dl/extractor/__init__.py | 1 +
gallery_dl/extractor/chzzk.py | 81 ++++++++++++++++++++++++++++++++
test/results/chzzk.py | 81 ++++++++++++++++++++++++++++++++
6 files changed, 184 insertions(+), 1 deletion(-)
create mode 100644 gallery_dl/extractor/chzzk.py
create mode 100644 test/results/chzzk.py
diff --git a/docs/configuration.rst b/docs/configuration.rst
index f86c06a9..2aed705e 100644
--- a/docs/configuration.rst
+++ b/docs/configuration.rst
@@ -1778,6 +1778,16 @@ Description
* ``false``: Match only URLs with known TLDs
+extractor.chzzk.offset
+----------------------
+Type
+ ``integer``
+Default
+ ``0``
+Description
+ Custom ``offset`` starting value when paginating over comments.
+
+
extractor.cien.files
--------------------
Type
diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
index 5e476167..3f7a4df2 100644
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -1,4 +1,4 @@
-{
+ {
"#": "gallery-dl default configuration file",
"#": "full documentation at",
@@ -182,6 +182,10 @@
"endpoint": "/api/_001",
"tlds": false
},
+ "chzzk":
+ {
+ "offset": 0
+ },
"cien":
{
"sleep-request": "1.0-2.0",
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index aac36641..ff0992d2 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -169,6 +169,12 @@ Consider all listed sites to potentially be NSFW.
Albums, Files |
|
+
+ | Chzzk |
+ https://chzzk.naver.com |
+ Comments, Communities |
+ |
+
| Ci-en |
https://ci-en.net/ |
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 83e8ced7..9bafef03 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -38,6 +38,7 @@ modules = [
"bunkr",
"catbox",
"chevereto",
+ "chzzk",
"cien",
"civitai",
"comick",
diff --git a/gallery_dl/extractor/chzzk.py b/gallery_dl/extractor/chzzk.py
new file mode 100644
index 00000000..ed2e0f48
--- /dev/null
+++ b/gallery_dl/extractor/chzzk.py
@@ -0,0 +1,81 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://chzzk.naver.com"""
+
+from .common import Extractor, Message
+from .. import text, util
+
+
+class ChzzkExtractor(Extractor):
+ """Base class for chzzk extractors"""
+ category = "chzzk"
+ filename_fmt = "{uid}_{id}_{num}.{extension}"
+ directory_fmt = ("{category}", "{user[userNickname]}")
+ archive_fmt = "{uid}_{id}_{num}"
+
+ def request_api(self, uid, id=None, params=None):
+ return self.request_json(
+ f"https://apis.naver.com/nng_main/nng_comment_api/v1/type"
+ f"/CHANNEL_POST/id/{uid}/comments/{id or ''}",
+ params=params)["content"]
+
+ def items(self):
+ for comment in self.comments():
+ data = comment["comment"]
+ files = data.pop("attaches") or ()
+ data["id"] = data["commentId"]
+ data["uid"] = data["objectId"]
+ data["user"] = comment["user"]
+ data["count"] = len(files)
+ data["date"] = text.parse_datetime(
+ data["createdDate"], "%Y%m%d%H%M%S")
+
+ yield Message.Directory, data
+ for data["num"], file in enumerate(files, 1):
+ if extra := file.get("extraJson"):
+ file.update(util.json_loads(extra))
+ file["date"] = text.parse_datetime(
+ file["createdDate"], "%Y-%m-%dT%H:%M:%S.%f%z")
+ file["date_updated"] = text.parse_datetime(
+ file["updatedDate"], "%Y-%m-%dT%H:%M:%S.%f%z")
+ data["file"] = file
+ url = file["attachValue"]
+ yield Message.Url, url, text.nameext_from_url(url, data)
+
+
+class ChzzkCommentExtractor(ChzzkExtractor):
+ """Extractor for individual comment from chzzk.naver.com"""
+ subcategory = "comment"
+ pattern = r"(?:https?://)?chzzk\.naver\.com/(\w+)/community/detail/(\d+)"
+ example = "https://chzzk.naver.com/0123456789abcdef/community/detail/12345"
+
+ def comments(self):
+ uid, id = self.groups
+ res = self.request_api(uid, id)
+ return ({"comment": res["comment"], "user": res["user"]},)
+
+
+class ChzzkCommunityExtractor(ChzzkExtractor):
+ """Extractor for comments from chzzk.naver.com"""
+ subcategory = "community"
+ pattern = r"(?:https?://)?chzzk\.naver\.com/(\w+)/community"
+ example = "https://chzzk.naver.com/0123456789abcdef/community"
+ request_interval = (0.5, 1.5)
+
+ def comments(self):
+ uid = self.match[1]
+ params = {
+ "limit": 10,
+ "offset": text.parse_int(self.config("offset")),
+ "pagingType": "PAGE",
+ }
+ while True:
+ comments = self.request_api(uid, params=params)["comments"]
+ yield from comments["data"]
+ if not comments["page"]["next"]:
+ return
+ params["offset"] += params["limit"]
diff --git a/test/results/chzzk.py b/test/results/chzzk.py
new file mode 100644
index 00000000..25da3ce2
--- /dev/null
+++ b/test/results/chzzk.py
@@ -0,0 +1,81 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+from gallery_dl.extractor import chzzk
+
+
+__tests__ = (
+
+{
+ "#url" : "https://chzzk.naver.com/f30b95fc9af53a75b781d7d3dd933892/community/detail/13393754",
+ "#class" : chzzk.ChzzkCommentExtractor,
+ "#results": (
+ "https://nng-phinf.pstatic.net/MjAyNDA3MDlfNDgg/MDAxNzIwNTMzNzg2MDUx.0K9XrEW9CCSd2b7VdQHf8RGWkHAUsqEhNnLlleA11SUg.ZLx2V3gJPZR-kzrMY3E17wbu1ZmzYjitrEKmM_ykeWkg.PNG/tftyt.png",
+ ),
+ "#count" : 1,
+
+ "id" : 13393754,
+ "uid" : "f30b95fc9af53a75b781d7d3dd933892",
+ "date" : "dt:2024-07-09 23:03:07",
+ "num" : int,
+ "user" : {
+ "userNickname": "memoji",
+ "userRoleCode": "streamer",
+ },
+ "file" : {
+ "attachType": "PHOTO",
+ "date" : "dt:2024-07-09 14:03:07",
+ "order": int,
+ "date_updated": "dt:2024-07-09 14:03:07",
+ },
+},
+
+{
+ "#url" : "https://chzzk.naver.com/f30b95fc9af53a75b781d7d3dd933892/community/detail/20273040",
+ "#class" : chzzk.ChzzkCommentExtractor,
+ "#results": (
+ "https://nng-phinf.pstatic.net/MjAyNTA2MTNfMTUw/MDAxNzQ5ODI1NjkyMzgx.8bsZ9moAfpuK3dqhHBxdd_CQdSuP5-MRrFgyJGDfdtEg.cs9HcI9BxBVXGUqJQhsUSGyOYvB3vj2itDB-arpvmokg.GIF/%EB%AC%BC%EC%9E%90%EB%AF%B8%EB%84%A4a.gif",
+ "https://nng-phinf.pstatic.net/MjAyNTA2MTNfMTAg/MDAxNzQ5ODI1NzA2NDk4.8PHxVU-4N8UE6mnDoDRhTMYoao9p0niz08DPQEqm2pog.C4KZL_RiK-jGlfKgoXJS5LdO3BDZUuPDCSsaqttE6Jwg.GIF/%EB%AC%BC%EC%9E%90%EB%AF%B8%EB%84%A4ab.gif",
+ "https://nng-phinf.pstatic.net/MjAyNTA2MTNfMjUz/MDAxNzQ5ODI1NzAzNTIw.ZODg1ok9tj0e9jQYgdAouwb_4MPX938QPWwNyhPdGs8g.wB3uMXpHObpljfoBcUTuemJfiYHTYuUT629BDIL18cog.GIF/%EB%AC%BC%EC%9E%90%EB%AF%B8%EB%84%A4b.gif",
+ ),
+ "#count" : 3,
+
+ "id" : 20273040,
+ "uid" : "f30b95fc9af53a75b781d7d3dd933892",
+ "date" : "dt:2025-06-13 23:42:18",
+ "content" : "https://mega.nz/file/DfoFgBAC#r5F_lbI4DUc2l5uuSlTMctMpk1I-qHC575ifLhYOWLI\nhttps://mega.nz/file/LWAmkCwR#BML88rd6vRu2rKg3UwKIJzdreU86w0StAmw_7h0Nueo\n\n",
+ "num" : int,
+ "user" : {
+ "userNickname": "memoji",
+ "userRoleCode": "streamer",
+ },
+ "file" : {
+ "attachType": "PHOTO",
+ "date" : "dt:2025-06-13 14:42:18",
+ "width" : int,
+ "order" : int,
+ "height": int,
+ "extraJson": "{\"width\":900,\"height\":800}",
+ "date_updated": "dt:2025-06-13 14:42:18",
+ },
+},
+
+{
+ "#url" : "https://chzzk.naver.com/f30b95fc9af53a75b781d7d3dd933892/community",
+ "#class": chzzk.ChzzkCommunityExtractor,
+ "#range": "1-50",
+ "#count": 50,
+},
+
+{
+ "#url" : "https://chzzk.naver.com/f30b95fc9af53a75b781d7d3dd933892/community",
+ "#class" : chzzk.ChzzkCommunityExtractor,
+ "#options": {"offset": 50},
+ "#range" : "1-50",
+ "#count" : 50,
+},
+
+)