From f77e98b57dd92ea78d7745e1bff96cfc899a1d17 Mon Sep 17 00:00:00 2001
From: enduser420 <91022934+enduser420@users.noreply.github.com>
Date: Sat, 28 Jun 2025 18:57:19 +0530
Subject: [PATCH] [chzzk] add 'comment' and 'community' extractors (#7735
 #7741)

* [chzzk] add 'comment' and 'community' extractors
* [chzzk] update
* [chzzk] add tests
* [chzzk] update docs/supportedsites
* [chzzk] add 'offset' option
* [docs] add 'offset' option to gallery-dl.conf
---
 docs/configuration.rst           | 10 ++++
 docs/gallery-dl.conf             |  6 ++-
 docs/supportedsites.md           |  6 +++
 gallery_dl/extractor/__init__.py |  1 +
 gallery_dl/extractor/chzzk.py    | 81 ++++++++++++++++++++++++++++++++
 test/results/chzzk.py            | 81 ++++++++++++++++++++++++++++++++
 6 files changed, 184 insertions(+), 1 deletion(-)
 create mode 100644 gallery_dl/extractor/chzzk.py
 create mode 100644 test/results/chzzk.py
diff --git a/docs/configuration.rst b/docs/configuration.rst
index f86c06a9..2aed705e 100644
--- a/docs/configuration.rst
+++ b/docs/configuration.rst
@@ -1778,6 +1778,16 @@ Description
     * ``false``: Match only URLs with known TLDs
 
 
+extractor.chzzk.offset
+----------------------
+Type
+    ``integer``
+Default
+    ``0``
+Description
+    Custom ``offset`` starting value when paginating over comments.
+
+
 extractor.cien.files
 --------------------
 Type
diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
index 5e476167..3f7a4df2 100644
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -1,4 +1,4 @@
-{
+    {
     "#": "gallery-dl default configuration file",
 
     "#": "full documentation at",
@@ -182,6 +182,10 @@
             "endpoint": "/api/_001",
             "tlds": false
         },
+        "chzzk":
+        {
+            "offset": 0
+        },
         "cien":
         {
             "sleep-request": "1.0-2.0",
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index aac36641..ff0992d2 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -169,6 +169,12 @@ Consider all listed sites to potentially be NSFW.
     <td>Albums, Files</td>
     <td></td>
 </tr>
+<tr>
+    <td>Chzzk</td>
+    <td>https://chzzk.naver.com</td>
+    <td>Comments, Communities</td>
+    <td></td>
+</tr>
 <tr>
     <td>Ci-en</td>
     <td>https://ci-en.net/</td>
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 83e8ced7..9bafef03 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -38,6 +38,7 @@ modules = [
     "bunkr",
     "catbox",
     "chevereto",
+    "chzzk",
     "cien",
     "civitai",
     "comick",
diff --git a/gallery_dl/extractor/chzzk.py b/gallery_dl/extractor/chzzk.py
new file mode 100644
index 00000000..ed2e0f48
--- /dev/null
+++ b/gallery_dl/extractor/chzzk.py
@@ -0,0 +1,81 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://chzzk.naver.com"""
+
+from .common import Extractor, Message
+from .. import text, util
+
+
+class ChzzkExtractor(Extractor):
+    """Base class for chzzk extractors"""
+    category = "chzzk"
+    filename_fmt = "{uid}_{id}_{num}.{extension}"
+    directory_fmt = ("{category}", "{user[userNickname]}")
+    archive_fmt = "{uid}_{id}_{num}"
+
+    def request_api(self, uid, id=None, params=None):
+        return self.request_json(
+            f"https://apis.naver.com/nng_main/nng_comment_api/v1/type"
+            f"/CHANNEL_POST/id/{uid}/comments/{id or ''}",
+            params=params)["content"]
+
+    def items(self):
+        for comment in self.comments():
+            data = comment["comment"]
+            files = data.pop("attaches") or ()
+            data["id"] = data["commentId"]
+            data["uid"] = data["objectId"]
+            data["user"] = comment["user"]
+            data["count"] = len(files)
+            data["date"] = text.parse_datetime(
+                data["createdDate"], "%Y%m%d%H%M%S")
+
+            yield Message.Directory, data
+            for data["num"], file in enumerate(files, 1):
+                if extra := file.get("extraJson"):
+                    file.update(util.json_loads(extra))
+                file["date"] = text.parse_datetime(
+                    file["createdDate"], "%Y-%m-%dT%H:%M:%S.%f%z")
+                file["date_updated"] = text.parse_datetime(
+                    file["updatedDate"], "%Y-%m-%dT%H:%M:%S.%f%z")
+                data["file"] = file
+                url = file["attachValue"]
+                yield Message.Url, url, text.nameext_from_url(url, data)
+
+
+class ChzzkCommentExtractor(ChzzkExtractor):
+    """Extractor for individual comment from chzzk.naver.com"""
+    subcategory = "comment"
+    pattern = r"(?:https?://)?chzzk\.naver\.com/(\w+)/community/detail/(\d+)"
+    example = "https://chzzk.naver.com/0123456789abcdef/community/detail/12345"
+
+    def comments(self):
+        uid, id = self.groups
+        res = self.request_api(uid, id)
+        return ({"comment": res["comment"], "user": res["user"]},)
+
+
+class ChzzkCommunityExtractor(ChzzkExtractor):
+    """Extractor for comments from chzzk.naver.com"""
+    subcategory = "community"
+    pattern = r"(?:https?://)?chzzk\.naver\.com/(\w+)/community"
+    example = "https://chzzk.naver.com/0123456789abcdef/community"
+    request_interval = (0.5, 1.5)
+
+    def comments(self):
+        uid = self.match[1]
+        params = {
+            "limit": 10,
+            "offset": text.parse_int(self.config("offset")),
+            "pagingType": "PAGE",
+        }
+        while True:
+            comments = self.request_api(uid, params=params)["comments"]
+            yield from comments["data"]
+            if not comments["page"]["next"]:
+                return
+            params["offset"] += params["limit"]
diff --git a/test/results/chzzk.py b/test/results/chzzk.py
new file mode 100644
index 00000000..25da3ce2
--- /dev/null
+++ b/test/results/chzzk.py
@@ -0,0 +1,81 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+from gallery_dl.extractor import chzzk
+
+
+__tests__ = (
+
+{
+    "#url"    : "https://chzzk.naver.com/f30b95fc9af53a75b781d7d3dd933892/community/detail/13393754",
+    "#class"  : chzzk.ChzzkCommentExtractor,
+    "#results": (
+        "https://nng-phinf.pstatic.net/MjAyNDA3MDlfNDgg/MDAxNzIwNTMzNzg2MDUx.0K9XrEW9CCSd2b7VdQHf8RGWkHAUsqEhNnLlleA11SUg.ZLx2V3gJPZR-kzrMY3E17wbu1ZmzYjitrEKmM_ykeWkg.PNG/tftyt.png",
+    ),
+    "#count"  : 1,
+
+    "id"      : 13393754,
+    "uid"     : "f30b95fc9af53a75b781d7d3dd933892",
+    "date"    : "dt:2024-07-09 23:03:07",
+    "num"     : int,
+    "user"    : {
+        "userNickname": "memoji",
+        "userRoleCode": "streamer",
+    },
+    "file"     : {
+        "attachType": "PHOTO",
+        "date" : "dt:2024-07-09 14:03:07",
+        "order": int,
+        "date_updated": "dt:2024-07-09 14:03:07",
+    },
+},
+
+{
+    "#url"    : "https://chzzk.naver.com/f30b95fc9af53a75b781d7d3dd933892/community/detail/20273040",
+    "#class"  : chzzk.ChzzkCommentExtractor,
+    "#results": (
+        "https://nng-phinf.pstatic.net/MjAyNTA2MTNfMTUw/MDAxNzQ5ODI1NjkyMzgx.8bsZ9moAfpuK3dqhHBxdd_CQdSuP5-MRrFgyJGDfdtEg.cs9HcI9BxBVXGUqJQhsUSGyOYvB3vj2itDB-arpvmokg.GIF/%EB%AC%BC%EC%9E%90%EB%AF%B8%EB%84%A4a.gif",
+        "https://nng-phinf.pstatic.net/MjAyNTA2MTNfMTAg/MDAxNzQ5ODI1NzA2NDk4.8PHxVU-4N8UE6mnDoDRhTMYoao9p0niz08DPQEqm2pog.C4KZL_RiK-jGlfKgoXJS5LdO3BDZUuPDCSsaqttE6Jwg.GIF/%EB%AC%BC%EC%9E%90%EB%AF%B8%EB%84%A4ab.gif",
+        "https://nng-phinf.pstatic.net/MjAyNTA2MTNfMjUz/MDAxNzQ5ODI1NzAzNTIw.ZODg1ok9tj0e9jQYgdAouwb_4MPX938QPWwNyhPdGs8g.wB3uMXpHObpljfoBcUTuemJfiYHTYuUT629BDIL18cog.GIF/%EB%AC%BC%EC%9E%90%EB%AF%B8%EB%84%A4b.gif",
+    ),
+    "#count"  : 3,
+
+    "id"      : 20273040,
+    "uid"     : "f30b95fc9af53a75b781d7d3dd933892",
+    "date"    : "dt:2025-06-13 23:42:18",
+    "content" : "https://mega.nz/file/DfoFgBAC#r5F_lbI4DUc2l5uuSlTMctMpk1I-qHC575ifLhYOWLI\nhttps://mega.nz/file/LWAmkCwR#BML88rd6vRu2rKg3UwKIJzdreU86w0StAmw_7h0Nueo\n\n",
+    "num"     : int,
+    "user"    : {
+        "userNickname": "memoji",
+        "userRoleCode": "streamer",
+    },
+    "file"      : {
+        "attachType": "PHOTO",
+        "date"  : "dt:2025-06-13 14:42:18",
+        "width" : int,
+        "order" : int,
+        "height": int,
+        "extraJson": "{\"width\":900,\"height\":800}",
+        "date_updated": "dt:2025-06-13 14:42:18",
+    },
+},
+
+{
+    "#url"  : "https://chzzk.naver.com/f30b95fc9af53a75b781d7d3dd933892/community",
+    "#class": chzzk.ChzzkCommunityExtractor,
+    "#range": "1-50",
+    "#count": 50,
+},
+
+{
+    "#url"    : "https://chzzk.naver.com/f30b95fc9af53a75b781d7d3dd933892/community",
+    "#class"  : chzzk.ChzzkCommunityExtractor,
+    "#options": {"offset": 50},
+    "#range"  : "1-50",
+    "#count"  : 50,
+},
+
+)