[chzzk] add 'comment' and 'community' extractors (#7735 #7741)

* [chzzk] add 'comment' and 'community' extractors * [chzzk] update * [chzzk] add tests * [chzzk] update docs/supportedsites * [chzzk] add 'offset' option * [docs] add 'offset' option to gallery-dl.conf
2025-06-28 18:57:19 +05:30
parent c8e4a2f8d1
commit f77e98b57d
6 changed files with 184 additions and 1 deletions
--- a/docs/configuration.rst
+++ b/docs/configuration.rst
@@ -1778,6 +1778,16 @@ Description
    * ``false``: Match only URLs with known TLDs


+extractor.chzzk.offset
+----------------------
+Type
+    ``integer``
+Default
+    ``0``
+Description
+    Custom ``offset`` starting value when paginating over comments.
+
+
 extractor.cien.files
 --------------------
 Type
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -1,4 +1,4 @@
-{
+    {
    "#": "gallery-dl default configuration file",

    "#": "full documentation at",
@@ -182,6 +182,10 @@
            "endpoint": "/api/_001",
            "tlds": false
        },
+        "chzzk":
+        {
+            "offset": 0
+        },
        "cien":
        {
            "sleep-request": "1.0-2.0",
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -169,6 +169,12 @@ Consider all listed sites to potentially be NSFW.
    <td>Albums, Files</td>
    <td></td>
 </tr>
+<tr>
+    <td>Chzzk</td>
+    <td>https://chzzk.naver.com</td>
+    <td>Comments, Communities</td>
+    <td></td>
+</tr>
 <tr>
    <td>Ci-en</td>
    <td>https://ci-en.net/</td>
--- a/gallery_dl/extractor/init.py
+++ b/gallery_dl/extractor/init.py
@@ -38,6 +38,7 @@ modules = [
    "bunkr",
    "catbox",
    "chevereto",
+    "chzzk",
    "cien",
    "civitai",
    "comick",
--- a/gallery_dl/extractor/chzzk.py
+++ b/gallery_dl/extractor/chzzk.py
@@ -0,0 +1,81 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://chzzk.naver.com"""
+
+from .common import Extractor, Message
+from .. import text, util
+
+
+class ChzzkExtractor(Extractor):
+    """Base class for chzzk extractors"""
+    category = "chzzk"
+    filename_fmt = "{uid}_{id}_{num}.{extension}"
+    directory_fmt = ("{category}", "{user[userNickname]}")
+    archive_fmt = "{uid}_{id}_{num}"
+
+    def request_api(self, uid, id=None, params=None):
+        return self.request_json(
+            f"https://apis.naver.com/nng_main/nng_comment_api/v1/type"
+            f"/CHANNEL_POST/id/{uid}/comments/{id or ''}",
+            params=params)["content"]
+
+    def items(self):
+        for comment in self.comments():
+            data = comment["comment"]
+            files = data.pop("attaches") or ()
+            data["id"] = data["commentId"]
+            data["uid"] = data["objectId"]
+            data["user"] = comment["user"]
+            data["count"] = len(files)
+            data["date"] = text.parse_datetime(
+                data["createdDate"], "%Y%m%d%H%M%S")
+
+            yield Message.Directory, data
+            for data["num"], file in enumerate(files, 1):
+                if extra := file.get("extraJson"):
+                    file.update(util.json_loads(extra))
+                file["date"] = text.parse_datetime(
+                    file["createdDate"], "%Y-%m-%dT%H:%M:%S.%f%z")
+                file["date_updated"] = text.parse_datetime(
+                    file["updatedDate"], "%Y-%m-%dT%H:%M:%S.%f%z")
+                data["file"] = file
+                url = file["attachValue"]
+                yield Message.Url, url, text.nameext_from_url(url, data)
+
+
+class ChzzkCommentExtractor(ChzzkExtractor):
+    """Extractor for individual comment from chzzk.naver.com"""
+    subcategory = "comment"
+    pattern = r"(?:https?://)?chzzk\.naver\.com/(\w+)/community/detail/(\d+)"
+    example = "https://chzzk.naver.com/0123456789abcdef/community/detail/12345"
+
+    def comments(self):
+        uid, id = self.groups
+        res = self.request_api(uid, id)
+        return ({"comment": res["comment"], "user": res["user"]},)
+
+
+class ChzzkCommunityExtractor(ChzzkExtractor):
+    """Extractor for comments from chzzk.naver.com"""
+    subcategory = "community"
+    pattern = r"(?:https?://)?chzzk\.naver\.com/(\w+)/community"
+    example = "https://chzzk.naver.com/0123456789abcdef/community"
+    request_interval = (0.5, 1.5)
+
+    def comments(self):
+        uid = self.match[1]
+        params = {
+            "limit": 10,
+            "offset": text.parse_int(self.config("offset")),
+            "pagingType": "PAGE",
+        }
+        while True:
+            comments = self.request_api(uid, params=params)["comments"]
+            yield from comments["data"]
+            if not comments["page"]["next"]:
+                return
+            params["offset"] += params["limit"]
--- a/test/results/chzzk.py
+++ b/test/results/chzzk.py
@@ -0,0 +1,81 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+from gallery_dl.extractor import chzzk
+
+
+__tests__ = (
+
+{
+    "#url"    : "https://chzzk.naver.com/f30b95fc9af53a75b781d7d3dd933892/community/detail/13393754",
+    "#class"  : chzzk.ChzzkCommentExtractor,
+    "#results": (
+        "https://nng-phinf.pstatic.net/MjAyNDA3MDlfNDgg/MDAxNzIwNTMzNzg2MDUx.0K9XrEW9CCSd2b7VdQHf8RGWkHAUsqEhNnLlleA11SUg.ZLx2V3gJPZR-kzrMY3E17wbu1ZmzYjitrEKmM_ykeWkg.PNG/tftyt.png",
+    ),
+    "#count"  : 1,
+
+    "id"      : 13393754,
+    "uid"     : "f30b95fc9af53a75b781d7d3dd933892",
+    "date"    : "dt:2024-07-09 23:03:07",
+    "num"     : int,
+    "user"    : {
+        "userNickname": "memoji",
+        "userRoleCode": "streamer",
+    },
+    "file"     : {
+        "attachType": "PHOTO",
+        "date" : "dt:2024-07-09 14:03:07",
+        "order": int,
+        "date_updated": "dt:2024-07-09 14:03:07",
+    },
+},
+
+{
+    "#url"    : "https://chzzk.naver.com/f30b95fc9af53a75b781d7d3dd933892/community/detail/20273040",
+    "#class"  : chzzk.ChzzkCommentExtractor,
+    "#results": (
+        "https://nng-phinf.pstatic.net/MjAyNTA2MTNfMTUw/MDAxNzQ5ODI1NjkyMzgx.8bsZ9moAfpuK3dqhHBxdd_CQdSuP5-MRrFgyJGDfdtEg.cs9HcI9BxBVXGUqJQhsUSGyOYvB3vj2itDB-arpvmokg.GIF/%EB%AC%BC%EC%9E%90%EB%AF%B8%EB%84%A4a.gif",
+        "https://nng-phinf.pstatic.net/MjAyNTA2MTNfMTAg/MDAxNzQ5ODI1NzA2NDk4.8PHxVU-4N8UE6mnDoDRhTMYoao9p0niz08DPQEqm2pog.C4KZL_RiK-jGlfKgoXJS5LdO3BDZUuPDCSsaqttE6Jwg.GIF/%EB%AC%BC%EC%9E%90%EB%AF%B8%EB%84%A4ab.gif",
+        "https://nng-phinf.pstatic.net/MjAyNTA2MTNfMjUz/MDAxNzQ5ODI1NzAzNTIw.ZODg1ok9tj0e9jQYgdAouwb_4MPX938QPWwNyhPdGs8g.wB3uMXpHObpljfoBcUTuemJfiYHTYuUT629BDIL18cog.GIF/%EB%AC%BC%EC%9E%90%EB%AF%B8%EB%84%A4b.gif",
+    ),
+    "#count"  : 3,
+
+    "id"      : 20273040,
+    "uid"     : "f30b95fc9af53a75b781d7d3dd933892",
+    "date"    : "dt:2025-06-13 23:42:18",
+    "content" : "https://mega.nz/file/DfoFgBAC#r5F_lbI4DUc2l5uuSlTMctMpk1I-qHC575ifLhYOWLI\nhttps://mega.nz/file/LWAmkCwR#BML88rd6vRu2rKg3UwKIJzdreU86w0StAmw_7h0Nueo\n\n",
+    "num"     : int,
+    "user"    : {
+        "userNickname": "memoji",
+        "userRoleCode": "streamer",
+    },
+    "file"      : {
+        "attachType": "PHOTO",
+        "date"  : "dt:2025-06-13 14:42:18",
+        "width" : int,
+        "order" : int,
+        "height": int,
+        "extraJson": "{\"width\":900,\"height\":800}",
+        "date_updated": "dt:2025-06-13 14:42:18",
+    },
+},
+
+{
+    "#url"  : "https://chzzk.naver.com/f30b95fc9af53a75b781d7d3dd933892/community",
+    "#class": chzzk.ChzzkCommunityExtractor,
+    "#range": "1-50",
+    "#count": 50,
+},
+
+{
+    "#url"    : "https://chzzk.naver.com/f30b95fc9af53a75b781d7d3dd933892/community",
+    "#class"  : chzzk.ChzzkCommunityExtractor,
+    "#options": {"offset": 50},
+    "#range"  : "1-50",
+    "#count"  : 50,
+},
+
+)