From f77e98b57dd92ea78d7745e1bff96cfc899a1d17 Mon Sep 17 00:00:00 2001 From: enduser420 <91022934+enduser420@users.noreply.github.com> Date: Sat, 28 Jun 2025 18:57:19 +0530 Subject: [PATCH] [chzzk] add 'comment' and 'community' extractors (#7735 #7741) * [chzzk] add 'comment' and 'community' extractors * [chzzk] update * [chzzk] add tests * [chzzk] update docs/supportedsites * [chzzk] add 'offset' option * [docs] add 'offset' option to gallery-dl.conf --- docs/configuration.rst | 10 ++++ docs/gallery-dl.conf | 6 ++- docs/supportedsites.md | 6 +++ gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/chzzk.py | 81 ++++++++++++++++++++++++++++++++ test/results/chzzk.py | 81 ++++++++++++++++++++++++++++++++ 6 files changed, 184 insertions(+), 1 deletion(-) create mode 100644 gallery_dl/extractor/chzzk.py create mode 100644 test/results/chzzk.py diff --git a/docs/configuration.rst b/docs/configuration.rst index f86c06a9..2aed705e 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -1778,6 +1778,16 @@ Description * ``false``: Match only URLs with known TLDs +extractor.chzzk.offset +---------------------- +Type + ``integer`` +Default + ``0`` +Description + Custom ``offset`` starting value when paginating over comments. + + extractor.cien.files -------------------- Type diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 5e476167..3f7a4df2 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -1,4 +1,4 @@ -{ + { "#": "gallery-dl default configuration file", "#": "full documentation at", @@ -182,6 +182,10 @@ "endpoint": "/api/_001", "tlds": false }, + "chzzk": + { + "offset": 0 + }, "cien": { "sleep-request": "1.0-2.0", diff --git a/docs/supportedsites.md b/docs/supportedsites.md index aac36641..ff0992d2 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -169,6 +169,12 @@ Consider all listed sites to potentially be NSFW. Albums, Files + + Chzzk + https://chzzk.naver.com + Comments, Communities + + Ci-en https://ci-en.net/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 83e8ced7..9bafef03 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -38,6 +38,7 @@ modules = [ "bunkr", "catbox", "chevereto", + "chzzk", "cien", "civitai", "comick", diff --git a/gallery_dl/extractor/chzzk.py b/gallery_dl/extractor/chzzk.py new file mode 100644 index 00000000..ed2e0f48 --- /dev/null +++ b/gallery_dl/extractor/chzzk.py @@ -0,0 +1,81 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://chzzk.naver.com""" + +from .common import Extractor, Message +from .. import text, util + + +class ChzzkExtractor(Extractor): + """Base class for chzzk extractors""" + category = "chzzk" + filename_fmt = "{uid}_{id}_{num}.{extension}" + directory_fmt = ("{category}", "{user[userNickname]}") + archive_fmt = "{uid}_{id}_{num}" + + def request_api(self, uid, id=None, params=None): + return self.request_json( + f"https://apis.naver.com/nng_main/nng_comment_api/v1/type" + f"/CHANNEL_POST/id/{uid}/comments/{id or ''}", + params=params)["content"] + + def items(self): + for comment in self.comments(): + data = comment["comment"] + files = data.pop("attaches") or () + data["id"] = data["commentId"] + data["uid"] = data["objectId"] + data["user"] = comment["user"] + data["count"] = len(files) + data["date"] = text.parse_datetime( + data["createdDate"], "%Y%m%d%H%M%S") + + yield Message.Directory, data + for data["num"], file in enumerate(files, 1): + if extra := file.get("extraJson"): + file.update(util.json_loads(extra)) + file["date"] = text.parse_datetime( + file["createdDate"], "%Y-%m-%dT%H:%M:%S.%f%z") + file["date_updated"] = text.parse_datetime( + file["updatedDate"], "%Y-%m-%dT%H:%M:%S.%f%z") + data["file"] = file + url = file["attachValue"] + yield Message.Url, url, text.nameext_from_url(url, data) + + +class ChzzkCommentExtractor(ChzzkExtractor): + """Extractor for individual comment from chzzk.naver.com""" + subcategory = "comment" + pattern = r"(?:https?://)?chzzk\.naver\.com/(\w+)/community/detail/(\d+)" + example = "https://chzzk.naver.com/0123456789abcdef/community/detail/12345" + + def comments(self): + uid, id = self.groups + res = self.request_api(uid, id) + return ({"comment": res["comment"], "user": res["user"]},) + + +class ChzzkCommunityExtractor(ChzzkExtractor): + """Extractor for comments from chzzk.naver.com""" + subcategory = "community" + pattern = r"(?:https?://)?chzzk\.naver\.com/(\w+)/community" + example = "https://chzzk.naver.com/0123456789abcdef/community" + request_interval = (0.5, 1.5) + + def comments(self): + uid = self.match[1] + params = { + "limit": 10, + "offset": text.parse_int(self.config("offset")), + "pagingType": "PAGE", + } + while True: + comments = self.request_api(uid, params=params)["comments"] + yield from comments["data"] + if not comments["page"]["next"]: + return + params["offset"] += params["limit"] diff --git a/test/results/chzzk.py b/test/results/chzzk.py new file mode 100644 index 00000000..25da3ce2 --- /dev/null +++ b/test/results/chzzk.py @@ -0,0 +1,81 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +from gallery_dl.extractor import chzzk + + +__tests__ = ( + +{ + "#url" : "https://chzzk.naver.com/f30b95fc9af53a75b781d7d3dd933892/community/detail/13393754", + "#class" : chzzk.ChzzkCommentExtractor, + "#results": ( + "https://nng-phinf.pstatic.net/MjAyNDA3MDlfNDgg/MDAxNzIwNTMzNzg2MDUx.0K9XrEW9CCSd2b7VdQHf8RGWkHAUsqEhNnLlleA11SUg.ZLx2V3gJPZR-kzrMY3E17wbu1ZmzYjitrEKmM_ykeWkg.PNG/tftyt.png", + ), + "#count" : 1, + + "id" : 13393754, + "uid" : "f30b95fc9af53a75b781d7d3dd933892", + "date" : "dt:2024-07-09 23:03:07", + "num" : int, + "user" : { + "userNickname": "memoji", + "userRoleCode": "streamer", + }, + "file" : { + "attachType": "PHOTO", + "date" : "dt:2024-07-09 14:03:07", + "order": int, + "date_updated": "dt:2024-07-09 14:03:07", + }, +}, + +{ + "#url" : "https://chzzk.naver.com/f30b95fc9af53a75b781d7d3dd933892/community/detail/20273040", + "#class" : chzzk.ChzzkCommentExtractor, + "#results": ( + "https://nng-phinf.pstatic.net/MjAyNTA2MTNfMTUw/MDAxNzQ5ODI1NjkyMzgx.8bsZ9moAfpuK3dqhHBxdd_CQdSuP5-MRrFgyJGDfdtEg.cs9HcI9BxBVXGUqJQhsUSGyOYvB3vj2itDB-arpvmokg.GIF/%EB%AC%BC%EC%9E%90%EB%AF%B8%EB%84%A4a.gif", + "https://nng-phinf.pstatic.net/MjAyNTA2MTNfMTAg/MDAxNzQ5ODI1NzA2NDk4.8PHxVU-4N8UE6mnDoDRhTMYoao9p0niz08DPQEqm2pog.C4KZL_RiK-jGlfKgoXJS5LdO3BDZUuPDCSsaqttE6Jwg.GIF/%EB%AC%BC%EC%9E%90%EB%AF%B8%EB%84%A4ab.gif", + "https://nng-phinf.pstatic.net/MjAyNTA2MTNfMjUz/MDAxNzQ5ODI1NzAzNTIw.ZODg1ok9tj0e9jQYgdAouwb_4MPX938QPWwNyhPdGs8g.wB3uMXpHObpljfoBcUTuemJfiYHTYuUT629BDIL18cog.GIF/%EB%AC%BC%EC%9E%90%EB%AF%B8%EB%84%A4b.gif", + ), + "#count" : 3, + + "id" : 20273040, + "uid" : "f30b95fc9af53a75b781d7d3dd933892", + "date" : "dt:2025-06-13 23:42:18", + "content" : "https://mega.nz/file/DfoFgBAC#r5F_lbI4DUc2l5uuSlTMctMpk1I-qHC575ifLhYOWLI\nhttps://mega.nz/file/LWAmkCwR#BML88rd6vRu2rKg3UwKIJzdreU86w0StAmw_7h0Nueo\n\n", + "num" : int, + "user" : { + "userNickname": "memoji", + "userRoleCode": "streamer", + }, + "file" : { + "attachType": "PHOTO", + "date" : "dt:2025-06-13 14:42:18", + "width" : int, + "order" : int, + "height": int, + "extraJson": "{\"width\":900,\"height\":800}", + "date_updated": "dt:2025-06-13 14:42:18", + }, +}, + +{ + "#url" : "https://chzzk.naver.com/f30b95fc9af53a75b781d7d3dd933892/community", + "#class": chzzk.ChzzkCommunityExtractor, + "#range": "1-50", + "#count": 50, +}, + +{ + "#url" : "https://chzzk.naver.com/f30b95fc9af53a75b781d7d3dd933892/community", + "#class" : chzzk.ChzzkCommunityExtractor, + "#options": {"offset": 50}, + "#range" : "1-50", + "#count" : 50, +}, + +)