From ff5f6fe70fd2e2d7002524a9c945281c82569ea4 Mon Sep 17 00:00:00 2001 From: Dominik Prange Date: Sat, 4 Jan 2025 19:49:48 +0100 Subject: [PATCH] [boosty] added new direct message extractor - formatting - fixed linting formatting errors - fixed E999 SyntaxError: invalid syntax - fixed class naming - fixed mandatory extractor.boosty.metadata as true requirement - update - apply changes - add test - update docs/supportedsites - improve 'dialog' pagination logic --- docs/supportedsites.md | 2 +- gallery_dl/extractor/boosty.py | 92 +++++++++++++++++++++++++++++++--- scripts/supportedsites.py | 1 + test/results/boosty.py | 19 +++++++ 4 files changed, 106 insertions(+), 8 deletions(-) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index dc716619..2ba82626 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -148,7 +148,7 @@ Consider all listed sites to potentially be NSFW. Boosty https://www.boosty.to/ - Subscriptions Feed, Followed Users, Media Files, Posts, User Profiles + DMs, Subscriptions Feed, Followed Users, Media Files, Posts, User Profiles Cookies diff --git a/gallery_dl/extractor/boosty.py b/gallery_dl/extractor/boosty.py index c28fad97..f3e441b1 100644 --- a/gallery_dl/extractor/boosty.py +++ b/gallery_dl/extractor/boosty.py @@ -8,6 +8,7 @@ from .common import Extractor, Message from .. import text, util, exception +import itertools BASE_PATTERN = r"(?:https?://)?boosty\.to" @@ -53,7 +54,9 @@ class BoostyExtractor(Extractor): self.log.warning("Not allowed to access post %s", post["id"]) continue - files = self._process_post(post) + files = self._extract_files(post) + if self._user: + post["user"] = self._user data = { "post" : post, "user" : post.pop("user", None), @@ -69,15 +72,13 @@ class BoostyExtractor(Extractor): def posts(self): """Yield JSON content of all relevant posts""" - def _process_post(self, post): + def _extract_files(self, post): files = [] post["content"] = content = [] post["links"] = links = [] if "createdAt" in post: post["date"] = text.parse_timestamp(post["createdAt"]) - if self._user: - post["user"] = self._user for block in post["data"]: try: @@ -94,7 +95,7 @@ class BoostyExtractor(Extractor): elif type == "ok_video": if not self.videos: self.log.debug("%s: Skipping video %s", - post["int_id"], block["id"]) + post["id"], block["id"]) continue fmts = { fmt["type"]: fmt["url"] @@ -114,7 +115,7 @@ class BoostyExtractor(Extractor): else: self.log.warning( "%s: Found no suitable video format for %s", - post["int_id"], block["id"]) + post["id"], block["id"]) elif type == "link": url = block["url"] @@ -127,9 +128,12 @@ class BoostyExtractor(Extractor): elif type == "file": files.append(self._update_url(post, block)) + elif type == "smile": + content.append(":" + block["name"] + ":") + else: self.log.debug("%s: Unsupported data type '%s'", - post["int_id"], type) + post["id"], type) except Exception as exc: self.log.debug("%s: %s", exc.__class__.__name__, exc) @@ -219,6 +223,51 @@ class BoostyFollowingExtractor(BoostyExtractor): yield Message.Queue, url, user +class BoostyDirectMessagesExtractor(BoostyExtractor): + """Extractor for boosty.to direct messages""" + subcategory = "direct-messages" + directory_fmt = ("{category}", "{user[blogUrl]} ({user[id]})", + "Direct Messages") + pattern = BASE_PATTERN + r"/app/messages/?\?dialogId=(\d+)" + example = "https://boosty.to/app/messages?dialogId=12345" + + def items(self): + """Yield direct messages from a given dialog ID.""" + dialog_id = self.groups[0] + response = self.api.dialog(dialog_id) + signed_query = response.get("signedQuery") + + try: + messages = response["messages"]["data"] + offset = messages[0]["id"] + except Exception: + return + + try: + user = self.api.user(response["chatmate"]["url"]) + except Exception: + user = None + + messages.reverse() + for message in itertools.chain( + messages, + self.api.dialog_messages(dialog_id, offset=offset) + ): + message["signedQuery"] = signed_query + files = self._extract_files(message) + data = { + "post": message, + "user": user, + "count": len(files), + } + + yield Message.Directory, data + for data["num"], file in enumerate(files, 1): + data["file"] = file + url = file["url"] + yield Message.Url, url, text.nameext_from_url(url, data) + + class BoostyAPI(): """Interface for the Boosty API""" root = "https://api.boosty.to" @@ -367,3 +416,32 @@ class BoostyAPI(): if offset > data["total"]: return params["offset"] = offset + + def dialog(self, dialog_id): + endpoint = "/v1/dialog/{}".format(dialog_id) + return self._call(endpoint) + + def dialog_messages(self, dialog_id, limit=300, offset=None): + endpoint = "/v1/dialog/{}/message/".format(dialog_id) + params = { + "limit": limit, + "reverse": "true", + "offset": offset, + } + return self._pagination_dialog(endpoint, params) + + def _pagination_dialog(self, endpoint, params): + while True: + data = self._call(endpoint, params) + + yield from data["data"] + + try: + extra = data["extra"] + if extra.get("isLast"): + break + params["offset"] = offset = extra["offset"] + if not offset: + break + except Exception: + break diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index 92fa4268..22eb9832 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -211,6 +211,7 @@ SUBCATEGORY_MAP = { "posts": "", }, "boosty": { + "direct-messages": "DMs", "feed": "Subscriptions Feed", }, "civitai": { diff --git a/test/results/boosty.py b/test/results/boosty.py index 2da0e6b3..b570c502 100644 --- a/test/results/boosty.py +++ b/test/results/boosty.py @@ -131,5 +131,24 @@ __tests__ = ( "#auth" : True, }, +{ + "#url" : "https://boosty.to/app/messages?dialogId=3598621", + "#class" : boosty.BoostyDirectMessagesExtractor, + "#auth" : True, + "#count" : 7, + + "count" : 1, + "extension": "", + "file" : dict, + "user" : dict, + + "post": { + "authorId": int, + "content" : list, + "date" : "type:datetime", + "dialogId": 3598621, + "id" : int, + }, +}, )