From ff5f6fe70fd2e2d7002524a9c945281c82569ea4 Mon Sep 17 00:00:00 2001
From: Dominik Prange <dominikprange@protonmail.com>
Date: Sat, 4 Jan 2025 19:49:48 +0100
Subject: [PATCH] [boosty] added new direct message extractor

- formatting
- fixed linting formatting errors
- fixed E999 SyntaxError: invalid syntax
- fixed class naming
- fixed mandatory extractor.boosty.metadata as true requirement
- update
  - apply changes
  - add test
  - update docs/supportedsites
- improve 'dialog' pagination logic
---
 docs/supportedsites.md         |  2 +-
 gallery_dl/extractor/boosty.py | 92 +++++++++++++++++++++++++++++++---
 scripts/supportedsites.py      |  1 +
 test/results/boosty.py         | 19 +++++++
 4 files changed, 106 insertions(+), 8 deletions(-)
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index dc716619..2ba82626 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -148,7 +148,7 @@ Consider all listed sites to potentially be NSFW.
 <tr>
     <td>Boosty</td>
     <td>https://www.boosty.to/</td>
-    <td>Subscriptions Feed, Followed Users, Media Files, Posts, User Profiles</td>
+    <td>DMs, Subscriptions Feed, Followed Users, Media Files, Posts, User Profiles</td>
     <td><a href="https://github.com/mikf/gallery-dl#cookies">Cookies</a></td>
 </tr>
 <tr>
diff --git a/gallery_dl/extractor/boosty.py b/gallery_dl/extractor/boosty.py
index c28fad97..f3e441b1 100644
--- a/gallery_dl/extractor/boosty.py
+++ b/gallery_dl/extractor/boosty.py
@@ -8,6 +8,7 @@
 
 from .common import Extractor, Message
 from .. import text, util, exception
+import itertools
 
 BASE_PATTERN = r"(?:https?://)?boosty\.to"
 
@@ -53,7 +54,9 @@ class BoostyExtractor(Extractor):
                 self.log.warning("Not allowed to access post %s", post["id"])
                 continue
 
-            files = self._process_post(post)
+            files = self._extract_files(post)
+            if self._user:
+                post["user"] = self._user
             data = {
                 "post" : post,
                 "user" : post.pop("user", None),
@@ -69,15 +72,13 @@ class BoostyExtractor(Extractor):
     def posts(self):
         """Yield JSON content of all relevant posts"""
 
-    def _process_post(self, post):
+    def _extract_files(self, post):
         files = []
         post["content"] = content = []
         post["links"] = links = []
 
         if "createdAt" in post:
             post["date"] = text.parse_timestamp(post["createdAt"])
-        if self._user:
-            post["user"] = self._user
 
         for block in post["data"]:
             try:
@@ -94,7 +95,7 @@ class BoostyExtractor(Extractor):
                 elif type == "ok_video":
                     if not self.videos:
                         self.log.debug("%s: Skipping video %s",
-                                       post["int_id"], block["id"])
+                                       post["id"], block["id"])
                         continue
                     fmts = {
                         fmt["type"]: fmt["url"]
@@ -114,7 +115,7 @@ class BoostyExtractor(Extractor):
                     else:
                         self.log.warning(
                             "%s: Found no suitable video format for %s",
-                            post["int_id"], block["id"])
+                            post["id"], block["id"])
 
                 elif type == "link":
                     url = block["url"]
@@ -127,9 +128,12 @@ class BoostyExtractor(Extractor):
                 elif type == "file":
                     files.append(self._update_url(post, block))
 
+                elif type == "smile":
+                    content.append(":" + block["name"] + ":")
+
                 else:
                     self.log.debug("%s: Unsupported data type '%s'",
-                                   post["int_id"], type)
+                                   post["id"], type)
             except Exception as exc:
                 self.log.debug("%s: %s", exc.__class__.__name__, exc)
 
@@ -219,6 +223,51 @@ class BoostyFollowingExtractor(BoostyExtractor):
             yield Message.Queue, url, user
 
 
+class BoostyDirectMessagesExtractor(BoostyExtractor):
+    """Extractor for boosty.to direct messages"""
+    subcategory = "direct-messages"
+    directory_fmt = ("{category}", "{user[blogUrl]} ({user[id]})",
+                     "Direct Messages")
+    pattern = BASE_PATTERN + r"/app/messages/?\?dialogId=(\d+)"
+    example = "https://boosty.to/app/messages?dialogId=12345"
+
+    def items(self):
+        """Yield direct messages from a given dialog ID."""
+        dialog_id = self.groups[0]
+        response = self.api.dialog(dialog_id)
+        signed_query = response.get("signedQuery")
+
+        try:
+            messages = response["messages"]["data"]
+            offset = messages[0]["id"]
+        except Exception:
+            return
+
+        try:
+            user = self.api.user(response["chatmate"]["url"])
+        except Exception:
+            user = None
+
+        messages.reverse()
+        for message in itertools.chain(
+            messages,
+            self.api.dialog_messages(dialog_id, offset=offset)
+        ):
+            message["signedQuery"] = signed_query
+            files = self._extract_files(message)
+            data = {
+                "post": message,
+                "user": user,
+                "count": len(files),
+            }
+
+            yield Message.Directory, data
+            for data["num"], file in enumerate(files, 1):
+                data["file"] = file
+                url = file["url"]
+                yield Message.Url, url, text.nameext_from_url(url, data)
+
+
 class BoostyAPI():
     """Interface for the Boosty API"""
     root = "https://api.boosty.to"
@@ -367,3 +416,32 @@ class BoostyAPI():
             if offset > data["total"]:
                 return
             params["offset"] = offset
+
+    def dialog(self, dialog_id):
+        endpoint = "/v1/dialog/{}".format(dialog_id)
+        return self._call(endpoint)
+
+    def dialog_messages(self, dialog_id, limit=300, offset=None):
+        endpoint = "/v1/dialog/{}/message/".format(dialog_id)
+        params = {
+            "limit": limit,
+            "reverse": "true",
+            "offset": offset,
+        }
+        return self._pagination_dialog(endpoint, params)
+
+    def _pagination_dialog(self, endpoint, params):
+        while True:
+            data = self._call(endpoint, params)
+
+            yield from data["data"]
+
+            try:
+                extra = data["extra"]
+                if extra.get("isLast"):
+                    break
+                params["offset"] = offset = extra["offset"]
+                if not offset:
+                    break
+            except Exception:
+                break
diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py
index 92fa4268..22eb9832 100755
--- a/scripts/supportedsites.py
+++ b/scripts/supportedsites.py
@@ -211,6 +211,7 @@ SUBCATEGORY_MAP = {
         "posts": "",
     },
     "boosty": {
+        "direct-messages": "DMs",
         "feed": "Subscriptions Feed",
     },
     "civitai": {
diff --git a/test/results/boosty.py b/test/results/boosty.py
index 2da0e6b3..b570c502 100644
--- a/test/results/boosty.py
+++ b/test/results/boosty.py
@@ -131,5 +131,24 @@ __tests__ = (
     "#auth"    : True,
 },
 
+{
+    "#url"     : "https://boosty.to/app/messages?dialogId=3598621",
+    "#class"   : boosty.BoostyDirectMessagesExtractor,
+    "#auth"    : True,
+    "#count"   : 7,
+
+    "count"    : 1,
+    "extension": "",
+    "file"     : dict,
+    "user"     : dict,
+
+    "post": {
+        "authorId": int,
+        "content" : list,
+        "date"    : "type:datetime",
+        "dialogId": 3598621,
+        "id"      : int,
+    },
+},
 
 )