[boosty] added new direct message extractor
- formatting - fixed linting formatting errors - fixed E999 SyntaxError: invalid syntax - fixed class naming - fixed mandatory extractor.boosty.metadata as true requirement - update - apply changes - add test - update docs/supportedsites - improve 'dialog' pagination logic
This commit is contained in:
committed by
Mike Fährmann
parent
18ed39c1cf
commit
ff5f6fe70f
@@ -148,7 +148,7 @@ Consider all listed sites to potentially be NSFW.
|
|||||||
<tr>
|
<tr>
|
||||||
<td>Boosty</td>
|
<td>Boosty</td>
|
||||||
<td>https://www.boosty.to/</td>
|
<td>https://www.boosty.to/</td>
|
||||||
<td>Subscriptions Feed, Followed Users, Media Files, Posts, User Profiles</td>
|
<td>DMs, Subscriptions Feed, Followed Users, Media Files, Posts, User Profiles</td>
|
||||||
<td><a href="https://github.com/mikf/gallery-dl#cookies">Cookies</a></td>
|
<td><a href="https://github.com/mikf/gallery-dl#cookies">Cookies</a></td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
|
|||||||
@@ -8,6 +8,7 @@
|
|||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, util, exception
|
from .. import text, util, exception
|
||||||
|
import itertools
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?boosty\.to"
|
BASE_PATTERN = r"(?:https?://)?boosty\.to"
|
||||||
|
|
||||||
@@ -53,7 +54,9 @@ class BoostyExtractor(Extractor):
|
|||||||
self.log.warning("Not allowed to access post %s", post["id"])
|
self.log.warning("Not allowed to access post %s", post["id"])
|
||||||
continue
|
continue
|
||||||
|
|
||||||
files = self._process_post(post)
|
files = self._extract_files(post)
|
||||||
|
if self._user:
|
||||||
|
post["user"] = self._user
|
||||||
data = {
|
data = {
|
||||||
"post" : post,
|
"post" : post,
|
||||||
"user" : post.pop("user", None),
|
"user" : post.pop("user", None),
|
||||||
@@ -69,15 +72,13 @@ class BoostyExtractor(Extractor):
|
|||||||
def posts(self):
|
def posts(self):
|
||||||
"""Yield JSON content of all relevant posts"""
|
"""Yield JSON content of all relevant posts"""
|
||||||
|
|
||||||
def _process_post(self, post):
|
def _extract_files(self, post):
|
||||||
files = []
|
files = []
|
||||||
post["content"] = content = []
|
post["content"] = content = []
|
||||||
post["links"] = links = []
|
post["links"] = links = []
|
||||||
|
|
||||||
if "createdAt" in post:
|
if "createdAt" in post:
|
||||||
post["date"] = text.parse_timestamp(post["createdAt"])
|
post["date"] = text.parse_timestamp(post["createdAt"])
|
||||||
if self._user:
|
|
||||||
post["user"] = self._user
|
|
||||||
|
|
||||||
for block in post["data"]:
|
for block in post["data"]:
|
||||||
try:
|
try:
|
||||||
@@ -94,7 +95,7 @@ class BoostyExtractor(Extractor):
|
|||||||
elif type == "ok_video":
|
elif type == "ok_video":
|
||||||
if not self.videos:
|
if not self.videos:
|
||||||
self.log.debug("%s: Skipping video %s",
|
self.log.debug("%s: Skipping video %s",
|
||||||
post["int_id"], block["id"])
|
post["id"], block["id"])
|
||||||
continue
|
continue
|
||||||
fmts = {
|
fmts = {
|
||||||
fmt["type"]: fmt["url"]
|
fmt["type"]: fmt["url"]
|
||||||
@@ -114,7 +115,7 @@ class BoostyExtractor(Extractor):
|
|||||||
else:
|
else:
|
||||||
self.log.warning(
|
self.log.warning(
|
||||||
"%s: Found no suitable video format for %s",
|
"%s: Found no suitable video format for %s",
|
||||||
post["int_id"], block["id"])
|
post["id"], block["id"])
|
||||||
|
|
||||||
elif type == "link":
|
elif type == "link":
|
||||||
url = block["url"]
|
url = block["url"]
|
||||||
@@ -127,9 +128,12 @@ class BoostyExtractor(Extractor):
|
|||||||
elif type == "file":
|
elif type == "file":
|
||||||
files.append(self._update_url(post, block))
|
files.append(self._update_url(post, block))
|
||||||
|
|
||||||
|
elif type == "smile":
|
||||||
|
content.append(":" + block["name"] + ":")
|
||||||
|
|
||||||
else:
|
else:
|
||||||
self.log.debug("%s: Unsupported data type '%s'",
|
self.log.debug("%s: Unsupported data type '%s'",
|
||||||
post["int_id"], type)
|
post["id"], type)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
self.log.debug("%s: %s", exc.__class__.__name__, exc)
|
self.log.debug("%s: %s", exc.__class__.__name__, exc)
|
||||||
|
|
||||||
@@ -219,6 +223,51 @@ class BoostyFollowingExtractor(BoostyExtractor):
|
|||||||
yield Message.Queue, url, user
|
yield Message.Queue, url, user
|
||||||
|
|
||||||
|
|
||||||
|
class BoostyDirectMessagesExtractor(BoostyExtractor):
|
||||||
|
"""Extractor for boosty.to direct messages"""
|
||||||
|
subcategory = "direct-messages"
|
||||||
|
directory_fmt = ("{category}", "{user[blogUrl]} ({user[id]})",
|
||||||
|
"Direct Messages")
|
||||||
|
pattern = BASE_PATTERN + r"/app/messages/?\?dialogId=(\d+)"
|
||||||
|
example = "https://boosty.to/app/messages?dialogId=12345"
|
||||||
|
|
||||||
|
def items(self):
|
||||||
|
"""Yield direct messages from a given dialog ID."""
|
||||||
|
dialog_id = self.groups[0]
|
||||||
|
response = self.api.dialog(dialog_id)
|
||||||
|
signed_query = response.get("signedQuery")
|
||||||
|
|
||||||
|
try:
|
||||||
|
messages = response["messages"]["data"]
|
||||||
|
offset = messages[0]["id"]
|
||||||
|
except Exception:
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
user = self.api.user(response["chatmate"]["url"])
|
||||||
|
except Exception:
|
||||||
|
user = None
|
||||||
|
|
||||||
|
messages.reverse()
|
||||||
|
for message in itertools.chain(
|
||||||
|
messages,
|
||||||
|
self.api.dialog_messages(dialog_id, offset=offset)
|
||||||
|
):
|
||||||
|
message["signedQuery"] = signed_query
|
||||||
|
files = self._extract_files(message)
|
||||||
|
data = {
|
||||||
|
"post": message,
|
||||||
|
"user": user,
|
||||||
|
"count": len(files),
|
||||||
|
}
|
||||||
|
|
||||||
|
yield Message.Directory, data
|
||||||
|
for data["num"], file in enumerate(files, 1):
|
||||||
|
data["file"] = file
|
||||||
|
url = file["url"]
|
||||||
|
yield Message.Url, url, text.nameext_from_url(url, data)
|
||||||
|
|
||||||
|
|
||||||
class BoostyAPI():
|
class BoostyAPI():
|
||||||
"""Interface for the Boosty API"""
|
"""Interface for the Boosty API"""
|
||||||
root = "https://api.boosty.to"
|
root = "https://api.boosty.to"
|
||||||
@@ -367,3 +416,32 @@ class BoostyAPI():
|
|||||||
if offset > data["total"]:
|
if offset > data["total"]:
|
||||||
return
|
return
|
||||||
params["offset"] = offset
|
params["offset"] = offset
|
||||||
|
|
||||||
|
def dialog(self, dialog_id):
|
||||||
|
endpoint = "/v1/dialog/{}".format(dialog_id)
|
||||||
|
return self._call(endpoint)
|
||||||
|
|
||||||
|
def dialog_messages(self, dialog_id, limit=300, offset=None):
|
||||||
|
endpoint = "/v1/dialog/{}/message/".format(dialog_id)
|
||||||
|
params = {
|
||||||
|
"limit": limit,
|
||||||
|
"reverse": "true",
|
||||||
|
"offset": offset,
|
||||||
|
}
|
||||||
|
return self._pagination_dialog(endpoint, params)
|
||||||
|
|
||||||
|
def _pagination_dialog(self, endpoint, params):
|
||||||
|
while True:
|
||||||
|
data = self._call(endpoint, params)
|
||||||
|
|
||||||
|
yield from data["data"]
|
||||||
|
|
||||||
|
try:
|
||||||
|
extra = data["extra"]
|
||||||
|
if extra.get("isLast"):
|
||||||
|
break
|
||||||
|
params["offset"] = offset = extra["offset"]
|
||||||
|
if not offset:
|
||||||
|
break
|
||||||
|
except Exception:
|
||||||
|
break
|
||||||
|
|||||||
@@ -211,6 +211,7 @@ SUBCATEGORY_MAP = {
|
|||||||
"posts": "",
|
"posts": "",
|
||||||
},
|
},
|
||||||
"boosty": {
|
"boosty": {
|
||||||
|
"direct-messages": "DMs",
|
||||||
"feed": "Subscriptions Feed",
|
"feed": "Subscriptions Feed",
|
||||||
},
|
},
|
||||||
"civitai": {
|
"civitai": {
|
||||||
|
|||||||
@@ -131,5 +131,24 @@ __tests__ = (
|
|||||||
"#auth" : True,
|
"#auth" : True,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://boosty.to/app/messages?dialogId=3598621",
|
||||||
|
"#class" : boosty.BoostyDirectMessagesExtractor,
|
||||||
|
"#auth" : True,
|
||||||
|
"#count" : 7,
|
||||||
|
|
||||||
|
"count" : 1,
|
||||||
|
"extension": "",
|
||||||
|
"file" : dict,
|
||||||
|
"user" : dict,
|
||||||
|
|
||||||
|
"post": {
|
||||||
|
"authorId": int,
|
||||||
|
"content" : list,
|
||||||
|
"date" : "type:datetime",
|
||||||
|
"dialogId": 3598621,
|
||||||
|
"id" : int,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user