[mastodon] implement 'text-posts' option (#1569)

similar to Twitter's 'text-tweets'
This commit is contained in:
Mike Fährmann
2021-07-02 22:12:41 +02:00
parent 414bdc95a3
commit 93d356712c
2 changed files with 33 additions and 23 deletions

View File

@@ -1269,6 +1269,16 @@ Description
Provide ``artist``, ``author``, and ``group`` metadata fields. Provide ``artist``, ``author``, and ``group`` metadata fields.
extractor.mastodon.text-posts
-----------------------------
Type
``bool``
Default
``false``
Description
Also emit metadata for text-only posts without media content.
extractor.newgrounds.flash extractor.newgrounds.flash
-------------------------- --------------------------
Type Type

View File

@@ -6,7 +6,7 @@
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation. # published by the Free Software Foundation.
"""Extractors for mastodon instances""" """Extractors for Mastodon instances"""
from .common import BaseExtractor, Message from .common import BaseExtractor, Message
from .. import text, exception from .. import text, exception
@@ -29,26 +29,23 @@ class MastodonExtractor(BaseExtractor):
def items(self): def items(self):
for status in self.statuses(): for status in self.statuses():
attachments = status["media_attachments"] attachments = status["media_attachments"]
if attachments: del status["media_attachments"]
self.prepare(status)
yield Message.Directory, status status["instance"] = self.instance
for media in attachments: status["tags"] = [tag["name"] for tag in status["tags"]]
status["media"] = media status["date"] = text.parse_datetime(
url = media["url"] status["created_at"][:19], "%Y-%m-%dT%H:%M:%S")
yield Message.Url, url, text.nameext_from_url(url, status)
yield Message.Directory, status
for media in attachments:
status["media"] = media
url = media["url"]
yield Message.Url, url, text.nameext_from_url(url, status)
def statuses(self): def statuses(self):
"""Return an iterable containing all relevant Status-objects""" """Return an iterable containing all relevant Status objects"""
return () return ()
def prepare(self, status):
"""Prepare a status object"""
del status["media_attachments"]
status["instance"] = self.instance
status["tags"] = [tag["name"] for tag in status["tags"]]
status["date"] = text.parse_datetime(
status["created_at"][:19], "%Y-%m-%dT%H:%M:%S")
INSTANCES = { INSTANCES = {
"mastodon.social": { "mastodon.social": {
@@ -97,6 +94,7 @@ class MastodonUserExtractor(MastodonExtractor):
def statuses(self): def statuses(self):
api = MastodonAPI(self) api = MastodonAPI(self)
username = self.item username = self.item
handle = "@{}@{}".format(username, self.instance) handle = "@{}@{}".format(username, self.instance)
for account in api.account_search(handle, 1): for account in api.account_search(handle, 1):
@@ -104,7 +102,9 @@ class MastodonUserExtractor(MastodonExtractor):
break break
else: else:
raise exception.NotFoundError("account") raise exception.NotFoundError("account")
return api.account_statuses(account["id"])
return api.account_statuses(
account["id"], not self.config("text-posts", False))
class MastodonStatusExtractor(MastodonExtractor): class MastodonStatusExtractor(MastodonExtractor):
@@ -130,8 +130,8 @@ class MastodonStatusExtractor(MastodonExtractor):
class MastodonAPI(): class MastodonAPI():
"""Minimal interface for the Mastodon API """Minimal interface for the Mastodon API
https://docs.joinmastodon.org/
https://github.com/tootsuite/mastodon https://github.com/tootsuite/mastodon
https://github.com/tootsuite/documentation/blob/master/Using-the-API/API.md
""" """
def __init__(self, extractor): def __init__(self, extractor):
@@ -153,15 +153,15 @@ class MastodonAPI():
self.headers = {"Authorization": "Bearer " + access_token} self.headers = {"Authorization": "Bearer " + access_token}
def account_search(self, query, limit=40): def account_search(self, query, limit=40):
"""Search for content""" """Search for accounts"""
endpoint = "/v1/accounts/search" endpoint = "/v1/accounts/search"
params = {"q": query, "limit": limit} params = {"q": query, "limit": limit}
return self._call(endpoint, params).json() return self._call(endpoint, params).json()
def account_statuses(self, account_id): def account_statuses(self, account_id, only_media=True):
"""Get an account's statuses""" """Fetch an account's statuses"""
endpoint = "/v1/accounts/{}/statuses".format(account_id) endpoint = "/v1/accounts/{}/statuses".format(account_id)
params = {"only_media": "1"} params = {"only_media": "1"} if only_media else None
return self._pagination(endpoint, params) return self._pagination(endpoint, params)
def status(self, status_id): def status(self, status_id):