[mastodon] update

- inherit from BaseExtractor
- remove custom generate_extractors() and config()
- improve layout of MastodonAPI internals
This commit is contained in:
Mike Fährmann
2021-01-27 23:49:01 +01:00
parent 231bcad614
commit fa33f13453

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2019-2020 Mike Fährmann # Copyright 2019-2021 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@@ -8,35 +8,25 @@
"""Extractors for mastodon instances""" """Extractors for mastodon instances"""
from .common import Extractor, Message from .common import BaseExtractor, Message
from .. import text, util, config, exception from .. import text, exception
import re
class MastodonExtractor(Extractor): class MastodonExtractor(BaseExtractor):
"""Base class for mastodon extractors""" """Base class for mastodon extractors"""
basecategory = "mastodon" basecategory = "mastodon"
directory_fmt = ("mastodon", "{instance}", "{account[username]}") directory_fmt = ("mastodon", "{instance}", "{account[username]}")
filename_fmt = "{category}_{id}_{media[id]}.{extension}" filename_fmt = "{category}_{id}_{media[id]}.{extension}"
archive_fmt = "{media[id]}" archive_fmt = "{media[id]}"
cookiedomain = None cookiedomain = None
instance = None
root = None
def __init__(self, match): def __init__(self, match):
Extractor.__init__(self, match) BaseExtractor.__init__(self, match)
self.instance = self.root.partition("://")[2]
self.item = match.group(match.lastindex)
self.api = MastodonAPI(self) self.api = MastodonAPI(self)
def config(self, key, default=None):
return config.interpolate_common(
("extractor",), (
(self.category, self.subcategory),
(self.basecategory, self.instance, self.subcategory),
), key, default,
)
def items(self): def items(self):
yield Message.Version, 1
for status in self.statuses(): for status in self.statuses():
attachments = status["media_attachments"] attachments = status["media_attachments"]
if attachments: if attachments:
@@ -60,18 +50,56 @@ class MastodonExtractor(Extractor):
status["created_at"][:19], "%Y-%m-%dT%H:%M:%S") status["created_at"][:19], "%Y-%m-%dT%H:%M:%S")
INSTANCES = {
"mastodon.social": {
"root" : "https://mastodon.social",
"access-token" : "Y06R36SMvuXXN5_wiPKFAEFiQaMSQg0o_hGgc86Jj48",
"client-id" : "dBSHdpsnOUZgxOnjKSQrWEPakO3ctM7HmsyoOd4FcRo",
"client-secret": "DdrODTHs_XoeOsNVXnILTMabtdpWrWOAtrmw91wU1zI",
},
"pawoo": {
"root" : "https://pawoo.net",
"access-token" : "c12c9d275050bce0dc92169a28db09d7"
"0d62d0a75a8525953098c167eacd3668",
"client-id" : "978a25f843ec01e53d09be2c290cd75c"
"782bc3b7fdbd7ea4164b9f3c3780c8ff",
"client-secret": "9208e3d4a7997032cf4f1b0e12e5df38"
"8428ef1fadb446dcfeb4f5ed6872d97b",
},
"baraag": {
"root" : "https://baraag.net",
"access-token" : "53P1Mdigf4EJMH-RmeFOOSM9gdSDztmrAYFgabOKKE0",
"client-id" : "czxx2qilLElYHQ_sm-lO8yXuGwOHxLX9RYYaD0-nq1o",
"client-secret": "haMaFdMBgK_-BIxufakmI2gFgkYjqmgXGEO2tB-R2xY",
}
}
BASE_PATTERN = MastodonExtractor.update(INSTANCES)
class MastodonUserExtractor(MastodonExtractor): class MastodonUserExtractor(MastodonExtractor):
"""Extractor for all images of an account/user""" """Extractor for all images of an account/user"""
subcategory = "user" subcategory = "user"
pattern = BASE_PATTERN + r"/@([^/?#]+)(?:/media)?/?$"
def __init__(self, match): test = (
MastodonExtractor.__init__(self, match) ("https://mastodon.social/@jk", {
self.account_name = match.group(1) "pattern": r"https://files.mastodon.social/media_attachments"
r"/files/(\d+/){3,}original/\w+",
"range": "1-60",
"count": 60,
}),
("https://pawoo.net/@yoru_nine/", {
"range": "1-60",
"count": 60,
}),
("https://baraag.net/@pumpkinnsfw"),
)
def statuses(self): def statuses(self):
handle = "@{}@{}".format(self.account_name, self.instance) username = self.item
handle = "@{}@{}".format(username, self.instance)
for account in self.api.account_search(handle, 1): for account in self.api.account_search(handle, 1):
if account["username"] == self.account_name: if account["username"] == username:
break break
else: else:
raise exception.NotFoundError("account") raise exception.NotFoundError("account")
@@ -81,13 +109,21 @@ class MastodonUserExtractor(MastodonExtractor):
class MastodonStatusExtractor(MastodonExtractor): class MastodonStatusExtractor(MastodonExtractor):
"""Extractor for images from a status""" """Extractor for images from a status"""
subcategory = "status" subcategory = "status"
pattern = BASE_PATTERN + r"/@[^/?#]+/(\d+)"
def __init__(self, match): test = (
MastodonExtractor.__init__(self, match) ("https://mastodon.social/@jk/103794036899778366", {
self.status_id = match.group(1) "count": 4,
}),
("https://pawoo.net/@yoru_nine/105038878897832922", {
"content": "b52e807f8ab548d6f896b09218ece01eba83987a",
}),
("https://baraag.net/@pumpkinnsfw/104364170556898443", {
"content": "67748c1b828c58ad60d0fe5729b59fb29c872244",
}),
)
def statuses(self): def statuses(self):
return (self.api.status(self.status_id),) return (self.api.status(self.item),)
class MastodonAPI(): class MastodonAPI():
@@ -102,30 +138,36 @@ class MastodonAPI():
self.extractor = extractor self.extractor = extractor
if not access_token: if not access_token:
access_token = extractor.config( access_token = extractor.config("access-token")
"access-token", extractor.access_token) if not access_token:
self.headers = {"Authorization": "Bearer {}".format(access_token)} if extractor.category not in INSTANCES:
raise exception.StopExtraction("missing access token")
access_token = INSTANCES[extractor.category]["access-token"]
self.headers = {"Authorization": "Bearer " + access_token}
def account_search(self, query, limit=40): def account_search(self, query, limit=40):
"""Search for content""" """Search for content"""
endpoint = "/v1/accounts/search"
params = {"q": query, "limit": limit} params = {"q": query, "limit": limit}
return self._call("accounts/search", params).json() return self._call(endpoint, params).json()
def account_statuses(self, account_id): def account_statuses(self, account_id):
"""Get an account's statuses""" """Get an account's statuses"""
endpoint = "accounts/{}/statuses".format(account_id) endpoint = "/v1/accounts/{}/statuses".format(account_id)
params = {"only_media": "1"} params = {"only_media": "1"}
return self._pagination(endpoint, params) return self._pagination(endpoint, params)
def status(self, status_id): def status(self, status_id):
"""Fetch a Status""" """Fetch a status"""
return self._call("statuses/" + status_id).json() endpoint = "/v1/statuses/" + status_id
return self._call(endpoint).json()
def _call(self, endpoint, params=None): def _call(self, endpoint, params=None):
if endpoint.startswith("http"): if endpoint.startswith("http"):
url = endpoint url = endpoint
else: else:
url = "{}/api/v1/{}".format(self.root, endpoint) url = self.root + "/api" + endpoint
while True: while True:
response = self.extractor.request( response = self.extractor.request(
@@ -145,7 +187,7 @@ class MastodonAPI():
raise exception.StopExtraction(response.json().get("error")) raise exception.StopExtraction(response.json().get("error"))
def _pagination(self, endpoint, params): def _pagination(self, endpoint, params):
url = "{}/api/v1/{}".format(self.root, endpoint) url = endpoint
while url: while url:
response = self._call(url, params) response = self._call(url, params)
yield from response.json() yield from response.json()
@@ -154,88 +196,3 @@ class MastodonAPI():
if not url: if not url:
return return
url = url["url"] url = url["url"]
def generate_extractors():
"""Dynamically generate Extractor classes for Mastodon instances"""
symtable = globals()
extractors = config.get(("extractor",), "mastodon")
if extractors:
util.combine_dict(EXTRACTORS, extractors)
config.set(("extractor",), "mastodon", EXTRACTORS)
for instance, info in EXTRACTORS.items():
if not isinstance(info, dict):
continue
category = info.get("category") or instance.replace(".", "")
root = info.get("root") or "https://" + instance
name = (info.get("name") or category).capitalize()
token = info.get("access-token")
pattern = info.get("pattern") or re.escape(instance)
class Extr(MastodonUserExtractor):
pass
Extr.__name__ = Extr.__qualname__ = name + "UserExtractor"
Extr.__doc__ = "Extractor for all images of a user on " + instance
Extr.category = category
Extr.instance = instance
Extr.pattern = (r"(?:https?://)?" + pattern +
r"/@([^/?#]+)(?:/media)?/?$")
Extr.test = info.get("test-user")
Extr.root = root
Extr.access_token = token
symtable[Extr.__name__] = Extr
class Extr(MastodonStatusExtractor):
pass
Extr.__name__ = Extr.__qualname__ = name + "StatusExtractor"
Extr.__doc__ = "Extractor for images from a status on " + instance
Extr.category = category
Extr.instance = instance
Extr.pattern = r"(?:https?://)?" + pattern + r"/@[^/?#]+/(\d+)"
Extr.test = info.get("test-status")
Extr.root = root
Extr.access_token = token
symtable[Extr.__name__] = Extr
EXTRACTORS = {
"mastodon.social": {
"category" : "mastodon.social",
"access-token" : "Y06R36SMvuXXN5_wiPKFAEFiQaMSQg0o_hGgc86Jj48",
"client-id" : "dBSHdpsnOUZgxOnjKSQrWEPakO3ctM7HmsyoOd4FcRo",
"client-secret": "DdrODTHs_XoeOsNVXnILTMabtdpWrWOAtrmw91wU1zI",
"test-user" : ("https://mastodon.social/@jk", {
"pattern": r"https://files.mastodon.social/media_attachments"
r"/files/(\d+/){3,}original/\w+",
"range": "1-60",
"count": 60,
}),
"test-status" : ("https://mastodon.social/@jk/103794036899778366", {
"count": 4,
}),
},
"pawoo.net": {
"category" : "pawoo",
"access-token" : "c12c9d275050bce0dc92169a28db09d7"
"0d62d0a75a8525953098c167eacd3668",
"client-id" : "978a25f843ec01e53d09be2c290cd75c"
"782bc3b7fdbd7ea4164b9f3c3780c8ff",
"client-secret": "9208e3d4a7997032cf4f1b0e12e5df38"
"8428ef1fadb446dcfeb4f5ed6872d97b",
},
"baraag.net": {
"category" : "baraag",
"access-token" : "53P1Mdigf4EJMH-RmeFOOSM9gdSDztmrAYFgabOKKE0",
"client-id" : "czxx2qilLElYHQ_sm-lO8yXuGwOHxLX9RYYaD0-nq1o",
"client-secret": "haMaFdMBgK_-BIxufakmI2gFgkYjqmgXGEO2tB-R2xY",
},
}
generate_extractors()