[mastodon] various improvements and fixes (#144)

- allow instances to specify their own 'category'
- improve config lookup:
  - first look into extractor.<category>.*
  - and afterwards look into extractor.mastodon.<instance>.*
- add a default entry for pawoo.net in a way that actually works
- add an 'instance' keyword and turn 'tags' into a usable list
This commit is contained in:
Mike Fährmann
2019-01-27 20:43:33 +01:00
parent 3f608a84b7
commit 4f49fdf065

View File

@@ -16,7 +16,7 @@ import re
class MastodonExtractor(Extractor): class MastodonExtractor(Extractor):
"""Base class for mastodon extractors""" """Base class for mastodon extractors"""
basecategory = "mastodon" basecategory = "mastodon"
directory_fmt = ["mastodon", "{category}", "{account[username]}"] directory_fmt = ["mastodon", "{instance}", "{account[username]}"]
filename_fmt = "{category}_{id}_{media[id]}.{extension}" filename_fmt = "{category}_{id}_{media[id]}.{extension}"
archive_fmt = "{media[id]}" archive_fmt = "{media[id]}"
instance = None instance = None
@@ -26,9 +26,12 @@ class MastodonExtractor(Extractor):
self.instance = match.group(1) self.instance = match.group(1)
self.api = MastodonAPI(self, self.instance) self.api = MastodonAPI(self, self.instance)
def config(self, key, default=None): def config(self, key, default=None, *, sentinel=object()):
value = Extractor.config(self, key, sentinel)
if value is not sentinel:
return value
return config.interpolate( return config.interpolate(
("extractor", "mastodon", self.category, self.subcategory, key), ("extractor", "mastodon", self.instance, self.subcategory, key),
default, default,
) )
@@ -46,9 +49,10 @@ class MastodonExtractor(Extractor):
"""Return an iterable containing all relevant Status-objects""" """Return an iterable containing all relevant Status-objects"""
return () return ()
@staticmethod def prepare(self, status):
def prepare(status):
"""Prepare a status object""" """Prepare a status object"""
status["instance"] = self.instance
status["tags"] = [tag["name"] for tag in status["tags"]]
attachments = status["media_attachments"] attachments = status["media_attachments"]
del status["media_attachments"] del status["media_attachments"]
return attachments return attachments
@@ -138,10 +142,14 @@ def generate_extractors():
"""Dynamically generate Extractor classes for Mastodon instances""" """Dynamically generate Extractor classes for Mastodon instances"""
symtable = globals() symtable = globals()
mastodon = config.get(("extractor", "mastodon")) or {} mastodon = config.get(("extractor", "mastodon"))
if not mastodon:
mastodon = {}
config.set(("extractor", "mastodon"), mastodon)
if "pawoo.net" not in mastodon: if "pawoo.net" not in mastodon:
mastodon["pawoo.net"] = { mastodon["pawoo.net"] = {
"category" : "pawoo",
"access-token" : "286462927198d0cf3e24683e91c8259a" "access-token" : "286462927198d0cf3e24683e91c8259a"
"ac4367233064e0570ca18df2ac65b226", "ac4367233064e0570ca18df2ac65b226",
"client-id" : "97b142b6904abf97a1068d51a7bc2f2f" "client-id" : "97b142b6904abf97a1068d51a7bc2f2f"
@@ -163,10 +171,11 @@ def generate_extractors():
pattern = [r"(?:https?://)?({})/@[^/?&#]+/(\d+)".format( pattern = [r"(?:https?://)?({})/@[^/?&#]+/(\d+)".format(
re.escape(instance))] re.escape(instance))]
name = re.sub(r"[^A-Za-z]+", "", instance).capitalize() category = info.get("category", instance)
name = re.sub(r"[^A-Za-z]+", "", category).capitalize()
for extr in (UserExtractor, StatusExtractor): for extr in (UserExtractor, StatusExtractor):
extr.category = instance extr.category = category
extr.__name__ = name + extr.__name__ extr.__name__ = name + extr.__name__
extr.__doc__ = "{} on {}".format(extr.__base__.__doc__, instance) extr.__doc__ = "{} on {}".format(extr.__base__.__doc__, instance)
symtable[extr.__name__] = extr symtable[extr.__name__] = extr