[bluesky] extract 'hashtags', 'mentions', and 'uris' metadata (#4438)

This commit is contained in:
Mike Fährmann
2024-02-10 00:01:55 +01:00
parent 55bbd49a0e
commit c7d17f1111
2 changed files with 60 additions and 0 deletions

View File

@@ -45,6 +45,21 @@ class BlueskyExtractor(Extractor):
if "images" in media:
images = media["images"]
if "facets" in post:
post["hashtags"] = tags = []
post["mentions"] = dids = []
post["uris"] = uris = []
for facet in post["facets"]:
features = facet["features"][0]
if "tag" in features:
tags.append(features["tag"])
elif "did" in features:
dids.append(features["did"])
elif "uri" in features:
uris.append(features["uri"])
else:
post["hashtags"] = post["mentions"] = post["uris"] = ()
post["post_id"] = post["uri"].rpartition("/")[2]
post["count"] = len(images)
post["date"] = text.parse_datetime(