[bluesky] extract 'hashtags', 'mentions', and 'uris' metadata (#4438)
This commit is contained in:
@@ -45,6 +45,21 @@ class BlueskyExtractor(Extractor):
|
||||
if "images" in media:
|
||||
images = media["images"]
|
||||
|
||||
if "facets" in post:
|
||||
post["hashtags"] = tags = []
|
||||
post["mentions"] = dids = []
|
||||
post["uris"] = uris = []
|
||||
for facet in post["facets"]:
|
||||
features = facet["features"][0]
|
||||
if "tag" in features:
|
||||
tags.append(features["tag"])
|
||||
elif "did" in features:
|
||||
dids.append(features["did"])
|
||||
elif "uri" in features:
|
||||
uris.append(features["uri"])
|
||||
else:
|
||||
post["hashtags"] = post["mentions"] = post["uris"] = ()
|
||||
|
||||
post["post_id"] = post["uri"].rpartition("/")[2]
|
||||
post["count"] = len(images)
|
||||
post["date"] = text.parse_datetime(
|
||||
|
||||
@@ -39,6 +39,51 @@ __tests__ = (
|
||||
"repostCount": int,
|
||||
"uri" : "at://did:plc:z72i7hdynmk6r22z27h6tvur/app.bsky.feed.post/3kh5rarr3gn2n",
|
||||
"width" : 1200,
|
||||
"hashtags" : [],
|
||||
"mentions" : [],
|
||||
"uris" : ["https://blueskyweb.xyz/blog/12-21-2023-butterfly"],
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://bsky.app/profile/mikf.bsky.social/post/3kkzc3xaf5m2w",
|
||||
"#category": ("", "bluesky", "post"),
|
||||
"#class" : bluesky.BlueskyPostExtractor,
|
||||
"#urls" : "https://bsky.social/xrpc/com.atproto.sync.getBlob?did=did:plc:cslxjqkeexku6elp5xowxkq7&cid=bafkreib7ydpe3xxo4cq7nn32w7eqhcanfaanz6caepd2z4kzplxtx2ctgi",
|
||||
"#sha1_content": "9cf5748f6d00aae83fbb3cc2c6eb3caa832b90f4",
|
||||
|
||||
"author": {
|
||||
"did" : "did:plc:cslxjqkeexku6elp5xowxkq7",
|
||||
"displayName": "mikf",
|
||||
"handle" : "mikf.bsky.social",
|
||||
"labels" : [],
|
||||
},
|
||||
"cid" : "bafyreihtck7clocti2qshaiounadof74pxqhz7gnvbstxujqzhlodigqru",
|
||||
"count" : 1,
|
||||
"createdAt" : "2024-02-09T21:57:31.917Z",
|
||||
"date" : "dt:2024-02-09 21:57:31",
|
||||
"description": "reading lewd books",
|
||||
"extension" : "jpeg",
|
||||
"filename" : "bafkreib7ydpe3xxo4cq7nn32w7eqhcanfaanz6caepd2z4kzplxtx2ctgi",
|
||||
"hashtags" : [
|
||||
"patchouli",
|
||||
"patchy",
|
||||
],
|
||||
"mentions" : [
|
||||
"did:plc:cslxjqkeexku6elp5xowxkq7",
|
||||
],
|
||||
"uris" : [
|
||||
"https://seiga.nicovideo.jp/seiga/im5977527",
|
||||
],
|
||||
"width" : 1024,
|
||||
"height" : 768,
|
||||
"langs" : ["en"],
|
||||
"likeCount" : int,
|
||||
"num" : 1,
|
||||
"post_id" : "3kkzc3xaf5m2w",
|
||||
"replyCount" : int,
|
||||
"repostCount": int,
|
||||
"text" : "testing \"facets\"\n\nsource: seiga.nicovideo.jp/seiga/im5977...\n#patchouli #patchy\n@mikf.bsky.social",
|
||||
"uri" : "at://did:plc:cslxjqkeexku6elp5xowxkq7/app.bsky.feed.post/3kkzc3xaf5m2w",
|
||||
},
|
||||
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user