[bluesky:likes] use 'repo.listRecords' endpoint (#7194 #7287)

requires one additional API request per post,
but allows accessing likes of all users without login
and avoids the infinite loop at the end of 'getActorLikes'

add 'endpoint' option
This commit is contained in:
Mike Fährmann
2025-04-15 21:57:17 +02:00
parent 48053cf3ec
commit 481ad1c206
4 changed files with 91 additions and 8 deletions

View File

@@ -1591,6 +1591,28 @@ Description
It is possible to use ``"all"`` instead of listing all values separately.
extractor.bluesky.likes.endpoint
--------------------------------
Type
``string``
Default
``"listRecords"``
Description
API endpoint to use for retrieving liked posts.
``"listRecords"``
| Use the results from
`com.atproto.repo.listRecords <https://docs.bsky.app/docs/api/com-atproto-repo-list-records>`__
| Requires no login and alows accessing likes of all users,
but uses one request to
`getPostThread <https://docs.bsky.app/docs/api/app-bsky-feed-get-post-thread>`__
per post,
``"getActorLikes"``
| Use the results from
`app.bsky.feed.getActorLikes <https://docs.bsky.app/docs/api/app-bsky-feed-get-actor-likes>`__
| Requires login and only allows accessing your own likes.
extractor.bluesky.metadata
--------------------------
Type
@@ -1613,6 +1635,8 @@ Description
extractor.bluesky.post.depth
----------------------------
extractor.bluesky.likes.depth
-----------------------------
Type
``integer``
Default

View File

@@ -153,6 +153,10 @@
"reposts" : false,
"videos" : true,
"likes": {
"depth" : 0,
"endpoint": "listRecords"
},
"post": {
"depth": 0
}

View File

@@ -87,6 +87,22 @@ class BlueskyExtractor(Extractor):
def posts(self):
return ()
def _posts_records(self, actor, collection):
depth = self.config("depth", "0")
for record in self.api.list_records(actor, collection):
uri = None
try:
uri = record["value"]["subject"]["uri"]
if "/app.bsky.feed.post/" in uri:
yield from self.api.get_post_thread_uri(uri, depth)
except exception.StopExtraction:
pass # deleted post
except Exception as exc:
self.log.debug(record, exc_info=exc)
self.log.warning("Failed to extract %s (%s: %s)",
uri or "record", exc.__class__.__name__, exc)
def _pid(self, post):
return post["uri"].rpartition("/")[2]
@@ -250,7 +266,9 @@ class BlueskyLikesExtractor(BlueskyExtractor):
example = "https://bsky.app/profile/HANDLE/likes"
def posts(self):
return self.api.get_actor_likes(self.user)
if self.config("endpoint") == "getActorLikes":
return self.api.get_actor_likes(self.user)
return self._posts_records(self.user, "app.bsky.feed.like")
class BlueskyFeedExtractor(BlueskyExtractor):
@@ -416,11 +434,16 @@ class BlueskyAPI():
return self._pagination(endpoint, params)
def get_post_thread(self, actor, post_id):
uri = "at://{}/app.bsky.feed.post/{}".format(
self._did_from_actor(actor), post_id)
depth = self.extractor.config("depth", "0")
return self.get_post_thread_uri(uri, depth)
def get_post_thread_uri(self, uri, depth="0"):
endpoint = "app.bsky.feed.getPostThread"
params = {
"uri": "at://{}/app.bsky.feed.post/{}".format(
self._did_from_actor(actor), post_id),
"depth" : self.extractor.config("depth", "0"),
"uri" : uri,
"depth" : depth,
"parentHeight": "0",
}
@@ -443,6 +466,18 @@ class BlueskyAPI():
params = {"actor": did}
return self._call(endpoint, params)
def list_records(self, actor, collection):
endpoint = "com.atproto.repo.listRecords"
actor_did = self._did_from_actor(actor)
params = {
"repo" : actor_did,
"collection": collection,
"limit" : "100",
# "reverse" : "false",
}
return self._pagination(endpoint, params, "records",
self.service_endpoint(actor_did))
@memcache(keyarg=1)
def resolve_handle(self, handle):
endpoint = "com.atproto.identity.resolveHandle"
@@ -523,8 +558,10 @@ class BlueskyAPI():
_refresh_token_cache.update(self.username, data["refreshJwt"])
return "Bearer " + data["accessJwt"]
def _call(self, endpoint, params):
url = "{}/xrpc/{}".format(self.root, endpoint)
def _call(self, endpoint, params, root=None):
if root is None:
root = self.root
url = "{}/xrpc/{}".format(root, endpoint)
while True:
self.authenticate()
@@ -549,9 +586,9 @@ class BlueskyAPI():
self.extractor.log.debug("Server response: %s", response.text)
raise exception.StopExtraction(msg)
def _pagination(self, endpoint, params, key="feed"):
def _pagination(self, endpoint, params, key="feed", root=None):
while True:
data = self._call(endpoint, params)
data = self._call(endpoint, params, root)
yield from data[key]
cursor = data.get("cursor")

View File

@@ -116,6 +116,24 @@ __tests__ = (
"#url" : "https://bsky.app/profile/bsky.app/likes",
"#category": ("", "bluesky", "likes"),
"#class" : bluesky.BlueskyLikesExtractor,
"#auth" : False,
"#range" : "1-5",
"#count" : 5,
},
{
"#url" : "https://bsky.app/profile/mikf.bsky.social/likes",
"#class" : bluesky.BlueskyLikesExtractor,
"#auth" : False,
"#urls" : "https://conocybe.us-west.host.bsky.network/xrpc/com.atproto.sync.getBlob?did=did:plc:cslxjqkeexku6elp5xowxkq7&cid=bafkreih2dn2xeyoayabgvpyutv5ldubcdxzfqipijasfzxyeez7fff5ymi",
},
{
"#url" : "https://bsky.app/profile/mikf.bsky.social/likes",
"#class" : bluesky.BlueskyLikesExtractor,
"#options" : {"endpoint": "getActorLikes"},
"#auth" : False,
"#count" : 0,
},
{