[cohost] add 'tag' extractor (#4483)

This commit is contained in:
Mike Fährmann
2024-09-13 11:11:53 +02:00
parent 0d67d54162
commit 7abf6e445c
2 changed files with 34 additions and 1 deletions

View File

@@ -158,3 +158,36 @@ class CohostPostExtractor(CohostExtractor):
post["comments"] = ()
return (post,)
class CohostTagExtractor(CohostExtractor):
"""Extractor for tagged posts"""
subcategory = "tag"
pattern = BASE_PATTERN + r"/([^/?#]+)/tagged/([^/?#]+)(?:\?([^#]+))?"
example = "https://cohost.org/USER/tagged/TAG"
def posts(self):
user, tag, query = self.groups
url = "{}/{}/tagged/{}".format(self.root, user, tag)
params = text.parse_query(query)
post_feed_key = ("tagged-post-feed" if user == "rc" else
"project-tagged-post-feed")
while True:
page = self.request(url, params=params).text
data = util.json_loads(text.extr(
page, 'id="__COHOST_LOADER_STATE__">', '</script>'))
try:
feed = data[post_feed_key]
except KeyError:
feed = data.popitem()[1]
yield from feed["posts"]
pagination = feed["paginationMode"]
if not pagination.get("morePagesForward"):
return
params["refTimestamp"] = pagination["refTimestamp"]
params["skipPosts"] = \
pagination["currentSkip"] + pagination["idealPageStride"]