[subscribestar] extract 'title' metadata (#7219)

This commit is contained in:
Mike Fährmann
2025-03-22 09:46:08 +01:00
parent f8ef9a7b35
commit 4807bc215c
2 changed files with 5 additions and 0 deletions

View File

@@ -39,6 +39,8 @@ class SubscribestarExtractor(Extractor):
for post_html in self.posts():
media = self._media_from_post(post_html)
data = self._data_from_post(post_html)
data["title"] = text.unescape(text.extr(
data["content"], "<h1>", "</h1>"))
yield Message.Directory, data
for num, item in enumerate(media, 1):
item.update(data)

View File

@@ -24,6 +24,7 @@ __tests__ = (
"id" : int,
"num" : int,
"post_id" : int,
"title" : str,
"type" : r"re:image|video|attachment",
"url" : str,
"?pinned" : bool,
@@ -66,6 +67,7 @@ __tests__ = (
"num" : 1,
"pinned" : False,
"post_id" : 102468,
"title" : "Brand Guidelines and Assets",
"type" : "image",
"width" : 700,
},
@@ -85,6 +87,7 @@ __tests__ = (
"name" : "Training for freelancers - Fiverr.pdf",
"num" : 2,
"post_id" : 920015,
"title" : "",
"type" : "attachment",
},