[fanbox] add 'content' metadata field (#3020)

This commit is contained in:
Mike Fährmann
2022-10-10 16:05:46 +02:00
parent e5d229c524
commit 1ca6be8619

View File

@@ -68,6 +68,16 @@ class FanboxExtractor(Extractor):
post["html"] = content_body["html"]
if post["type"] == "article":
post["articleBody"] = content_body.copy()
if "blocks" in content_body:
content = []
append = content.append
for block in content_body["blocks"]:
if "text" in block:
append(block["text"])
if "links" in block:
for link in block["links"]:
append(link["url"])
post["content"] = "\n".join(content)
post["date"] = text.parse_datetime(post["publishedDatetime"])
post["text"] = content_body.get("text") if content_body else None
@@ -271,6 +281,16 @@ class FanboxPostExtractor(FanboxExtractor):
"hasAdultContent": True
},
}),
# 'content' metadata (#3020)
("https://www.fanbox.cc/@official-en/posts/4326303", {
"keyword": {
"content": r"re:^Greetings from FANBOX.\n \n"
r"As of Monday, September 5th, 2022, we are happy "
r"to announce the start of the FANBOX hashtag "
r"event #MySetupTour ! \nAbout the event\n"
r"To join this event ...",
},
}),
)
def __init__(self, match):