[issuu] unescape HTML entities

This commit is contained in:
Mike Fährmann
2025-02-02 18:33:18 +01:00
parent 6c9b20fe45
commit 5807daa19a
2 changed files with 33 additions and 2 deletions

View File

@@ -30,8 +30,8 @@ class IssuuPublicationExtractor(IssuuBase, GalleryExtractor):
def metadata(self, page):
pos = page.rindex('id="initial-data"')
data = util.json_loads(text.rextract(
page, '<script data-json="', '"', pos)[0].replace("&quot;", '"'))
data = util.json_loads(text.unescape(text.rextract(
page, '<script data-json="', '"', pos)[0]))
doc = data["initialDocumentData"]["document"]
doc["date"] = text.parse_datetime(