[issuu] fix extractors (#7317)
This commit is contained in:
@@ -29,9 +29,11 @@ class IssuuPublicationExtractor(IssuuBase, GalleryExtractor):
|
|||||||
example = "https://issuu.com/issuu/docs/TITLE/"
|
example = "https://issuu.com/issuu/docs/TITLE/"
|
||||||
|
|
||||||
def metadata(self, page):
|
def metadata(self, page):
|
||||||
pos = page.rindex('id="initial-data"')
|
|
||||||
data = util.json_loads(text.unescape(text.rextract(
|
data = text.extr(
|
||||||
page, '<script data-json="', '"', pos)[0]))
|
page, '{\\"documentTextVersion\\":', ']\\n"])</script>')
|
||||||
|
data = util.json_loads(text.unescape(
|
||||||
|
'{"":' + data.replace('\\"', '"')))
|
||||||
|
|
||||||
doc = data["initialDocumentData"]["document"]
|
doc = data["initialDocumentData"]["document"]
|
||||||
doc["date"] = text.parse_datetime(
|
doc["date"] = text.parse_datetime(
|
||||||
@@ -39,7 +41,7 @@ class IssuuPublicationExtractor(IssuuBase, GalleryExtractor):
|
|||||||
|
|
||||||
self._cnt = text.parse_int(doc["pageCount"])
|
self._cnt = text.parse_int(doc["pageCount"])
|
||||||
self._tpl = "https://{}/{}-{}/jpg/page_{{}}.jpg".format(
|
self._tpl = "https://{}/{}-{}/jpg/page_{{}}.jpg".format(
|
||||||
data["config"]["hosts"]["image"],
|
"image.isu.pub", # data["config"]["hosts"]["image"],
|
||||||
doc["revisionId"],
|
doc["revisionId"],
|
||||||
doc["publicationId"],
|
doc["publicationId"],
|
||||||
)
|
)
|
||||||
@@ -66,9 +68,8 @@ class IssuuUserExtractor(IssuuBase, Extractor):
|
|||||||
url = base + "/" + str(pnum) if pnum > 1 else base
|
url = base + "/" + str(pnum) if pnum > 1 else base
|
||||||
try:
|
try:
|
||||||
html = self.request(url).text
|
html = self.request(url).text
|
||||||
data = util.json_loads(text.unescape(text.extr(
|
data = text.extr(html, '\\"docs\\":', '}]\\n"]')
|
||||||
html, '</main></div><script data-json="', '" id="')))
|
docs = util.json_loads(data.replace('\\"', '"'))
|
||||||
docs = data["docs"]
|
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
self.log.debug("", exc_info=exc)
|
self.log.debug("", exc_info=exc)
|
||||||
return
|
return
|
||||||
|
|||||||
@@ -5,6 +5,7 @@
|
|||||||
# published by the Free Software Foundation.
|
# published by the Free Software Foundation.
|
||||||
|
|
||||||
from gallery_dl.extractor import issuu
|
from gallery_dl.extractor import issuu
|
||||||
|
from gallery_dl import exception
|
||||||
|
|
||||||
|
|
||||||
__tests__ = (
|
__tests__ = (
|
||||||
@@ -35,10 +36,11 @@ __tests__ = (
|
|||||||
},
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://issuu.com/foodhome1955/docs/fh_winter2025-issuu-011625",
|
"#url" : "https://issuu.com/foodhome1955/docs/fh_winter2025-issuu-011625",
|
||||||
"#comment" : "HTML escapes",
|
"#comment" : "HTML escapes",
|
||||||
"#class" : issuu.IssuuPublicationExtractor,
|
"#class" : issuu.IssuuPublicationExtractor,
|
||||||
"#count" : 84,
|
"#exception": exception.NotFoundError,
|
||||||
|
"#count" : 84,
|
||||||
|
|
||||||
"document": {
|
"document": {
|
||||||
"access" : "PUBLIC",
|
"access" : "PUBLIC",
|
||||||
|
|||||||
Reference in New Issue
Block a user