[issuu] fix 'user' extractor

This commit is contained in:
Mike Fährmann
2025-01-27 21:43:15 +01:00
parent fe815b9226
commit 1b5e0c0e87
2 changed files with 28 additions and 20 deletions

View File

@@ -54,26 +54,30 @@ class IssuuPublicationExtractor(IssuuBase, GalleryExtractor):
class IssuuUserExtractor(IssuuBase, Extractor): class IssuuUserExtractor(IssuuBase, Extractor):
"""Extractor for all publications of a user/publisher""" """Extractor for all publications of a user/publisher"""
subcategory = "user" subcategory = "user"
pattern = r"(?:https?://)?issuu\.com/([^/?#]+)/?$" pattern = r"(?:https?://)?issuu\.com/([^/?#]+)(?:/(\d*))?$"
example = "https://issuu.com/USER" example = "https://issuu.com/USER"
def __init__(self, match):
Extractor.__init__(self, match)
self.user = match.group(1)
def items(self): def items(self):
url = "{}/call/profile/v1/documents/{}".format(self.root, self.user) user, pnum = self.groups
params = {"offset": 0, "limit": "25"} base = self.root + "/" + user
pnum = text.parse_int(pnum, 1)
while True: while True:
data = self.request(url, params=params).json() url = base + "/" + str(pnum) if pnum > 1 else base
try:
for publication in data["items"]: html = self.request(url).text
publication["url"] = "{}/{}/docs/{}".format( data = util.json_loads(text.unescape(text.extr(
self.root, self.user, publication["uri"]) html, '</main></div><script data-json="', '" id="')))
publication["_extractor"] = IssuuPublicationExtractor docs = data["docs"]
yield Message.Queue, publication["url"], publication except Exception as exc:
self.log.debug("", exc_info=exc)
if not data["hasMore"]:
return return
params["offset"] += data["limit"]
for publication in docs:
url = self.root + "/" + publication["uri"]
publication["_extractor"] = IssuuPublicationExtractor
yield Message.Queue, url, publication
if len(docs) < 48:
return
pnum += 1

View File

@@ -10,7 +10,6 @@ from gallery_dl.extractor import issuu
__tests__ = ( __tests__ = (
{ {
"#url" : "https://issuu.com/issuu/docs/motions-1-2019/", "#url" : "https://issuu.com/issuu/docs/motions-1-2019/",
"#category": ("", "issuu", "publication"),
"#class" : issuu.IssuuPublicationExtractor, "#class" : issuu.IssuuPublicationExtractor,
"#pattern" : r"https://image.isu.pub/190916155301-\w+/jpg/page_\d+.jpg", "#pattern" : r"https://image.isu.pub/190916155301-\w+/jpg/page_\d+.jpg",
"#count" : 36, "#count" : 36,
@@ -37,10 +36,15 @@ __tests__ = (
{ {
"#url" : "https://issuu.com/issuu", "#url" : "https://issuu.com/issuu",
"#category": ("", "issuu", "user"),
"#class" : issuu.IssuuUserExtractor, "#class" : issuu.IssuuUserExtractor,
"#pattern" : issuu.IssuuPublicationExtractor.pattern, "#pattern" : issuu.IssuuPublicationExtractor.pattern,
"#count" : "> 25", "#count" : range(100, 150),
},
{
"#url" : "https://issuu.com/issuu/3",
"#class" : issuu.IssuuUserExtractor,
"#count" : range(4, 40),
}, },
) )