[issuu] fix 'user' extractor

This commit is contained in:
Mike Fährmann
2025-01-27 21:43:15 +01:00
parent fe815b9226
commit 1b5e0c0e87
2 changed files with 28 additions and 20 deletions

View File

@@ -54,26 +54,30 @@ class IssuuPublicationExtractor(IssuuBase, GalleryExtractor):
class IssuuUserExtractor(IssuuBase, Extractor):
"""Extractor for all publications of a user/publisher"""
subcategory = "user"
pattern = r"(?:https?://)?issuu\.com/([^/?#]+)/?$"
pattern = r"(?:https?://)?issuu\.com/([^/?#]+)(?:/(\d*))?$"
example = "https://issuu.com/USER"
def __init__(self, match):
Extractor.__init__(self, match)
self.user = match.group(1)
def items(self):
url = "{}/call/profile/v1/documents/{}".format(self.root, self.user)
params = {"offset": 0, "limit": "25"}
user, pnum = self.groups
base = self.root + "/" + user
pnum = text.parse_int(pnum, 1)
while True:
data = self.request(url, params=params).json()
for publication in data["items"]:
publication["url"] = "{}/{}/docs/{}".format(
self.root, self.user, publication["uri"])
publication["_extractor"] = IssuuPublicationExtractor
yield Message.Queue, publication["url"], publication
if not data["hasMore"]:
url = base + "/" + str(pnum) if pnum > 1 else base
try:
html = self.request(url).text
data = util.json_loads(text.unescape(text.extr(
html, '</main></div><script data-json="', '" id="')))
docs = data["docs"]
except Exception as exc:
self.log.debug("", exc_info=exc)
return
params["offset"] += data["limit"]
for publication in docs:
url = self.root + "/" + publication["uri"]
publication["_extractor"] = IssuuPublicationExtractor
yield Message.Queue, url, publication
if len(docs) < 48:
return
pnum += 1

View File

@@ -10,7 +10,6 @@ from gallery_dl.extractor import issuu
__tests__ = (
{
"#url" : "https://issuu.com/issuu/docs/motions-1-2019/",
"#category": ("", "issuu", "publication"),
"#class" : issuu.IssuuPublicationExtractor,
"#pattern" : r"https://image.isu.pub/190916155301-\w+/jpg/page_\d+.jpg",
"#count" : 36,
@@ -37,10 +36,15 @@ __tests__ = (
{
"#url" : "https://issuu.com/issuu",
"#category": ("", "issuu", "user"),
"#class" : issuu.IssuuUserExtractor,
"#pattern" : issuu.IssuuPublicationExtractor.pattern,
"#count" : "> 25",
"#count" : range(100, 150),
},
{
"#url" : "https://issuu.com/issuu/3",
"#class" : issuu.IssuuUserExtractor,
"#count" : range(4, 40),
},
)