[issuu] fix 'user' extractor
This commit is contained in:
@@ -54,26 +54,30 @@ class IssuuPublicationExtractor(IssuuBase, GalleryExtractor):
|
|||||||
class IssuuUserExtractor(IssuuBase, Extractor):
|
class IssuuUserExtractor(IssuuBase, Extractor):
|
||||||
"""Extractor for all publications of a user/publisher"""
|
"""Extractor for all publications of a user/publisher"""
|
||||||
subcategory = "user"
|
subcategory = "user"
|
||||||
pattern = r"(?:https?://)?issuu\.com/([^/?#]+)/?$"
|
pattern = r"(?:https?://)?issuu\.com/([^/?#]+)(?:/(\d*))?$"
|
||||||
example = "https://issuu.com/USER"
|
example = "https://issuu.com/USER"
|
||||||
|
|
||||||
def __init__(self, match):
|
|
||||||
Extractor.__init__(self, match)
|
|
||||||
self.user = match.group(1)
|
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
url = "{}/call/profile/v1/documents/{}".format(self.root, self.user)
|
user, pnum = self.groups
|
||||||
params = {"offset": 0, "limit": "25"}
|
base = self.root + "/" + user
|
||||||
|
pnum = text.parse_int(pnum, 1)
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
data = self.request(url, params=params).json()
|
url = base + "/" + str(pnum) if pnum > 1 else base
|
||||||
|
try:
|
||||||
for publication in data["items"]:
|
html = self.request(url).text
|
||||||
publication["url"] = "{}/{}/docs/{}".format(
|
data = util.json_loads(text.unescape(text.extr(
|
||||||
self.root, self.user, publication["uri"])
|
html, '</main></div><script data-json="', '" id="')))
|
||||||
publication["_extractor"] = IssuuPublicationExtractor
|
docs = data["docs"]
|
||||||
yield Message.Queue, publication["url"], publication
|
except Exception as exc:
|
||||||
|
self.log.debug("", exc_info=exc)
|
||||||
if not data["hasMore"]:
|
|
||||||
return
|
return
|
||||||
params["offset"] += data["limit"]
|
|
||||||
|
for publication in docs:
|
||||||
|
url = self.root + "/" + publication["uri"]
|
||||||
|
publication["_extractor"] = IssuuPublicationExtractor
|
||||||
|
yield Message.Queue, url, publication
|
||||||
|
|
||||||
|
if len(docs) < 48:
|
||||||
|
return
|
||||||
|
pnum += 1
|
||||||
|
|||||||
@@ -10,7 +10,6 @@ from gallery_dl.extractor import issuu
|
|||||||
__tests__ = (
|
__tests__ = (
|
||||||
{
|
{
|
||||||
"#url" : "https://issuu.com/issuu/docs/motions-1-2019/",
|
"#url" : "https://issuu.com/issuu/docs/motions-1-2019/",
|
||||||
"#category": ("", "issuu", "publication"),
|
|
||||||
"#class" : issuu.IssuuPublicationExtractor,
|
"#class" : issuu.IssuuPublicationExtractor,
|
||||||
"#pattern" : r"https://image.isu.pub/190916155301-\w+/jpg/page_\d+.jpg",
|
"#pattern" : r"https://image.isu.pub/190916155301-\w+/jpg/page_\d+.jpg",
|
||||||
"#count" : 36,
|
"#count" : 36,
|
||||||
@@ -37,10 +36,15 @@ __tests__ = (
|
|||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://issuu.com/issuu",
|
"#url" : "https://issuu.com/issuu",
|
||||||
"#category": ("", "issuu", "user"),
|
|
||||||
"#class" : issuu.IssuuUserExtractor,
|
"#class" : issuu.IssuuUserExtractor,
|
||||||
"#pattern" : issuu.IssuuPublicationExtractor.pattern,
|
"#pattern" : issuu.IssuuPublicationExtractor.pattern,
|
||||||
"#count" : "> 25",
|
"#count" : range(100, 150),
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://issuu.com/issuu/3",
|
||||||
|
"#class" : issuu.IssuuUserExtractor,
|
||||||
|
"#count" : range(4, 40),
|
||||||
},
|
},
|
||||||
|
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user