[joyreactor] fix and improve pagination (#148)
This commit is contained in:
@@ -54,13 +54,12 @@ class JoyreactorExtractor(Extractor):
|
|||||||
yield from text.extract_iter(
|
yield from text.extract_iter(
|
||||||
page, '<div class="uhead">', '<div class="ufoot">')
|
page, '<div class="uhead">', '<div class="ufoot">')
|
||||||
|
|
||||||
pos = page.find("<span class='current'>")
|
try:
|
||||||
if pos == -1 or page[pos+21:pos+24] == ">1<":
|
pos = page.index("class='next'")
|
||||||
|
pos = page.rindex("class='current'", 0, pos)
|
||||||
|
url = self.root + text.extract(page, "href='", "'", pos)[0]
|
||||||
|
except (ValueError, TypeError):
|
||||||
return
|
return
|
||||||
path = text.extract(page, "href='", "'", pos)[0]
|
|
||||||
if not path:
|
|
||||||
return
|
|
||||||
url = self.root + path
|
|
||||||
|
|
||||||
def _parse_post(self, post):
|
def _parse_post(self, post):
|
||||||
post, _, script = post.partition('<script type="application/ld+json">')
|
post, _, script = post.partition('<script type="application/ld+json">')
|
||||||
@@ -75,7 +74,7 @@ class JoyreactorExtractor(Extractor):
|
|||||||
script = script.translate(mapping).replace("\\", "\\\\")
|
script = script.translate(mapping).replace("\\", "\\\\")
|
||||||
data = json.loads(script)
|
data = json.loads(script)
|
||||||
except ValueError as exc:
|
except ValueError as exc:
|
||||||
self.log.warning("Unable to parse post: %s", exc)
|
self.log.warning("Unable to parse JSON data: %s", exc)
|
||||||
return
|
return
|
||||||
|
|
||||||
num = 0
|
num = 0
|
||||||
@@ -148,10 +147,11 @@ class JoyreactorSearchExtractor(JoyreactorTagExtractor):
|
|||||||
pattern = [BASE_PATTERN + r"/search(?:/|\?q=)([^/?&#]+)"]
|
pattern = [BASE_PATTERN + r"/search(?:/|\?q=)([^/?&#]+)"]
|
||||||
test = [
|
test = [
|
||||||
("http://joyreactor.com/search?q=Cirno+Gifs", {
|
("http://joyreactor.com/search?q=Cirno+Gifs", {
|
||||||
"count": ">= 0",
|
"count": 0, # no search results on joyreactor.com
|
||||||
}),
|
}),
|
||||||
("http://joyreactor.cc/search/Cirno+Gifs", {
|
("http://joyreactor.cc/search/Cirno+Gifs", {
|
||||||
"count": ">= 0",
|
"range": "1-25",
|
||||||
|
"count": ">= 20",
|
||||||
}),
|
}),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user