[joyreactor] fix and improve pagination (#148)

This commit is contained in:
Mike Fährmann
2019-01-03 22:11:38 +01:00
parent 8753627ef4
commit 1737d7f576

View File

@@ -54,13 +54,12 @@ class JoyreactorExtractor(Extractor):
yield from text.extract_iter( yield from text.extract_iter(
page, '<div class="uhead">', '<div class="ufoot">') page, '<div class="uhead">', '<div class="ufoot">')
pos = page.find("<span class='current'>") try:
if pos == -1 or page[pos+21:pos+24] == ">1<": pos = page.index("class='next'")
pos = page.rindex("class='current'", 0, pos)
url = self.root + text.extract(page, "href='", "'", pos)[0]
except (ValueError, TypeError):
return return
path = text.extract(page, "href='", "'", pos)[0]
if not path:
return
url = self.root + path
def _parse_post(self, post): def _parse_post(self, post):
post, _, script = post.partition('<script type="application/ld+json">') post, _, script = post.partition('<script type="application/ld+json">')
@@ -75,7 +74,7 @@ class JoyreactorExtractor(Extractor):
script = script.translate(mapping).replace("\\", "\\\\") script = script.translate(mapping).replace("\\", "\\\\")
data = json.loads(script) data = json.loads(script)
except ValueError as exc: except ValueError as exc:
self.log.warning("Unable to parse post: %s", exc) self.log.warning("Unable to parse JSON data: %s", exc)
return return
num = 0 num = 0
@@ -148,10 +147,11 @@ class JoyreactorSearchExtractor(JoyreactorTagExtractor):
pattern = [BASE_PATTERN + r"/search(?:/|\?q=)([^/?&#]+)"] pattern = [BASE_PATTERN + r"/search(?:/|\?q=)([^/?&#]+)"]
test = [ test = [
("http://joyreactor.com/search?q=Cirno+Gifs", { ("http://joyreactor.com/search?q=Cirno+Gifs", {
"count": ">= 0", "count": 0, # no search results on joyreactor.com
}), }),
("http://joyreactor.cc/search/Cirno+Gifs", { ("http://joyreactor.cc/search/Cirno+Gifs", {
"count": ">= 0", "range": "1-25",
"count": ">= 20",
}), }),
] ]