[imhentai] improve pagination duplicate filtering
This commit is contained in:
@@ -16,6 +16,7 @@ class ImhentaiExtractor(BaseExtractor):
|
|||||||
basecategory = "IMHentai"
|
basecategory = "IMHentai"
|
||||||
|
|
||||||
def _pagination(self, url):
|
def _pagination(self, url):
|
||||||
|
prev = None
|
||||||
base = self.root + "/gallery/"
|
base = self.root + "/gallery/"
|
||||||
data = {"_extractor": ImhentaiGalleryExtractor}
|
data = {"_extractor": ImhentaiGalleryExtractor}
|
||||||
|
|
||||||
@@ -25,10 +26,12 @@ class ImhentaiExtractor(BaseExtractor):
|
|||||||
|
|
||||||
while True:
|
while True:
|
||||||
gallery_id = extr('<a href="/gallery/', '"')
|
gallery_id = extr('<a href="/gallery/', '"')
|
||||||
|
if gallery_id == prev:
|
||||||
|
continue
|
||||||
if not gallery_id:
|
if not gallery_id:
|
||||||
break
|
break
|
||||||
yield Message.Queue, base + gallery_id, data
|
yield Message.Queue, base + gallery_id, data
|
||||||
extr('<a href="/gallery/', '"') # skip duplicate GIDs
|
prev = gallery_id
|
||||||
|
|
||||||
href = text.rextract(page, "class='page-link' href='", "'")[0]
|
href = text.rextract(page, "class='page-link' href='", "'")[0]
|
||||||
if not href or href == "#":
|
if not href or href == "#":
|
||||||
|
|||||||
Reference in New Issue
Block a user