[gelbooru] workaround pagination limits
Gelbooru only allows to retrieve the latest 20k posts for a tag search. Add 'id:<N' to the search tags to work around that limitation, where N is the ID of the last retrieved post. http://gelbooru.me/index.php?page=forum&s=view&id=1467
This commit is contained in:
@@ -27,8 +27,21 @@ class GelbooruV02Extractor(booru.BooruExtractor):
|
|||||||
params["pid"] = self.page_start
|
params["pid"] = self.page_start
|
||||||
params["limit"] = self.per_page
|
params["limit"] = self.per_page
|
||||||
|
|
||||||
|
post = None
|
||||||
while True:
|
while True:
|
||||||
root = self._api_request(params)
|
try:
|
||||||
|
root = self._api_request(params)
|
||||||
|
except ElementTree.ParseError:
|
||||||
|
if "tags" not in params or post is None:
|
||||||
|
raise
|
||||||
|
taglist = [tag for tag in params["tags"].split()
|
||||||
|
if not tag.startswith("id:<")]
|
||||||
|
taglist.append("id:<" + str(post.attrib["id"]))
|
||||||
|
params["tags"] = " ".join(taglist)
|
||||||
|
params["pid"] = 0
|
||||||
|
continue
|
||||||
|
|
||||||
|
post = None
|
||||||
for post in root:
|
for post in root:
|
||||||
yield post.attrib
|
yield post.attrib
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user