diff --git a/gallery_dl/extractor/booru.py b/gallery_dl/extractor/booru.py
index eba75452..362c1a39 100644
--- a/gallery_dl/extractor/booru.py
+++ b/gallery_dl/extractor/booru.py
@@ -9,7 +9,7 @@
"""Base classes for extractors for danbooru and co"""
from .common import SharedConfigExtractor, Message
-from .. import text
+from .. import text, exception
from xml.etree import ElementTree
import collections
import datetime
@@ -167,7 +167,39 @@ class PoolMixin():
self.params["limit"] = self.per_page
def get_metadata(self):
- return {"pool": self.pool}
+ return {"pool": text.parse_int(self.pool)}
+
+
+class GelbooruPoolMixin(PoolMixin):
+ """Image-pool extraction for Gelbooru-like sites"""
+ per_page = 1
+
+ def get_metadata(self):
+ page = self.request(self.pool_url.format(self.pool)).text
+ name, pos = text.extract(page, "
Now Viewing: ", "
")
+ if not name:
+ name, pos = text.extract(page, "Pool: ", "
")
+ if not name:
+ raise exception.NotFoundError("pool")
+ self.posts = list(text.extract_iter(page, 'id="p', '"', pos))
+
+ return {
+ "pool": text.parse_int(self.pool),
+ "pool_name": text.unescape(name or ""),
+ "count": len(self.posts),
+ }
+
+ def reset_page(self):
+ self.index = self.page_start
+ self.update_page(None)
+
+ def update_page(self, data):
+ try:
+ post = self.posts[self.index]
+ self.index += 1
+ except IndexError:
+ post = "0"
+ self.params["tags"] = "id:" + post
class PostMixin():
diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py
index 99ae4162..c8b25475 100644
--- a/gallery_dl/extractor/gelbooru.py
+++ b/gallery_dl/extractor/gelbooru.py
@@ -10,7 +10,7 @@
from . import booru
from .common import Message
-from .. import text, exception
+from .. import text, util
class GelbooruExtractor(booru.XmlParserMixin,
@@ -20,6 +20,7 @@ class GelbooruExtractor(booru.XmlParserMixin,
category = "gelbooru"
api_url = "https://gelbooru.com/index.php"
post_url = "https://gelbooru.com/index.php?page=post&s=view&id={}"
+ pool_url = "https://gelbooru.com/index.php?page=pool&s=show&id={}"
def __init__(self, match):
super().__init__(match)
@@ -102,31 +103,16 @@ class GelbooruTagExtractor(booru.TagMixin, GelbooruExtractor):
params["pid"] += self.per_page
-class GelbooruPoolExtractor(GelbooruExtractor):
+class GelbooruPoolExtractor(booru.GelbooruPoolMixin, GelbooruExtractor):
"""Extractor for image-pools from gelbooru.com"""
pattern = [r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?"
- r"\?page=pool&s=show&id=(\d+)"]
+ r"\?page=pool&s=show&id=(?P\d+)"]
test = [("https://gelbooru.com/index.php?page=pool&s=show&id=761", {
"count": 6,
})]
- def get_metadata(self):
- page = self.request("https://gelbooru.com/index.php?page=pool&s=show"
- "&id=" + self.pool_id).text
- name, pos = text.extract(page, "Now Viewing: ", "
")
- self.posts = list(text.extract_iter(page, 'id="p', '"', pos))
-
- if not name:
- raise exception.NotFoundError("pool")
-
- return {
- "pool": text.parse_int(self.pool_id),
- "pool_name": text.unescape(name),
- "count": len(self.posts),
- }
-
def get_posts(self):
- return self.posts
+ return util.advance(self.posts, self.page_start)
class GelbooruPostExtractor(booru.PostMixin, GelbooruExtractor):