diff --git a/gallery_dl/extractor/booru.py b/gallery_dl/extractor/booru.py index eba75452..362c1a39 100644 --- a/gallery_dl/extractor/booru.py +++ b/gallery_dl/extractor/booru.py @@ -9,7 +9,7 @@ """Base classes for extractors for danbooru and co""" from .common import SharedConfigExtractor, Message -from .. import text +from .. import text, exception from xml.etree import ElementTree import collections import datetime @@ -167,7 +167,39 @@ class PoolMixin(): self.params["limit"] = self.per_page def get_metadata(self): - return {"pool": self.pool} + return {"pool": text.parse_int(self.pool)} + + +class GelbooruPoolMixin(PoolMixin): + """Image-pool extraction for Gelbooru-like sites""" + per_page = 1 + + def get_metadata(self): + page = self.request(self.pool_url.format(self.pool)).text + name, pos = text.extract(page, "

Now Viewing: ", "

") + if not name: + name, pos = text.extract(page, "

Pool: ", "

") + if not name: + raise exception.NotFoundError("pool") + self.posts = list(text.extract_iter(page, 'id="p', '"', pos)) + + return { + "pool": text.parse_int(self.pool), + "pool_name": text.unescape(name or ""), + "count": len(self.posts), + } + + def reset_page(self): + self.index = self.page_start + self.update_page(None) + + def update_page(self, data): + try: + post = self.posts[self.index] + self.index += 1 + except IndexError: + post = "0" + self.params["tags"] = "id:" + post class PostMixin(): diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py index 99ae4162..c8b25475 100644 --- a/gallery_dl/extractor/gelbooru.py +++ b/gallery_dl/extractor/gelbooru.py @@ -10,7 +10,7 @@ from . import booru from .common import Message -from .. import text, exception +from .. import text, util class GelbooruExtractor(booru.XmlParserMixin, @@ -20,6 +20,7 @@ class GelbooruExtractor(booru.XmlParserMixin, category = "gelbooru" api_url = "https://gelbooru.com/index.php" post_url = "https://gelbooru.com/index.php?page=post&s=view&id={}" + pool_url = "https://gelbooru.com/index.php?page=pool&s=show&id={}" def __init__(self, match): super().__init__(match) @@ -102,31 +103,16 @@ class GelbooruTagExtractor(booru.TagMixin, GelbooruExtractor): params["pid"] += self.per_page -class GelbooruPoolExtractor(GelbooruExtractor): +class GelbooruPoolExtractor(booru.GelbooruPoolMixin, GelbooruExtractor): """Extractor for image-pools from gelbooru.com""" pattern = [r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?" - r"\?page=pool&s=show&id=(\d+)"] + r"\?page=pool&s=show&id=(?P\d+)"] test = [("https://gelbooru.com/index.php?page=pool&s=show&id=761", { "count": 6, })] - def get_metadata(self): - page = self.request("https://gelbooru.com/index.php?page=pool&s=show" - "&id=" + self.pool_id).text - name, pos = text.extract(page, "

Now Viewing: ", "

") - self.posts = list(text.extract_iter(page, 'id="p', '"', pos)) - - if not name: - raise exception.NotFoundError("pool") - - return { - "pool": text.parse_int(self.pool_id), - "pool_name": text.unescape(name), - "count": len(self.posts), - } - def get_posts(self): - return self.posts + return util.advance(self.posts, self.page_start) class GelbooruPostExtractor(booru.PostMixin, GelbooruExtractor):