From 155e1faeaff1040410b91d3b7df15067464435f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Thu, 11 Apr 2019 19:12:27 +0200 Subject: [PATCH] [imagebam] support galleries with >100 images (fixes #219) --- gallery_dl/extractor/imagebam.py | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/gallery_dl/extractor/imagebam.py b/gallery_dl/extractor/imagebam.py index 121b55c5..6980185c 100644 --- a/gallery_dl/extractor/imagebam.py +++ b/gallery_dl/extractor/imagebam.py @@ -26,6 +26,11 @@ class ImagebamExtractor(Extractor): data["image_id"] = data["image_key"][6:] return image_url + def request_page(self, url): + """Retrive the main part of a gallery page""" + page = self.request(text.urljoin(self.root, url)).text + return text.extract(page, "
", "
")[0] + class ImagebamGalleryExtractor(ImagebamExtractor): """Extractor for image galleries from imagebam.com""" @@ -40,6 +45,11 @@ class ImagebamGalleryExtractor(ImagebamExtractor): "keyword": "9e25b8827474ac93c54855e798d60aa3cbecbd7a", "content": "596e6bfa157f2c7169805d50075c2986549973a8", }), + ("http://www.imagebam.com/gallery/op9dwcklwdrrguibnkoe7jxgvig30o5p", { + # more than 100 images; see issue #219 + "count": 107, + "url": "f92ce5b17676b6ea69288f0aef26f4cdbea7fd8d", + }), ("http://www.imagebam.com/gallery/gsl8teckymt4vbvx1stjkyk37j70va2c", { "exception": exception.NotFoundError, }), @@ -51,10 +61,9 @@ class ImagebamGalleryExtractor(ImagebamExtractor): def items(self): url = "{}/gallery/{}".format(self.root, self.gallery_key) - page = self.request(url).text - if ">Error<" in page: + page = self.request_page(url) + if not page or ">Error<" in page: raise exception.NotFoundError("gallery") - page = text.extract(page, "
", "
")[0] data = self.get_metadata(page) imgs = self.get_image_pages(page) @@ -76,10 +85,18 @@ class ImagebamGalleryExtractor(ImagebamExtractor): ("description", ":#FCFCFC;'>", ""), ))[0] - @staticmethod - def get_image_pages(page): + def get_image_pages(self, page): """Return a list of all image pages""" - return list(text.extract_iter(page, " 0: + url = text.extract(page, "