From 57c1a8608216a5118a5b5c778be0f076c2a2c4cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Wed, 28 Jul 2021 22:42:33 +0200 Subject: [PATCH] [bbc] support multi-page gallery listings (closes #1730) --- gallery_dl/extractor/bbc.py | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/gallery_dl/extractor/bbc.py b/gallery_dl/extractor/bbc.py index ace8a285..804a56e2 100644 --- a/gallery_dl/extractor/bbc.py +++ b/gallery_dl/extractor/bbc.py @@ -60,21 +60,34 @@ class BbcProgrammeExtractor(Extractor): category = "bbc" subcategory = "programme" root = "https://www.bbc.co.uk" - pattern = BASE_PATTERN + r"[^/?#]+/galleries)" - test = ("https://www.bbc.co.uk/programmes/b006q2x0/galleries", { - "pattern": BbcGalleryExtractor.pattern, - "count": ">= 24", - }) + pattern = BASE_PATTERN + r"[^/?#]+/galleries)(?:/?\?page=(\d+))?" + test = ( + ("https://www.bbc.co.uk/programmes/b006q2x0/galleries", { + "pattern": BbcGalleryExtractor.pattern, + "range": "1-50", + "count": ">= 50", + }), + ("https://www.bbc.co.uk/programmes/b006q2x0/galleries?page=40", { + "pattern": BbcGalleryExtractor.pattern, + "count": ">= 100", + }), + ) def __init__(self, match): Extractor.__init__(self, match) - self.galleries_url = self.root + match.group(1) + self.path, self.page = match.groups() def items(self): - page = self.request(self.galleries_url).text data = {"_extractor": BbcGalleryExtractor} + params = {"page": text.parse_int(self.page, 1)} + galleries_url = self.root + self.path - for programme_id in text.extract_iter( - page, '