diff --git a/gallery_dl/extractor/bbc.py b/gallery_dl/extractor/bbc.py index 113a669a..b3981521 100644 --- a/gallery_dl/extractor/bbc.py +++ b/gallery_dl/extractor/bbc.py @@ -27,7 +27,12 @@ class BbcGalleryExtractor(GalleryExtractor): def metadata(self, page): data = self._extract_jsonld(page) + return { + "title": text.unescape(text.extr( + page, "

", "

").rpartition("")[2]), + "description": text.unescape(text.extr( + page, 'property="og:description" content="', '"')), "programme": self.gallery_url.split("/")[4], "path": list(util.unique_sequence( element["name"] @@ -40,11 +45,20 @@ class BbcGalleryExtractor(GalleryExtractor): width = width - width % 16 if width else 1920 dimensions = "/{}xn/".format(width) - return [ - (src.replace("/320x180_b/", dimensions), - {"_fallback": self._fallback_urls(src, width)}) - for src in text.extract_iter(page, 'data-image-src="', '"') - ] + results = [] + for img in text.extract_iter(page, 'class="gallery__thumbnail', ">"): + src = text.extr(img, 'data-image-src="', '"') + results.append(( + src.replace("/320x180_b/", dimensions), + { + "title_image": text.unescape(text.extr( + img, 'data-gallery-title="', '"')), + "synopsis": text.unescape(text.extr( + img, 'data-gallery-synopsis="', '"')), + "_fallback": self._fallback_urls(src, width), + }, + )) + return results @staticmethod def _fallback_urls(src, max_width): @@ -62,14 +76,11 @@ class BbcProgrammeExtractor(Extractor): pattern = BASE_PATTERN + r"[^/?#]+/galleries)(?:/?\?page=(\d+))?" example = "https://www.bbc.co.uk/programmes/ID/galleries" - def __init__(self, match): - Extractor.__init__(self, match) - self.path, self.page = match.groups() - def items(self): + path, pnum = self.groups data = {"_extractor": BbcGalleryExtractor} - params = {"page": text.parse_int(self.page, 1)} - galleries_url = self.root + self.path + params = {"page": text.parse_int(pnum, 1)} + galleries_url = self.root + path while True: page = self.request(galleries_url, params=params).text diff --git a/test/results/bbc.py b/test/results/bbc.py index 836786ae..c75246ed 100644 --- a/test/results/bbc.py +++ b/test/results/bbc.py @@ -15,14 +15,52 @@ __tests__ = ( "#pattern" : r"https://ichef\.bbci\.co\.uk/images/ic/1920xn/\w+\.jpg", "#count" : 37, - "programme": "p084qtzs", - "path" : [ + "count" : 37, + "num" : range(1, 37), + "description": "The Cybermen attack. And for the Doctor, nothing will ever be the same.", + "programme" : "p084qtzs", + "synopsis" : "The Cybermen attack. And for the Doctor, nothing will ever be the same.", + "title" : "The Timeless Children", + "title_image": {"The Timeless Children", ": The Timeless Children"}, + "path" : [ "BBC One", "Doctor Who (2005–2022)", "The Timeless Children", ], }, +{ + "#url" : "https://www.bbc.co.uk/programmes/p086f8yf/p086f8j6", + "#category": ("", "bbc", "gallery"), + "#class" : bbc.BbcGalleryExtractor, + "#pattern" : r"https://ichef\.bbci\.co\.uk/images/ic/1920xn/\w+\.jpg", + "#range" : "1-2", + "#count" : 2, + + "count" : 9, + "num" : {1, 2}, + "description": "Continuing his journey, Colin gives unique insights into the unique animals he finds.", + "extension" : "jpg", + "filename" : {"p086f7yn", "p086f80n"}, + "programme" : "p086f8yf", + "title" : "Wild Cuba: A Caribbean Journey - Part 2", + "title_image": { + "Cuba is home to many unique birds", + "A Cuban pygmy owl looks out of its tree hole", + }, + "synopsis" : { + "This vibrant Cuban tody is just one of more than 300 species of bird found in Cuba.", + "Cuban pygmy owls nest in abandoned holes carved out by woodpeckers.", + }, + "path" : [ + "BBC Two", + "Natural World", + "2019-2020", + "Wild Cuba: A Caribbean Journey - Part 2", + "Wildlife camera operator Colin Stafford-Johnson has loved Cuba since he was a little boy" + ], +}, + { "#url" : "https://www.bbc.co.uk/programmes/p084qtzs", "#category": ("", "bbc", "gallery"),