diff --git a/gallery_dl/extractor/bbc.py b/gallery_dl/extractor/bbc.py
index 113a669a..b3981521 100644
--- a/gallery_dl/extractor/bbc.py
+++ b/gallery_dl/extractor/bbc.py
@@ -27,7 +27,12 @@ class BbcGalleryExtractor(GalleryExtractor):
def metadata(self, page):
data = self._extract_jsonld(page)
+
return {
+ "title": text.unescape(text.extr(
+ page, "
", "
").rpartition("")[2]),
+ "description": text.unescape(text.extr(
+ page, 'property="og:description" content="', '"')),
"programme": self.gallery_url.split("/")[4],
"path": list(util.unique_sequence(
element["name"]
@@ -40,11 +45,20 @@ class BbcGalleryExtractor(GalleryExtractor):
width = width - width % 16 if width else 1920
dimensions = "/{}xn/".format(width)
- return [
- (src.replace("/320x180_b/", dimensions),
- {"_fallback": self._fallback_urls(src, width)})
- for src in text.extract_iter(page, 'data-image-src="', '"')
- ]
+ results = []
+ for img in text.extract_iter(page, 'class="gallery__thumbnail', ">"):
+ src = text.extr(img, 'data-image-src="', '"')
+ results.append((
+ src.replace("/320x180_b/", dimensions),
+ {
+ "title_image": text.unescape(text.extr(
+ img, 'data-gallery-title="', '"')),
+ "synopsis": text.unescape(text.extr(
+ img, 'data-gallery-synopsis="', '"')),
+ "_fallback": self._fallback_urls(src, width),
+ },
+ ))
+ return results
@staticmethod
def _fallback_urls(src, max_width):
@@ -62,14 +76,11 @@ class BbcProgrammeExtractor(Extractor):
pattern = BASE_PATTERN + r"[^/?#]+/galleries)(?:/?\?page=(\d+))?"
example = "https://www.bbc.co.uk/programmes/ID/galleries"
- def __init__(self, match):
- Extractor.__init__(self, match)
- self.path, self.page = match.groups()
-
def items(self):
+ path, pnum = self.groups
data = {"_extractor": BbcGalleryExtractor}
- params = {"page": text.parse_int(self.page, 1)}
- galleries_url = self.root + self.path
+ params = {"page": text.parse_int(pnum, 1)}
+ galleries_url = self.root + path
while True:
page = self.request(galleries_url, params=params).text
diff --git a/test/results/bbc.py b/test/results/bbc.py
index 836786ae..c75246ed 100644
--- a/test/results/bbc.py
+++ b/test/results/bbc.py
@@ -15,14 +15,52 @@ __tests__ = (
"#pattern" : r"https://ichef\.bbci\.co\.uk/images/ic/1920xn/\w+\.jpg",
"#count" : 37,
- "programme": "p084qtzs",
- "path" : [
+ "count" : 37,
+ "num" : range(1, 37),
+ "description": "The Cybermen attack. And for the Doctor, nothing will ever be the same.",
+ "programme" : "p084qtzs",
+ "synopsis" : "The Cybermen attack. And for the Doctor, nothing will ever be the same.",
+ "title" : "The Timeless Children",
+ "title_image": {"The Timeless Children", ": The Timeless Children"},
+ "path" : [
"BBC One",
"Doctor Who (2005–2022)",
"The Timeless Children",
],
},
+{
+ "#url" : "https://www.bbc.co.uk/programmes/p086f8yf/p086f8j6",
+ "#category": ("", "bbc", "gallery"),
+ "#class" : bbc.BbcGalleryExtractor,
+ "#pattern" : r"https://ichef\.bbci\.co\.uk/images/ic/1920xn/\w+\.jpg",
+ "#range" : "1-2",
+ "#count" : 2,
+
+ "count" : 9,
+ "num" : {1, 2},
+ "description": "Continuing his journey, Colin gives unique insights into the unique animals he finds.",
+ "extension" : "jpg",
+ "filename" : {"p086f7yn", "p086f80n"},
+ "programme" : "p086f8yf",
+ "title" : "Wild Cuba: A Caribbean Journey - Part 2",
+ "title_image": {
+ "Cuba is home to many unique birds",
+ "A Cuban pygmy owl looks out of its tree hole",
+ },
+ "synopsis" : {
+ "This vibrant Cuban tody is just one of more than 300 species of bird found in Cuba.",
+ "Cuban pygmy owls nest in abandoned holes carved out by woodpeckers.",
+ },
+ "path" : [
+ "BBC Two",
+ "Natural World",
+ "2019-2020",
+ "Wild Cuba: A Caribbean Journey - Part 2",
+ "Wildlife camera operator Colin Stafford-Johnson has loved Cuba since he was a little boy"
+ ],
+},
+
{
"#url" : "https://www.bbc.co.uk/programmes/p084qtzs",
"#category": ("", "bbc", "gallery"),