diff --git a/docs/configuration.rst b/docs/configuration.rst index fa278861..a71ac9b6 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -1600,6 +1600,16 @@ Description ``image``, ``video``, ``mediacollection``, ``embed``, ``text``. +extractor.bellazon.quoted +------------------------- +Type + ``bool`` +Default + ``false`` +Description + Extract files from quoted content. + + extractor.[blogger].api-key --------------------------- Type diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index e4cba423..d03effd1 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -154,6 +154,10 @@ "modules": ["image", "video", "mediacollection", "embed"] }, + "bellazon": + { + "quoted": false + }, "bilibili": { "sleep-request": "3.0-6.0" diff --git a/gallery_dl/extractor/bellazon.py b/gallery_dl/extractor/bellazon.py index b7eb46ae..cae69663 100644 --- a/gallery_dl/extractor/bellazon.py +++ b/gallery_dl/extractor/bellazon.py @@ -29,8 +29,17 @@ class BellazonExtractor(Extractor): r'(?s)<((?:video .*?]*?href)="([^"]+).*?)' ).findall + if self.config("quoted", False): + strip_quoted = None + else: + strip_quoted = text.re(r"(?s)
").sub + for post in self.posts(): - urls = extract_urls(post["content"]) + if strip_quoted is None: + urls = extract_urls(post["content"]) + else: + urls = extract_urls(strip_quoted("", post["content"])) + data = {"post": post} post["count"] = data["count"] = len(urls)