[bellazon] add 'quoted' option (#8247)

ignore quoted content by default
This commit is contained in:
Mike Fährmann
2025-09-20 20:56:45 +02:00
parent dbae953c71
commit 6bca0f93a1
3 changed files with 24 additions and 1 deletions

View File

@@ -1600,6 +1600,16 @@ Description
``image``, ``video``, ``mediacollection``, ``embed``, ``text``.
extractor.bellazon.quoted
-------------------------
Type
``bool``
Default
``false``
Description
Extract files from quoted content.
extractor.[blogger].api-key
---------------------------
Type

View File

@@ -154,6 +154,10 @@
"modules": ["image", "video", "mediacollection", "embed"]
},
"bellazon":
{
"quoted": false
},
"bilibili":
{
"sleep-request": "3.0-6.0"

View File

@@ -29,8 +29,17 @@ class BellazonExtractor(Extractor):
r'(?s)<((?:video .*?<source src|a [^>]*?href)="([^"]+).*?)</a>'
).findall
if self.config("quoted", False):
strip_quoted = None
else:
strip_quoted = text.re(r"(?s)<blockquote .*?</blockquote>").sub
for post in self.posts():
urls = extract_urls(post["content"])
if strip_quoted is None:
urls = extract_urls(post["content"])
else:
urls = extract_urls(strip_quoted("", post["content"]))
data = {"post": post}
post["count"] = data["count"] = len(urls)