[bellazon] add 'quoted' option (#8247)
ignore quoted content by default
This commit is contained in:
@@ -1600,6 +1600,16 @@ Description
|
||||
``image``, ``video``, ``mediacollection``, ``embed``, ``text``.
|
||||
|
||||
|
||||
extractor.bellazon.quoted
|
||||
-------------------------
|
||||
Type
|
||||
``bool``
|
||||
Default
|
||||
``false``
|
||||
Description
|
||||
Extract files from quoted content.
|
||||
|
||||
|
||||
extractor.[blogger].api-key
|
||||
---------------------------
|
||||
Type
|
||||
|
||||
@@ -154,6 +154,10 @@
|
||||
|
||||
"modules": ["image", "video", "mediacollection", "embed"]
|
||||
},
|
||||
"bellazon":
|
||||
{
|
||||
"quoted": false
|
||||
},
|
||||
"bilibili":
|
||||
{
|
||||
"sleep-request": "3.0-6.0"
|
||||
|
||||
@@ -29,8 +29,17 @@ class BellazonExtractor(Extractor):
|
||||
r'(?s)<((?:video .*?<source src|a [^>]*?href)="([^"]+).*?)</a>'
|
||||
).findall
|
||||
|
||||
if self.config("quoted", False):
|
||||
strip_quoted = None
|
||||
else:
|
||||
strip_quoted = text.re(r"(?s)<blockquote .*?</blockquote>").sub
|
||||
|
||||
for post in self.posts():
|
||||
urls = extract_urls(post["content"])
|
||||
if strip_quoted is None:
|
||||
urls = extract_urls(post["content"])
|
||||
else:
|
||||
urls = extract_urls(strip_quoted("", post["content"]))
|
||||
|
||||
data = {"post": post}
|
||||
post["count"] = data["count"] = len(urls)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user