[xenforo] add 'attachments' & 'embeds' options
This commit is contained in:
@@ -7247,6 +7247,26 @@ Description
|
|||||||
For ``Category:`` pages, recursively descent into subcategories.
|
For ``Category:`` pages, recursively descent into subcategories.
|
||||||
|
|
||||||
|
|
||||||
|
extractor.[xenforo].attachments
|
||||||
|
-------------------------------
|
||||||
|
Type
|
||||||
|
``bool``
|
||||||
|
Default
|
||||||
|
``true``
|
||||||
|
Description
|
||||||
|
Extract forum post attachments.
|
||||||
|
|
||||||
|
|
||||||
|
extractor.[xenforo].embeds
|
||||||
|
--------------------------
|
||||||
|
Type
|
||||||
|
``bool``
|
||||||
|
Default
|
||||||
|
``true``
|
||||||
|
Description
|
||||||
|
Extract URLs of forum post embeds.
|
||||||
|
|
||||||
|
|
||||||
extractor.[xenforo].metadata
|
extractor.[xenforo].metadata
|
||||||
----------------------------
|
----------------------------
|
||||||
Type
|
Type
|
||||||
|
|||||||
@@ -1196,6 +1196,8 @@
|
|||||||
|
|
||||||
"xenforo":
|
"xenforo":
|
||||||
{
|
{
|
||||||
|
"attachments": true,
|
||||||
|
"embeds" : true,
|
||||||
"metadata" : false,
|
"metadata" : false,
|
||||||
"order-posts": "desc"
|
"order-posts": "desc"
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -42,13 +42,16 @@ class XenforoExtractor(BaseExtractor):
|
|||||||
r')'
|
r')'
|
||||||
).findall
|
).findall
|
||||||
|
|
||||||
|
embeds = self.config("embeds", True)
|
||||||
|
attachments = self.config("attachments", True)
|
||||||
|
|
||||||
root = self.root
|
root = self.root
|
||||||
base = root if (pos := root.find("/", 8)) < 0 else root[:pos]
|
base = root if (pos := root.find("/", 8)) < 0 else root[:pos]
|
||||||
for post in self.posts():
|
for post in self.posts():
|
||||||
urls = extract_urls(post["content"])
|
urls = extract_urls(post["content"])
|
||||||
if "data-s9e-mediaembed-iframe=" in post["content"]:
|
if embeds and "data-s9e-mediaembed-iframe=" in post["content"]:
|
||||||
self._extract_embeds(urls, post)
|
self._extract_embeds(urls, post)
|
||||||
if post["attachments"]:
|
if attachments and post["attachments"]:
|
||||||
self._extract_attachments(urls, post)
|
self._extract_attachments(urls, post)
|
||||||
|
|
||||||
data = {"post": post}
|
data = {"post": post}
|
||||||
|
|||||||
Reference in New Issue
Block a user