[reddit] add 'selftext' option (#7111)
This commit is contained in:
@@ -4005,6 +4005,17 @@ Description
|
|||||||
at 600 requests every 10 minutes/600 seconds.
|
at 600 requests every 10 minutes/600 seconds.
|
||||||
|
|
||||||
|
|
||||||
|
extractor.reddit.selftext
|
||||||
|
-------------------------
|
||||||
|
Type
|
||||||
|
``bool``
|
||||||
|
Default
|
||||||
|
* ``true`` if `comments <extractor.reddit.comments_>`__ are enabled
|
||||||
|
* ``false`` otherwise
|
||||||
|
Description
|
||||||
|
Follow links in the original post's ``selftext``.
|
||||||
|
|
||||||
|
|
||||||
extractor.reddit.videos
|
extractor.reddit.videos
|
||||||
-----------------------
|
-----------------------
|
||||||
Type
|
Type
|
||||||
|
|||||||
@@ -512,6 +512,7 @@
|
|||||||
"id-max" : null,
|
"id-max" : null,
|
||||||
"previews" : true,
|
"previews" : true,
|
||||||
"recursion" : 0,
|
"recursion" : 0,
|
||||||
|
"selftext" : null,
|
||||||
"videos" : true
|
"videos" : true
|
||||||
},
|
},
|
||||||
"redgifs":
|
"redgifs":
|
||||||
|
|||||||
@@ -41,6 +41,11 @@ class RedditExtractor(Extractor):
|
|||||||
self._extract_video = self._extract_video_dash
|
self._extract_video = self._extract_video_dash
|
||||||
videos = True
|
videos = True
|
||||||
|
|
||||||
|
selftext = self.config("selftext")
|
||||||
|
if selftext is None:
|
||||||
|
selftext = self.api.comments
|
||||||
|
selftext = True if selftext else False
|
||||||
|
|
||||||
submissions = self.submissions()
|
submissions = self.submissions()
|
||||||
visited = set()
|
visited = set()
|
||||||
depth = 0
|
depth = 0
|
||||||
@@ -92,12 +97,12 @@ class RedditExtractor(Extractor):
|
|||||||
elif parentdir:
|
elif parentdir:
|
||||||
yield Message.Directory, comments[0]
|
yield Message.Directory, comments[0]
|
||||||
|
|
||||||
|
if selftext and submission:
|
||||||
|
for url in text.extract_iter(
|
||||||
|
submission["selftext_html"] or "", ' href="', '"'):
|
||||||
|
urls.append((url, submission))
|
||||||
|
|
||||||
if self.api.comments:
|
if self.api.comments:
|
||||||
if submission:
|
|
||||||
for url in text.extract_iter(
|
|
||||||
submission["selftext_html"] or "",
|
|
||||||
' href="', '"'):
|
|
||||||
urls.append((url, submission))
|
|
||||||
for comment in comments:
|
for comment in comments:
|
||||||
html = comment["body_html"] or ""
|
html = comment["body_html"] or ""
|
||||||
href = (' href="' in html)
|
href = (' href="' in html)
|
||||||
|
|||||||
@@ -223,6 +223,18 @@ __tests__ = (
|
|||||||
"#urls" : "https://preview.redd.it/u9ud4k6xaf271.jpg?auto=webp&s=19b1334cb4409111cda136c01f7b44c2c42bf9fb",
|
"#urls" : "https://preview.redd.it/u9ud4k6xaf271.jpg?auto=webp&s=19b1334cb4409111cda136c01f7b44c2c42bf9fb",
|
||||||
},
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://www.reddit.com/r/gonewildaudio/comments/1j2pxfn/",
|
||||||
|
"#comment" : "'selftext' option (#7111)",
|
||||||
|
"#category": ("", "reddit", "submission"),
|
||||||
|
"#class" : reddit.RedditSubmissionExtractor,
|
||||||
|
"#options" : {"selftext": True, "comments": 0},
|
||||||
|
"#urls" : (
|
||||||
|
"https://www.reddit.com/r/gonewildaudio/s/22pP7vizkx",
|
||||||
|
"https://soundgasm.net/u/chuwa/Your-Timid-Neighbor-Asks-You-To-Turn-Your-Music-Down-So-You-Fuck-Her-Stupid",
|
||||||
|
),
|
||||||
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://old.reddit.com/r/lavaporn/comments/2a00np/",
|
"#url" : "https://old.reddit.com/r/lavaporn/comments/2a00np/",
|
||||||
"#category": ("", "reddit", "submission"),
|
"#category": ("", "reddit", "submission"),
|
||||||
|
|||||||
Reference in New Issue
Block a user