[reddit] download preview for 404ed imgur links (#4322)
This is a pretty ugly hack as the internal infrastructure doesn't really support switching from external URL to regular download in case the former fails, but it kind of works ... Can be disabled by setting 'reddit.fallback' to 'false'.
This commit is contained in:
@@ -108,7 +108,11 @@ class RedditExtractor(Extractor):
|
||||
if match:
|
||||
extra.append(match.group(1))
|
||||
elif not match_user(url) and not match_subreddit(url):
|
||||
if "preview" in data:
|
||||
data["_fallback"] = self._previews(data)
|
||||
yield Message.Queue, text.unescape(url), data
|
||||
if "_fallback" in data:
|
||||
del data["_fallback"]
|
||||
|
||||
if not extra or depth == max_depth:
|
||||
return
|
||||
@@ -165,6 +169,13 @@ class RedditExtractor(Extractor):
|
||||
submission["_ytdl_extra"] = {"title": submission["title"]}
|
||||
return submission["url"]
|
||||
|
||||
def _previews(self, post):
|
||||
try:
|
||||
for image in post["preview"]["images"]:
|
||||
yield image["source"]["url"]
|
||||
except Exception as exc:
|
||||
self.log.debug("%s: %s", exc.__class__.__name__, exc)
|
||||
|
||||
|
||||
class RedditSubredditExtractor(RedditExtractor):
|
||||
"""Extractor for URLs from subreddits on reddit.com"""
|
||||
|
||||
@@ -388,10 +388,23 @@ class DownloadJob(Job):
|
||||
try:
|
||||
if pextr.config("parent-skip"):
|
||||
job._skipcnt = self._skipcnt
|
||||
self.status |= job.run()
|
||||
status = job.run()
|
||||
self._skipcnt = job._skipcnt
|
||||
else:
|
||||
self.status |= job.run()
|
||||
status = job.run()
|
||||
|
||||
if status:
|
||||
self.status |= status
|
||||
if "_fallback" in kwdict and self.fallback:
|
||||
fallback = kwdict["_fallback"] = \
|
||||
iter(kwdict["_fallback"])
|
||||
try:
|
||||
url = next(fallback)
|
||||
except StopIteration:
|
||||
pass
|
||||
else:
|
||||
text.nameext_from_url(url, kwdict)
|
||||
self.handle_url(url, kwdict)
|
||||
break
|
||||
except exception.RestartExtraction:
|
||||
pass
|
||||
|
||||
Reference in New Issue
Block a user