From da9840a39d4073904c17af08374af9be7e6707c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Mon, 6 Mar 2023 12:18:25 +0100 Subject: [PATCH] [reddit] update 'videos' option (#3712) - add 'dash' to directly extract DASH manifest URLs (was default behavior since a7c79531) - change default strategy back to before a7c79531 - disable 'Falling back on generic information extractor' warning --- docs/configuration.rst | 7 ++++ gallery_dl/extractor/reddit.py | 58 ++++++++++++++++++++++++++-------- 2 files changed, 51 insertions(+), 14 deletions(-) diff --git a/docs/configuration.rst b/docs/configuration.rst index d3ffb031..fbb0416b 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -2562,8 +2562,15 @@ Description HLS and DASH manifests * ``"ytdl"``: Download videos and let `youtube-dl`_ handle all of video extraction and download + * ``"dash"``: Extract DASH manifest URLs and use `youtube-dl`_ + to download and merge them. (*) * ``false``: Ignore videos + (*) + This saves 1 HTTP request per video + and might potentially be able to download otherwise deleted videos, + but it will not always get the best video quality available. + extractor.redgifs.format ------------------------ diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py index 204562e7..305de2a0 100644 --- a/gallery_dl/extractor/reddit.py +++ b/gallery_dl/extractor/reddit.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2017-2022 Mike Fährmann +# Copyright 2017-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -29,7 +29,14 @@ class RedditExtractor(Extractor): parentdir = self.config("parent-directory") max_depth = self.config("recursion", 0) + videos = self.config("videos", True) + if videos: + if videos == "ytdl": + self._extract_video = self._extract_video_ytdl + elif videos == "dash": + self._extract_video = self._extract_video_dash + videos = True submissions = self.submissions() visited = set() @@ -62,19 +69,8 @@ class RedditExtractor(Extractor): elif submission["is_video"]: if videos: text.nameext_from_url(url, submission) - if videos == "ytdl": - url = "https://www.reddit.com" + \ - submission["permalink"] - else: - submission["_ytdl_extra"] = { - "title": submission["title"], - } - try: - url = (submission["secure_media"] - ["reddit_video"]["dash_url"]) - except (KeyError, TypeError): - pass - yield Message.Url, "ytdl:" + url, submission + url = "ytdl:" + self._extract_video(submission) + yield Message.Url, url, submission elif not submission["is_self"]: urls.append((url, submission)) @@ -145,6 +141,21 @@ class RedditExtractor(Extractor): submission["id"], item["media_id"]) self.log.debug(src) + def _extract_video_ytdl(self, submission): + return "https://www.reddit.com" + submission["permalink"] + + def _extract_video_dash(self, submission): + submission["_ytdl_extra"] = {"title": submission["title"]} + try: + return (submission["secure_media"]["reddit_video"]["dash_url"] + + "#__youtubedl_smuggle=%7B%22to_generic%22%3A+1%7D") + except Exception: + return submission["url"] + + def _extract_video(self, submission): + submission["_ytdl_extra"] = {"title": submission["title"]} + return submission["url"] + class RedditSubredditExtractor(RedditExtractor): """Extractor for URLs from subreddits on reddit.com""" @@ -233,6 +244,25 @@ class RedditSubmissionExtractor(RedditExtractor): "content": "1e7dde4ee7d5f4c4b45749abfd15b2dbfa27df3f", "count": 3, }), + # video + ("https://www.reddit.com/r/aww/comments/90bu6w/", { + "pattern": r"ytdl:https://v.redd.it/gyh95hiqc0b11", + "count": 1, + }), + # video (ytdl) + ("https://www.reddit.com/r/aww/comments/90bu6w/", { + "options": (("videos", "ytdl"),), + "pattern": r"ytdl:https://www.reddit.com/r/aww/comments/90bu6w" + r"/heat_index_was_110_degrees_so_we_offered_him_a/", + "count": 1, + }), + # video (dash) + ("https://www.reddit.com/r/aww/comments/90bu6w/", { + "options": (("videos", "dash"),), + "pattern": r"ytdl:https://v.redd.it/gyh95hiqc0b11" + r"/DASHPlaylist.mpd\?a=", + "count": 1, + }), # deleted gallery (#953) ("https://www.reddit.com/gallery/icfgzv", { "count": 0,