From 1b918bd9378ac049bdb03e31b5949f6e6caa42b9 Mon Sep 17 00:00:00 2001 From: Alexandru Vasilescu Date: Fri, 28 Apr 2023 13:13:25 +0300 Subject: [PATCH 1/3] fix(extractor): fix extraction for cross-posted reddit videos and galleries --- gallery_dl/extractor/reddit.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py index cefe8d37..b7260dcf 100644 --- a/gallery_dl/extractor/reddit.py +++ b/gallery_dl/extractor/reddit.py @@ -56,17 +56,26 @@ class RedditExtractor(Extractor): submission["num"] = 0 url = submission["url"] - if url and url.startswith("https://i.redd.it/"): + if not url: + continue + + if url.startswith("https://i.redd.it/"): text.nameext_from_url(url, submission) yield Message.Url, url, submission - elif "gallery_data" in submission: + elif url.startswith("https://www.reddit.com/gallery/"): + submission_with_gallery = submission + if "crosspost_parent_list" in submission_with_gallery: + submission_with_gallery = submission["crosspost_parent_list"][-1] + if "gallery_data" not in submission_with_gallery: + continue + for submission["num"], url in enumerate( - self._extract_gallery(submission), 1): + self._extract_gallery(submission_with_gallery), 1): text.nameext_from_url(url, submission) yield Message.Url, url, submission - elif submission["is_video"]: + elif url.startswith("https://v.redd.it/"): if videos: text.nameext_from_url(url, submission) url = "ytdl:" + self._extract_video(submission) From d4f8b2fe2206afeedf5fa8a2bfa7f6655a135811 Mon Sep 17 00:00:00 2001 From: Alexandru Vasilescu Date: Fri, 28 Apr 2023 13:45:23 +0300 Subject: [PATCH 2/3] fix: linter issues --- gallery_dl/extractor/reddit.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py index b7260dcf..e1f1d27f 100644 --- a/gallery_dl/extractor/reddit.py +++ b/gallery_dl/extractor/reddit.py @@ -64,14 +64,16 @@ class RedditExtractor(Extractor): yield Message.Url, url, submission elif url.startswith("https://www.reddit.com/gallery/"): - submission_with_gallery = submission - if "crosspost_parent_list" in submission_with_gallery: - submission_with_gallery = submission["crosspost_parent_list"][-1] - if "gallery_data" not in submission_with_gallery: + gallery_submission = submission + if "crosspost_parent_list" in gallery_submission: + gallery_submission = \ + submission["crosspost_parent_list"][-1] + if "gallery_data" not in gallery_submission: continue - for submission["num"], url in enumerate( - self._extract_gallery(submission_with_gallery), 1): + gallery = self._extract_gallery(gallery_submission) + + for submission["num"], url in enumerate(gallery, 1): text.nameext_from_url(url, submission) yield Message.Url, url, submission From f8c4c5eef9b0d3f210e8fceed9f2c91cfa893459 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Thu, 25 May 2023 13:15:11 +0200 Subject: [PATCH 3/3] [reddit] simplify and add tests --- gallery_dl/extractor/reddit.py | 41 +++++++++++++++++----------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py index e1f1d27f..3f09e13e 100644 --- a/gallery_dl/extractor/reddit.py +++ b/gallery_dl/extractor/reddit.py @@ -55,32 +55,26 @@ class RedditExtractor(Extractor): visited.add(submission["id"]) submission["num"] = 0 - url = submission["url"] - if not url: - continue + if "crosspost_parent_list" in submission: + media = submission["crosspost_parent_list"][-1] + else: + media = submission - if url.startswith("https://i.redd.it/"): + url = media["url"] + if url and url.startswith("https://i.redd.it/"): text.nameext_from_url(url, submission) yield Message.Url, url, submission - elif url.startswith("https://www.reddit.com/gallery/"): - gallery_submission = submission - if "crosspost_parent_list" in gallery_submission: - gallery_submission = \ - submission["crosspost_parent_list"][-1] - if "gallery_data" not in gallery_submission: - continue - - gallery = self._extract_gallery(gallery_submission) - - for submission["num"], url in enumerate(gallery, 1): + elif "gallery_data" in media: + for submission["num"], url in enumerate( + self._extract_gallery(media), 1): text.nameext_from_url(url, submission) yield Message.Url, url, submission - elif url.startswith("https://v.redd.it/"): + elif media["is_video"]: if videos: text.nameext_from_url(url, submission) - url = "ytdl:" + self._extract_video(submission) + url = "ytdl:" + self._extract_video(media) yield Message.Url, url, submission elif not submission["is_self"]: @@ -291,14 +285,19 @@ class RedditSubmissionExtractor(RedditExtractor): ("https://www.reddit.com/r/kpopfap/comments/qjj04q/", { "count": 0, }), - ("https://old.reddit.com/r/lavaporn/comments/2a00np/"), - ("https://np.reddit.com/r/lavaporn/comments/2a00np/"), - ("https://m.reddit.com/r/lavaporn/comments/2a00np/"), - ("https://redd.it/2a00np/"), + # user page submission (#2301) ("https://www.reddit.com/user/TheSpiritTree/comments/srilyf/", { "pattern": r"https://i.redd.it/8fpgv17yqlh81.jpg", "count": 1, }), + # cross-posted video (#887, #3586, #3976) + ("https://www.reddit.com/r/kittengifs/comments/12m0b8d", { + "pattern": r"ytdl:https://v\.redd\.it/cvabpjacrvta1", + }), + ("https://old.reddit.com/r/lavaporn/comments/2a00np/"), + ("https://np.reddit.com/r/lavaporn/comments/2a00np/"), + ("https://m.reddit.com/r/lavaporn/comments/2a00np/"), + ("https://redd.it/2a00np/"), ) def __init__(self, match):