[reddit] ensure 'comment' metadata field for media files (#8228)

2025-09-19 17:22:33 +02:00
parent 713a65923a
commit b225018eda
1 changed files with 18 additions and 18 deletions
--- a/gallery_dl/extractor/reddit.py
+++ b/gallery_dl/extractor/reddit.py
@@ -56,6 +56,7 @@ class RedditExtractor(Extractor):
                urls = []
                if submission:
                    submission["comment"] = None
                    submission["date"] = text.parse_timestamp(
                        submission["created_utc"])
                    yield Message.Directory, submission
@@ -99,14 +100,13 @@ class RedditExtractor(Extractor):
                    elif not submission["is_self"]:
                        urls.append((url, submission))
                    if selftext and (txt := submission["selftext_html"]):
                        for url in text.extract_iter(txt, ' href="', '"'):
                            urls.append((url, submission))
                elif parentdir:
                    yield Message.Directory, comments[0]
                if selftext and submission:
                    for url in text.extract_iter(
                            submission["selftext_html"] or "", ' href="', '"'):
                        urls.append((url, submission))
                if self.api.comments:
                    if comments and not submission:
                        submission = comments[0]
@@ -115,24 +115,24 @@ class RedditExtractor(Extractor):
                            yield Message.Directory, submission
                    for comment in comments:
                        media = (embeds and "media_metadata" in comment)
                        html = comment["body_html"] or ""
                        href = (' href="' in html)
                        media = (embeds and "media_metadata" in comment)
-                        if media or href:
+                        if not media and not href:
-                            comment["date"] = text.parse_timestamp(
+                            continue
-                                comment["created_utc"])
+
-                            if submission:
+                        data = submission.copy()
-                                data = submission.copy()
+                        data["comment"] = comment
-                                data["comment"] = comment
+                        comment["date"] = text.parse_timestamp(
-                            else:
+                            comment["created_utc"])
                                data = comment
                        if media:
-                            for embed in self._extract_embed(comment):
+                            for url in self._extract_embed(comment):
-                                submission["num"] += 1
+                                data["num"] += 1
-                                text.nameext_from_url(embed, submission)
+                                text.nameext_from_url(url, data)
-                                yield Message.Url, embed, submission
+                                yield Message.Url, url, data
                            submission["num"] = data["num"]
                        if href:
                            for url in text.extract_iter(html, ' href="', '"'):