From 4ab0960083db8ce794df9ce07241739dd97f4b83 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= <mike_faehrmann@web.de>
Date: Sat, 29 Dec 2018 17:52:43 +0100
Subject: [PATCH] [reddit] add metadata to extracted URLs

---
 gallery_dl/extractor/reddit.py | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py
index 5ab7c065..8e922b37 100644
--- a/gallery_dl/extractor/reddit.py
+++ b/gallery_dl/extractor/reddit.py
@@ -36,7 +36,7 @@ class RedditExtractor(Extractor):
                 util.SPECIAL_EXTRACTORS, [RedditSubredditExtractor]):
             while True:
                 extra = []
-                for url in self._urls(submissions):
+                for url, data in self._urls(submissions):
                     if url[0] == "#":
                         continue
                     if url[0] == "/":
@@ -46,7 +46,7 @@ class RedditExtractor(Extractor):
                     if match:
                         extra.append(match.group(1))
                     else:
-                        yield Message.Queue, text.unescape(url), {}
+                        yield Message.Queue, text.unescape(url), data
 
                 if not extra or depth == self.max_depth:
                     return
@@ -62,11 +62,18 @@ class RedditExtractor(Extractor):
     def _urls(self, submissions):
         for submission, comments in submissions:
             self._visited.add(submission["id"])
+
             if not submission["is_self"]:
-                yield submission["url"]
-            strings = [submission["selftext_html"] or ""]
-            strings += [c["body_html"] or "" for c in comments]
-            yield from text.extract_iter("".join(strings), ' href="', '"')
+                yield submission["url"], submission
+
+            for url in text.extract_iter(
+                    submission["selftext_html"] or "", ' href="', '"'):
+                yield url, submission
+
+            for comment in comments:
+                for url in text.extract_iter(
+                        comment["body_html"] or "", ' href="', '"'):
+                    yield url, comment
 
 
 class RedditSubredditExtractor(RedditExtractor):
@@ -103,7 +110,6 @@ class RedditSubmissionExtractor(RedditExtractor):
     test = [
         ("https://www.reddit.com/r/lavaporn/comments/2a00np/", {
             "pattern": r"https?://i\.imgur\.com/AaAUCgy\.jpg",
-            "count": 1,
         }),
         ("https://old.reddit.com/r/lavaporn/comments/2a00np/", None),
         ("https://np.reddit.com/r/lavaporn/comments/2a00np/", None),