[reddit] improve comment metadata v2 (#4482)

provide main submission metadata at the top level
and comment metadata inside the 'comment' field,
i.e. the other way round than in 1710f1e9
This commit is contained in:
Mike Fährmann
2023-09-20 22:12:40 +02:00
parent 7592c5e566
commit 4963bb9b30

View File

@@ -99,11 +99,15 @@ class RedditExtractor(Extractor):
for comment in comments:
html = comment["body_html"] or ""
if ' href="' in html:
comment["submission"] = submission
comment["date"] = text.parse_timestamp(
comment["created_utc"])
if submission:
data = submission.copy()
data["comment"] = comment
else:
data = comment
for url in text.extract_iter(html, ' href="', '"'):
urls.append((url, comment))
urls.append((url, data))
for url, data in urls:
if not url or url[0] == "#":