[reddit] improve comment metadata (#4482)

- provide 'date'
- make metadata of the main submission available as 'submission[…]'
This commit is contained in:
Mike Fährmann
2023-09-12 21:41:31 +02:00
parent 93a7a89cf6
commit 1710f1e983

View File

@@ -97,9 +97,13 @@ class RedditExtractor(Extractor):
' href="', '"'):
urls.append((url, submission))
for comment in comments:
for url in text.extract_iter(
comment["body_html"] or "", ' href="', '"'):
urls.append((url, comment))
html = comment["body_html"] or ""
if ' href="' in html:
comment["submission"] = submission
comment["date"] = text.parse_timestamp(
comment["created_utc"])
for url in text.extract_iter(html, ' href="', '"'):
urls.append((url, comment))
for url, data in urls:
if not url or url[0] == "#":