[subscribestar] strip whitespace from 'content'

This commit is contained in:
Mike Fährmann
2025-01-04 16:19:22 +01:00
parent a53ce6103c
commit 107798eeab

View File

@@ -137,7 +137,7 @@ class SubscribestarExtractor(Extractor):
"author_nick": text.unescape(extr('>', '<')), "author_nick": text.unescape(extr('>', '<')),
"date" : self._parse_datetime(extr( "date" : self._parse_datetime(extr(
'class="post-date">', '</').rpartition(">")[2]), 'class="post-date">', '</').rpartition(">")[2]),
"content" : extr('<body>', '</body>') "content" : extr('<body>', '</body>').strip(),
} }
def _parse_datetime(self, dt): def _parse_datetime(self, dt):
@@ -194,5 +194,5 @@ class SubscribestarPostExtractor(SubscribestarExtractor):
"author_nick": text.unescape(extr('alt="', '"')), "author_nick": text.unescape(extr('alt="', '"')),
"date" : self._parse_datetime(extr( "date" : self._parse_datetime(extr(
'<span class="star_link-types">', '<')), '<span class="star_link-types">', '<')),
"content" : extr('<body>', '</body>') "content" : extr('<body>', '</body>').strip(),
} }