[kemonoparty] add 'comments' option (#1980)
This commit is contained in:
@@ -1328,6 +1328,16 @@ Description
|
|||||||
Download video files.
|
Download video files.
|
||||||
|
|
||||||
|
|
||||||
|
extractor.kemonoparty.comments
|
||||||
|
-----------------------------
|
||||||
|
Type
|
||||||
|
``bool``
|
||||||
|
Default
|
||||||
|
``false``
|
||||||
|
Description
|
||||||
|
Extract ``comments`` metadata.
|
||||||
|
|
||||||
|
|
||||||
extractor.kemonoparty.max-posts
|
extractor.kemonoparty.max-posts
|
||||||
-------------------------------
|
-------------------------------
|
||||||
Type
|
Type
|
||||||
|
|||||||
@@ -35,6 +35,7 @@ class KemonopartyExtractor(Extractor):
|
|||||||
r'|/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{64}\.[^"]+)').findall
|
r'|/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{64}\.[^"]+)').findall
|
||||||
skip_service = \
|
skip_service = \
|
||||||
"patreon" if self.config("patreon-skip-file", True) else None
|
"patreon" if self.config("patreon-skip-file", True) else None
|
||||||
|
comments = self.config("comments")
|
||||||
|
|
||||||
if self.config("metadata"):
|
if self.config("metadata"):
|
||||||
username = text.unescape(text.extract(
|
username = text.unescape(text.extract(
|
||||||
@@ -68,6 +69,8 @@ class KemonopartyExtractor(Extractor):
|
|||||||
post["published"], "%a, %d %b %Y %H:%M:%S %Z")
|
post["published"], "%a, %d %b %Y %H:%M:%S %Z")
|
||||||
if username:
|
if username:
|
||||||
post["username"] = username
|
post["username"] = username
|
||||||
|
if comments:
|
||||||
|
post["comments"] = self._extract_comments(post)
|
||||||
yield Message.Directory, post
|
yield Message.Directory, post
|
||||||
|
|
||||||
for post["num"], file in enumerate(files, 1):
|
for post["num"], file in enumerate(files, 1):
|
||||||
@@ -100,6 +103,24 @@ class KemonopartyExtractor(Extractor):
|
|||||||
|
|
||||||
return {c.name: c.value for c in response.history[0].cookies}
|
return {c.name: c.value for c in response.history[0].cookies}
|
||||||
|
|
||||||
|
def _extract_comments(self, post):
|
||||||
|
url = "{}/{}/user/{}/post/{}".format(
|
||||||
|
self.root, post["service"], post["user"], post["id"])
|
||||||
|
page = self.request(url).text
|
||||||
|
|
||||||
|
comments = []
|
||||||
|
for comment in text.extract_iter(page, "<article", "</article>"):
|
||||||
|
extr = text.extract_from(comment)
|
||||||
|
cid = extr('id="', '"')
|
||||||
|
comments.append({
|
||||||
|
"id" : cid,
|
||||||
|
"user": extr('href="#' + cid + '"', '</').strip(" \n\r>"),
|
||||||
|
"body": extr(
|
||||||
|
'<section class="comment__body">', '</section>').strip(),
|
||||||
|
"date": extr('datetime="', '"'),
|
||||||
|
})
|
||||||
|
return comments
|
||||||
|
|
||||||
|
|
||||||
class KemonopartyUserExtractor(KemonopartyExtractor):
|
class KemonopartyUserExtractor(KemonopartyExtractor):
|
||||||
"""Extractor for all posts from a kemono.party user listing"""
|
"""Extractor for all posts from a kemono.party user listing"""
|
||||||
|
|||||||
Reference in New Issue
Block a user