[kemonoparty] add 'count' metadata field (#2952)
This commit is contained in:
@@ -46,7 +46,7 @@ class KemonopartyExtractor(Extractor):
|
|||||||
comments = self.config("comments")
|
comments = self.config("comments")
|
||||||
username = dms = None
|
username = dms = None
|
||||||
|
|
||||||
# prevent files to be sent with gzip compression
|
# prevent files from being sent with gzip compression
|
||||||
headers = {"Accept-Encoding": "identity"}
|
headers = {"Accept-Encoding": "identity"}
|
||||||
|
|
||||||
if self.config("metadata"):
|
if self.config("metadata"):
|
||||||
@@ -63,6 +63,7 @@ class KemonopartyExtractor(Extractor):
|
|||||||
|
|
||||||
for post in posts:
|
for post in posts:
|
||||||
|
|
||||||
|
post["_http_headers"] = headers
|
||||||
post["date"] = text.parse_datetime(
|
post["date"] = text.parse_datetime(
|
||||||
post["published"] or post["added"],
|
post["published"] or post["added"],
|
||||||
"%a, %d %b %Y %H:%M:%S %Z")
|
"%a, %d %b %Y %H:%M:%S %Z")
|
||||||
@@ -74,27 +75,32 @@ class KemonopartyExtractor(Extractor):
|
|||||||
if dms is True:
|
if dms is True:
|
||||||
dms = self._extract_dms(post)
|
dms = self._extract_dms(post)
|
||||||
post["dms"] = dms
|
post["dms"] = dms
|
||||||
yield Message.Directory, post
|
|
||||||
|
|
||||||
|
files = []
|
||||||
hashes = set()
|
hashes = set()
|
||||||
post["num"] = 0
|
|
||||||
for file in itertools.chain.from_iterable(
|
for file in itertools.chain.from_iterable(
|
||||||
g(post) for g in generators):
|
g(post) for g in generators):
|
||||||
url = file["path"]
|
url = file["path"]
|
||||||
|
|
||||||
match = find_hash(url)
|
match = find_hash(url)
|
||||||
if match:
|
if match:
|
||||||
post["hash"] = hash = match.group(1)
|
file["hash"] = hash = match.group(1)
|
||||||
if hash in hashes and not duplicates:
|
if hash in hashes and not duplicates:
|
||||||
self.log.debug("Skipping %s (duplicate)", url)
|
self.log.debug("Skipping %s (duplicate)", url)
|
||||||
continue
|
continue
|
||||||
hashes.add(hash)
|
hashes.add(hash)
|
||||||
else:
|
else:
|
||||||
post["hash"] = ""
|
file["hash"] = ""
|
||||||
|
|
||||||
post["type"] = file["type"]
|
files.append(file)
|
||||||
post["num"] += 1
|
|
||||||
post["_http_headers"] = headers
|
post["count"] = len(files)
|
||||||
|
yield Message.Directory, post
|
||||||
|
|
||||||
|
for post["num"], file in enumerate(files, 1):
|
||||||
|
post.update(file)
|
||||||
|
url = file["path"]
|
||||||
|
|
||||||
text.nameext_from_url(file.get("name", url), post)
|
text.nameext_from_url(file.get("name", url), post)
|
||||||
if not post["extension"]:
|
if not post["extension"]:
|
||||||
@@ -236,6 +242,7 @@ class KemonopartyPostExtractor(KemonopartyExtractor):
|
|||||||
"keyword": {
|
"keyword": {
|
||||||
"added": "Wed, 06 May 2020 20:28:02 GMT",
|
"added": "Wed, 06 May 2020 20:28:02 GMT",
|
||||||
"content": str,
|
"content": str,
|
||||||
|
"count": 1,
|
||||||
"date": "dt:2019-08-11 02:09:04",
|
"date": "dt:2019-08-11 02:09:04",
|
||||||
"edited": None,
|
"edited": None,
|
||||||
"embed": dict,
|
"embed": dict,
|
||||||
@@ -374,6 +381,7 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor):
|
|||||||
post["channel_name"] = self.channel_name
|
post["channel_name"] = self.channel_name
|
||||||
post["date"] = text.parse_datetime(
|
post["date"] = text.parse_datetime(
|
||||||
post["published"], "%a, %d %b %Y %H:%M:%S %Z")
|
post["published"], "%a, %d %b %Y %H:%M:%S %Z")
|
||||||
|
post["count"] = len(files)
|
||||||
yield Message.Directory, post
|
yield Message.Directory, post
|
||||||
|
|
||||||
for post["num"], file in enumerate(files, 1):
|
for post["num"], file in enumerate(files, 1):
|
||||||
|
|||||||
Reference in New Issue
Block a user