[kemono:discord] extract 'archives' metadata (#8898)

4a74bc6e30
This commit is contained in:
Mike Fährmann
2026-01-20 17:41:22 +01:00
parent efcbde7dcd
commit 774d885a86
2 changed files with 100 additions and 0 deletions

View File

@@ -417,6 +417,8 @@ class KemonoDiscordExtractor(KemonoExtractor):
r"https?://(?:cdn\.discordapp.com|media\.discordapp\.net)"
r"(/[A-Za-z0-9-._~:/?#\[\]@!$&'()*+,;%=]+)").findall
find_hash = text.re(HASH_PATTERN).match
archives = True if self.config("archives") else False
exts_archive = util.EXTS_ARCHIVE
if (order := self.config("order-posts")) and order[0] in ("r", "d"):
posts = self.api.discord_channel(channel_id, channel["post_count"])
@@ -440,6 +442,8 @@ class KemonoDiscordExtractor(KemonoExtractor):
post.update(data)
post["date"] = self._parse_datetime(post["published"])
post["count"] = len(files)
post["archives"] = post_archives = ()
yield Message.Directory, "", post
for post["num"], file in enumerate(files, 1):
@@ -451,6 +455,23 @@ class KemonoDiscordExtractor(KemonoExtractor):
if not post["extension"]:
post["extension"] = text.ext_from_url(url)
if post["extension"] in exts_archive:
if not post_archives:
post["archives"] = post_archives = []
post["type"] = "archive"
if archives:
try:
post_archives.append({
**self.api.file(file["hash"]), **file})
except Exception as exc:
self.log.warning(
"%s: Failed to retrieve archive metadata of "
"'%s' (%s: %s)", post["id"], file.get("name"),
exc.__class__.__name__, exc)
post_archives.append(file.copy())
else:
post_archives.append(file.copy())
if url[0] == "/":
url = f"{self.root}/data{url}"
elif url.startswith(self.root):