Files
gallery-dl/gallery_dl/extractor/kemonoparty.py
Mike Fährmann e9ab97396f [kemonoparty] update default filenames and archive IDs (#1514)
Add an enumeration index so that attachments and regular files with the
same filename still get downloaded and not counted as duplicate files
(even though for patreon posts they usually are)

This invalidates all previously generated archive IDs.
To keep using old names and IDs, set
'filename' to "{id}_{title}_{filename}.{extension}" and
'archive-format' to "{service}_{user}_{id}_{filename}.{extension}".
2021-06-18 16:32:45 +02:00

149 lines
5.1 KiB
Python

# -*- coding: utf-8 -*-
# Copyright 2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://kemono.party/"""
from .common import Extractor, Message
from .. import text
import re
BASE_PATTERN = r"(?:https?://)?kemono\.party/([^/?#]+)/user/([^/?#]+)"
class KemonopartyExtractor(Extractor):
"""Base class for kemonoparty extractors"""
category = "kemonoparty"
root = "https://kemono.party"
directory_fmt = ("{category}", "{service}", "{user}")
filename_fmt = "{id}_{title}_{num:>02}_{filename}.{extension}"
archive_fmt = "{service}_{user}_{id}_{num}"
def items(self):
find_inline = re.compile(r'src="(/inline/[^"]+)').findall
if self.config("metadata"):
username = text.unescape(text.extract(
self.request(self.user_url).text, "<title>", " | Kemono<")[0])
else:
username = None
for post in self.posts():
files = []
append = files.append
file = post["file"]
if file:
file["type"] = "file"
append(file)
for attachment in post["attachments"]:
attachment["type"] = "attachment"
append(attachment)
for path in find_inline(post["content"] or ""):
append({"path": path, "name": path, "type": "inline"})
post["date"] = text.parse_datetime(
post["published"], "%a, %d %b %Y %H:%M:%S %Z")
if username:
post["username"] = username
yield Message.Directory, post
for post["num"], file in enumerate(files, 1):
post["type"] = file["type"]
url = file["path"]
if url[0] == "/":
url = "https://data.kemono.party" + url
elif url.startswith("https://kemono.party/"):
url = "https://data.kemono.party" + url[20:]
text.nameext_from_url(file["name"], post)
yield Message.Url, url, post
class KemonopartyUserExtractor(KemonopartyExtractor):
"""Extractor for all posts from a kemono.party user listing"""
subcategory = "user"
pattern = BASE_PATTERN + r"/?(?:$|[?#])"
test = (
("https://kemono.party/fanbox/user/6993449", {
"range": "1-25",
"count": 25,
}),
("https://kemono.party/subscribestar/user/alcorart"),
)
def __init__(self, match):
KemonopartyExtractor.__init__(self, match)
service, user_id = match.groups()
self.api_url = "{}/api/{}/user/{}".format(self.root, service, user_id)
self.user_url = "{}/{}/user/{}".format(self.root, service, user_id)
def posts(self):
url = self.api_url
params = {"o": 0}
while True:
posts = self.request(url, params=params).json()
yield from posts
if len(posts) < 25:
return
params["o"] += 25
class KemonopartyPostExtractor(KemonopartyExtractor):
"""Extractor for a single kemono.party post"""
subcategory = "post"
pattern = BASE_PATTERN + r"/post/([^/?#]+)"
test = (
("https://kemono.party/fanbox/user/6993449/post/506575", {
"pattern": r"https://data\.kemono\.party/files/fanbox"
r"/6993449/506575/P058kDFYus7DbqAkGlfWTlOr\.jpeg",
"keyword": {
"added": "Wed, 06 May 2020 20:28:02 GMT",
"content": str,
"date": "dt:2019-08-11 02:09:04",
"edited": None,
"embed": dict,
"extension": "jpeg",
"filename": "P058kDFYus7DbqAkGlfWTlOr",
"id": "506575",
"num": 1,
"published": "Sun, 11 Aug 2019 02:09:04 GMT",
"service": "fanbox",
"shared_file": False,
"subcategory": "post",
"title": "c96取り置き",
"type": "file",
"user": "6993449",
},
}),
# inline image (#1286)
("https://kemono.party/fanbox/user/7356311/post/802343", {
"pattern": r"https://data\.kemono\.party/inline/fanbox"
r"/uaozO4Yga6ydkGIJFAQDixfE\.jpeg",
}),
# kemono.party -> data.kemono.party
("https://kemono.party/gumroad/user/trylsc/post/IURjT", {
"pattern": r"https://data\.kemono\.party/(file|attachment)s"
r"/gumroad/trylsc/IURjT/",
}),
("https://kemono.party/subscribestar/user/alcorart/post/184330"),
)
def __init__(self, match):
KemonopartyExtractor.__init__(self, match)
service, user_id, post_id = match.groups()
self.api_url = "{}/api/{}/user/{}/post/{}".format(
self.root, service, user_id, post_id)
self.user_url = "{}/{}/user/{}".format(self.root, service, user_id)
def posts(self):
posts = self.request(self.api_url).json()
return (posts[0],) if len(posts) > 1 else posts