[kemonoparty] update to new site layout / API endpoints

(#6415, #6503, #6528, #6530, #6536)

… at least for the most part. Favorites are still broken, but the rest
should be functional again.
This commit is contained in:
Mike Fährmann
2024-11-26 21:58:15 +01:00
parent 5412b22dae
commit 74d855c693
2 changed files with 200 additions and 165 deletions

View File

@@ -10,7 +10,7 @@
from .common import Extractor, Message
from .. import text, util, exception
from ..cache import cache, memcache
from ..cache import cache
import itertools
import json
import re
@@ -38,6 +38,7 @@ class KemonopartyExtractor(Extractor):
Extractor.__init__(self, match)
def _init(self):
self.api = KemonoAPI(self)
self.revisions = self.config("revisions")
if self.revisions:
self.revisions_unique = (self.revisions == "unique")
@@ -53,48 +54,53 @@ class KemonopartyExtractor(Extractor):
sort_keys=True, separators=(",", ":")).encode
def items(self):
service = self.groups[2]
creator_id = self.groups[3]
find_hash = re.compile(HASH_PATTERN).match
generators = self._build_file_generators(self.config("files"))
duplicates = self.config("duplicates")
comments = self.config("comments")
username = dms = announcements = None
announcements = True if self.config("announcements") else None
comments = True if self.config("comments") else False
duplicates = True if self.config("duplicates") else False
dms = True if self.config("dms") else None
profile = username = None
# prevent files from being sent with gzip compression
headers = {"Accept-Encoding": "identity"}
if self.config("metadata"):
username = text.unescape(text.extract(
self.request(self.user_url).text,
'<meta name="artist_name" content="', '"')[0])
if self.config("dms"):
dms = True
if self.config("announcements"):
announcements = True
profile = self.api.creator_profile(service, creator_id)
username = profile["name"]
posts = self.posts()
max_posts = self.config("max-posts")
if max_posts:
posts = itertools.islice(posts, max_posts)
if self.revisions:
posts = self._revisions(posts)
for post in posts:
headers["Referer"] = "{}/{}/user/{}/post/{}".format(
self.root, post["service"], post["user"], post["id"])
post["_http_headers"] = headers
post["date"] = self._parse_datetime(
post.get("published") or post.get("added") or "")
if username:
if profile is not None:
post["username"] = username
post["user_profile"] = profile
if comments:
post["comments"] = self._extract_comments(post)
post["comments"] = self.api.creator_post_comments(
service, creator_id, post["id"])
if dms is not None:
if dms is True:
dms = self._extract_cards(post, "dms")
dms = self.api.creator_dms(
post["service"], post["user"])
post["dms"] = dms
if announcements is not None:
if announcements is True:
announcements = self._extract_cards(post, "announcements")
announcements = self.api.creator_announcements(
post["service"], post["user"])
post["announcements"] = announcements
files = []
@@ -188,56 +194,21 @@ class KemonopartyExtractor(Extractor):
filetypes = filetypes.split(",")
return [genmap[ft] for ft in filetypes]
def _extract_comments(self, post):
url = "{}/{}/user/{}/post/{}".format(
self.root, post["service"], post["user"], post["id"])
page = self.request(url).text
comments = []
for comment in text.extract_iter(page, "<article", "</article>"):
extr = text.extract_from(comment)
cid = extr('id="', '"')
comments.append({
"id" : cid,
"user": extr('href="#' + cid + '"', '</').strip(" \n\r>"),
"body": extr(
'<section class="comment__body">', '</section>').strip(),
"date": extr('datetime="', '"'),
})
return comments
def _extract_cards(self, post, type):
url = "{}/{}/user/{}/{}".format(
self.root, post["service"], post["user"], type)
page = self.request(url).text
cards = []
for card in text.extract_iter(page, "<article", "</article>"):
footer = text.extr(card, "<footer", "</footer>")
cards.append({
"body": text.unescape(text.extr(
card, "<pre>", "</pre></",
).strip()),
"date": text.extr(footer, ': ', '\n'),
})
return cards
def _parse_datetime(self, date_string):
if len(date_string) > 19:
date_string = date_string[:19]
return text.parse_datetime(date_string, "%Y-%m-%dT%H:%M:%S")
@memcache(keyarg=1)
def _discord_channels(self, server):
url = "{}/api/v1/discord/channel/lookup/{}".format(
self.root, server)
return self.request(url).json()
def _revisions(self, posts):
return itertools.chain.from_iterable(
self._revisions_post(post) for post in posts)
def _revisions_post(self, post, url):
def _revisions_post(self, post):
post["revision_id"] = 0
try:
revs = self.request(url + "/revisions").json()
revs = self.api.creator_post_revisions(
post["service"], post["user"], post["id"])
except exception.HttpError:
post["revision_hash"] = self._revision_hash(post)
post["revision_index"] = 1
@@ -268,8 +239,8 @@ class KemonopartyExtractor(Extractor):
return revs
def _revisions_all(self, url):
revs = self.request(url + "/revisions").json()
def _revisions_all(self, service, creator_id, post_id):
revs = self.api.creator_post_revisions(service, creator_id, post_id)
cnt = idx = len(revs)
for rev in revs:
@@ -305,50 +276,30 @@ def _validate(response):
class KemonopartyUserExtractor(KemonopartyExtractor):
"""Extractor for all posts from a kemono.su user listing"""
subcategory = "user"
pattern = USER_PATTERN + r"/?(?:\?([^#]+))?(?:$|[?#])"
pattern = USER_PATTERN + r"/?(?:\?([^#]+))?(?:$|\?|#)"
example = "https://kemono.su/SERVICE/user/12345"
def __init__(self, match):
_, _, service, user_id, self.query = match.groups()
self.subcategory = service
self.subcategory = match.group(3)
KemonopartyExtractor.__init__(self, match)
self.api_url = "{}/api/v1/{}/user/{}".format(
self.root, service, user_id)
self.user_url = "{}/{}/user/{}".format(self.root, service, user_id)
def posts(self):
url = self.api_url
params = text.parse_query(self.query)
params["o"] = text.parse_int(params.get("o"))
while True:
posts = self.request(url, params=params).json()
if self.revisions:
for post in posts:
post_url = "{}/api/v1/{}/user/{}/post/{}".format(
self.root, post["service"], post["user"], post["id"])
yield from self._revisions_post(post, post_url)
else:
yield from posts
if len(posts) < 50:
break
params["o"] += 50
_, _, service, creator_id, query = self.groups
params = text.parse_query(query)
return self.api.creator_posts(
service, creator_id, params.get("o"), params.get("q"))
class KemonopartyPostsExtractor(KemonopartyExtractor):
"""Extractor for kemono.su post listings"""
subcategory = "posts"
pattern = BASE_PATTERN + r"/posts(?:/?\?([^#]+))?"
pattern = BASE_PATTERN + r"/posts()()(?:/?\?([^#]+))?"
example = "https://kemono.su/posts"
def __init__(self, match):
KemonopartyExtractor.__init__(self, match)
self.query = match.group(3)
self.api_url = self.root + "/api/v1/posts"
posts = KemonopartyUserExtractor.posts
def posts(self):
params = text.parse_query(self.groups[4])
return self.api.posts(
params.get("o"), params.get("q"), params.get("tag"))
class KemonopartyPostExtractor(KemonopartyExtractor):
@@ -358,27 +309,23 @@ class KemonopartyPostExtractor(KemonopartyExtractor):
example = "https://kemono.su/SERVICE/user/12345/post/12345"
def __init__(self, match):
_, _, service, user_id, post_id, self.revision, self.revision_id = \
match.groups()
self.subcategory = service
self.subcategory = match.group(3)
KemonopartyExtractor.__init__(self, match)
self.api_url = "{}/api/v1/{}/user/{}/post/{}".format(
self.root, service, user_id, post_id)
self.user_url = "{}/{}/user/{}".format(self.root, service, user_id)
def posts(self):
if not self.revision:
post = self.request(self.api_url).json()
if self.revisions:
return self._revisions_post(post, self.api_url)
return (post,)
_, _, service, creator_id, post_id, revision, revision_id = self.groups
post = self.api.creator_post(service, creator_id, post_id)
if not revision:
return (post["post"],)
revs = self._revisions_all(self.api_url)
if not self.revision_id:
self.revisions = False
revs = self._revisions_all(service, creator_id, post_id)
if not revision_id:
return revs
for rev in revs:
if str(rev["revision_id"]) == self.revision_id:
if str(rev["revision_id"]) == revision_id:
return (rev,)
raise exception.NotFoundError("revision")
@@ -394,37 +341,35 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor):
pattern = BASE_PATTERN + r"/discord/server/(\d+)(?:/channel/(\d+))?#(.*)"
example = "https://kemono.su/discord/server/12345#CHANNEL"
def __init__(self, match):
KemonopartyExtractor.__init__(self, match)
_, _, self.server, self.channel_id, self.channel = match.groups()
self.channel_name = ""
def items(self):
self._prepare_ddosguard_cookies()
if self.channel_id:
self.channel_name = self.channel
_, _, server_id, channel_id, channel = self.groups
channel_name = ""
if channel_id:
channel_name = channel
else:
if self.channel.isdecimal() and len(self.channel) >= 16:
if channel.isdecimal() and len(channel) >= 16:
key = "id"
else:
key = "name"
for channel in self._discord_channels(self.server):
if channel[key] == self.channel:
for ch in self.api.discord_server(server_id):
if ch[key] == channel:
break
else:
raise exception.NotFoundError("channel")
self.channel_id = channel["id"]
self.channel_name = channel["name"]
channel_id = ch["id"]
channel_name = ch["name"]
find_inline = re.compile(
r"https?://(?:cdn\.discordapp.com|media\.discordapp\.net)"
r"(/[A-Za-z0-9-._~:/?#\[\]@!$&'()*+,;%=]+)").findall
find_hash = re.compile(HASH_PATTERN).match
posts = self.posts()
posts = self.api.discord_channel(channel_id)
max_posts = self.config("max-posts")
if max_posts:
posts = itertools.islice(posts, max_posts)
@@ -441,7 +386,7 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor):
append({"path": "https://cdn.discordapp.com" + path,
"name": path, "type": "inline", "hash": ""})
post["channel_name"] = self.channel_name
post["channel_name"] = channel_name
post["date"] = self._parse_datetime(post["published"])
post["count"] = len(files)
yield Message.Directory, post
@@ -461,33 +406,17 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor):
url = self.root + "/data" + url[20:]
yield Message.Url, url, post
def posts(self):
url = "{}/api/v1/discord/channel/{}".format(
self.root, self.channel_id)
params = {"o": 0}
while True:
posts = self.request(url, params=params).json()
yield from posts
if len(posts) < 150:
break
params["o"] += 150
class KemonopartyDiscordServerExtractor(KemonopartyExtractor):
subcategory = "discord-server"
pattern = BASE_PATTERN + r"/discord/server/(\d+)$"
example = "https://kemono.su/discord/server/12345"
def __init__(self, match):
KemonopartyExtractor.__init__(self, match)
self.server = match.group(3)
def items(self):
for channel in self._discord_channels(self.server):
server_id = self.groups[2]
for channel in self.api.discord_server(server_id):
url = "{}/discord/server/{}/channel/{}#{}".format(
self.root, self.server, channel["id"], channel["name"])
self.root, server_id, channel["id"], channel["name"])
channel["_extractor"] = KemonopartyDiscordExtractor
yield Message.Queue, url, channel
@@ -541,3 +470,100 @@ class KemonopartyFavoriteExtractor(KemonopartyExtractor):
url = "{}/{}/user/{}/post/{}".format(
self.root, post["service"], post["user"], post["id"])
yield Message.Queue, url, post
class KemonoAPI():
"""Interface for the Kemono API v1.1.0
https://kemono.su/documentation/api
"""
def __init__(self, extractor):
self.extractor = extractor
self.root = extractor.root + "/api/v1"
def posts(self, offset=0, query=None, tags=None):
endpoint = "/posts"
params = {"q": query, "o": offset, "tags": tags}
return self._pagination(endpoint, params, 50, "posts")
def creator_posts(self, service, creator_id, offset=0, query=None):
endpoint = "/{}/user/{}".format(service, creator_id)
params = {"q": query, "o": offset}
return self._pagination(endpoint, params, 50)
def creator_announcements(self, service, creator_id):
endpoint = "/{}/user/{}/announcements".format(service, creator_id)
return self._call(endpoint)
def creator_dms(self, service, creator_id):
endpoint = "/{}/user/{}/dms".format(service, creator_id)
return self._call(endpoint)
def creator_fancards(self, service, creator_id):
endpoint = "/{}/user/{}/fancards".format(service, creator_id)
return self._call(endpoint)
def creator_post(self, service, creator_id, post_id):
endpoint = "/{}/user/{}/post/{}".format(service, creator_id, post_id)
return self._call(endpoint)
def creator_post_comments(self, service, creator_id, post_id):
endpoint = "/{}/user/{}/post/{}/comments".format(
service, creator_id, post_id)
return self._call(endpoint)
def creator_post_revisions(self, service, creator_id, post_id):
endpoint = "/{}/user/{}/post/{}/revisions".format(
service, creator_id, post_id)
return self._call(endpoint)
def creator_profile(self, service, creator_id):
endpoint = "/{}/user/{}/profile".format(service, creator_id)
return self._call(endpoint)
def creator_links(self, service, creator_id):
endpoint = "/{}/user/{}/links".format(service, creator_id)
return self._call(endpoint)
def creator_tags(self, service, creator_id):
endpoint = "/{}/user/{}/tags".format(service, creator_id)
return self._call(endpoint)
def discord_channel(self, channel_id):
endpoint = "/discord/channel/{}".format(channel_id)
return self._pagination(endpoint, {}, 150)
def discord_server(self, server_id):
endpoint = "/discord/channel/lookup/{}".format(server_id)
return self._call(endpoint)
def account_favorites(self, type):
endpoint = "/account/favorites"
params = {"type": type}
return self._call(endpoint, params)
def authentication_login(self, username, password):
endpoint = "/authentication/login"
params = {"username": username, "password": password}
return self._call(endpoint, params)
def _call(self, endpoint, params=None):
url = self.root + endpoint
response = self.extractor.request(url, params=params)
return response.json()
def _pagination(self, endpoint, params, batch=50, key=False):
params["o"] = text.parse_int(params.get("o")) % 50
while True:
data = self._call(endpoint, params)
if key:
yield from data[key]
else:
yield from data
if len(data) < batch:
return
params["o"] += batch

View File

@@ -23,7 +23,7 @@ __tests__ = (
"#category": ("", "kemonoparty", "patreon"),
"#class" : kemonoparty.KemonopartyUserExtractor,
"#options" : {"max-posts": 100},
"#count" : range(200, 300),
"#count" : range(200, 400),
},
{
@@ -92,7 +92,7 @@ __tests__ = (
"#url" : "https://kemono.su/gumroad/user/3101696181060/post/tOWyf",
"#category": ("", "kemonoparty", "gumroad"),
"#class" : kemonoparty.KemonopartyPostExtractor,
"#urls" : "https://kemono.su/data/6f/13/6f1394b19516396ea520254350662c254bbea30c1e111fd4b0f042c61c426d07.zip",
"#count" : 12,
},
{
@@ -129,10 +129,19 @@ __tests__ = (
"#class" : kemonoparty.KemonopartyPostExtractor,
"#options" : {"dms": True},
"dms": [{
"body": r"re:Hi! Thank you very much for supporting the work I did in May. Here's your reward pack! I hope you find something you enjoy in it. :\)\n\nhttps://www.mediafire.com/file/\w+/Set13_tier_2.zip/file",
"date": "2021-06",
}],
"dms": [
{
"added" : "2021-07-31T02:47:51.327865",
"artist" : None,
"content" : "Hi! Thank you very much for supporting the work I did in May. Here's your reward pack! I hope you find something you enjoy in it. :)\n\nhttps://www.mediafire.com/file/n9ppjpip0r3f01v/Set13_tier_2.zip/file",
"embed" : {},
"file" : {},
"hash" : "f8d4962fb7908614c9b7c8c0de1b5f8985f01b62a9b06d74d640c5b2bcedf758",
"published": "2021-06-09T03:28:51.431000",
"service" : "patreon",
"user" : "34134344",
},
],
},
{
@@ -142,10 +151,16 @@ __tests__ = (
"#class" : kemonoparty.KemonopartyPostExtractor,
"#options" : {"announcements": True},
"announcements": [{
"body": "<div><strong>Thank you so much for the support!</strong><strong><br></strong>This Patreon is more of a tip jar for supporting what I make. I have to clarify that there are <strong>no exclusive Patreon animations</strong> because all are released for the public. You will get earlier access to WIPs. Direct downloads to my works are also available for $5 and $10 Tiers.</div>",
"date": "2023-02",
}],
"announcements": [
{
"added" : "2023-02-01T22:44:34.670719",
"content" : "<div style=\"text-align: center;\"><strong>Thank you so much for the support!</strong><strong><br></strong>This Patreon is more of a tip jar for supporting what I make. I have to clarify that there are <strong>no exclusive Patreon animations</strong>&nbsp;because all are released for the public. You will get earlier access to WIPs. Direct downloads to my works are also available for $5 and $10 Tiers.</div>",
"hash" : "815648d41c60d1d546437e475a0888fd4a77fd098b1ec61a3648ea6da30c1034",
"published": None,
"service" : "patreon",
"user_id" : "3161935",
},
],
},
{
@@ -207,7 +222,7 @@ __tests__ = (
"hash" : "88521f71822dfa2f42df3beba319ea4fceda2a2d6dc59da0276a75238f743f86",
"revision_id" : 142470,
"revision_index": 2,
"revision_count": 9,
"revision_count": 10,
"revision_hash" : "e0e93281495e151b11636c156e52bfe9234c2a40",
},
@@ -218,13 +233,15 @@ __tests__ = (
"#class" : kemonoparty.KemonopartyPostExtractor,
"#options" : {"revisions": "unique"},
"#urls" : "https://kemono.su/data/88/52/88521f71822dfa2f42df3beba319ea4fceda2a2d6dc59da0276a75238f743f86.jpg",
"#archive" : False,
"filename" : "wip update",
"hash" : "88521f71822dfa2f42df3beba319ea4fceda2a2d6dc59da0276a75238f743f86",
"revision_id" : 0,
"revision_index": 1,
"revision_count": 1,
"revision_hash" : "e0e93281495e151b11636c156e52bfe9234c2a40",
"revision_id" : {9277608, 0},
"revision_index": {1, 2},
"revision_count": 2,
"revision_hash" : {"e0e93281495e151b11636c156e52bfe9234c2a40",
"79d5967719583a6fa52b2fc143e6a80fcdf75fb8"},
},
{
@@ -233,12 +250,12 @@ __tests__ = (
"#category": ("", "kemonoparty", "patreon"),
"#class" : kemonoparty.KemonopartyPostExtractor,
"#pattern" : r"https://kemono\.su/data/88/52/88521f71822dfa2f42df3beba319ea4fceda2a2d6dc59da0276a75238f743f86\.jpg",
"#count" : 9,
"#count" : 10,
"#archive" : False,
"revision_id": range(134996, 3052965),
"revision_index": range(1, 9),
"revision_count": 9,
"revision_id": range(134996, 9277608),
"revision_index": range(1, 10),
"revision_count": 10,
"revision_hash": "e0e93281495e151b11636c156e52bfe9234c2a40",
},
@@ -341,15 +358,7 @@ __tests__ = (
"#category": ("", "kemonoparty", "discord-server"),
"#class" : kemonoparty.KemonopartyDiscordServerExtractor,
"#pattern" : kemonoparty.KemonopartyDiscordExtractor.pattern,
"#count" : 13,
},
{
"#url" : "https://kemono.su/discord/server/488668827274444803",
"#category": ("", "kemonoparty", "discord-server"),
"#class" : kemonoparty.KemonopartyDiscordServerExtractor,
"#pattern" : kemonoparty.KemonopartyDiscordExtractor.pattern,
"#count" : 13,
"#count" : 15,
},
{