[furry34] add support (#1078 #7018)

This commit is contained in:
Mike Fährmann
2025-02-19 16:35:48 +01:00
parent 67937d33e3
commit 4396029d36
5 changed files with 318 additions and 0 deletions

View File

@@ -289,6 +289,12 @@ Consider all listed sites to potentially be NSFW.
<td>Favorites, Followed Users, Galleries, Posts, Scraps, Search Results, New Submissions, User Profiles</td>
<td><a href="https://github.com/mikf/gallery-dl#cookies">Cookies</a></td>
</tr>
<tr>
<td>Furry 34 com</td>
<td>https://furry34.com/</td>
<td>Playlists, Posts, Tag Searches</td>
<td></td>
</tr>
<tr>
<td>Fuskator</td>
<td>https://fuskator.com/</td>

View File

@@ -57,6 +57,7 @@ modules = [
"fapachi",
"flickr",
"furaffinity",
"furry34",
"fuskator",
"gelbooru",
"gelbooru_v01",

View File

@@ -0,0 +1,156 @@
# -*- coding: utf-8 -*-
# Copyright 2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://furry34.com/"""
from .booru import BooruExtractor
from .. import text
import collections
BASE_PATTERN = r"(?:https?://)?(?:www\.)?furry34\.com"
class Furry34Extractor(BooruExtractor):
category = "furry34"
root = "https://furry34.com"
root_cdn = "https://furry34com.b-cdn.net"
filename_fmt = "{category}_{id}.{extension}"
per_page = 30
TAG_TYPES = {
None: "general",
1 : "general",
2 : "copyright",
4 : "character",
8 : "artist",
}
FORMATS = (
("100", "mov.mp4"),
("101", "mov720.mp4"),
("102", "mov480.mp4"),
("10" , "pic.jpg"),
)
def _file_url(self, post):
files = post["files"]
for fmt, extension in self.FORMATS:
if fmt in files:
break
else:
fmt = next(iter(files))
post_id = post["id"]
root = self.root_cdn if files[fmt][0] else self.root
post["file_url"] = url = "{}/posts/{}/{}/{}.{}".format(
root, post_id // 1000, post_id, post_id, extension)
post["format_id"] = fmt
post["format"] = extension.partition(".")[0]
return url
def _prepare(self, post):
post.pop("files", None)
post["date"] = text.parse_datetime(
post["created"], "%Y-%m-%dT%H:%M:%S.%fZ")
post["filename"], _, post["format"] = post["filename"].rpartition(".")
if "tags" in post:
post["tags"] = [t["value"] for t in post["tags"]]
def _tags(self, post, _):
if "tags" not in post:
post.update(self._fetch_post(post["id"]))
tags = collections.defaultdict(list)
for tag in post["tags"]:
tags[tag["type"] or 1].append(tag["value"])
types = self.TAG_TYPES
for type, values in tags.items():
post["tags_" + types[type]] = values
def _fetch_post(self, post_id):
url = "{}/api/v2/post/{}".format(self.root, post_id)
return self.request(url).json()
def _pagination(self, endpoint, params=None):
url = "{}/api{}".format(self.root, endpoint)
if params is None:
params = {}
params["sortBy"] = 0
params["take"] = self.per_page
threshold = self.per_page
while True:
data = self.request(url, method="POST", json=params).json()
yield from data["items"]
if len(data["items"]) < threshold:
return
params["cursor"] = data.get("cursor")
class Furry34PostExtractor(Furry34Extractor):
subcategory = "post"
archive_fmt = "{id}"
pattern = BASE_PATTERN + r"/post/(\d+)"
example = "https://furry34.com/post/12345"
def posts(self):
return (self._fetch_post(self.groups[0]),)
class Furry34PlaylistExtractor(Furry34Extractor):
subcategory = "playlist"
directory_fmt = ("{category}", "{playlist_id}")
archive_fmt = "p_{playlist_id}_{id}"
pattern = BASE_PATTERN + r"/playlists/view/(\d+)"
example = "https://furry34.com/playlists/view/12345"
def metadata(self):
return {"playlist_id": self.groups[0]}
def posts(self):
endpoint = "/v2/post/search/playlist/" + self.groups[0]
return self._pagination(endpoint)
class Furry34TagExtractor(Furry34Extractor):
subcategory = "tag"
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "t_{search_tags}_{id}"
pattern = BASE_PATTERN + r"/(?:([^/?#]+))?(?:/?\?([^#]+))?(?:$|#)"
example = "https://furry34.com/TAG"
def _init(self):
tag, query = self.groups
params = text.parse_query(query)
self.tags = tags = []
if tag:
tags.extend(text.unquote(text.unquote(tag)).split("|"))
if "tags" in params:
tags.extend(params["tags"].split("|"))
type = params.get("type")
if type == "video":
self.type = 1
elif type == "image":
self.type = 0
else:
self.type = None
def metadata(self):
return {"search_tags": " ".join(self.tags)}
def posts(self):
endpoint = "/v2/post/search/root"
params = {"includeTags": [t.replace("_", " ") for t in self.tags]}
if self.type is not None:
params["type"] = self.type
return self._pagination(endpoint, params)

View File

@@ -56,6 +56,7 @@ CATEGORY_MAP = {
"fanbox" : "pixivFANBOX",
"fashionnova" : "Fashion Nova",
"furaffinity" : "Fur Affinity",
"furry34" : "Furry 34 com",
"hatenablog" : "HatenaBlog",
"hbrowse" : "HBrowse",
"hentai2read" : "Hentai2Read",

154
test/results/furry34.py Normal file
View File

@@ -0,0 +1,154 @@
# -*- coding: utf-8 -*-
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
from gallery_dl.extractor import furry34
__tests__ = (
{
"#url" : "https://furry34.com/post/541949",
"#comment": "image",
"#class" : furry34.Furry34PostExtractor,
"#options" : {"tags": True},
"#urls" : "https://furry34com.b-cdn.net/posts/541/541949/541949.pic.jpg",
"#sha1_content": "4880da04f7fb41b1760aad4c8297c9917aeeec53",
"created" : "2024-09-20T19:49:47.443232Z",
"date" : "dt:2024-09-20 19:49:47",
"extension" : "jpg",
"file_url" : "https://furry34com.b-cdn.net/posts/541/541949/541949.pic.jpg",
"filename" : "541949",
"format" : "pic",
"format_id" : "10",
"id" : 541949,
"likes" : 8,
"posted" : "2024-09-20T19:50:05.772166Z",
"status" : 2,
"type" : 0,
"uploaderId": 2,
"width" : 1300,
"height" : 1920,
"data": {
"sources": [
"https://x.com/EchoeDragon/status/1834316160252477741",
"https://pbs.twimg.com/media/GXTMHFkWYAA8wDj?format=jpg&name=orig",
],
},
"tags": [
"echodragon",
"scp-1471",
"scp-1471-a",
"scp-1471-a (da.nilkaz)",
"scp foundation",
"canid",
"canine",
"malo",
"mammal",
"anthro",
"big breasts",
"black hair",
"breasts",
"cleavage",
"clothed",
"clothing",
"female",
"hair",
"orange jumpsuit",
"prison uniform",
"solo",
"tail",
"thick thighs",
"white eyes",
"3d (artwork)",
"digital media (artwork)",
"hi res",
],
"tags_artist": [
"echodragon",
],
"tags_character": [
"scp-1471",
"scp-1471-a",
"scp-1471-a (da.nilkaz)",
],
"tags_copyright": [
"scp foundation",
],
"tags_general": [
"canid",
"canine",
"malo",
"mammal",
"anthro",
"big breasts",
"black hair",
"breasts",
"cleavage",
"clothed",
"clothing",
"female",
"hair",
"orange jumpsuit",
"prison uniform",
"solo",
"tail",
"thick thighs",
"white eyes",
"3d (artwork)",
"digital media (artwork)",
"hi res",
],
"uploader": {
"attributes" : [
80,
],
"avatarModifyDate": None,
"created" : "2021-07-04T15:01:03.110916Z",
"data" : None,
"displayName": "agent.e621-uploader",
"emailVerified": False,
"id" : 2,
"role" : 3,
"userName" : "agent.e621-uploader",
},
},
{
"#url" : "https://furry34.com/post/605309",
"#comment": "video",
"#class" : furry34.Furry34PostExtractor,
"#urls" : "https://furry34.com/posts/605/605309/605309.mov.mp4",
"#sha1_content": "914d00e2a6cfee73547bae266ec4b7aaee5aadf2",
"type": 1,
},
{
"#url" : "https://furry34.com/tree",
"#class": furry34.Furry34TagExtractor,
"#pattern": r"https://(furry34\.com|furry34com\.b-cdn\.net)/posts/\d+/\d+/\d+\.(pic\.jpg|mov\d*\.mp4)",
"#range" : "1-10",
"#count" : 10,
},
{
"#url" : "https://furry34.com/dariana_%2528quetzaly%2529%257Canimated?type=video",
"#class": furry34.Furry34TagExtractor,
"#pattern": r"https://(furry34\.com|furry34com\.b-cdn\.net)/posts/\d+/\d+/\d+\.(pic\.jpg|mov\d*\.mp4)",
"#count" : range(8, 20),
"type": 1,
},
{
"#url" : "https://furry34.com/playlists/view/8966",
"#class": furry34.Furry34PlaylistExtractor,
"#pattern": r"https://(furry34\.com|furry34com\.b-cdn\.net)/posts/\d+/\d+/\d+\.mov(720)?\.mp4",
"#count" : range(50, 75),
},
)