* feat: issue 5847 site support for are.na
* flake8 lint error fix
* class name error fix
* update
- prevent unnecessary request to 'page_url'
- fix pagination
- simplify block extraction code
TODO: - rewrite without GalleryExtractor
- extractors for Blocks, Users, etc
* supportedsites
* tests
* rename to 'channel' extractor
* update site title to 'Are.na'
* prioritize attachments
---------
Co-authored-by: Mike Fährmann <mike_faehrmann@web.de>
This commit is contained in:
@@ -109,6 +109,12 @@ Consider all listed sites to potentially be NSFW.
|
|||||||
<td>Search Results, Series, Subscriptions, Tag Searches, User Profiles, User Bookmarks, User Series, User Works, Works</td>
|
<td>Search Results, Series, Subscriptions, Tag Searches, User Profiles, User Bookmarks, User Series, User Works, Works</td>
|
||||||
<td>Supported</td>
|
<td>Supported</td>
|
||||||
</tr>
|
</tr>
|
||||||
|
<tr id="arena" title="arena">
|
||||||
|
<td>Are.na</td>
|
||||||
|
<td>https://are.na/</td>
|
||||||
|
<td>Channels</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
<tr id="artstation" title="artstation">
|
<tr id="artstation" title="artstation">
|
||||||
<td>ArtStation</td>
|
<td>ArtStation</td>
|
||||||
<td>https://www.artstation.com/</td>
|
<td>https://www.artstation.com/</td>
|
||||||
|
|||||||
@@ -26,6 +26,7 @@ modules = [
|
|||||||
"ao3",
|
"ao3",
|
||||||
"arcalive",
|
"arcalive",
|
||||||
"architizer",
|
"architizer",
|
||||||
|
"arena",
|
||||||
"artstation",
|
"artstation",
|
||||||
"aryion",
|
"aryion",
|
||||||
"batoto",
|
"batoto",
|
||||||
|
|||||||
85
gallery_dl/extractor/arena.py
Normal file
85
gallery_dl/extractor/arena.py
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# Copyright 2025 Mike Fährmann
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
|
# published by the Free Software Foundation.
|
||||||
|
|
||||||
|
"""Extractor for https://are.na/"""
|
||||||
|
|
||||||
|
from .common import GalleryExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class ArenaChannelExtractor(GalleryExtractor):
|
||||||
|
"""Extractor for are.na channels"""
|
||||||
|
category = "arena"
|
||||||
|
subcategory = "channel"
|
||||||
|
root = "https://are.na"
|
||||||
|
pattern = r"(?:https?://)?(?:www\.)?are\.na/[^/?#]+/([^/?#]+)"
|
||||||
|
example = "https://are.na/evan-collins-1522646491/cassette-futurism"
|
||||||
|
|
||||||
|
def metadata(self, page):
|
||||||
|
info = self.request_json(
|
||||||
|
f"https://api.are.na/v2/channels/{self.groups[0]}")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"gallery_id" : info.get("slug") or str(info.get("id")),
|
||||||
|
"channel_id" : info.get("id"),
|
||||||
|
"channel_slug": info.get("slug"),
|
||||||
|
"title" : info.get("title") or "",
|
||||||
|
"count" : info.get("length") or 0,
|
||||||
|
"user" : info.get("user"),
|
||||||
|
"date" : self.parse_datetime_iso(info.get("created_at")),
|
||||||
|
"date_updated": self.parse_datetime_iso(info.get("updated_at")),
|
||||||
|
}
|
||||||
|
|
||||||
|
def images(self, page):
|
||||||
|
api = f"https://api.are.na/v2/channels/{self.groups[0]}/contents"
|
||||||
|
limit = 100
|
||||||
|
params = {"page": 1, "per": limit}
|
||||||
|
|
||||||
|
while True:
|
||||||
|
data = self.request_json(api, params=params)
|
||||||
|
|
||||||
|
contents = data.get("contents")
|
||||||
|
if not contents:
|
||||||
|
return
|
||||||
|
|
||||||
|
for block in contents:
|
||||||
|
url = None
|
||||||
|
meta = {
|
||||||
|
"id": block.get("id"),
|
||||||
|
"block_class": block.get("class"),
|
||||||
|
"block_title": block.get("title") or block.get(
|
||||||
|
"generated_title") or "",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Attachments (e.g., PDFs, files)
|
||||||
|
if attachment := block.get("attachment"):
|
||||||
|
url = attachment.get("url")
|
||||||
|
|
||||||
|
# Images
|
||||||
|
elif image := block.get("image"):
|
||||||
|
# Prefer original image
|
||||||
|
if original := image.get("original"):
|
||||||
|
url = original.get("url")
|
||||||
|
# Fallback to display/large image if present
|
||||||
|
elif display := image.get("display"):
|
||||||
|
url = display.get("url")
|
||||||
|
elif large := image.get("large"):
|
||||||
|
url = large.get("url")
|
||||||
|
|
||||||
|
# Some Links/Channels may not have downloadable media
|
||||||
|
if not url:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Provide source link if it exists
|
||||||
|
if src := block.get("source"):
|
||||||
|
meta["source_url"] = src.get("url") or ""
|
||||||
|
|
||||||
|
yield url, meta
|
||||||
|
|
||||||
|
if len(contents) < limit:
|
||||||
|
return
|
||||||
|
params["page"] += 1
|
||||||
@@ -30,6 +30,7 @@ CATEGORY_MAP = {
|
|||||||
"ao3" : "Archive of Our Own",
|
"ao3" : "Archive of Our Own",
|
||||||
"archivedmoe" : "Archived.Moe",
|
"archivedmoe" : "Archived.Moe",
|
||||||
"archiveofsins" : "Archive of Sins",
|
"archiveofsins" : "Archive of Sins",
|
||||||
|
"arena" : "Are.na",
|
||||||
"artstation" : "ArtStation",
|
"artstation" : "ArtStation",
|
||||||
"aryion" : "Eka's Portal",
|
"aryion" : "Eka's Portal",
|
||||||
"atfbooru" : "ATFBooru",
|
"atfbooru" : "ATFBooru",
|
||||||
|
|||||||
166
test/results/arena.py
Normal file
166
test/results/arena.py
Normal file
@@ -0,0 +1,166 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
|
# published by the Free Software Foundation.
|
||||||
|
|
||||||
|
from gallery_dl.extractor import arena
|
||||||
|
|
||||||
|
|
||||||
|
__tests__ = (
|
||||||
|
{
|
||||||
|
"#url" : "https://are.na/evan-collins-1522646491/cassette-futurism",
|
||||||
|
"#class" : arena.ArenaChannelExtractor,
|
||||||
|
"#pattern" : r"https://d2w9rnfcy7mm78\.cloudfront\.net/\d+/original_\w+\.\w+\?\d+\?bc=\d",
|
||||||
|
"#count" : 160,
|
||||||
|
|
||||||
|
"block_class" : "Image",
|
||||||
|
"block_title" : str,
|
||||||
|
"channel_id" : 1102343,
|
||||||
|
"channel_slug": "cassette-futurism",
|
||||||
|
"count" : 160,
|
||||||
|
"date" : "dt:2021-05-31 20:38:28",
|
||||||
|
"date_updated": "dt:2025-10-24 15:25:40",
|
||||||
|
"gallery_id" : "cassette-futurism",
|
||||||
|
"id" : int,
|
||||||
|
"num" : range(1, 160),
|
||||||
|
"title" : "Cassette Futurism",
|
||||||
|
"user" : {
|
||||||
|
"avatar" : "https://static.avatars.are.na/51156/small_8c6098f64217eca6b4bcff44a7abf2d7.jpg?1563035757",
|
||||||
|
"badge" : "premium",
|
||||||
|
"base_class" : "User",
|
||||||
|
"can_index" : True,
|
||||||
|
"channel_count" : range(250, 300),
|
||||||
|
"class" : "User",
|
||||||
|
"created_at" : "2018-04-02T05:21:30.282Z",
|
||||||
|
"first_name" : "Evan",
|
||||||
|
"follower_count" : range(4900, 6000),
|
||||||
|
"following_count": range(10, 20),
|
||||||
|
"full_name" : "Evan Collins",
|
||||||
|
"id" : 51156,
|
||||||
|
"initials" : "EC",
|
||||||
|
"is_confirmed" : True,
|
||||||
|
"is_exceeding_connections_limit": False,
|
||||||
|
"is_lifetime_premium": False,
|
||||||
|
"is_pending_confirmation": False,
|
||||||
|
"is_pending_reconfirmation": False,
|
||||||
|
"is_premium" : True,
|
||||||
|
"is_supporter" : False,
|
||||||
|
"last_name" : "Collins",
|
||||||
|
"metadata" : {"description": None},
|
||||||
|
"profile_id" : 171860,
|
||||||
|
"slug" : "evan-collins-1522646491",
|
||||||
|
"username" : "Evan Collins",
|
||||||
|
"avatar_image" : {
|
||||||
|
"display": "https://static.avatars.are.na/51156/medium_8c6098f64217eca6b4bcff44a7abf2d7.jpg?1563035757",
|
||||||
|
"thumb" : "https://static.avatars.are.na/51156/small_8c6098f64217eca6b4bcff44a7abf2d7.jpg?1563035757",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://are.na/lachie/transparent-tech-cobxde9pu40",
|
||||||
|
"#class" : arena.ArenaChannelExtractor,
|
||||||
|
"#pattern" : r"https://d2w9rnfcy7mm78\.cloudfront\.net/\d+/original_\w+(\.\w+)?\?\d+\?bc=\d",
|
||||||
|
"#count" : 89,
|
||||||
|
|
||||||
|
"block_class" : str,
|
||||||
|
"block_title" : str,
|
||||||
|
"channel_id" : 2599871,
|
||||||
|
"channel_slug": "transparent-tech-cobxde9pu40",
|
||||||
|
"count" : 91,
|
||||||
|
"date" : "dt:2024-01-14 02:37:22",
|
||||||
|
"date_updated": "dt:2025-10-20 20:52:09",
|
||||||
|
"gallery_id" : "transparent-tech-cobxde9pu40",
|
||||||
|
"id" : int,
|
||||||
|
"num" : int,
|
||||||
|
"?source_url" : str,
|
||||||
|
"title" : "🫙 Transparent Tech",
|
||||||
|
"user" : {
|
||||||
|
"avatar" : "https://static.avatars.are.na/55241/small_fdcab74d56d0d9b93930333bdea50f4a.jpg?1738898629",
|
||||||
|
"badge" : "premium",
|
||||||
|
"base_class" : "User",
|
||||||
|
"can_index" : True,
|
||||||
|
"channel_count" : 219,
|
||||||
|
"class" : "User",
|
||||||
|
"created_at" : "2018-05-03T07:13:39.847Z",
|
||||||
|
"first_name" : "lachie",
|
||||||
|
"follower_count" : range(80, 120),
|
||||||
|
"following_count": range(40, 80),
|
||||||
|
"full_name" : "lachie 🔐",
|
||||||
|
"id" : 55241,
|
||||||
|
"initials" : "l🔐",
|
||||||
|
"is_confirmed" : True,
|
||||||
|
"is_exceeding_connections_limit": False,
|
||||||
|
"is_lifetime_premium": False,
|
||||||
|
"is_pending_confirmation": False,
|
||||||
|
"is_pending_reconfirmation": False,
|
||||||
|
"is_premium" : True,
|
||||||
|
"is_supporter" : False,
|
||||||
|
"last_name" : "🔐",
|
||||||
|
"metadata" : {"description": None},
|
||||||
|
"profile_id" : 188402,
|
||||||
|
"slug" : "lachie",
|
||||||
|
"username" : "lachie 🔐",
|
||||||
|
"avatar_image" : {
|
||||||
|
"display": "https://static.avatars.are.na/55241/medium_fdcab74d56d0d9b93930333bdea50f4a.jpg?1738898629",
|
||||||
|
"thumb" : "https://static.avatars.are.na/55241/small_fdcab74d56d0d9b93930333bdea50f4a.jpg?1738898629",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://www.are.na/mikf/touhou-zr5p8idnkag",
|
||||||
|
"#class" : arena.ArenaChannelExtractor,
|
||||||
|
"#results" : (
|
||||||
|
"https://d2w9rnfcy7mm78.cloudfront.net/40871580/original_3fb729c818f92de4539d4ff263eb6056.png?1762357121?bc=0",
|
||||||
|
"https://d2w9rnfcy7mm78.cloudfront.net/40871591/original_91d7c8144a5ba9776118f0af6d923f94.png?1762357155?bc=0",
|
||||||
|
"https://d2w9rnfcy7mm78.cloudfront.net/40871607/original_766f89eb3b06cc84372bea9d58132c93.png?1762357207?bc=0",
|
||||||
|
"https://attachments.are.na/40873309/ebf4eae61a70773f7494e10a98b18fe3.mp4?1762359389",
|
||||||
|
"https://d2w9rnfcy7mm78.cloudfront.net/40873379/original_289824f61eade100785db100652abd9a.jpg?1762359483?bc=0",
|
||||||
|
),
|
||||||
|
|
||||||
|
"block_class" : str,
|
||||||
|
"block_title" : str,
|
||||||
|
"channel_id" : 4422732,
|
||||||
|
"channel_slug": "touhou-zr5p8idnkag",
|
||||||
|
"count" : 6,
|
||||||
|
"date" : "dt:2025-11-05 15:37:40",
|
||||||
|
"date_updated": "dt:2025-11-10 19:52:52",
|
||||||
|
"gallery_id" : "touhou-zr5p8idnkag",
|
||||||
|
"id" : int,
|
||||||
|
"title" : '''Touhou "東方"''',
|
||||||
|
"user" : {
|
||||||
|
"avatar" : "",
|
||||||
|
"badge" : None,
|
||||||
|
"base_class" : "User",
|
||||||
|
"can_index" : False,
|
||||||
|
"channel_count" : 3,
|
||||||
|
"class" : "User",
|
||||||
|
"created_at" : "2025-11-05T15:35:15.242Z",
|
||||||
|
"first_name" : "mikf",
|
||||||
|
"follower_count" : 0,
|
||||||
|
"following_count": 0,
|
||||||
|
"full_name" : "mikf .",
|
||||||
|
"id" : 1127493,
|
||||||
|
"initials" : "m.",
|
||||||
|
"is_confirmed" : True,
|
||||||
|
"is_exceeding_connections_limit": False,
|
||||||
|
"is_lifetime_premium": False,
|
||||||
|
"is_pending_confirmation": False,
|
||||||
|
"is_pending_reconfirmation": False,
|
||||||
|
"is_premium" : False,
|
||||||
|
"is_supporter" : False,
|
||||||
|
"last_name" : ".",
|
||||||
|
"metadata" : {"description": None},
|
||||||
|
"profile_id" : 4422723,
|
||||||
|
"slug" : "mikf",
|
||||||
|
"username" : "mikf .",
|
||||||
|
"avatar_image" : {
|
||||||
|
"display": "",
|
||||||
|
"thumb" : "",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
|
||||||
|
)
|
||||||
Reference in New Issue
Block a user