[arena] add 'channel' extractor (#5847 #8509)

* feat: issue 5847 site support for are.na
* flake8 lint error fix
* class name error fix
* update
    - prevent unnecessary request to 'page_url'
    - fix pagination
    - simplify block extraction code
    TODO: - rewrite without GalleryExtractor
          - extractors for Blocks, Users, etc
* supportedsites
* tests
* rename to 'channel' extractor
* update site title to 'Are.na'
* prioritize attachments

---------

Co-authored-by: Mike Fährmann <mike_faehrmann@web.de>
This commit is contained in:
dev-KingMaster
2025-11-11 14:44:40 -05:00
committed by GitHub
parent d75a135e03
commit 793956d2d5
5 changed files with 259 additions and 0 deletions

View File

@@ -109,6 +109,12 @@ Consider all listed sites to potentially be NSFW.
<td>Search Results, Series, Subscriptions, Tag Searches, User Profiles, User Bookmarks, User Series, User Works, Works</td>
<td>Supported</td>
</tr>
<tr id="arena" title="arena">
<td>Are.na</td>
<td>https://are.na/</td>
<td>Channels</td>
<td></td>
</tr>
<tr id="artstation" title="artstation">
<td>ArtStation</td>
<td>https://www.artstation.com/</td>

View File

@@ -26,6 +26,7 @@ modules = [
"ao3",
"arcalive",
"architizer",
"arena",
"artstation",
"aryion",
"batoto",

View File

@@ -0,0 +1,85 @@
# -*- coding: utf-8 -*-
# Copyright 2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractor for https://are.na/"""
from .common import GalleryExtractor
class ArenaChannelExtractor(GalleryExtractor):
"""Extractor for are.na channels"""
category = "arena"
subcategory = "channel"
root = "https://are.na"
pattern = r"(?:https?://)?(?:www\.)?are\.na/[^/?#]+/([^/?#]+)"
example = "https://are.na/evan-collins-1522646491/cassette-futurism"
def metadata(self, page):
info = self.request_json(
f"https://api.are.na/v2/channels/{self.groups[0]}")
return {
"gallery_id" : info.get("slug") or str(info.get("id")),
"channel_id" : info.get("id"),
"channel_slug": info.get("slug"),
"title" : info.get("title") or "",
"count" : info.get("length") or 0,
"user" : info.get("user"),
"date" : self.parse_datetime_iso(info.get("created_at")),
"date_updated": self.parse_datetime_iso(info.get("updated_at")),
}
def images(self, page):
api = f"https://api.are.na/v2/channels/{self.groups[0]}/contents"
limit = 100
params = {"page": 1, "per": limit}
while True:
data = self.request_json(api, params=params)
contents = data.get("contents")
if not contents:
return
for block in contents:
url = None
meta = {
"id": block.get("id"),
"block_class": block.get("class"),
"block_title": block.get("title") or block.get(
"generated_title") or "",
}
# Attachments (e.g., PDFs, files)
if attachment := block.get("attachment"):
url = attachment.get("url")
# Images
elif image := block.get("image"):
# Prefer original image
if original := image.get("original"):
url = original.get("url")
# Fallback to display/large image if present
elif display := image.get("display"):
url = display.get("url")
elif large := image.get("large"):
url = large.get("url")
# Some Links/Channels may not have downloadable media
if not url:
continue
# Provide source link if it exists
if src := block.get("source"):
meta["source_url"] = src.get("url") or ""
yield url, meta
if len(contents) < limit:
return
params["page"] += 1

View File

@@ -30,6 +30,7 @@ CATEGORY_MAP = {
"ao3" : "Archive of Our Own",
"archivedmoe" : "Archived.Moe",
"archiveofsins" : "Archive of Sins",
"arena" : "Are.na",
"artstation" : "ArtStation",
"aryion" : "Eka's Portal",
"atfbooru" : "ATFBooru",

166
test/results/arena.py Normal file
View File

@@ -0,0 +1,166 @@
# -*- coding: utf-8 -*-
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
from gallery_dl.extractor import arena
__tests__ = (
{
"#url" : "https://are.na/evan-collins-1522646491/cassette-futurism",
"#class" : arena.ArenaChannelExtractor,
"#pattern" : r"https://d2w9rnfcy7mm78\.cloudfront\.net/\d+/original_\w+\.\w+\?\d+\?bc=\d",
"#count" : 160,
"block_class" : "Image",
"block_title" : str,
"channel_id" : 1102343,
"channel_slug": "cassette-futurism",
"count" : 160,
"date" : "dt:2021-05-31 20:38:28",
"date_updated": "dt:2025-10-24 15:25:40",
"gallery_id" : "cassette-futurism",
"id" : int,
"num" : range(1, 160),
"title" : "Cassette Futurism",
"user" : {
"avatar" : "https://static.avatars.are.na/51156/small_8c6098f64217eca6b4bcff44a7abf2d7.jpg?1563035757",
"badge" : "premium",
"base_class" : "User",
"can_index" : True,
"channel_count" : range(250, 300),
"class" : "User",
"created_at" : "2018-04-02T05:21:30.282Z",
"first_name" : "Evan",
"follower_count" : range(4900, 6000),
"following_count": range(10, 20),
"full_name" : "Evan Collins",
"id" : 51156,
"initials" : "EC",
"is_confirmed" : True,
"is_exceeding_connections_limit": False,
"is_lifetime_premium": False,
"is_pending_confirmation": False,
"is_pending_reconfirmation": False,
"is_premium" : True,
"is_supporter" : False,
"last_name" : "Collins",
"metadata" : {"description": None},
"profile_id" : 171860,
"slug" : "evan-collins-1522646491",
"username" : "Evan Collins",
"avatar_image" : {
"display": "https://static.avatars.are.na/51156/medium_8c6098f64217eca6b4bcff44a7abf2d7.jpg?1563035757",
"thumb" : "https://static.avatars.are.na/51156/small_8c6098f64217eca6b4bcff44a7abf2d7.jpg?1563035757",
},
},
},
{
"#url" : "https://are.na/lachie/transparent-tech-cobxde9pu40",
"#class" : arena.ArenaChannelExtractor,
"#pattern" : r"https://d2w9rnfcy7mm78\.cloudfront\.net/\d+/original_\w+(\.\w+)?\?\d+\?bc=\d",
"#count" : 89,
"block_class" : str,
"block_title" : str,
"channel_id" : 2599871,
"channel_slug": "transparent-tech-cobxde9pu40",
"count" : 91,
"date" : "dt:2024-01-14 02:37:22",
"date_updated": "dt:2025-10-20 20:52:09",
"gallery_id" : "transparent-tech-cobxde9pu40",
"id" : int,
"num" : int,
"?source_url" : str,
"title" : "🫙 Transparent Tech",
"user" : {
"avatar" : "https://static.avatars.are.na/55241/small_fdcab74d56d0d9b93930333bdea50f4a.jpg?1738898629",
"badge" : "premium",
"base_class" : "User",
"can_index" : True,
"channel_count" : 219,
"class" : "User",
"created_at" : "2018-05-03T07:13:39.847Z",
"first_name" : "lachie",
"follower_count" : range(80, 120),
"following_count": range(40, 80),
"full_name" : "lachie 🔐",
"id" : 55241,
"initials" : "l🔐",
"is_confirmed" : True,
"is_exceeding_connections_limit": False,
"is_lifetime_premium": False,
"is_pending_confirmation": False,
"is_pending_reconfirmation": False,
"is_premium" : True,
"is_supporter" : False,
"last_name" : "🔐",
"metadata" : {"description": None},
"profile_id" : 188402,
"slug" : "lachie",
"username" : "lachie 🔐",
"avatar_image" : {
"display": "https://static.avatars.are.na/55241/medium_fdcab74d56d0d9b93930333bdea50f4a.jpg?1738898629",
"thumb" : "https://static.avatars.are.na/55241/small_fdcab74d56d0d9b93930333bdea50f4a.jpg?1738898629",
},
},
},
{
"#url" : "https://www.are.na/mikf/touhou-zr5p8idnkag",
"#class" : arena.ArenaChannelExtractor,
"#results" : (
"https://d2w9rnfcy7mm78.cloudfront.net/40871580/original_3fb729c818f92de4539d4ff263eb6056.png?1762357121?bc=0",
"https://d2w9rnfcy7mm78.cloudfront.net/40871591/original_91d7c8144a5ba9776118f0af6d923f94.png?1762357155?bc=0",
"https://d2w9rnfcy7mm78.cloudfront.net/40871607/original_766f89eb3b06cc84372bea9d58132c93.png?1762357207?bc=0",
"https://attachments.are.na/40873309/ebf4eae61a70773f7494e10a98b18fe3.mp4?1762359389",
"https://d2w9rnfcy7mm78.cloudfront.net/40873379/original_289824f61eade100785db100652abd9a.jpg?1762359483?bc=0",
),
"block_class" : str,
"block_title" : str,
"channel_id" : 4422732,
"channel_slug": "touhou-zr5p8idnkag",
"count" : 6,
"date" : "dt:2025-11-05 15:37:40",
"date_updated": "dt:2025-11-10 19:52:52",
"gallery_id" : "touhou-zr5p8idnkag",
"id" : int,
"title" : '''Touhou "東方"''',
"user" : {
"avatar" : "",
"badge" : None,
"base_class" : "User",
"can_index" : False,
"channel_count" : 3,
"class" : "User",
"created_at" : "2025-11-05T15:35:15.242Z",
"first_name" : "mikf",
"follower_count" : 0,
"following_count": 0,
"full_name" : "mikf .",
"id" : 1127493,
"initials" : "m.",
"is_confirmed" : True,
"is_exceeding_connections_limit": False,
"is_lifetime_premium": False,
"is_pending_confirmation": False,
"is_pending_reconfirmation": False,
"is_premium" : False,
"is_supporter" : False,
"last_name" : ".",
"metadata" : {"description": None},
"profile_id" : 4422723,
"slug" : "mikf",
"username" : "mikf .",
"avatar_image" : {
"display": "",
"thumb" : "",
},
},
},
)