From 793956d2d5ba446e77de5adca94450a99ef5e7c4 Mon Sep 17 00:00:00 2001 From: dev-KingMaster <136489418+dev-KingMaster@users.noreply.github.com> Date: Tue, 11 Nov 2025 14:44:40 -0500 Subject: [PATCH] [arena] add 'channel' extractor (#5847 #8509) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: issue 5847 site support for are.na * flake8 lint error fix * class name error fix * update - prevent unnecessary request to 'page_url' - fix pagination - simplify block extraction code TODO: - rewrite without GalleryExtractor - extractors for Blocks, Users, etc * supportedsites * tests * rename to 'channel' extractor * update site title to 'Are.na' * prioritize attachments --------- Co-authored-by: Mike Fährmann --- docs/supportedsites.md | 6 ++ gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/arena.py | 85 ++++++++++++++++ scripts/supportedsites.py | 1 + test/results/arena.py | 166 +++++++++++++++++++++++++++++++ 5 files changed, 259 insertions(+) create mode 100644 gallery_dl/extractor/arena.py create mode 100644 test/results/arena.py diff --git a/docs/supportedsites.md b/docs/supportedsites.md index cb5049c5..f450b493 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -109,6 +109,12 @@ Consider all listed sites to potentially be NSFW. Search Results, Series, Subscriptions, Tag Searches, User Profiles, User Bookmarks, User Series, User Works, Works Supported + + Are.na + https://are.na/ + Channels + + ArtStation https://www.artstation.com/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 2f2c0f5e..c0d3a790 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -26,6 +26,7 @@ modules = [ "ao3", "arcalive", "architizer", + "arena", "artstation", "aryion", "batoto", diff --git a/gallery_dl/extractor/arena.py b/gallery_dl/extractor/arena.py new file mode 100644 index 00000000..b464f317 --- /dev/null +++ b/gallery_dl/extractor/arena.py @@ -0,0 +1,85 @@ +# -*- coding: utf-8 -*- + +# Copyright 2025 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractor for https://are.na/""" + +from .common import GalleryExtractor + + +class ArenaChannelExtractor(GalleryExtractor): + """Extractor for are.na channels""" + category = "arena" + subcategory = "channel" + root = "https://are.na" + pattern = r"(?:https?://)?(?:www\.)?are\.na/[^/?#]+/([^/?#]+)" + example = "https://are.na/evan-collins-1522646491/cassette-futurism" + + def metadata(self, page): + info = self.request_json( + f"https://api.are.na/v2/channels/{self.groups[0]}") + + return { + "gallery_id" : info.get("slug") or str(info.get("id")), + "channel_id" : info.get("id"), + "channel_slug": info.get("slug"), + "title" : info.get("title") or "", + "count" : info.get("length") or 0, + "user" : info.get("user"), + "date" : self.parse_datetime_iso(info.get("created_at")), + "date_updated": self.parse_datetime_iso(info.get("updated_at")), + } + + def images(self, page): + api = f"https://api.are.na/v2/channels/{self.groups[0]}/contents" + limit = 100 + params = {"page": 1, "per": limit} + + while True: + data = self.request_json(api, params=params) + + contents = data.get("contents") + if not contents: + return + + for block in contents: + url = None + meta = { + "id": block.get("id"), + "block_class": block.get("class"), + "block_title": block.get("title") or block.get( + "generated_title") or "", + } + + # Attachments (e.g., PDFs, files) + if attachment := block.get("attachment"): + url = attachment.get("url") + + # Images + elif image := block.get("image"): + # Prefer original image + if original := image.get("original"): + url = original.get("url") + # Fallback to display/large image if present + elif display := image.get("display"): + url = display.get("url") + elif large := image.get("large"): + url = large.get("url") + + # Some Links/Channels may not have downloadable media + if not url: + continue + + # Provide source link if it exists + if src := block.get("source"): + meta["source_url"] = src.get("url") or "" + + yield url, meta + + if len(contents) < limit: + return + params["page"] += 1 diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index 63a8c8c6..cf7ecf53 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -30,6 +30,7 @@ CATEGORY_MAP = { "ao3" : "Archive of Our Own", "archivedmoe" : "Archived.Moe", "archiveofsins" : "Archive of Sins", + "arena" : "Are.na", "artstation" : "ArtStation", "aryion" : "Eka's Portal", "atfbooru" : "ATFBooru", diff --git a/test/results/arena.py b/test/results/arena.py new file mode 100644 index 00000000..54066a47 --- /dev/null +++ b/test/results/arena.py @@ -0,0 +1,166 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +from gallery_dl.extractor import arena + + +__tests__ = ( +{ + "#url" : "https://are.na/evan-collins-1522646491/cassette-futurism", + "#class" : arena.ArenaChannelExtractor, + "#pattern" : r"https://d2w9rnfcy7mm78\.cloudfront\.net/\d+/original_\w+\.\w+\?\d+\?bc=\d", + "#count" : 160, + + "block_class" : "Image", + "block_title" : str, + "channel_id" : 1102343, + "channel_slug": "cassette-futurism", + "count" : 160, + "date" : "dt:2021-05-31 20:38:28", + "date_updated": "dt:2025-10-24 15:25:40", + "gallery_id" : "cassette-futurism", + "id" : int, + "num" : range(1, 160), + "title" : "Cassette Futurism", + "user" : { + "avatar" : "https://static.avatars.are.na/51156/small_8c6098f64217eca6b4bcff44a7abf2d7.jpg?1563035757", + "badge" : "premium", + "base_class" : "User", + "can_index" : True, + "channel_count" : range(250, 300), + "class" : "User", + "created_at" : "2018-04-02T05:21:30.282Z", + "first_name" : "Evan", + "follower_count" : range(4900, 6000), + "following_count": range(10, 20), + "full_name" : "Evan Collins", + "id" : 51156, + "initials" : "EC", + "is_confirmed" : True, + "is_exceeding_connections_limit": False, + "is_lifetime_premium": False, + "is_pending_confirmation": False, + "is_pending_reconfirmation": False, + "is_premium" : True, + "is_supporter" : False, + "last_name" : "Collins", + "metadata" : {"description": None}, + "profile_id" : 171860, + "slug" : "evan-collins-1522646491", + "username" : "Evan Collins", + "avatar_image" : { + "display": "https://static.avatars.are.na/51156/medium_8c6098f64217eca6b4bcff44a7abf2d7.jpg?1563035757", + "thumb" : "https://static.avatars.are.na/51156/small_8c6098f64217eca6b4bcff44a7abf2d7.jpg?1563035757", + }, + }, +}, + +{ + "#url" : "https://are.na/lachie/transparent-tech-cobxde9pu40", + "#class" : arena.ArenaChannelExtractor, + "#pattern" : r"https://d2w9rnfcy7mm78\.cloudfront\.net/\d+/original_\w+(\.\w+)?\?\d+\?bc=\d", + "#count" : 89, + + "block_class" : str, + "block_title" : str, + "channel_id" : 2599871, + "channel_slug": "transparent-tech-cobxde9pu40", + "count" : 91, + "date" : "dt:2024-01-14 02:37:22", + "date_updated": "dt:2025-10-20 20:52:09", + "gallery_id" : "transparent-tech-cobxde9pu40", + "id" : int, + "num" : int, + "?source_url" : str, + "title" : "🫙 Transparent Tech", + "user" : { + "avatar" : "https://static.avatars.are.na/55241/small_fdcab74d56d0d9b93930333bdea50f4a.jpg?1738898629", + "badge" : "premium", + "base_class" : "User", + "can_index" : True, + "channel_count" : 219, + "class" : "User", + "created_at" : "2018-05-03T07:13:39.847Z", + "first_name" : "lachie", + "follower_count" : range(80, 120), + "following_count": range(40, 80), + "full_name" : "lachie 🔐", + "id" : 55241, + "initials" : "l🔐", + "is_confirmed" : True, + "is_exceeding_connections_limit": False, + "is_lifetime_premium": False, + "is_pending_confirmation": False, + "is_pending_reconfirmation": False, + "is_premium" : True, + "is_supporter" : False, + "last_name" : "🔐", + "metadata" : {"description": None}, + "profile_id" : 188402, + "slug" : "lachie", + "username" : "lachie 🔐", + "avatar_image" : { + "display": "https://static.avatars.are.na/55241/medium_fdcab74d56d0d9b93930333bdea50f4a.jpg?1738898629", + "thumb" : "https://static.avatars.are.na/55241/small_fdcab74d56d0d9b93930333bdea50f4a.jpg?1738898629", + }, + }, +}, + +{ + "#url" : "https://www.are.na/mikf/touhou-zr5p8idnkag", + "#class" : arena.ArenaChannelExtractor, + "#results" : ( + "https://d2w9rnfcy7mm78.cloudfront.net/40871580/original_3fb729c818f92de4539d4ff263eb6056.png?1762357121?bc=0", + "https://d2w9rnfcy7mm78.cloudfront.net/40871591/original_91d7c8144a5ba9776118f0af6d923f94.png?1762357155?bc=0", + "https://d2w9rnfcy7mm78.cloudfront.net/40871607/original_766f89eb3b06cc84372bea9d58132c93.png?1762357207?bc=0", + "https://attachments.are.na/40873309/ebf4eae61a70773f7494e10a98b18fe3.mp4?1762359389", + "https://d2w9rnfcy7mm78.cloudfront.net/40873379/original_289824f61eade100785db100652abd9a.jpg?1762359483?bc=0", + ), + + "block_class" : str, + "block_title" : str, + "channel_id" : 4422732, + "channel_slug": "touhou-zr5p8idnkag", + "count" : 6, + "date" : "dt:2025-11-05 15:37:40", + "date_updated": "dt:2025-11-10 19:52:52", + "gallery_id" : "touhou-zr5p8idnkag", + "id" : int, + "title" : '''Touhou "東方"''', + "user" : { + "avatar" : "", + "badge" : None, + "base_class" : "User", + "can_index" : False, + "channel_count" : 3, + "class" : "User", + "created_at" : "2025-11-05T15:35:15.242Z", + "first_name" : "mikf", + "follower_count" : 0, + "following_count": 0, + "full_name" : "mikf .", + "id" : 1127493, + "initials" : "m.", + "is_confirmed" : True, + "is_exceeding_connections_limit": False, + "is_lifetime_premium": False, + "is_pending_confirmation": False, + "is_pending_reconfirmation": False, + "is_premium" : False, + "is_supporter" : False, + "last_name" : ".", + "metadata" : {"description": None}, + "profile_id" : 4422723, + "slug" : "mikf", + "username" : "mikf .", + "avatar_image" : { + "display": "", + "thumb" : "", + }, + }, +}, + +)