Python's 'ast' module and its 'NodeVisitor' class were incredibly helpful in identifying these
90 lines
3.0 KiB
Python
90 lines
3.0 KiB
Python
# -*- coding: utf-8 -*-
|
|
|
|
# Copyright 2025 Mike Fährmann
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License version 2 as
|
|
# published by the Free Software Foundation.
|
|
|
|
"""Extractor for https://are.na/"""
|
|
|
|
from .common import GalleryExtractor
|
|
|
|
|
|
class ArenaChannelExtractor(GalleryExtractor):
|
|
"""Extractor for are.na channels"""
|
|
category = "arena"
|
|
subcategory = "channel"
|
|
root = "https://are.na"
|
|
directory_fmt = ("{category}", "{user[full_name]} ({user[id]})",
|
|
"{channel[title]} ({channel[id]})")
|
|
filename_fmt = "{num:>03}{block[id]:? //}.{extension}"
|
|
archive_fmt = "{channel[id]}/{block[id]}"
|
|
pattern = r"(?:https?://)?(?:www\.)?are\.na/[^/?#]+/([^/?#]+)"
|
|
example = "https://are.na/evan-collins-1522646491/cassette-futurism"
|
|
|
|
def metadata(self, page):
|
|
url = "https://api.are.na/v2/channels/" + self.groups[0]
|
|
channel = self.request_json(url)
|
|
|
|
channel["date"] = self.parse_datetime_iso(
|
|
channel["created_at"])
|
|
channel["date_updated"] = self.parse_datetime_iso(
|
|
channel["updated_at"])
|
|
channel.pop("contents", None)
|
|
|
|
return {
|
|
"count" : channel.get("length"),
|
|
"user" : channel.pop("user", None),
|
|
"owner" : channel.pop("owner", None),
|
|
"channel": channel,
|
|
}
|
|
|
|
def images(self, page):
|
|
api = f"https://api.are.na/v2/channels/{self.groups[0]}/contents"
|
|
limit = 100
|
|
params = {"page": 1, "per": limit}
|
|
|
|
while True:
|
|
data = self.request_json(api, params=params)
|
|
|
|
contents = data.get("contents")
|
|
if not contents:
|
|
return
|
|
|
|
for block in contents:
|
|
url = None
|
|
|
|
# Attachments (e.g., PDFs, files)
|
|
if attachment := block.get("attachment"):
|
|
url = attachment.get("url")
|
|
|
|
# Images
|
|
elif image := block.get("image"):
|
|
# Prefer original image
|
|
if original := image.get("original"):
|
|
url = original.get("url")
|
|
# Fallback to display/large image if present
|
|
elif display := image.get("display"):
|
|
url = display.get("url")
|
|
elif large := image.get("large"):
|
|
url = large.get("url")
|
|
|
|
# Some Links/Channels may not have downloadable media
|
|
if not url:
|
|
continue
|
|
|
|
block["date"] = self.parse_datetime_iso(
|
|
block["created_at"])
|
|
block["date_updated"] = self.parse_datetime_iso(
|
|
block["updated_at"])
|
|
|
|
yield url, {
|
|
"block" : block,
|
|
"source": block.pop("source", None),
|
|
}
|
|
|
|
if len(contents) < limit:
|
|
return
|
|
params["page"] += 1
|