diff --git a/docs/configuration.rst b/docs/configuration.rst
index 4c1cf631..1bf64747 100644
--- a/docs/configuration.rst
+++ b/docs/configuration.rst
@@ -384,6 +384,7 @@ Type
Default
* ``"0.5-1.5"``
``ao3``,
+ ``arcalive``,
``civitai``,
``[Danbooru]``,
``[E621]``,
@@ -1394,6 +1395,16 @@ Description
Format(s) to download.
+extractor.arcalive.emoticons
+----------------------------
+Type
+ ``bool``
+Default
+ ``false``
+Description
+ Download emoticon images.
+
+
extractor.artstation.external
-----------------------------
Type
diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
index 44833760..aa92ba9c 100644
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -99,6 +99,12 @@
"formats": ["pdf"]
},
+ "arcalive":
+ {
+ "sleep-request": "0.5-1.5",
+
+ "emoticons": false
+ },
"artstation":
{
"external" : false,
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index 4046e49c..df2ee955 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -97,6 +97,12 @@ Consider all listed sites to potentially be NSFW.
Posts, Tag Searches |
|
+
+ | Arcalive |
+ https://arca.live/ |
+ Boards, Posts |
+ |
+
| Architizer |
https://architizer.com/ |
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 8208241e..8198619e 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -24,6 +24,7 @@ modules = [
"adultempire",
"agnph",
"ao3",
+ "arcalive",
"architizer",
"artstation",
"aryion",
diff --git a/gallery_dl/extractor/arcalive.py b/gallery_dl/extractor/arcalive.py
new file mode 100644
index 00000000..db99313f
--- /dev/null
+++ b/gallery_dl/extractor/arcalive.py
@@ -0,0 +1,157 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://arca.live/"""
+
+from .common import Extractor, Message
+from .. import text, util, exception
+import re
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?arca\.live"
+
+
+class ArcaliveExtractor(Extractor):
+ """Base class for Arca.live extractors"""
+ category = "arcalive"
+ root = "https://arca.live"
+ request_interval = (0.5, 1.5)
+
+ def _init(self):
+ self.api = ArcaliveAPI(self)
+
+
+class ArcalivePostExtractor(ArcaliveExtractor):
+ """Extractor for an arca.live post"""
+ subcategory = "post"
+ directory_fmt = ("{category}", "{boardSlug}")
+ filename_fmt = "{id}_{num}{title:? //[b:230]}.{extension}"
+ archive_fmt = "{id}_{num}"
+ pattern = BASE_PATTERN + r"/b/(?:\w+)/(\d+)"
+ example = "https://arca.live/b/breaking/123456789"
+
+ def items(self):
+ self.emoticons = self.config("emoticons", False)
+
+ post = self.api.post(self.groups[0])
+ files = self._extract_files(post)
+
+ post["count"] = len(files)
+ post["date"] = text.parse_datetime(
+ post["createdAt"][:19], "%Y-%m-%dT%H:%M:%S")
+ post["post_url"] = post_url = "{}/b/{}/{}".format(
+ self.root, post["boardSlug"], post["id"])
+ post["_http_headers"] = {"Referer": post_url + "?p=1"}
+
+ yield Message.Directory, post
+ for post["num"], file in enumerate(files, 1):
+ post.update(file)
+ url = file["url"]
+ yield Message.Url, url, text.nameext_from_url(url, post)
+
+ def _extract_files(self, post):
+ files = []
+
+ for media in self._extract_media(post["content"]):
+
+ if not self.emoticons and 'class="arca-emoticon"' in media:
+ continue
+
+ src = (text.extr(media, 'data-originalurl="', '"') or
+ text.extr(media, 'src="', '"'))
+ if not src:
+ continue
+
+ src = text.unescape(src.partition("?")[0])
+ if src[0] == "/":
+ if src[1] == "/":
+ url = "https:" + src
+ else:
+ url = self.root + src
+ else:
+ url = src
+
+ fallback = ()
+ orig = text.extr(media, 'data-orig="', '"')
+ if orig:
+ path, _, ext = url.rpartition(".")
+ if ext != orig:
+ fallback = (url + "?type=orig",)
+ url = path + "." + orig
+
+ files.append({
+ "url" : url + "?type=orig",
+ "width" : text.parse_int(text.extr(media, 'width="', '"')),
+ "height": text.parse_int(text.extr(media, 'height="', '"')),
+ "_fallback": fallback,
+ })
+
+ return files
+
+ def _extract_media(self, content):
+ ArcalivePostExtractor._extract_media = extr = re.compile(
+ r"<(?:img|video) ([^>]+)").findall
+ return extr(content)
+
+
+class ArcaliveBoardExtractor(ArcaliveExtractor):
+ """Extractor for an arca.live board's posts"""
+ subcategory = "board"
+ pattern = BASE_PATTERN + r"/b/(\w+)(?:/?\?([^#]+))?$"
+ example = "https://arca.live/b/breaking"
+
+ def items(self):
+ board, query = self.groups
+ params = text.parse_query(query)
+ articles = self.api.board(board, params)
+
+ for article in articles:
+ article["_extractor"] = ArcalivePostExtractor
+ url = "{}/b/{}/{}".format(self.root, board, article["id"])
+ yield Message.Queue, url, article
+
+
+class ArcaliveAPI():
+
+ def __init__(self, extractor):
+ self.extractor = extractor
+ self.log = extractor.log
+ self.root = extractor.root + "/api/app"
+
+ headers = extractor.session.headers
+ headers["User-Agent"] = "net.umanle.arca.android.playstore/0.9.75"
+ headers["X-Device-Token"] = util.generate_token(64)
+
+ def board(self, board_slug, params):
+ endpoint = "/list/channel/" + board_slug
+ return self._pagination(endpoint, params, "articles")
+
+ def post(self, post_id):
+ endpoint = "/view/article/breaking/" + str(post_id)
+ return self._call(endpoint)
+
+ def _call(self, endpoint, params=None):
+ url = self.root + endpoint
+ response = self.extractor.request(url, params=params)
+
+ data = response.json()
+ if response.status_code == 200:
+ return data
+
+ self.log.debug("Server response: %s", data)
+ msg = data.get("message")
+ raise exception.StopExtraction(
+ "API request failed%s", ": " + msg if msg else "")
+
+ def _pagination(self, endpoint, params, key):
+ while True:
+ data = self._call(endpoint, params)
+
+ posts = data.get(key)
+ if not posts:
+ break
+ yield from posts
+
+ params.update(data["next"])
diff --git a/test/results/arcalive.py b/test/results/arcalive.py
new file mode 100644
index 00000000..8dcdc7bb
--- /dev/null
+++ b/test/results/arcalive.py
@@ -0,0 +1,130 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+from gallery_dl.extractor import arcalive
+
+
+__tests__ = (
+{
+ "#url" : "https://arca.live/b/arknights/66031722?p=1",
+ "#class" : arcalive.ArcalivePostExtractor,
+ "#urls" : "https://ac.namu.la/20221225sac2/e06dcf8edd29c597240898a6752c74dbdd0680fc932cfd0ecc898795f1db34b5.jpg?type=orig",
+
+ "isEditable": False,
+ "isDeletable": False,
+ "isReportable": False,
+ "id": 66031722,
+ "nickname": "Si리링",
+ "title": "엑샤 스작함",
+ "contentType": "html",
+ "content": r"re:^알게또 뽑으려했는데 못뽑아서 엑샤 스작함
엑샤에 보카디 3스나 와파린 2스 붙이는거 맞음.+/>
$",
+ "viewCount": range(8000, 20000),
+ "ratingUp": 0,
+ "ratingDown": 0,
+ "ratingUpIp": 0,
+ "ratingDownIp": 0,
+ "createdAt": "2022-12-25T05:16:55.000Z",
+ "updatedAt": "2022-12-25T05:16:55.000Z",
+ "lastComment": "2022-12-25T05:22:12.000Z",
+ "commentCount": range(2, 9),
+ "publicId": None,
+ "token": "44bb2dfd0bbc672e",
+ "isUser": True,
+ "gravatar": "//secure.gravatar.com/avatar/6c3fdbdeea149b29eea8d887c37fc119?d=retro&f=y",
+ "preventDelete": False,
+ "channelPermission": dict,
+ "captcha": True,
+ "isSensitive": False,
+ "categoryDisplayName": None,
+ "blockPreview": False,
+ "isSpoilerAlert": False,
+ "boardName": "명일방주 채널",
+ "boardSlug": "arknights",
+ "isBest": False,
+ "vote": [],
+ "date": "dt:2022-12-25 05:16:55",
+ "post_url": "https://arca.live/b/arknights/66031722",
+ "count": 1,
+ "num": 1,
+ "url": "https://ac.namu.la/20221225sac2/e06dcf8edd29c597240898a6752c74dbdd0680fc932cfd0ecc898795f1db34b5.jpg?type=orig",
+ "width": 3200,
+ "height": 1440,
+ "filename": "e06dcf8edd29c597240898a6752c74dbdd0680fc932cfd0ecc898795f1db34b5",
+ "extension": "jpg",
+},
+
+{
+ "#url" : "https://arca.live/b/breaking/66031722",
+ "#comment": "/b/breaking page URL",
+ "#class" : arcalive.ArcalivePostExtractor,
+ "#urls" : "https://ac.namu.la/20221225sac2/e06dcf8edd29c597240898a6752c74dbdd0680fc932cfd0ecc898795f1db34b5.jpg?type=orig",
+},
+
+{
+ "#url" : "https://arca.live/b/bluearchive/65031202",
+ "#comment": "animated gif",
+ "#class" : arcalive.ArcalivePostExtractor,
+ "#urls" : (
+ "https://ac.namu.la/20221211sac/5ea7fbca5e49ec16beb099fc6fc991690d37552e599b1de8462533908346241e.png?type=orig",
+ "https://ac.namu.la/20221211sac/7f73beefc4f18a2f986bc4c6821caba706e27f4c94cb828fc16e2af1253402d9.gif?type=orig",
+ "https://ac.namu.la/20221211sac2/3e72f9e05ca97c0c3c0fe5f25632b06eb21ab9f211e9ea22816e16468ee241ca.png?type=orig",
+ ),
+},
+
+{
+ "#url" : "https://arca.live/b/arknights/122263340",
+ "#comment": "animated webp",
+ "#class" : arcalive.ArcalivePostExtractor,
+ "#urls" : (
+ "https://ac.namu.la/20241126sac/b2175d9ef4504945d3d989526120dbb6aded501ddedfba8ecc44a64e7aae9059.gif?type=orig",
+ "https://ac.namu.la/20241126sac/bc1f3cb388a3a2d099ab67bc09b28f0a93c2c4755152b3ef9190690a9f0a28fb.webp?type=orig",
+ ),
+},
+
+{
+ "#url" : "https://arca.live/b/bluearchive/117240135",
+ "#comment": "video",
+ "#class" : arcalive.ArcalivePostExtractor,
+ "#urls" : "https://ac.namu.la/20240926sac/16f07778a97f91b935c8a3394ead01a223d96b2a619fdb25c4628ddba88b5fad.mp4?type=orig",
+},
+
+{
+ "#url" : "https://arca.live/b/bluearchive/111191955",
+ "#comment": "fake .mp4 GIF",
+ "#skip" : "not implemented",
+ "#class" : arcalive.ArcalivePostExtractor,
+ # "#urls" : "https://ac.namu.la/20240714sac/c8fcadeb0b578e5121eb7a7e8fb05984cb87c68e7a6e0481a1c8869bf0ecfd2b.gif?type=orig",
+ "#urls" : "https://ac.namu.la/20240714sac/c8fcadeb0b578e5121eb7a7e8fb05984cb87c68e7a6e0481a1c8869bf0ecfd2b.mp4?type=orig",
+},
+
+{
+ "#url" : "https://arca.live/b/arknights/49406926",
+ "#comment": "static emoticon",
+ "#class" : arcalive.ArcalivePostExtractor,
+ "#urls" : "https://ac.namu.la/20220428sac2/41f472adcea674aff75f15f146e81c27032bc4d6c8073bd7c19325bd1c97d335.png?type=orig",
+},
+
+{
+ "#url" : "https://arca.live/b/commission/63658702",
+ "#comment": "animated emoticon",
+ "#class" : arcalive.ArcalivePostExtractor,
+ "#options": {"emoticons": True},
+ "#urls" : (
+ "https://ac.namu.la/20221123sac2/14925c5e22ab9f17f2923ae60a39b7af0794c43e478ecaba054ab6131e57e022.png?type=orig",
+ "https://ac.namu.la/20221123sac2/50c385a4004bca44271a2f6133990f086cfefd29a7968514e9c14d6017d61265.png?type=orig",
+ "https://ac.namu.la/20221005sac2/28ebe073fffbb2b88f710c2d380b0fe6dd99a856070c4a836db57634a5371366.gif?type=orig",
+ ),
+},
+
+{
+ "#url" : "https://arca.live/b/arknights",
+ "#class" : arcalive.ArcaliveBoardExtractor,
+ "#pattern": arcalive.ArcalivePostExtractor.pattern,
+ "#range" : "1-100",
+ "#count" : 100,
+},
+
+)