diff --git a/docs/configuration.rst b/docs/configuration.rst index 4c1cf631..1bf64747 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -384,6 +384,7 @@ Type Default * ``"0.5-1.5"`` ``ao3``, + ``arcalive``, ``civitai``, ``[Danbooru]``, ``[E621]``, @@ -1394,6 +1395,16 @@ Description Format(s) to download. +extractor.arcalive.emoticons +---------------------------- +Type + ``bool`` +Default + ``false`` +Description + Download emoticon images. + + extractor.artstation.external ----------------------------- Type diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 44833760..aa92ba9c 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -99,6 +99,12 @@ "formats": ["pdf"] }, + "arcalive": + { + "sleep-request": "0.5-1.5", + + "emoticons": false + }, "artstation": { "external" : false, diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 4046e49c..df2ee955 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -97,6 +97,12 @@ Consider all listed sites to potentially be NSFW. Posts, Tag Searches + + Arcalive + https://arca.live/ + Boards, Posts + + Architizer https://architizer.com/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 8208241e..8198619e 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -24,6 +24,7 @@ modules = [ "adultempire", "agnph", "ao3", + "arcalive", "architizer", "artstation", "aryion", diff --git a/gallery_dl/extractor/arcalive.py b/gallery_dl/extractor/arcalive.py new file mode 100644 index 00000000..db99313f --- /dev/null +++ b/gallery_dl/extractor/arcalive.py @@ -0,0 +1,157 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://arca.live/""" + +from .common import Extractor, Message +from .. import text, util, exception +import re + +BASE_PATTERN = r"(?:https?://)?(?:www\.)?arca\.live" + + +class ArcaliveExtractor(Extractor): + """Base class for Arca.live extractors""" + category = "arcalive" + root = "https://arca.live" + request_interval = (0.5, 1.5) + + def _init(self): + self.api = ArcaliveAPI(self) + + +class ArcalivePostExtractor(ArcaliveExtractor): + """Extractor for an arca.live post""" + subcategory = "post" + directory_fmt = ("{category}", "{boardSlug}") + filename_fmt = "{id}_{num}{title:? //[b:230]}.{extension}" + archive_fmt = "{id}_{num}" + pattern = BASE_PATTERN + r"/b/(?:\w+)/(\d+)" + example = "https://arca.live/b/breaking/123456789" + + def items(self): + self.emoticons = self.config("emoticons", False) + + post = self.api.post(self.groups[0]) + files = self._extract_files(post) + + post["count"] = len(files) + post["date"] = text.parse_datetime( + post["createdAt"][:19], "%Y-%m-%dT%H:%M:%S") + post["post_url"] = post_url = "{}/b/{}/{}".format( + self.root, post["boardSlug"], post["id"]) + post["_http_headers"] = {"Referer": post_url + "?p=1"} + + yield Message.Directory, post + for post["num"], file in enumerate(files, 1): + post.update(file) + url = file["url"] + yield Message.Url, url, text.nameext_from_url(url, post) + + def _extract_files(self, post): + files = [] + + for media in self._extract_media(post["content"]): + + if not self.emoticons and 'class="arca-emoticon"' in media: + continue + + src = (text.extr(media, 'data-originalurl="', '"') or + text.extr(media, 'src="', '"')) + if not src: + continue + + src = text.unescape(src.partition("?")[0]) + if src[0] == "/": + if src[1] == "/": + url = "https:" + src + else: + url = self.root + src + else: + url = src + + fallback = () + orig = text.extr(media, 'data-orig="', '"') + if orig: + path, _, ext = url.rpartition(".") + if ext != orig: + fallback = (url + "?type=orig",) + url = path + "." + orig + + files.append({ + "url" : url + "?type=orig", + "width" : text.parse_int(text.extr(media, 'width="', '"')), + "height": text.parse_int(text.extr(media, 'height="', '"')), + "_fallback": fallback, + }) + + return files + + def _extract_media(self, content): + ArcalivePostExtractor._extract_media = extr = re.compile( + r"<(?:img|video) ([^>]+)").findall + return extr(content) + + +class ArcaliveBoardExtractor(ArcaliveExtractor): + """Extractor for an arca.live board's posts""" + subcategory = "board" + pattern = BASE_PATTERN + r"/b/(\w+)(?:/?\?([^#]+))?$" + example = "https://arca.live/b/breaking" + + def items(self): + board, query = self.groups + params = text.parse_query(query) + articles = self.api.board(board, params) + + for article in articles: + article["_extractor"] = ArcalivePostExtractor + url = "{}/b/{}/{}".format(self.root, board, article["id"]) + yield Message.Queue, url, article + + +class ArcaliveAPI(): + + def __init__(self, extractor): + self.extractor = extractor + self.log = extractor.log + self.root = extractor.root + "/api/app" + + headers = extractor.session.headers + headers["User-Agent"] = "net.umanle.arca.android.playstore/0.9.75" + headers["X-Device-Token"] = util.generate_token(64) + + def board(self, board_slug, params): + endpoint = "/list/channel/" + board_slug + return self._pagination(endpoint, params, "articles") + + def post(self, post_id): + endpoint = "/view/article/breaking/" + str(post_id) + return self._call(endpoint) + + def _call(self, endpoint, params=None): + url = self.root + endpoint + response = self.extractor.request(url, params=params) + + data = response.json() + if response.status_code == 200: + return data + + self.log.debug("Server response: %s", data) + msg = data.get("message") + raise exception.StopExtraction( + "API request failed%s", ": " + msg if msg else "") + + def _pagination(self, endpoint, params, key): + while True: + data = self._call(endpoint, params) + + posts = data.get(key) + if not posts: + break + yield from posts + + params.update(data["next"]) diff --git a/test/results/arcalive.py b/test/results/arcalive.py new file mode 100644 index 00000000..8dcdc7bb --- /dev/null +++ b/test/results/arcalive.py @@ -0,0 +1,130 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +from gallery_dl.extractor import arcalive + + +__tests__ = ( +{ + "#url" : "https://arca.live/b/arknights/66031722?p=1", + "#class" : arcalive.ArcalivePostExtractor, + "#urls" : "https://ac.namu.la/20221225sac2/e06dcf8edd29c597240898a6752c74dbdd0680fc932cfd0ecc898795f1db34b5.jpg?type=orig", + + "isEditable": False, + "isDeletable": False, + "isReportable": False, + "id": 66031722, + "nickname": "Si리링", + "title": "엑샤 스작함", + "contentType": "html", + "content": r"re:^

알게또 뽑으려했는데 못뽑아서 엑샤 스작함
엑샤에 보카디 3스나 와파린 2스 붙이는거 맞음.+/>

$", + "viewCount": range(8000, 20000), + "ratingUp": 0, + "ratingDown": 0, + "ratingUpIp": 0, + "ratingDownIp": 0, + "createdAt": "2022-12-25T05:16:55.000Z", + "updatedAt": "2022-12-25T05:16:55.000Z", + "lastComment": "2022-12-25T05:22:12.000Z", + "commentCount": range(2, 9), + "publicId": None, + "token": "44bb2dfd0bbc672e", + "isUser": True, + "gravatar": "//secure.gravatar.com/avatar/6c3fdbdeea149b29eea8d887c37fc119?d=retro&f=y", + "preventDelete": False, + "channelPermission": dict, + "captcha": True, + "isSensitive": False, + "categoryDisplayName": None, + "blockPreview": False, + "isSpoilerAlert": False, + "boardName": "명일방주 채널", + "boardSlug": "arknights", + "isBest": False, + "vote": [], + "date": "dt:2022-12-25 05:16:55", + "post_url": "https://arca.live/b/arknights/66031722", + "count": 1, + "num": 1, + "url": "https://ac.namu.la/20221225sac2/e06dcf8edd29c597240898a6752c74dbdd0680fc932cfd0ecc898795f1db34b5.jpg?type=orig", + "width": 3200, + "height": 1440, + "filename": "e06dcf8edd29c597240898a6752c74dbdd0680fc932cfd0ecc898795f1db34b5", + "extension": "jpg", +}, + +{ + "#url" : "https://arca.live/b/breaking/66031722", + "#comment": "/b/breaking page URL", + "#class" : arcalive.ArcalivePostExtractor, + "#urls" : "https://ac.namu.la/20221225sac2/e06dcf8edd29c597240898a6752c74dbdd0680fc932cfd0ecc898795f1db34b5.jpg?type=orig", +}, + +{ + "#url" : "https://arca.live/b/bluearchive/65031202", + "#comment": "animated gif", + "#class" : arcalive.ArcalivePostExtractor, + "#urls" : ( + "https://ac.namu.la/20221211sac/5ea7fbca5e49ec16beb099fc6fc991690d37552e599b1de8462533908346241e.png?type=orig", + "https://ac.namu.la/20221211sac/7f73beefc4f18a2f986bc4c6821caba706e27f4c94cb828fc16e2af1253402d9.gif?type=orig", + "https://ac.namu.la/20221211sac2/3e72f9e05ca97c0c3c0fe5f25632b06eb21ab9f211e9ea22816e16468ee241ca.png?type=orig", + ), +}, + +{ + "#url" : "https://arca.live/b/arknights/122263340", + "#comment": "animated webp", + "#class" : arcalive.ArcalivePostExtractor, + "#urls" : ( + "https://ac.namu.la/20241126sac/b2175d9ef4504945d3d989526120dbb6aded501ddedfba8ecc44a64e7aae9059.gif?type=orig", + "https://ac.namu.la/20241126sac/bc1f3cb388a3a2d099ab67bc09b28f0a93c2c4755152b3ef9190690a9f0a28fb.webp?type=orig", + ), +}, + +{ + "#url" : "https://arca.live/b/bluearchive/117240135", + "#comment": "video", + "#class" : arcalive.ArcalivePostExtractor, + "#urls" : "https://ac.namu.la/20240926sac/16f07778a97f91b935c8a3394ead01a223d96b2a619fdb25c4628ddba88b5fad.mp4?type=orig", +}, + +{ + "#url" : "https://arca.live/b/bluearchive/111191955", + "#comment": "fake .mp4 GIF", + "#skip" : "not implemented", + "#class" : arcalive.ArcalivePostExtractor, + # "#urls" : "https://ac.namu.la/20240714sac/c8fcadeb0b578e5121eb7a7e8fb05984cb87c68e7a6e0481a1c8869bf0ecfd2b.gif?type=orig", + "#urls" : "https://ac.namu.la/20240714sac/c8fcadeb0b578e5121eb7a7e8fb05984cb87c68e7a6e0481a1c8869bf0ecfd2b.mp4?type=orig", +}, + +{ + "#url" : "https://arca.live/b/arknights/49406926", + "#comment": "static emoticon", + "#class" : arcalive.ArcalivePostExtractor, + "#urls" : "https://ac.namu.la/20220428sac2/41f472adcea674aff75f15f146e81c27032bc4d6c8073bd7c19325bd1c97d335.png?type=orig", +}, + +{ + "#url" : "https://arca.live/b/commission/63658702", + "#comment": "animated emoticon", + "#class" : arcalive.ArcalivePostExtractor, + "#options": {"emoticons": True}, + "#urls" : ( + "https://ac.namu.la/20221123sac2/14925c5e22ab9f17f2923ae60a39b7af0794c43e478ecaba054ab6131e57e022.png?type=orig", + "https://ac.namu.la/20221123sac2/50c385a4004bca44271a2f6133990f086cfefd29a7968514e9c14d6017d61265.png?type=orig", + "https://ac.namu.la/20221005sac2/28ebe073fffbb2b88f710c2d380b0fe6dd99a856070c4a836db57634a5371366.gif?type=orig", + ), +}, + +{ + "#url" : "https://arca.live/b/arknights", + "#class" : arcalive.ArcaliveBoardExtractor, + "#pattern": arcalive.ArcalivePostExtractor.pattern, + "#range" : "1-100", + "#count" : 100, +}, + +)