[arcalive] add 'user' extractor (#5657)

This commit is contained in:
Mike Fährmann
2025-03-14 18:58:10 +01:00
parent d900e868e4
commit 31e57bafab
4 changed files with 82 additions and 13 deletions

View File

@@ -100,7 +100,7 @@ Consider all listed sites to potentially be NSFW.
<tr>
<td>Arcalive</td>
<td>https://arca.live/</td>
<td>Boards, Posts</td>
<td>Boards, Posts, User Posts</td>
<td></td>
</tr>
<tr>

View File

@@ -22,6 +22,13 @@ class ArcaliveExtractor(Extractor):
def _init(self):
self.api = ArcaliveAPI(self)
def items(self):
for article in self.articles():
article["_extractor"] = ArcalivePostExtractor
board = self.board or article.get("boardSlug") or "breaking"
url = "{}/b/{}/{}".format(self.root, board, article["id"])
yield Message.Queue, url, article
class ArcalivePostExtractor(ArcaliveExtractor):
"""Extractor for an arca.live post"""
@@ -99,18 +106,26 @@ class ArcalivePostExtractor(ArcaliveExtractor):
class ArcaliveBoardExtractor(ArcaliveExtractor):
"""Extractor for an arca.live board's posts"""
subcategory = "board"
pattern = BASE_PATTERN + r"/b/(\w+)(?:/?\?([^#]+))?$"
pattern = BASE_PATTERN + r"/b/([^/?#]+)/?(?:\?([^#]+))?$"
example = "https://arca.live/b/breaking"
def items(self):
board, query = self.groups
def articles(self):
self.board, query = self.groups
params = text.parse_query(query)
articles = self.api.board(board, params)
return self.api.board(self.board, params)
for article in articles:
article["_extractor"] = ArcalivePostExtractor
url = "{}/b/{}/{}".format(self.root, board, article["id"])
yield Message.Queue, url, article
class ArcaliveUserExtractor(ArcaliveExtractor):
"""Extractor for an arca.live users's posts"""
subcategory = "user"
pattern = BASE_PATTERN + r"/u/@([^/?#]+)/?(?:\?([^#]+))?$"
example = "https://arca.live/u/@USER"
def articles(self):
self.board = None
user, query = self.groups
params = text.parse_query(query)
return self.api.user_posts(text.unquote(user), params)
class ArcaliveAPI():
@@ -132,6 +147,12 @@ class ArcaliveAPI():
endpoint = "/view/article/breaking/" + str(post_id)
return self._call(endpoint)
def user_posts(self, username, params):
endpoint = "/list/channel/breaking"
params["target"] = "nickname"
params["keyword"] = username
return self._pagination(endpoint, params, "articles")
def _call(self, endpoint, params=None):
url = self.root + endpoint
response = self.extractor.request(url, params=params)

View File

@@ -201,6 +201,9 @@ SUBCATEGORY_MAP = {
"user-series" : "",
"user-bookmark": "Bookmarks",
},
"arcalive": {
"user": "User Posts",
},
"artstation": {
"artwork": "Artwork Listings",
"collections": "",

View File

@@ -120,11 +120,56 @@ __tests__ = (
},
{
"#url" : "https://arca.live/b/arknights",
"#class" : arcalive.ArcaliveBoardExtractor,
"#url" : "https://arca.live/b/arknights",
"#class" : arcalive.ArcaliveBoardExtractor,
"#pattern": arcalive.ArcalivePostExtractor.pattern,
"#range" : "1-100",
"#count" : 100,
"#range" : "1-100",
"#count" : 100,
"category" : {str, None},
"categoryDisplayName": {str, None},
"commentCount": int,
"createdAt" : str,
"id" : int,
"isUser" : bool,
"?mark" : str,
"nickname" : str,
"publicId" : {int, None},
"ratingDown" : int,
"ratingUp" : int,
"thumbnailUrl": {str, None},
"title" : str,
"viewCount" : int,
},
{
"#url" : "https://arca.live/u/@Si%EB%A6%AC%EB%A7%81",
"#class": arcalive.ArcaliveUserExtractor,
"#range": "1-5",
"#urls" : (
"https://arca.live/b/vrchat/107257886",
"https://arca.live/b/soulworkers/95371697",
"https://arca.live/b/arcalivebreverse/90843346",
"https://arca.live/b/arcalivebreverse/90841126",
"https://arca.live/b/arcalivebreverse/90769916",
),
"boardName" : str,
"boardSlug" : {"vrchat", "soulworkers", "arcalivebreverse"},
"category" : {str, None},
"categoryDisplayName": {str, None},
"commentCount": int,
"createdAt" : str,
"id" : int,
"isUser" : True,
"?mark" : "image",
"nickname" : "Si리링",
"publicId" : {int, None},
"ratingDown" : int,
"ratingUp" : int,
"thumbnailUrl": {str, None},
"title" : str,
"viewCount" : int,
},
)