[weibo:album] implement 'subalbum' support (#8792)
https://weibo.com/u/USER?tabtype=album-1 https://weibo.com/u/USER?tabtype=album-NAME https://weibo.com/u/USER?tabtype=album-all https://weibo.com/u/USER?tabtype=album-only
This commit is contained in:
@@ -6986,6 +6986,16 @@ Description
|
||||
Download video files.
|
||||
|
||||
|
||||
extractor.weibo.album.subalbums
|
||||
-------------------------------
|
||||
Type
|
||||
``bool``
|
||||
Default
|
||||
``false``
|
||||
Description
|
||||
Extract subalbum media.
|
||||
|
||||
|
||||
extractor.wikimedia.format
|
||||
--------------------------
|
||||
Type
|
||||
|
||||
@@ -972,7 +972,11 @@
|
||||
"movies" : false,
|
||||
"retweets" : false,
|
||||
"text" : false,
|
||||
"videos" : true
|
||||
"videos" : true,
|
||||
|
||||
"album": {
|
||||
"subalbums": false
|
||||
}
|
||||
},
|
||||
"xfolio":
|
||||
{
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2019-2025 Mike Fährmann
|
||||
# Copyright 2019-2026 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@@ -29,7 +29,8 @@ class WeiboExtractor(Extractor):
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
self._prefix, self.user = match.groups()
|
||||
self._prefix = match[1]
|
||||
self.user = match[2]
|
||||
|
||||
def _init(self):
|
||||
self.livephoto = self.config("livephoto", True)
|
||||
@@ -199,18 +200,23 @@ class WeiboExtractor(Extractor):
|
||||
f"?id={status_id}&isGetLongText=true")
|
||||
return self.request_json(url)
|
||||
|
||||
def _user_id(self):
|
||||
if len(self.user) >= 10 and self.user.isdecimal():
|
||||
return self.user[-10:]
|
||||
else:
|
||||
url = (f"{self.root}/ajax/profile/info?"
|
||||
f"{'screen_name' if self._prefix == 'n' else 'custom'}="
|
||||
f"{self.user}")
|
||||
return self.request_json(url)["data"]["user"]["idstr"]
|
||||
def _user(self, user):
|
||||
url = (f"{self.root}/ajax/profile/info?"
|
||||
f"{'screen_name' if self._prefix == 'n' else 'custom'}={user}")
|
||||
return self.request_json(url, interval=False)["data"]["user"]
|
||||
|
||||
def _pagination(self, endpoint, params):
|
||||
def _user_id(self):
|
||||
user = self.user
|
||||
if len(user) >= 10 and user.isdecimal():
|
||||
return user[-10:]
|
||||
else:
|
||||
return self._user(user)["idstr"]
|
||||
|
||||
def _pagination(self, endpoint, params,
|
||||
since_key="sinceid", subalbums=None):
|
||||
url = f"{self.root}/ajax{endpoint}"
|
||||
headers = {
|
||||
"Accept": "application/json, text/plain, */*",
|
||||
"X-Requested-With": "XMLHttpRequest",
|
||||
"X-XSRF-TOKEN": None,
|
||||
"Referer": f"{self.root}/u/{params['uid']}",
|
||||
@@ -218,7 +224,6 @@ class WeiboExtractor(Extractor):
|
||||
|
||||
while True:
|
||||
response = self.request(url, params=params, headers=headers)
|
||||
headers["Accept"] = "application/json, text/plain, */*"
|
||||
headers["X-XSRF-TOKEN"] = response.cookies.get("XSRF-TOKEN")
|
||||
|
||||
data = response.json()
|
||||
@@ -234,6 +239,10 @@ class WeiboExtractor(Extractor):
|
||||
except KeyError:
|
||||
return
|
||||
|
||||
if subalbums is not None:
|
||||
subalbums = None
|
||||
yield data.get("album_list") or ()
|
||||
|
||||
yield from statuses
|
||||
|
||||
# videos, newvideo
|
||||
@@ -244,8 +253,10 @@ class WeiboExtractor(Extractor):
|
||||
continue
|
||||
|
||||
# album
|
||||
if since_id := data.get("since_id"):
|
||||
params["sinceid"] = since_id
|
||||
if "since_id" in data:
|
||||
params[since_key] = since_id = data["since_id"]
|
||||
if not since_id:
|
||||
return
|
||||
if "page" in params:
|
||||
params["page"] += 1
|
||||
continue
|
||||
@@ -383,9 +394,33 @@ class WeiboArticleExtractor(WeiboExtractor):
|
||||
class WeiboAlbumExtractor(WeiboExtractor):
|
||||
"""Extractor for weibo 'album' listings"""
|
||||
subcategory = "album"
|
||||
pattern = USER_PATTERN + r"\?tabtype=album"
|
||||
pattern = USER_PATTERN + r"\?tabtype=album(?:[:_-]([^&#]+))?"
|
||||
example = "https://weibo.com/USER?tabtype=album"
|
||||
|
||||
def items(self):
|
||||
subalbum = self.groups[2]
|
||||
|
||||
if not subalbum and not self.config("subalbums", False):
|
||||
return WeiboExtractor.items(self)
|
||||
|
||||
self.directory_fmt = ("{category}", "{user[screen_name]}",
|
||||
"Album", "{subalbum[pic_title]|''}")
|
||||
self.filename_fmt = "{filename}.{extension}"
|
||||
self.archive_fmt = "{subalbum[pic_title]}_{pid}"
|
||||
return self.items_subalbum(subalbum)
|
||||
|
||||
def items_subalbum(self, subalbum):
|
||||
user = self.kwdict["user"] = self._user(self.user)
|
||||
base = self.root + "/ajax/common/download?pid="
|
||||
|
||||
for data, files in self.albums(user["idstr"], subalbum):
|
||||
self.kwdict["subalbum"] = data
|
||||
yield Message.Directory, "", {}
|
||||
for file in files:
|
||||
file["filename"] = file["pid"]
|
||||
file["extension"] = "jpg"
|
||||
yield Message.Url, base + file["pid"], file
|
||||
|
||||
def statuses(self):
|
||||
endpoint = "/profile/getImageWall"
|
||||
params = {"uid": self._user_id()}
|
||||
@@ -401,6 +436,51 @@ class WeiboAlbumExtractor(WeiboExtractor):
|
||||
else:
|
||||
yield status
|
||||
|
||||
def albums(self, uid, subalbum):
|
||||
endpoint = "/profile/getImageWall"
|
||||
params = {
|
||||
"uid" : uid,
|
||||
"sinceid" : "0",
|
||||
"has_album": "true",
|
||||
}
|
||||
album = self._pagination(endpoint, params, subalbums=True)
|
||||
subalbums = next(album, ())
|
||||
|
||||
if not subalbum or subalbum == "0":
|
||||
return (({}, album),)
|
||||
|
||||
if subalbum == "all":
|
||||
results = [
|
||||
(sub, self._pagination_subalbum(uid, sub))
|
||||
for sub in subalbums
|
||||
]
|
||||
results.append(({}, album))
|
||||
return results
|
||||
|
||||
if subalbum == "only":
|
||||
return [
|
||||
(sub, self._pagination_subalbum(uid, sub))
|
||||
for sub in subalbums
|
||||
]
|
||||
|
||||
if subalbum.isdecimal():
|
||||
try:
|
||||
sub = subalbums[int(subalbum)-1]
|
||||
except Exception:
|
||||
raise exception.NotFoundError("subalbum")
|
||||
else:
|
||||
subalbum = text.unquote(subalbum)
|
||||
for sub in subalbums:
|
||||
if sub["pic_title"] == subalbum:
|
||||
break
|
||||
else:
|
||||
raise exception.NotFoundError("subalbum")
|
||||
return ((sub, self._pagination_subalbum(uid, sub)),)
|
||||
|
||||
def _pagination_subalbum(self, uid, sub):
|
||||
params = {"uid": uid, "containerid": text.unquote(sub["containerid"])}
|
||||
return self._pagination("/profile/getAlbumDetail", params, "since_id")
|
||||
|
||||
|
||||
class WeiboStatusExtractor(WeiboExtractor):
|
||||
"""Extractor for a weibo status"""
|
||||
|
||||
@@ -26,6 +26,7 @@ __tests__ = (
|
||||
"https://weibo.com/u/1758989602?tabtype=feed",
|
||||
"https://weibo.com/u/1758989602?tabtype=video",
|
||||
"https://weibo.com/u/1758989602?tabtype=newVideo",
|
||||
"https://weibo.com/u/1758989602?tabtype=article",
|
||||
"https://weibo.com/u/1758989602?tabtype=album",
|
||||
),
|
||||
},
|
||||
@@ -160,6 +161,34 @@ __tests__ = (
|
||||
"#count" : 3,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://weibo.com/u/2142058927?tabtype=album-头像",
|
||||
"#comment" : "subalbum",
|
||||
"#class" : weibo.WeiboAlbumExtractor,
|
||||
"#range" : "1-3",
|
||||
"#results" : (
|
||||
"https://weibo.com/ajax/common/download?pid=002kXRnxly8i5b4anvvxbj60u00u078w02",
|
||||
"https://weibo.com/ajax/common/download?pid=002kXRnxly8i2b7u68bfhj60u00u0dl002",
|
||||
"https://weibo.com/ajax/common/download?pid=002kXRnxly8i2b6rmr1trj60rs0rstdn02",
|
||||
),
|
||||
|
||||
"extension": "jpg",
|
||||
"pid" : str,
|
||||
"type" : "pic",
|
||||
"subalbum" : {
|
||||
"containerid": "2318262142058927_-_pc_profile_album_-_photo_-_avatar_-_35046512_-_%E5%A4%B4%E5%83%8F",
|
||||
"pic" : "https://wx1.sinaimg.cn/webp720/002kXRnxly8i5b4anvvxbj60u00u078w02.jpg",
|
||||
"pic_title" : "头像",
|
||||
},
|
||||
"user" : {
|
||||
"id" : 2142058927,
|
||||
"idstr" : "2142058927",
|
||||
"location" : "上海 黄浦区",
|
||||
"profile_url": "/u/2142058927",
|
||||
"screen_name": "吴磊LEO",
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://m.weibo.cn/detail/4323047042991618",
|
||||
"#category": ("", "weibo", "status"),
|
||||
|
||||
Reference in New Issue
Block a user