[weibo:album] implement 'subalbum' support (#8792)
https://weibo.com/u/USER?tabtype=album-1 https://weibo.com/u/USER?tabtype=album-NAME https://weibo.com/u/USER?tabtype=album-all https://weibo.com/u/USER?tabtype=album-only
This commit is contained in:
@@ -6986,6 +6986,16 @@ Description
|
|||||||
Download video files.
|
Download video files.
|
||||||
|
|
||||||
|
|
||||||
|
extractor.weibo.album.subalbums
|
||||||
|
-------------------------------
|
||||||
|
Type
|
||||||
|
``bool``
|
||||||
|
Default
|
||||||
|
``false``
|
||||||
|
Description
|
||||||
|
Extract subalbum media.
|
||||||
|
|
||||||
|
|
||||||
extractor.wikimedia.format
|
extractor.wikimedia.format
|
||||||
--------------------------
|
--------------------------
|
||||||
Type
|
Type
|
||||||
|
|||||||
@@ -972,7 +972,11 @@
|
|||||||
"movies" : false,
|
"movies" : false,
|
||||||
"retweets" : false,
|
"retweets" : false,
|
||||||
"text" : false,
|
"text" : false,
|
||||||
"videos" : true
|
"videos" : true,
|
||||||
|
|
||||||
|
"album": {
|
||||||
|
"subalbums": false
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"xfolio":
|
"xfolio":
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
# Copyright 2019-2025 Mike Fährmann
|
# Copyright 2019-2026 Mike Fährmann
|
||||||
#
|
#
|
||||||
# This program is free software; you can redistribute it and/or modify
|
# This program is free software; you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU General Public License version 2 as
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
@@ -29,7 +29,8 @@ class WeiboExtractor(Extractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
Extractor.__init__(self, match)
|
Extractor.__init__(self, match)
|
||||||
self._prefix, self.user = match.groups()
|
self._prefix = match[1]
|
||||||
|
self.user = match[2]
|
||||||
|
|
||||||
def _init(self):
|
def _init(self):
|
||||||
self.livephoto = self.config("livephoto", True)
|
self.livephoto = self.config("livephoto", True)
|
||||||
@@ -199,18 +200,23 @@ class WeiboExtractor(Extractor):
|
|||||||
f"?id={status_id}&isGetLongText=true")
|
f"?id={status_id}&isGetLongText=true")
|
||||||
return self.request_json(url)
|
return self.request_json(url)
|
||||||
|
|
||||||
def _user_id(self):
|
def _user(self, user):
|
||||||
if len(self.user) >= 10 and self.user.isdecimal():
|
url = (f"{self.root}/ajax/profile/info?"
|
||||||
return self.user[-10:]
|
f"{'screen_name' if self._prefix == 'n' else 'custom'}={user}")
|
||||||
else:
|
return self.request_json(url, interval=False)["data"]["user"]
|
||||||
url = (f"{self.root}/ajax/profile/info?"
|
|
||||||
f"{'screen_name' if self._prefix == 'n' else 'custom'}="
|
|
||||||
f"{self.user}")
|
|
||||||
return self.request_json(url)["data"]["user"]["idstr"]
|
|
||||||
|
|
||||||
def _pagination(self, endpoint, params):
|
def _user_id(self):
|
||||||
|
user = self.user
|
||||||
|
if len(user) >= 10 and user.isdecimal():
|
||||||
|
return user[-10:]
|
||||||
|
else:
|
||||||
|
return self._user(user)["idstr"]
|
||||||
|
|
||||||
|
def _pagination(self, endpoint, params,
|
||||||
|
since_key="sinceid", subalbums=None):
|
||||||
url = f"{self.root}/ajax{endpoint}"
|
url = f"{self.root}/ajax{endpoint}"
|
||||||
headers = {
|
headers = {
|
||||||
|
"Accept": "application/json, text/plain, */*",
|
||||||
"X-Requested-With": "XMLHttpRequest",
|
"X-Requested-With": "XMLHttpRequest",
|
||||||
"X-XSRF-TOKEN": None,
|
"X-XSRF-TOKEN": None,
|
||||||
"Referer": f"{self.root}/u/{params['uid']}",
|
"Referer": f"{self.root}/u/{params['uid']}",
|
||||||
@@ -218,7 +224,6 @@ class WeiboExtractor(Extractor):
|
|||||||
|
|
||||||
while True:
|
while True:
|
||||||
response = self.request(url, params=params, headers=headers)
|
response = self.request(url, params=params, headers=headers)
|
||||||
headers["Accept"] = "application/json, text/plain, */*"
|
|
||||||
headers["X-XSRF-TOKEN"] = response.cookies.get("XSRF-TOKEN")
|
headers["X-XSRF-TOKEN"] = response.cookies.get("XSRF-TOKEN")
|
||||||
|
|
||||||
data = response.json()
|
data = response.json()
|
||||||
@@ -234,6 +239,10 @@ class WeiboExtractor(Extractor):
|
|||||||
except KeyError:
|
except KeyError:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
if subalbums is not None:
|
||||||
|
subalbums = None
|
||||||
|
yield data.get("album_list") or ()
|
||||||
|
|
||||||
yield from statuses
|
yield from statuses
|
||||||
|
|
||||||
# videos, newvideo
|
# videos, newvideo
|
||||||
@@ -244,8 +253,10 @@ class WeiboExtractor(Extractor):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
# album
|
# album
|
||||||
if since_id := data.get("since_id"):
|
if "since_id" in data:
|
||||||
params["sinceid"] = since_id
|
params[since_key] = since_id = data["since_id"]
|
||||||
|
if not since_id:
|
||||||
|
return
|
||||||
if "page" in params:
|
if "page" in params:
|
||||||
params["page"] += 1
|
params["page"] += 1
|
||||||
continue
|
continue
|
||||||
@@ -383,9 +394,33 @@ class WeiboArticleExtractor(WeiboExtractor):
|
|||||||
class WeiboAlbumExtractor(WeiboExtractor):
|
class WeiboAlbumExtractor(WeiboExtractor):
|
||||||
"""Extractor for weibo 'album' listings"""
|
"""Extractor for weibo 'album' listings"""
|
||||||
subcategory = "album"
|
subcategory = "album"
|
||||||
pattern = USER_PATTERN + r"\?tabtype=album"
|
pattern = USER_PATTERN + r"\?tabtype=album(?:[:_-]([^&#]+))?"
|
||||||
example = "https://weibo.com/USER?tabtype=album"
|
example = "https://weibo.com/USER?tabtype=album"
|
||||||
|
|
||||||
|
def items(self):
|
||||||
|
subalbum = self.groups[2]
|
||||||
|
|
||||||
|
if not subalbum and not self.config("subalbums", False):
|
||||||
|
return WeiboExtractor.items(self)
|
||||||
|
|
||||||
|
self.directory_fmt = ("{category}", "{user[screen_name]}",
|
||||||
|
"Album", "{subalbum[pic_title]|''}")
|
||||||
|
self.filename_fmt = "{filename}.{extension}"
|
||||||
|
self.archive_fmt = "{subalbum[pic_title]}_{pid}"
|
||||||
|
return self.items_subalbum(subalbum)
|
||||||
|
|
||||||
|
def items_subalbum(self, subalbum):
|
||||||
|
user = self.kwdict["user"] = self._user(self.user)
|
||||||
|
base = self.root + "/ajax/common/download?pid="
|
||||||
|
|
||||||
|
for data, files in self.albums(user["idstr"], subalbum):
|
||||||
|
self.kwdict["subalbum"] = data
|
||||||
|
yield Message.Directory, "", {}
|
||||||
|
for file in files:
|
||||||
|
file["filename"] = file["pid"]
|
||||||
|
file["extension"] = "jpg"
|
||||||
|
yield Message.Url, base + file["pid"], file
|
||||||
|
|
||||||
def statuses(self):
|
def statuses(self):
|
||||||
endpoint = "/profile/getImageWall"
|
endpoint = "/profile/getImageWall"
|
||||||
params = {"uid": self._user_id()}
|
params = {"uid": self._user_id()}
|
||||||
@@ -401,6 +436,51 @@ class WeiboAlbumExtractor(WeiboExtractor):
|
|||||||
else:
|
else:
|
||||||
yield status
|
yield status
|
||||||
|
|
||||||
|
def albums(self, uid, subalbum):
|
||||||
|
endpoint = "/profile/getImageWall"
|
||||||
|
params = {
|
||||||
|
"uid" : uid,
|
||||||
|
"sinceid" : "0",
|
||||||
|
"has_album": "true",
|
||||||
|
}
|
||||||
|
album = self._pagination(endpoint, params, subalbums=True)
|
||||||
|
subalbums = next(album, ())
|
||||||
|
|
||||||
|
if not subalbum or subalbum == "0":
|
||||||
|
return (({}, album),)
|
||||||
|
|
||||||
|
if subalbum == "all":
|
||||||
|
results = [
|
||||||
|
(sub, self._pagination_subalbum(uid, sub))
|
||||||
|
for sub in subalbums
|
||||||
|
]
|
||||||
|
results.append(({}, album))
|
||||||
|
return results
|
||||||
|
|
||||||
|
if subalbum == "only":
|
||||||
|
return [
|
||||||
|
(sub, self._pagination_subalbum(uid, sub))
|
||||||
|
for sub in subalbums
|
||||||
|
]
|
||||||
|
|
||||||
|
if subalbum.isdecimal():
|
||||||
|
try:
|
||||||
|
sub = subalbums[int(subalbum)-1]
|
||||||
|
except Exception:
|
||||||
|
raise exception.NotFoundError("subalbum")
|
||||||
|
else:
|
||||||
|
subalbum = text.unquote(subalbum)
|
||||||
|
for sub in subalbums:
|
||||||
|
if sub["pic_title"] == subalbum:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
raise exception.NotFoundError("subalbum")
|
||||||
|
return ((sub, self._pagination_subalbum(uid, sub)),)
|
||||||
|
|
||||||
|
def _pagination_subalbum(self, uid, sub):
|
||||||
|
params = {"uid": uid, "containerid": text.unquote(sub["containerid"])}
|
||||||
|
return self._pagination("/profile/getAlbumDetail", params, "since_id")
|
||||||
|
|
||||||
|
|
||||||
class WeiboStatusExtractor(WeiboExtractor):
|
class WeiboStatusExtractor(WeiboExtractor):
|
||||||
"""Extractor for a weibo status"""
|
"""Extractor for a weibo status"""
|
||||||
|
|||||||
@@ -26,6 +26,7 @@ __tests__ = (
|
|||||||
"https://weibo.com/u/1758989602?tabtype=feed",
|
"https://weibo.com/u/1758989602?tabtype=feed",
|
||||||
"https://weibo.com/u/1758989602?tabtype=video",
|
"https://weibo.com/u/1758989602?tabtype=video",
|
||||||
"https://weibo.com/u/1758989602?tabtype=newVideo",
|
"https://weibo.com/u/1758989602?tabtype=newVideo",
|
||||||
|
"https://weibo.com/u/1758989602?tabtype=article",
|
||||||
"https://weibo.com/u/1758989602?tabtype=album",
|
"https://weibo.com/u/1758989602?tabtype=album",
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
@@ -160,6 +161,34 @@ __tests__ = (
|
|||||||
"#count" : 3,
|
"#count" : 3,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://weibo.com/u/2142058927?tabtype=album-头像",
|
||||||
|
"#comment" : "subalbum",
|
||||||
|
"#class" : weibo.WeiboAlbumExtractor,
|
||||||
|
"#range" : "1-3",
|
||||||
|
"#results" : (
|
||||||
|
"https://weibo.com/ajax/common/download?pid=002kXRnxly8i5b4anvvxbj60u00u078w02",
|
||||||
|
"https://weibo.com/ajax/common/download?pid=002kXRnxly8i2b7u68bfhj60u00u0dl002",
|
||||||
|
"https://weibo.com/ajax/common/download?pid=002kXRnxly8i2b6rmr1trj60rs0rstdn02",
|
||||||
|
),
|
||||||
|
|
||||||
|
"extension": "jpg",
|
||||||
|
"pid" : str,
|
||||||
|
"type" : "pic",
|
||||||
|
"subalbum" : {
|
||||||
|
"containerid": "2318262142058927_-_pc_profile_album_-_photo_-_avatar_-_35046512_-_%E5%A4%B4%E5%83%8F",
|
||||||
|
"pic" : "https://wx1.sinaimg.cn/webp720/002kXRnxly8i5b4anvvxbj60u00u078w02.jpg",
|
||||||
|
"pic_title" : "头像",
|
||||||
|
},
|
||||||
|
"user" : {
|
||||||
|
"id" : 2142058927,
|
||||||
|
"idstr" : "2142058927",
|
||||||
|
"location" : "上海 黄浦区",
|
||||||
|
"profile_url": "/u/2142058927",
|
||||||
|
"screen_name": "吴磊LEO",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://m.weibo.cn/detail/4323047042991618",
|
"#url" : "https://m.weibo.cn/detail/4323047042991618",
|
||||||
"#category": ("", "weibo", "status"),
|
"#category": ("", "weibo", "status"),
|
||||||
|
|||||||
Reference in New Issue
Block a user