Mike Fährmann
2026-01-07 18:09:57 +01:00
parent d491ed8ed8
commit b1a708beb9
4 changed files with 139 additions and 16 deletions

View File

@@ -6986,6 +6986,16 @@ Description
Download video files.
extractor.weibo.album.subalbums
-------------------------------
Type
``bool``
Default
``false``
Description
Extract subalbum media.
extractor.wikimedia.format
--------------------------
Type

View File

@@ -972,7 +972,11 @@
"movies" : false,
"retweets" : false,
"text" : false,
"videos" : true
"videos" : true,
"album": {
"subalbums": false
}
},
"xfolio":
{

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2019-2025 Mike Fährmann
# Copyright 2019-2026 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -29,7 +29,8 @@ class WeiboExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self._prefix, self.user = match.groups()
self._prefix = match[1]
self.user = match[2]
def _init(self):
self.livephoto = self.config("livephoto", True)
@@ -199,18 +200,23 @@ class WeiboExtractor(Extractor):
f"?id={status_id}&isGetLongText=true")
return self.request_json(url)
def _user_id(self):
if len(self.user) >= 10 and self.user.isdecimal():
return self.user[-10:]
else:
url = (f"{self.root}/ajax/profile/info?"
f"{'screen_name' if self._prefix == 'n' else 'custom'}="
f"{self.user}")
return self.request_json(url)["data"]["user"]["idstr"]
def _user(self, user):
url = (f"{self.root}/ajax/profile/info?"
f"{'screen_name' if self._prefix == 'n' else 'custom'}={user}")
return self.request_json(url, interval=False)["data"]["user"]
def _pagination(self, endpoint, params):
def _user_id(self):
user = self.user
if len(user) >= 10 and user.isdecimal():
return user[-10:]
else:
return self._user(user)["idstr"]
def _pagination(self, endpoint, params,
since_key="sinceid", subalbums=None):
url = f"{self.root}/ajax{endpoint}"
headers = {
"Accept": "application/json, text/plain, */*",
"X-Requested-With": "XMLHttpRequest",
"X-XSRF-TOKEN": None,
"Referer": f"{self.root}/u/{params['uid']}",
@@ -218,7 +224,6 @@ class WeiboExtractor(Extractor):
while True:
response = self.request(url, params=params, headers=headers)
headers["Accept"] = "application/json, text/plain, */*"
headers["X-XSRF-TOKEN"] = response.cookies.get("XSRF-TOKEN")
data = response.json()
@@ -234,6 +239,10 @@ class WeiboExtractor(Extractor):
except KeyError:
return
if subalbums is not None:
subalbums = None
yield data.get("album_list") or ()
yield from statuses
# videos, newvideo
@@ -244,8 +253,10 @@ class WeiboExtractor(Extractor):
continue
# album
if since_id := data.get("since_id"):
params["sinceid"] = since_id
if "since_id" in data:
params[since_key] = since_id = data["since_id"]
if not since_id:
return
if "page" in params:
params["page"] += 1
continue
@@ -383,9 +394,33 @@ class WeiboArticleExtractor(WeiboExtractor):
class WeiboAlbumExtractor(WeiboExtractor):
"""Extractor for weibo 'album' listings"""
subcategory = "album"
pattern = USER_PATTERN + r"\?tabtype=album"
pattern = USER_PATTERN + r"\?tabtype=album(?:[:_-]([^&#]+))?"
example = "https://weibo.com/USER?tabtype=album"
def items(self):
subalbum = self.groups[2]
if not subalbum and not self.config("subalbums", False):
return WeiboExtractor.items(self)
self.directory_fmt = ("{category}", "{user[screen_name]}",
"Album", "{subalbum[pic_title]|''}")
self.filename_fmt = "{filename}.{extension}"
self.archive_fmt = "{subalbum[pic_title]}_{pid}"
return self.items_subalbum(subalbum)
def items_subalbum(self, subalbum):
user = self.kwdict["user"] = self._user(self.user)
base = self.root + "/ajax/common/download?pid="
for data, files in self.albums(user["idstr"], subalbum):
self.kwdict["subalbum"] = data
yield Message.Directory, "", {}
for file in files:
file["filename"] = file["pid"]
file["extension"] = "jpg"
yield Message.Url, base + file["pid"], file
def statuses(self):
endpoint = "/profile/getImageWall"
params = {"uid": self._user_id()}
@@ -401,6 +436,51 @@ class WeiboAlbumExtractor(WeiboExtractor):
else:
yield status
def albums(self, uid, subalbum):
endpoint = "/profile/getImageWall"
params = {
"uid" : uid,
"sinceid" : "0",
"has_album": "true",
}
album = self._pagination(endpoint, params, subalbums=True)
subalbums = next(album, ())
if not subalbum or subalbum == "0":
return (({}, album),)
if subalbum == "all":
results = [
(sub, self._pagination_subalbum(uid, sub))
for sub in subalbums
]
results.append(({}, album))
return results
if subalbum == "only":
return [
(sub, self._pagination_subalbum(uid, sub))
for sub in subalbums
]
if subalbum.isdecimal():
try:
sub = subalbums[int(subalbum)-1]
except Exception:
raise exception.NotFoundError("subalbum")
else:
subalbum = text.unquote(subalbum)
for sub in subalbums:
if sub["pic_title"] == subalbum:
break
else:
raise exception.NotFoundError("subalbum")
return ((sub, self._pagination_subalbum(uid, sub)),)
def _pagination_subalbum(self, uid, sub):
params = {"uid": uid, "containerid": text.unquote(sub["containerid"])}
return self._pagination("/profile/getAlbumDetail", params, "since_id")
class WeiboStatusExtractor(WeiboExtractor):
"""Extractor for a weibo status"""

View File

@@ -26,6 +26,7 @@ __tests__ = (
"https://weibo.com/u/1758989602?tabtype=feed",
"https://weibo.com/u/1758989602?tabtype=video",
"https://weibo.com/u/1758989602?tabtype=newVideo",
"https://weibo.com/u/1758989602?tabtype=article",
"https://weibo.com/u/1758989602?tabtype=album",
),
},
@@ -160,6 +161,34 @@ __tests__ = (
"#count" : 3,
},
{
"#url" : "https://weibo.com/u/2142058927?tabtype=album-头像",
"#comment" : "subalbum",
"#class" : weibo.WeiboAlbumExtractor,
"#range" : "1-3",
"#results" : (
"https://weibo.com/ajax/common/download?pid=002kXRnxly8i5b4anvvxbj60u00u078w02",
"https://weibo.com/ajax/common/download?pid=002kXRnxly8i2b7u68bfhj60u00u0dl002",
"https://weibo.com/ajax/common/download?pid=002kXRnxly8i2b6rmr1trj60rs0rstdn02",
),
"extension": "jpg",
"pid" : str,
"type" : "pic",
"subalbum" : {
"containerid": "2318262142058927_-_pc_profile_album_-_photo_-_avatar_-_35046512_-_%E5%A4%B4%E5%83%8F",
"pic" : "https://wx1.sinaimg.cn/webp720/002kXRnxly8i5b4anvvxbj60u00u078w02.jpg",
"pic_title" : "头像",
},
"user" : {
"id" : 2142058927,
"idstr" : "2142058927",
"location" : "上海 黄浦区",
"profile_url": "/u/2142058927",
"screen_name": "吴磊LEO",
},
},
{
"#url" : "https://m.weibo.cn/detail/4323047042991618",
"#category": ("", "weibo", "status"),