From b1a708beb96f4695b3008920875a863d70a8471a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Wed, 7 Jan 2026 18:09:57 +0100 Subject: [PATCH] [weibo:album] implement 'subalbum' support (#8792) https://weibo.com/u/USER?tabtype=album-1 https://weibo.com/u/USER?tabtype=album-NAME https://weibo.com/u/USER?tabtype=album-all https://weibo.com/u/USER?tabtype=album-only --- docs/configuration.rst | 10 ++++ docs/gallery-dl.conf | 6 +- gallery_dl/extractor/weibo.py | 110 +++++++++++++++++++++++++++++----- test/results/weibo.py | 29 +++++++++ 4 files changed, 139 insertions(+), 16 deletions(-) diff --git a/docs/configuration.rst b/docs/configuration.rst index 2bea9101..c93e933c 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -6986,6 +6986,16 @@ Description Download video files. +extractor.weibo.album.subalbums +------------------------------- +Type + ``bool`` +Default + ``false`` +Description + Extract subalbum media. + + extractor.wikimedia.format -------------------------- Type diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 202e2985..16cce081 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -972,7 +972,11 @@ "movies" : false, "retweets" : false, "text" : false, - "videos" : true + "videos" : true, + + "album": { + "subalbums": false + } }, "xfolio": { diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py index ef50b9d9..7c544212 100644 --- a/gallery_dl/extractor/weibo.py +++ b/gallery_dl/extractor/weibo.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2019-2025 Mike Fährmann +# Copyright 2019-2026 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -29,7 +29,8 @@ class WeiboExtractor(Extractor): def __init__(self, match): Extractor.__init__(self, match) - self._prefix, self.user = match.groups() + self._prefix = match[1] + self.user = match[2] def _init(self): self.livephoto = self.config("livephoto", True) @@ -199,18 +200,23 @@ class WeiboExtractor(Extractor): f"?id={status_id}&isGetLongText=true") return self.request_json(url) - def _user_id(self): - if len(self.user) >= 10 and self.user.isdecimal(): - return self.user[-10:] - else: - url = (f"{self.root}/ajax/profile/info?" - f"{'screen_name' if self._prefix == 'n' else 'custom'}=" - f"{self.user}") - return self.request_json(url)["data"]["user"]["idstr"] + def _user(self, user): + url = (f"{self.root}/ajax/profile/info?" + f"{'screen_name' if self._prefix == 'n' else 'custom'}={user}") + return self.request_json(url, interval=False)["data"]["user"] - def _pagination(self, endpoint, params): + def _user_id(self): + user = self.user + if len(user) >= 10 and user.isdecimal(): + return user[-10:] + else: + return self._user(user)["idstr"] + + def _pagination(self, endpoint, params, + since_key="sinceid", subalbums=None): url = f"{self.root}/ajax{endpoint}" headers = { + "Accept": "application/json, text/plain, */*", "X-Requested-With": "XMLHttpRequest", "X-XSRF-TOKEN": None, "Referer": f"{self.root}/u/{params['uid']}", @@ -218,7 +224,6 @@ class WeiboExtractor(Extractor): while True: response = self.request(url, params=params, headers=headers) - headers["Accept"] = "application/json, text/plain, */*" headers["X-XSRF-TOKEN"] = response.cookies.get("XSRF-TOKEN") data = response.json() @@ -234,6 +239,10 @@ class WeiboExtractor(Extractor): except KeyError: return + if subalbums is not None: + subalbums = None + yield data.get("album_list") or () + yield from statuses # videos, newvideo @@ -244,8 +253,10 @@ class WeiboExtractor(Extractor): continue # album - if since_id := data.get("since_id"): - params["sinceid"] = since_id + if "since_id" in data: + params[since_key] = since_id = data["since_id"] + if not since_id: + return if "page" in params: params["page"] += 1 continue @@ -383,9 +394,33 @@ class WeiboArticleExtractor(WeiboExtractor): class WeiboAlbumExtractor(WeiboExtractor): """Extractor for weibo 'album' listings""" subcategory = "album" - pattern = USER_PATTERN + r"\?tabtype=album" + pattern = USER_PATTERN + r"\?tabtype=album(?:[:_-]([^&#]+))?" example = "https://weibo.com/USER?tabtype=album" + def items(self): + subalbum = self.groups[2] + + if not subalbum and not self.config("subalbums", False): + return WeiboExtractor.items(self) + + self.directory_fmt = ("{category}", "{user[screen_name]}", + "Album", "{subalbum[pic_title]|''}") + self.filename_fmt = "{filename}.{extension}" + self.archive_fmt = "{subalbum[pic_title]}_{pid}" + return self.items_subalbum(subalbum) + + def items_subalbum(self, subalbum): + user = self.kwdict["user"] = self._user(self.user) + base = self.root + "/ajax/common/download?pid=" + + for data, files in self.albums(user["idstr"], subalbum): + self.kwdict["subalbum"] = data + yield Message.Directory, "", {} + for file in files: + file["filename"] = file["pid"] + file["extension"] = "jpg" + yield Message.Url, base + file["pid"], file + def statuses(self): endpoint = "/profile/getImageWall" params = {"uid": self._user_id()} @@ -401,6 +436,51 @@ class WeiboAlbumExtractor(WeiboExtractor): else: yield status + def albums(self, uid, subalbum): + endpoint = "/profile/getImageWall" + params = { + "uid" : uid, + "sinceid" : "0", + "has_album": "true", + } + album = self._pagination(endpoint, params, subalbums=True) + subalbums = next(album, ()) + + if not subalbum or subalbum == "0": + return (({}, album),) + + if subalbum == "all": + results = [ + (sub, self._pagination_subalbum(uid, sub)) + for sub in subalbums + ] + results.append(({}, album)) + return results + + if subalbum == "only": + return [ + (sub, self._pagination_subalbum(uid, sub)) + for sub in subalbums + ] + + if subalbum.isdecimal(): + try: + sub = subalbums[int(subalbum)-1] + except Exception: + raise exception.NotFoundError("subalbum") + else: + subalbum = text.unquote(subalbum) + for sub in subalbums: + if sub["pic_title"] == subalbum: + break + else: + raise exception.NotFoundError("subalbum") + return ((sub, self._pagination_subalbum(uid, sub)),) + + def _pagination_subalbum(self, uid, sub): + params = {"uid": uid, "containerid": text.unquote(sub["containerid"])} + return self._pagination("/profile/getAlbumDetail", params, "since_id") + class WeiboStatusExtractor(WeiboExtractor): """Extractor for a weibo status""" diff --git a/test/results/weibo.py b/test/results/weibo.py index 3b3759ab..efefd0b2 100644 --- a/test/results/weibo.py +++ b/test/results/weibo.py @@ -26,6 +26,7 @@ __tests__ = ( "https://weibo.com/u/1758989602?tabtype=feed", "https://weibo.com/u/1758989602?tabtype=video", "https://weibo.com/u/1758989602?tabtype=newVideo", + "https://weibo.com/u/1758989602?tabtype=article", "https://weibo.com/u/1758989602?tabtype=album", ), }, @@ -160,6 +161,34 @@ __tests__ = ( "#count" : 3, }, +{ + "#url" : "https://weibo.com/u/2142058927?tabtype=album-头像", + "#comment" : "subalbum", + "#class" : weibo.WeiboAlbumExtractor, + "#range" : "1-3", + "#results" : ( + "https://weibo.com/ajax/common/download?pid=002kXRnxly8i5b4anvvxbj60u00u078w02", + "https://weibo.com/ajax/common/download?pid=002kXRnxly8i2b7u68bfhj60u00u0dl002", + "https://weibo.com/ajax/common/download?pid=002kXRnxly8i2b6rmr1trj60rs0rstdn02", + ), + + "extension": "jpg", + "pid" : str, + "type" : "pic", + "subalbum" : { + "containerid": "2318262142058927_-_pc_profile_album_-_photo_-_avatar_-_35046512_-_%E5%A4%B4%E5%83%8F", + "pic" : "https://wx1.sinaimg.cn/webp720/002kXRnxly8i5b4anvvxbj60u00u078w02.jpg", + "pic_title" : "头像", + }, + "user" : { + "id" : 2142058927, + "idstr" : "2142058927", + "location" : "上海 黄浦区", + "profile_url": "/u/2142058927", + "screen_name": "吴磊LEO", + }, +}, + { "#url" : "https://m.weibo.cn/detail/4323047042991618", "#category": ("", "weibo", "status"),