From ab54f71511a734d7defd4ed43334699598c2fae6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Tue, 12 Aug 2025 18:43:11 +0200 Subject: [PATCH] [skeb] implement 'include' option (#6558 #7267) - split 'user' extractor into 'works' & 'sent-requests' extractors - use BASE_PATTERN & USER_PATTERN - use self.groups --- docs/configuration.rst | 22 +++++++ docs/gallery-dl.conf | 1 + docs/supportedsites.md | 2 +- gallery_dl/extractor/skeb.py | 86 +++++++++++++++---------- scripts/supportedsites.py | 1 + test/results/skeb.py | 121 +++++++++++++++++++++++++++++++++-- 6 files changed, 191 insertions(+), 42 deletions(-) diff --git a/docs/configuration.rst b/docs/configuration.rst index 1f3d3428..d7234bd0 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -4746,6 +4746,28 @@ Description Download article images. +extractor.skeb.include +---------------------- +Type + * ``string`` + * ``list`` of ``strings`` +Default + ``"works"`` +Example + * ``"works,sent-requests"`` + * ``["works", "sent-requests"]`` +Description + A (comma-separated) list of subcategories to include + when processing a user profile. + + Possible values are + + * ``"works"`` + * ``"sent-requests"`` + + It is possible to use ``"all"`` instead of listing all values separately. + + extractor.skeb.sent-requests ---------------------------- Type diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 687155e4..3587eb98 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -675,6 +675,7 @@ "skeb": { "article" : false, + "include" : ["works"], "sent-requests": false, "thumbnails" : false, diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 3c4aa68e..2c3b18db 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -952,7 +952,7 @@ Consider all listed sites to potentially be NSFW. Skeb https://skeb.jp/ - Followed Creators, Followed Users, Posts, Search Results, User Profiles + Followed Creators, Followed Users, Posts, Search Results, Sent Requests, User Profiles, Works diff --git a/gallery_dl/extractor/skeb.py b/gallery_dl/extractor/skeb.py index 1caafd1f..a902532b 100644 --- a/gallery_dl/extractor/skeb.py +++ b/gallery_dl/extractor/skeb.py @@ -6,9 +6,11 @@ """Extractors for https://skeb.jp/""" -from .common import Extractor, Message +from .common import Extractor, Message, Dispatch from .. import text -import itertools + +BASE_PATTERN = r"(?:https?://)?skeb\.jp" +USER_PATTERN = BASE_PATTERN + r"/@([^/?#]+)" class SkebExtractor(Extractor): @@ -19,10 +21,6 @@ class SkebExtractor(Extractor): archive_fmt = "{post_num}_{_file_id}_{content_category}" root = "https://skeb.jp" - def __init__(self, match): - Extractor.__init__(self, match) - self.user_name = match[1] - def _init(self): self.thumbnails = self.config("thumbnails", False) self.article = self.config("article", False) @@ -65,7 +63,7 @@ class SkebExtractor(Extractor): url = file["file_url"] yield Message.Url, url, text.nameext_from_url(url, post) - def _items_users(self): + def items_users(self): base = self.root + "/@" for user in self.users(): user["_extractor"] = SkebUserExtractor @@ -196,44 +194,63 @@ class SkebExtractor(Extractor): class SkebPostExtractor(SkebExtractor): """Extractor for a single skeb post""" subcategory = "post" - pattern = r"(?:https?://)?skeb\.jp/@([^/?#]+)/works/(\d+)" + pattern = USER_PATTERN + r"/works/(\d+)" example = "https://skeb.jp/@USER/works/123" - def __init__(self, match): - SkebExtractor.__init__(self, match) - self.post_num = match[2] + def posts(self): + return (self.groups,) + + +class SkebWorksExtractor(SkebExtractor): + """Extractor for a skeb user's works""" + subcategory = "works" + pattern = USER_PATTERN + r"/works" + example = "https://skeb.jp/@USER/works" def posts(self): - return ((self.user_name, self.post_num),) + url = f"{self.root}/api/users/{self.groups[0]}/works" + params = {"role": "creator", "sort": "date"} + return self._pagination(url, params) -class SkebUserExtractor(SkebExtractor): - """Extractor for all posts from a skeb user""" - subcategory = "user" - pattern = r"(?:https?://)?skeb\.jp/@([^/?#]+)/?$" +class SkebSentRequestsExtractor(SkebExtractor): + """Extractor for a skeb user's sent requests""" + subcategory = "sent-requests" + pattern = USER_PATTERN + r"/sent[ _-]?requests" + example = "https://skeb.jp/@USER/sent-requests" + + def posts(self): + url = f"{self.root}/api/users/{self.groups[0]}/works" + params = {"role": "client", "sort": "date"} + return self._pagination(url, params) + + +class SkebUserExtractor(Dispatch, SkebExtractor): + """Extractor for a skeb user profile""" + pattern = USER_PATTERN + r"/?$" example = "https://skeb.jp/@USER" - def posts(self): - url = f"{self.root}/api/users/{self.user_name}/works" - - params = {"role": "creator", "sort": "date"} - posts = self._pagination(url, params) - + def items(self): if self.config("sent-requests", False): - params = {"role": "client", "sort": "date"} - posts = itertools.chain(posts, self._pagination(url, params)) + default = ("works", "sent-requests") + else: + default = ("works",) - return posts + base = f"{self.root}/@{self.groups[0]}/" + return self._dispatch_extractors(( + (SkebWorksExtractor , base + "works"), + (SkebSentRequestsExtractor, base + "sent-requests"), + ), default) class SkebSearchExtractor(SkebExtractor): """Extractor for skeb search results""" subcategory = "search" - pattern = r"(?:https?://)?skeb\.jp/search\?q=([^&#]+)" + pattern = BASE_PATTERN + r"/search\?q=([^&#]+)" example = "https://skeb.jp/search?q=QUERY" def metadata(self): - return {"search_tags": text.unquote(self.user_name)} + return {"search_tags": text.unquote(self.groups[0])} def posts(self): url = "https://hb1jt3kre9-2.algolianet.com/1/indexes/*/queries" @@ -258,7 +275,7 @@ class SkebSearchExtractor(SkebExtractor): request = { "indexName": "Request", - "query": text.unquote(self.user_name), + "query": text.unquote(self.groups[0]), "params": pams + str(page), } data = {"requests": (request,)} @@ -281,13 +298,13 @@ class SkebSearchExtractor(SkebExtractor): class SkebFollowingExtractor(SkebExtractor): """Extractor for all creators followed by a skeb user""" subcategory = "following" - pattern = r"(?:https?://)?skeb\.jp/@([^/?#]+)/following_creators" + pattern = USER_PATTERN + r"/following_creators" example = "https://skeb.jp/@USER/following_creators" - items = SkebExtractor._items_users + items = SkebExtractor.items_users def users(self): - endpoint = f"/users/{self.user_name}/following_creators" + endpoint = f"/users/{self.groups[0]}/following_creators" params = {"sort": "date"} return self._pagination_users(endpoint, params) @@ -295,12 +312,11 @@ class SkebFollowingExtractor(SkebExtractor): class SkebFollowingUsersExtractor(SkebExtractor): """Extractor for your followed users""" subcategory = "following-users" - pattern = r"(?:https?://)?skeb\.jp/following_users()" + pattern = BASE_PATTERN + r"/following_users" example = "https://skeb.jp/following_users" - items = SkebExtractor._items_users + items = SkebExtractor.items_users def users(self): endpoint = "/following_users" - params = {} - return self._pagination_users(endpoint, params) + return self._pagination_users(endpoint, {}) diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index a6010304..c904a55b 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -380,6 +380,7 @@ SUBCATEGORY_MAP = { "skeb": { "following" : "Followed Creators", "following-users": "Followed Users", + "sent-requests" : "Sent Requests", }, "smugmug": { "path": "Images from Users and Folders", diff --git a/test/results/skeb.py b/test/results/skeb.py index 4aa8691d..6087939e 100644 --- a/test/results/skeb.py +++ b/test/results/skeb.py @@ -10,7 +10,6 @@ from gallery_dl.extractor import skeb __tests__ = ( { "#url" : "https://skeb.jp/@kanade_cocotte/works/38", - "#category": ("", "skeb", "post"), "#class" : skeb.SkebPostExtractor, "#count" : 2, @@ -20,7 +19,7 @@ __tests__ = ( "num" : range(1, 2), "client" : { "avatar_url" : r"re:https://pbs.twimg.com/profile_images/\d+/\w+\.jpg", - "header_url" : r"re:https://pbs.twimg.com/profile_banners/1375007870291300358/\d+/1500x500", + "header_url" : None, "id" : 1196514, "name" : str, "screen_name": "minato_ragi", @@ -58,8 +57,35 @@ __tests__ = ( { "#url" : "https://skeb.jp/@kanade_cocotte", - "#category": ("", "skeb", "user"), "#class" : skeb.SkebUserExtractor, + "#results" : ( + "https://skeb.jp/@kanade_cocotte/works", + ), +}, + +{ + "#url" : "https://skeb.jp/@kanade_cocotte", + "#class" : skeb.SkebUserExtractor, + "#options" : {"include": "all"}, + "#results" : ( + "https://skeb.jp/@kanade_cocotte/works", + "https://skeb.jp/@kanade_cocotte/sent-requests", + ), +}, + +{ + "#url" : "https://skeb.jp/@kanade_cocotte", + "#class" : skeb.SkebUserExtractor, + "#options" : {"sent-requests": True}, + "#results" : ( + "https://skeb.jp/@kanade_cocotte/works", + "https://skeb.jp/@kanade_cocotte/sent-requests", + ), +}, + +{ + "#url" : "https://skeb.jp/@kanade_cocotte/works", + "#class" : skeb.SkebWorksExtractor, "#pattern" : r"https://si\.imgix\.net/\w+/uploads/origins/[\w-]+", "#range" : "1-5", @@ -67,9 +93,94 @@ __tests__ = ( "num" : int, }, +{ + "#url" : "https://skeb.jp/@kanade_cocotte/works", + "#class" : skeb.SkebWorksExtractor, + "#pattern" : r"https://si\.imgix\.net/\w+/uploads/origins/[\w-]+", + "#range" : "1-5", + + "count": int, + "num" : int, +}, + +{ + "#url" : "https://skeb.jp/@kanade_cocotte/sent-requests", + "#class" : skeb.SkebSentRequestsExtractor, +}, + +{ + "#url" : "https://skeb.jp/@4ra_su4/sentrequests", + "#class" : skeb.SkebSentRequestsExtractor, + "#pattern" : ( + r"https://si.imgix.net/4e44b668/uploads/origins/e42cbd8e-44af-4aaa-a11b-6a174f42202c\?bg=%23fff&auto=format&fm=webp&w=800&s=\w+", + r"https://si.imgix.net/4d30e75e/uploads/origins/6d3bb612-3f45-4d8e-9d31-49dceb3dab11\?bg=%23fff&auto=format&fm=webp&w=800&s=\w+", + ), + + "anonymous" : False, + "body" : """\ +リクエスト失礼致します。 +うちの子の福良ことりちゃん(https://twitter.com/sousaku_suru/status/1404393369564946432)(https://twitter.com/sousaku_suru/status/1523336440062820354)がナース衣装のコスプレをしている作品をご依頼したいです!コス衣装にカチューシャについているクローバーが反映されていると嬉しいです。ご検討よろしくお願い致します! + +https://twitter.com/sousaku_suru/status/1404393369564946432\ +""", + "content_category": "preview", + "count" : 2, + "extension" : "", + "file_id" : {950467, 950468}, + "file_url" : r"re:https://si.imgix.net/.+", + "filename" : str, + "genre" : "art", + "nsfw" : False, + "num" : range(1, 2), + "post_id" : 802511, + "post_num" : "2", + "post_url" : "https://skeb.jp/@okonimi_hyu/works/2", + "source_body" : None, + "source_thanks" : None, + "thanks" : None, + "translated_body" : False, + "translated_thanks": None, + "tags" : [ + "よろしく", + "お願い", + "作品", + "嬉しい", + "うちの子", + "コスプレ", + "カチューシャ", + "ナース", + "クローバー", + "ことりちゃん", + ], + "client" : { + "avatar_url" : "https://pbs.twimg.com/profile_images/1916152385107632128/pygB7-jf.jpg", + "header_url" : "https://pbs.twimg.com/profile_banners/1134460426006159360/1717082866/1500x500", + "id" : 2017625, + "name" : "しろえ", + "screen_name": "4ra_su4", + }, + "creator" : { + "avatar_url" : "https://pbs.twimg.com/profile_images/1943287378149543937/EaUIMtnM.jpg", + "header_url" : "https://pbs.twimg.com/profile_banners/2931377426/1523678757/1500x500", + "id" : 341737, + "name" : "Hyu@はゆ〜", + "screen_name": "okonimi_hyu", + }, + "original" : { + "byte_size" : {18463023, 793631}, + "duration" : None, + "extension" : {"psd", "png"}, + "frame_rate": None, + "height" : 1754, + "is_movie" : False, + "software" : None, + "transcoder": "image", + "width" : 1275, + }, +}, + { "#url" : "https://skeb.jp/search?q=bunny%20tree&t=works", - "#category": ("", "skeb", "search"), "#class" : skeb.SkebSearchExtractor, "#count" : ">= 18", @@ -78,13 +189,11 @@ __tests__ = ( { "#url" : "https://skeb.jp/@user/following_creators", - "#category": ("", "skeb", "following"), "#class" : skeb.SkebFollowingExtractor, }, { "#url" : "https://skeb.jp/following_users", - "#category": ("", "skeb", "following-users"), "#class" : skeb.SkebFollowingUsersExtractor, "#pattern" : skeb.SkebUserExtractor.pattern, "#auth" : True,