[skeb] implement 'include' option (#6558 #7267)

- split 'user' extractor into 'works' & 'sent-requests' extractors
- use BASE_PATTERN & USER_PATTERN
- use self.groups
This commit is contained in:
Mike Fährmann
2025-08-12 18:43:11 +02:00
parent a143e12c87
commit ab54f71511
6 changed files with 191 additions and 42 deletions

View File

@@ -4746,6 +4746,28 @@ Description
Download article images.
extractor.skeb.include
----------------------
Type
* ``string``
* ``list`` of ``strings``
Default
``"works"``
Example
* ``"works,sent-requests"``
* ``["works", "sent-requests"]``
Description
A (comma-separated) list of subcategories to include
when processing a user profile.
Possible values are
* ``"works"``
* ``"sent-requests"``
It is possible to use ``"all"`` instead of listing all values separately.
extractor.skeb.sent-requests
----------------------------
Type

View File

@@ -675,6 +675,7 @@
"skeb":
{
"article" : false,
"include" : ["works"],
"sent-requests": false,
"thumbnails" : false,

View File

@@ -952,7 +952,7 @@ Consider all listed sites to potentially be NSFW.
<tr>
<td>Skeb</td>
<td>https://skeb.jp/</td>
<td>Followed Creators, Followed Users, Posts, Search Results, User Profiles</td>
<td>Followed Creators, Followed Users, Posts, Search Results, Sent Requests, User Profiles, Works</td>
<td></td>
</tr>
<tr>

View File

@@ -6,9 +6,11 @@
"""Extractors for https://skeb.jp/"""
from .common import Extractor, Message
from .common import Extractor, Message, Dispatch
from .. import text
import itertools
BASE_PATTERN = r"(?:https?://)?skeb\.jp"
USER_PATTERN = BASE_PATTERN + r"/@([^/?#]+)"
class SkebExtractor(Extractor):
@@ -19,10 +21,6 @@ class SkebExtractor(Extractor):
archive_fmt = "{post_num}_{_file_id}_{content_category}"
root = "https://skeb.jp"
def __init__(self, match):
Extractor.__init__(self, match)
self.user_name = match[1]
def _init(self):
self.thumbnails = self.config("thumbnails", False)
self.article = self.config("article", False)
@@ -65,7 +63,7 @@ class SkebExtractor(Extractor):
url = file["file_url"]
yield Message.Url, url, text.nameext_from_url(url, post)
def _items_users(self):
def items_users(self):
base = self.root + "/@"
for user in self.users():
user["_extractor"] = SkebUserExtractor
@@ -196,44 +194,63 @@ class SkebExtractor(Extractor):
class SkebPostExtractor(SkebExtractor):
"""Extractor for a single skeb post"""
subcategory = "post"
pattern = r"(?:https?://)?skeb\.jp/@([^/?#]+)/works/(\d+)"
pattern = USER_PATTERN + r"/works/(\d+)"
example = "https://skeb.jp/@USER/works/123"
def __init__(self, match):
SkebExtractor.__init__(self, match)
self.post_num = match[2]
def posts(self):
return (self.groups,)
class SkebWorksExtractor(SkebExtractor):
"""Extractor for a skeb user's works"""
subcategory = "works"
pattern = USER_PATTERN + r"/works"
example = "https://skeb.jp/@USER/works"
def posts(self):
return ((self.user_name, self.post_num),)
url = f"{self.root}/api/users/{self.groups[0]}/works"
params = {"role": "creator", "sort": "date"}
return self._pagination(url, params)
class SkebUserExtractor(SkebExtractor):
"""Extractor for all posts from a skeb user"""
subcategory = "user"
pattern = r"(?:https?://)?skeb\.jp/@([^/?#]+)/?$"
class SkebSentRequestsExtractor(SkebExtractor):
"""Extractor for a skeb user's sent requests"""
subcategory = "sent-requests"
pattern = USER_PATTERN + r"/sent[ _-]?requests"
example = "https://skeb.jp/@USER/sent-requests"
def posts(self):
url = f"{self.root}/api/users/{self.groups[0]}/works"
params = {"role": "client", "sort": "date"}
return self._pagination(url, params)
class SkebUserExtractor(Dispatch, SkebExtractor):
"""Extractor for a skeb user profile"""
pattern = USER_PATTERN + r"/?$"
example = "https://skeb.jp/@USER"
def posts(self):
url = f"{self.root}/api/users/{self.user_name}/works"
params = {"role": "creator", "sort": "date"}
posts = self._pagination(url, params)
def items(self):
if self.config("sent-requests", False):
params = {"role": "client", "sort": "date"}
posts = itertools.chain(posts, self._pagination(url, params))
default = ("works", "sent-requests")
else:
default = ("works",)
return posts
base = f"{self.root}/@{self.groups[0]}/"
return self._dispatch_extractors((
(SkebWorksExtractor , base + "works"),
(SkebSentRequestsExtractor, base + "sent-requests"),
), default)
class SkebSearchExtractor(SkebExtractor):
"""Extractor for skeb search results"""
subcategory = "search"
pattern = r"(?:https?://)?skeb\.jp/search\?q=([^&#]+)"
pattern = BASE_PATTERN + r"/search\?q=([^&#]+)"
example = "https://skeb.jp/search?q=QUERY"
def metadata(self):
return {"search_tags": text.unquote(self.user_name)}
return {"search_tags": text.unquote(self.groups[0])}
def posts(self):
url = "https://hb1jt3kre9-2.algolianet.com/1/indexes/*/queries"
@@ -258,7 +275,7 @@ class SkebSearchExtractor(SkebExtractor):
request = {
"indexName": "Request",
"query": text.unquote(self.user_name),
"query": text.unquote(self.groups[0]),
"params": pams + str(page),
}
data = {"requests": (request,)}
@@ -281,13 +298,13 @@ class SkebSearchExtractor(SkebExtractor):
class SkebFollowingExtractor(SkebExtractor):
"""Extractor for all creators followed by a skeb user"""
subcategory = "following"
pattern = r"(?:https?://)?skeb\.jp/@([^/?#]+)/following_creators"
pattern = USER_PATTERN + r"/following_creators"
example = "https://skeb.jp/@USER/following_creators"
items = SkebExtractor._items_users
items = SkebExtractor.items_users
def users(self):
endpoint = f"/users/{self.user_name}/following_creators"
endpoint = f"/users/{self.groups[0]}/following_creators"
params = {"sort": "date"}
return self._pagination_users(endpoint, params)
@@ -295,12 +312,11 @@ class SkebFollowingExtractor(SkebExtractor):
class SkebFollowingUsersExtractor(SkebExtractor):
"""Extractor for your followed users"""
subcategory = "following-users"
pattern = r"(?:https?://)?skeb\.jp/following_users()"
pattern = BASE_PATTERN + r"/following_users"
example = "https://skeb.jp/following_users"
items = SkebExtractor._items_users
items = SkebExtractor.items_users
def users(self):
endpoint = "/following_users"
params = {}
return self._pagination_users(endpoint, params)
return self._pagination_users(endpoint, {})

View File

@@ -380,6 +380,7 @@ SUBCATEGORY_MAP = {
"skeb": {
"following" : "Followed Creators",
"following-users": "Followed Users",
"sent-requests" : "Sent Requests",
},
"smugmug": {
"path": "Images from Users and Folders",

View File

@@ -10,7 +10,6 @@ from gallery_dl.extractor import skeb
__tests__ = (
{
"#url" : "https://skeb.jp/@kanade_cocotte/works/38",
"#category": ("", "skeb", "post"),
"#class" : skeb.SkebPostExtractor,
"#count" : 2,
@@ -20,7 +19,7 @@ __tests__ = (
"num" : range(1, 2),
"client" : {
"avatar_url" : r"re:https://pbs.twimg.com/profile_images/\d+/\w+\.jpg",
"header_url" : r"re:https://pbs.twimg.com/profile_banners/1375007870291300358/\d+/1500x500",
"header_url" : None,
"id" : 1196514,
"name" : str,
"screen_name": "minato_ragi",
@@ -58,8 +57,35 @@ __tests__ = (
{
"#url" : "https://skeb.jp/@kanade_cocotte",
"#category": ("", "skeb", "user"),
"#class" : skeb.SkebUserExtractor,
"#results" : (
"https://skeb.jp/@kanade_cocotte/works",
),
},
{
"#url" : "https://skeb.jp/@kanade_cocotte",
"#class" : skeb.SkebUserExtractor,
"#options" : {"include": "all"},
"#results" : (
"https://skeb.jp/@kanade_cocotte/works",
"https://skeb.jp/@kanade_cocotte/sent-requests",
),
},
{
"#url" : "https://skeb.jp/@kanade_cocotte",
"#class" : skeb.SkebUserExtractor,
"#options" : {"sent-requests": True},
"#results" : (
"https://skeb.jp/@kanade_cocotte/works",
"https://skeb.jp/@kanade_cocotte/sent-requests",
),
},
{
"#url" : "https://skeb.jp/@kanade_cocotte/works",
"#class" : skeb.SkebWorksExtractor,
"#pattern" : r"https://si\.imgix\.net/\w+/uploads/origins/[\w-]+",
"#range" : "1-5",
@@ -67,9 +93,94 @@ __tests__ = (
"num" : int,
},
{
"#url" : "https://skeb.jp/@kanade_cocotte/works",
"#class" : skeb.SkebWorksExtractor,
"#pattern" : r"https://si\.imgix\.net/\w+/uploads/origins/[\w-]+",
"#range" : "1-5",
"count": int,
"num" : int,
},
{
"#url" : "https://skeb.jp/@kanade_cocotte/sent-requests",
"#class" : skeb.SkebSentRequestsExtractor,
},
{
"#url" : "https://skeb.jp/@4ra_su4/sentrequests",
"#class" : skeb.SkebSentRequestsExtractor,
"#pattern" : (
r"https://si.imgix.net/4e44b668/uploads/origins/e42cbd8e-44af-4aaa-a11b-6a174f42202c\?bg=%23fff&auto=format&fm=webp&w=800&s=\w+",
r"https://si.imgix.net/4d30e75e/uploads/origins/6d3bb612-3f45-4d8e-9d31-49dceb3dab11\?bg=%23fff&auto=format&fm=webp&w=800&s=\w+",
),
"anonymous" : False,
"body" : """\
リクエスト失礼致します。
うちの子の福良ことりちゃんhttps://twitter.com/sousaku_suru/status/1404393369564946432https://twitter.com/sousaku_suru/status/1523336440062820354がナース衣装のコスプレをしている作品をご依頼したいですコス衣装にカチューシャについているクローバーが反映されていると嬉しいです。ご検討よろしくお願い致します
https://twitter.com/sousaku_suru/status/1404393369564946432\
""",
"content_category": "preview",
"count" : 2,
"extension" : "",
"file_id" : {950467, 950468},
"file_url" : r"re:https://si.imgix.net/.+",
"filename" : str,
"genre" : "art",
"nsfw" : False,
"num" : range(1, 2),
"post_id" : 802511,
"post_num" : "2",
"post_url" : "https://skeb.jp/@okonimi_hyu/works/2",
"source_body" : None,
"source_thanks" : None,
"thanks" : None,
"translated_body" : False,
"translated_thanks": None,
"tags" : [
"よろしく",
"お願い",
"作品",
"嬉しい",
"うちの子",
"コスプレ",
"カチューシャ",
"ナース",
"クローバー",
"ことりちゃん",
],
"client" : {
"avatar_url" : "https://pbs.twimg.com/profile_images/1916152385107632128/pygB7-jf.jpg",
"header_url" : "https://pbs.twimg.com/profile_banners/1134460426006159360/1717082866/1500x500",
"id" : 2017625,
"name" : "しろえ",
"screen_name": "4ra_su4",
},
"creator" : {
"avatar_url" : "https://pbs.twimg.com/profile_images/1943287378149543937/EaUIMtnM.jpg",
"header_url" : "https://pbs.twimg.com/profile_banners/2931377426/1523678757/1500x500",
"id" : 341737,
"name" : "Hyu@はゆ〜",
"screen_name": "okonimi_hyu",
},
"original" : {
"byte_size" : {18463023, 793631},
"duration" : None,
"extension" : {"psd", "png"},
"frame_rate": None,
"height" : 1754,
"is_movie" : False,
"software" : None,
"transcoder": "image",
"width" : 1275,
},
},
{
"#url" : "https://skeb.jp/search?q=bunny%20tree&t=works",
"#category": ("", "skeb", "search"),
"#class" : skeb.SkebSearchExtractor,
"#count" : ">= 18",
@@ -78,13 +189,11 @@ __tests__ = (
{
"#url" : "https://skeb.jp/@user/following_creators",
"#category": ("", "skeb", "following"),
"#class" : skeb.SkebFollowingExtractor,
},
{
"#url" : "https://skeb.jp/following_users",
"#category": ("", "skeb", "following-users"),
"#class" : skeb.SkebFollowingUsersExtractor,
"#pattern" : skeb.SkebUserExtractor.pattern,
"#auth" : True,