[subscribestar] add 'user-tag' extractor (#8737)

This commit is contained in:
Mike Fährmann
2025-12-21 22:10:17 +01:00
parent 375ab4084b
commit 609e19273d
3 changed files with 54 additions and 17 deletions

View File

@@ -27,12 +27,12 @@ class SubscribestarExtractor(Extractor):
_warning = True
def __init__(self, match):
tld, self.item = match.groups()
if tld == "adult":
if match[1] == "adult":
self.root = "https://subscribestar.adult"
self.cookies_domain = ".subscribestar.adult"
self.subcategory += "-adult"
Extractor.__init__(self, match)
self.item = match[2]
def items(self):
self.login()
@@ -148,6 +148,21 @@ class SubscribestarExtractor(Extractor):
for cookie in response.cookies
}
def _pagination(self, url, params=None):
needle_next_page = 'data-role="infinite_scroll-next_page" href="'
page = self.request(url, params=params).text
while True:
posts = page.split('<div class="post ')[1:]
if not posts:
return
yield from posts
url = text.extr(posts[-1], needle_next_page, '"')
if not url:
return
page = self.request_json(self.root + text.unescape(url))["html"]
def _media_from_post(self, html):
media = []
@@ -218,6 +233,20 @@ class SubscribestarExtractor(Extractor):
self._warn_preview = util.noop
class SubscribestarUserTagExtractor(SubscribestarExtractor):
"""Extractor for a subscribestar user's tagged posts"""
subcategory = "user-tag"
pattern = rf"{BASE_PATTERN}/(?!posts/)([^/?#]+)\?tag=([^#]+)"
example = "https://www.subscribestar.com/USER?tag=TAG"
def posts(self):
_, user, tag = self.groups
self.kwdict["search_tags"] = tag = text.unquote(tag)
url = f"{self.root}/{user}"
params = {"tag": tag}
return self._pagination(url, params)
class SubscribestarUserExtractor(SubscribestarExtractor):
"""Extractor for media from a subscribestar user"""
subcategory = "user"
@@ -225,19 +254,7 @@ class SubscribestarUserExtractor(SubscribestarExtractor):
example = "https://www.subscribestar.com/USER"
def posts(self):
needle_next_page = 'data-role="infinite_scroll-next_page" href="'
page = self.request(f"{self.root}/{self.item}").text
while True:
posts = page.split('<div class="post ')[1:]
if not posts:
return
yield from posts
url = text.extr(posts[-1], needle_next_page, '"')
if not url:
return
page = self.request_json(self.root + text.unescape(url))["html"]
return self._pagination(f"{self.root}/{self.item}")
class SubscribestarPostExtractor(SubscribestarExtractor):