350 lines
10 KiB
Python
350 lines
10 KiB
Python
# -*- coding: utf-8 -*-
|
|
|
|
# Copyright 2024-2025 Mike Fährmann
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License version 2 as
|
|
# published by the Free Software Foundation.
|
|
|
|
"""Extractors for https://scrolller.com/"""
|
|
|
|
from .common import Extractor, Message
|
|
from .. import text, util, exception
|
|
from ..cache import cache
|
|
|
|
BASE_PATTERN = r"(?:https?://)?(?:www\.)?scrolller\.com"
|
|
|
|
|
|
class ScrolllerExtractor(Extractor):
|
|
"""Base class for scrolller extractors"""
|
|
category = "scrolller"
|
|
root = "https://scrolller.com"
|
|
directory_fmt = ("{category}", "{subredditTitle}")
|
|
filename_fmt = "{id}{num:?_//>03}{title:? //[:230]}.{extension}"
|
|
archive_fmt = "{id}_{num}"
|
|
request_interval = (0.5, 1.5)
|
|
|
|
def _init(self):
|
|
self.auth_token = None
|
|
|
|
def items(self):
|
|
self.login()
|
|
|
|
for post in self.posts():
|
|
files = self._extract_files(post)
|
|
post["count"] = len(files)
|
|
|
|
yield Message.Directory, "", post
|
|
for file in files:
|
|
url = file["url"]
|
|
post.update(file)
|
|
yield Message.Url, url, text.nameext_from_url(url, post)
|
|
|
|
def posts(self):
|
|
return ()
|
|
|
|
def _extract_files(self, post):
|
|
album = post.pop("albumContent", None)
|
|
if not album:
|
|
sources = post.get("mediaSources")
|
|
if not sources:
|
|
self.log.warning("%s: No media files", post.get("id"))
|
|
return ()
|
|
src = max(sources, key=self._sort_key)
|
|
src["num"] = 0
|
|
return (src,)
|
|
|
|
files = []
|
|
for num, media in enumerate(album, 1):
|
|
sources = media.get("mediaSources")
|
|
if not sources:
|
|
self.log.warning("%s/%s: Missing media file",
|
|
post.get("id"), num)
|
|
continue
|
|
src = max(sources, key=self._sort_key)
|
|
src["num"] = num
|
|
files.append(src)
|
|
return files
|
|
|
|
def login(self):
|
|
username, password = self._get_auth_info()
|
|
if username:
|
|
self.auth_token = self._login_impl(username, password)
|
|
|
|
@cache(maxage=28*86400, keyarg=1)
|
|
def _login_impl(self, username, password):
|
|
self.log.info("Logging in as %s", username)
|
|
|
|
variables = {
|
|
"username": username,
|
|
"password": password,
|
|
}
|
|
|
|
try:
|
|
data = self._request_graphql("LoginQuery", variables, False)
|
|
except exception.HttpError as exc:
|
|
if exc.status == 403:
|
|
raise exception.AuthenticationError()
|
|
raise
|
|
|
|
return data["login"]["token"]
|
|
|
|
def _request_graphql(self, opname, variables, admin=True):
|
|
headers = {
|
|
"Content-Type" : None,
|
|
"Origin" : self.root,
|
|
"Sec-Fetch-Dest": "empty",
|
|
"Sec-Fetch-Mode": "cors",
|
|
"Sec-Fetch-Site": "same-site",
|
|
}
|
|
data = {
|
|
"query" : QUERIES[opname],
|
|
"variables" : variables,
|
|
"authorization": self.auth_token,
|
|
}
|
|
|
|
if admin:
|
|
url = "https://api.scrolller.com/admin"
|
|
headers["Content-Type"] = "application/json"
|
|
else:
|
|
url = "https://api.scrolller.com/api/v2/graphql"
|
|
headers["Content-Type"] = "text/plain;charset=UTF-8"
|
|
|
|
return self.request_json(
|
|
url, method="POST", headers=headers, data=util.json_dumps(data),
|
|
)["data"]
|
|
|
|
def _pagination(self, opname, variables, data=None):
|
|
if data is None or not data.get("items"):
|
|
data = self._request_graphql(opname, variables)
|
|
|
|
while True:
|
|
while "items" not in data:
|
|
data = data.popitem()[1]
|
|
yield from data["items"]
|
|
|
|
if not data["iterator"]:
|
|
return
|
|
variables["iterator"] = data["iterator"]
|
|
|
|
data = self._request_graphql(opname, variables)
|
|
|
|
def _sort_key(self, src):
|
|
return src["width"], not src["isOptimized"]
|
|
|
|
|
|
class ScrolllerSubredditExtractor(ScrolllerExtractor):
|
|
"""Extractor for media from a scrolller subreddit"""
|
|
subcategory = "subreddit"
|
|
pattern = BASE_PATTERN + r"(/r/[^/?#]+)(?:/?\?([^#]+))?"
|
|
example = "https://scrolller.com/r/SUBREDDIT"
|
|
|
|
def posts(self):
|
|
url, query = self.groups
|
|
filter = None
|
|
sort = "RANDOM"
|
|
|
|
if query:
|
|
params = text.parse_query(query)
|
|
if "filter" in params:
|
|
filter = params["filter"].upper().rstrip("S")
|
|
|
|
variables = {
|
|
"url" : url,
|
|
"filter": filter,
|
|
"sortBy": sort,
|
|
"limit" : 50,
|
|
}
|
|
subreddit = self._request_graphql(
|
|
"SubredditQuery", variables)["getSubreddit"]
|
|
|
|
variables = {
|
|
"subredditId": subreddit["id"],
|
|
"iterator": None,
|
|
"filter" : filter,
|
|
"sortBy" : sort,
|
|
"limit" : 50,
|
|
"isNsfw" : subreddit["isNsfw"],
|
|
}
|
|
return self._pagination(
|
|
"SubredditChildrenQuery", variables, subreddit["children"])
|
|
|
|
|
|
class ScrolllerFollowingExtractor(ScrolllerExtractor):
|
|
"""Extractor for followed scrolller subreddits"""
|
|
subcategory = "following"
|
|
pattern = BASE_PATTERN + r"/following"
|
|
example = "https://scrolller.com/following"
|
|
|
|
def items(self):
|
|
self.login()
|
|
|
|
if not self.auth_token:
|
|
raise exception.AuthorizationError("Login required")
|
|
|
|
variables = {
|
|
"iterator": None,
|
|
"filter" : None,
|
|
"limit" : 10,
|
|
"isNsfw" : False,
|
|
"sortBy" : "RANDOM",
|
|
}
|
|
|
|
for subreddit in self._pagination("GetFollowingSubreddits", variables):
|
|
url = self.root + subreddit["url"]
|
|
subreddit["_extractor"] = ScrolllerSubredditExtractor
|
|
yield Message.Queue, url, subreddit
|
|
|
|
|
|
class ScrolllerPostExtractor(ScrolllerExtractor):
|
|
"""Extractor for media from a single scrolller post"""
|
|
subcategory = "post"
|
|
pattern = BASE_PATTERN + r"/(?!r/|following$)([^/?#]+)"
|
|
example = "https://scrolller.com/TITLE-SLUG-a1b2c3d4f5"
|
|
|
|
def posts(self):
|
|
variables = {"url": "/" + self.groups[0]}
|
|
data = self._request_graphql("SubredditPostQuery", variables)
|
|
return (data["getPost"],)
|
|
|
|
|
|
QUERIES = {
|
|
|
|
"SubredditPostQuery": """\
|
|
query SubredditPostQuery(
|
|
$url: String!
|
|
) {
|
|
getPost(
|
|
data: { url: $url }
|
|
) {
|
|
__typename id url title subredditId subredditTitle subredditUrl
|
|
redditPath isNsfw hasAudio fullLengthSource gfycatSource redgifsSource
|
|
ownerAvatar username displayName favoriteCount isPaid tags
|
|
commentsCount commentsRepliesCount isFavorite
|
|
albumContent { mediaSources { url width height isOptimized } }
|
|
mediaSources { url width height isOptimized }
|
|
blurredMediaSources { url width height isOptimized }
|
|
}
|
|
}
|
|
""",
|
|
|
|
"SubredditQuery": """\
|
|
query SubredditQuery(
|
|
$url: String!
|
|
$iterator: String
|
|
$sortBy: GallerySortBy
|
|
$filter: GalleryFilter
|
|
$limit: Int!
|
|
) {
|
|
getSubreddit(
|
|
data: {
|
|
url: $url,
|
|
iterator: $iterator,
|
|
filter: $filter,
|
|
limit: $limit,
|
|
sortBy: $sortBy
|
|
}
|
|
) {
|
|
__typename id url title secondaryTitle description createdAt isNsfw
|
|
subscribers isComplete itemCount videoCount pictureCount albumCount
|
|
isPaid username tags isFollowing
|
|
banner { url width height isOptimized }
|
|
children {
|
|
iterator items {
|
|
__typename id url title subredditId subredditTitle subredditUrl
|
|
redditPath isNsfw hasAudio fullLengthSource gfycatSource
|
|
redgifsSource ownerAvatar username displayName favoriteCount
|
|
isPaid tags commentsCount commentsRepliesCount isFavorite
|
|
albumContent { mediaSources { url width height isOptimized } }
|
|
mediaSources { url width height isOptimized }
|
|
blurredMediaSources { url width height isOptimized }
|
|
}
|
|
}
|
|
}
|
|
}
|
|
""",
|
|
|
|
"SubredditChildrenQuery": """\
|
|
query SubredditChildrenQuery(
|
|
$subredditId: Int!
|
|
$iterator: String
|
|
$filter: GalleryFilter
|
|
$sortBy: GallerySortBy
|
|
$limit: Int!
|
|
$isNsfw: Boolean
|
|
) {
|
|
getSubredditChildren(
|
|
data: {
|
|
subredditId: $subredditId,
|
|
iterator: $iterator,
|
|
filter: $filter,
|
|
sortBy: $sortBy,
|
|
limit: $limit,
|
|
isNsfw: $isNsfw
|
|
},
|
|
) {
|
|
iterator items {
|
|
__typename id url title subredditId subredditTitle subredditUrl
|
|
redditPath isNsfw hasAudio fullLengthSource gfycatSource
|
|
redgifsSource ownerAvatar username displayName favoriteCount isPaid
|
|
tags commentsCount commentsRepliesCount isFavorite
|
|
albumContent { mediaSources { url width height isOptimized } }
|
|
mediaSources { url width height isOptimized }
|
|
blurredMediaSources { url width height isOptimized }
|
|
}
|
|
}
|
|
}
|
|
""",
|
|
|
|
"GetFollowingSubreddits": """\
|
|
query GetFollowingSubreddits(
|
|
$iterator: String,
|
|
$limit: Int!,
|
|
$filter: GalleryFilter,
|
|
$isNsfw: Boolean,
|
|
$sortBy: GallerySortBy
|
|
) {
|
|
getFollowingSubreddits(
|
|
data: {
|
|
isNsfw: $isNsfw
|
|
limit: $limit
|
|
filter: $filter
|
|
iterator: $iterator
|
|
sortBy: $sortBy
|
|
}
|
|
) {
|
|
iterator items {
|
|
__typename id url title secondaryTitle description createdAt isNsfw
|
|
subscribers isComplete itemCount videoCount pictureCount albumCount
|
|
isFollowing
|
|
}
|
|
}
|
|
}
|
|
""",
|
|
|
|
"LoginQuery": """\
|
|
query LoginQuery(
|
|
$username: String!,
|
|
$password: String!
|
|
) {
|
|
login(
|
|
username: $username,
|
|
password: $password
|
|
) {
|
|
username token expiresAt isAdmin status isPremium
|
|
}
|
|
}
|
|
""",
|
|
|
|
"ItemTypeQuery": """\
|
|
query ItemTypeQuery(
|
|
$url: String!
|
|
) {
|
|
getItemType(
|
|
url: $url
|
|
)
|
|
}
|
|
""",
|
|
|
|
}
|