# -*- coding: utf-8 -*- # Copyright 2024-2025 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extractors for https://scrolller.com/""" from .common import Extractor, Message from .. import text, util from ..cache import cache BASE_PATTERN = r"(?:https?://)?(?:www\.)?scrolller\.com" class ScrolllerExtractor(Extractor): """Base class for scrolller extractors""" category = "scrolller" root = "https://scrolller.com" directory_fmt = ("{category}", "{subredditTitle}") filename_fmt = "{id}{num:?_//>03}{title:? //[:230]}.{extension}" archive_fmt = "{id}_{num}" request_interval = (0.5, 1.5) def _init(self): self.auth_token = None def items(self): self.login() for post in self.posts(): files = self._extract_files(post) post["count"] = len(files) yield Message.Directory, "", post for file in files: url = file["url"] post.update(file) yield Message.Url, url, text.nameext_from_url(url, post) def posts(self): return () def _extract_files(self, post): album = post.pop("albumContent", None) if not album: sources = post.get("mediaSources") if not sources: self.log.warning("%s: No media files", post.get("id")) return () src = max(sources, key=self._sort_key) src["num"] = 0 return (src,) files = [] for num, media in enumerate(album, 1): sources = media.get("mediaSources") if not sources: self.log.warning("%s/%s: Missing media file", post.get("id"), num) continue src = max(sources, key=self._sort_key) src["num"] = num files.append(src) return files def login(self): username, password = self._get_auth_info() if username: self.auth_token = self._login_impl(username, password) @cache(maxage=28*86400, keyarg=1) def _login_impl(self, username, password): self.log.info("Logging in as %s", username) variables = { "username": username, "password": password, } try: data = self._request_graphql("LoginQuery", variables, False) except self.exc.HttpError as exc: if exc.status == 403: raise self.exc.AuthenticationError() raise return data["login"]["token"] def _request_graphql(self, opname, variables, admin=True): headers = { "Content-Type" : None, "Origin" : self.root, "Sec-Fetch-Dest": "empty", "Sec-Fetch-Mode": "cors", "Sec-Fetch-Site": "same-site", } data = { "query" : self.utils("graphql", opname), "variables" : variables, "authorization": self.auth_token, } if admin: url = "https://api.scrolller.com/admin" headers["Content-Type"] = "application/json" else: url = "https://api.scrolller.com/api/v2/graphql" headers["Content-Type"] = "text/plain;charset=UTF-8" return self.request_json( url, method="POST", headers=headers, data=util.json_dumps(data), )["data"] def _pagination(self, opname, variables, data=None): if data is None or not data.get("items"): data = self._request_graphql(opname, variables) while True: while "items" not in data: data = data.popitem()[1] yield from data["items"] if not data["iterator"]: return variables["iterator"] = data["iterator"] data = self._request_graphql(opname, variables) def _sort_key(self, src): return src["width"], not src["isOptimized"] class ScrolllerSubredditExtractor(ScrolllerExtractor): """Extractor for media from a scrolller subreddit""" subcategory = "subreddit" pattern = BASE_PATTERN + r"(/r/[^/?#]+)(?:/?\?([^#]+))?" example = "https://scrolller.com/r/SUBREDDIT" def posts(self): url, query = self.groups filter = None sort = "RANDOM" if query: params = text.parse_query(query) if "filter" in params: filter = params["filter"].upper().rstrip("S") variables = { "url" : url, "filter": filter, "sortBy": sort, "limit" : 50, } subreddit = self._request_graphql( "SubredditQuery", variables)["getSubreddit"] variables = { "subredditId": subreddit["id"], "iterator": None, "filter" : filter, "sortBy" : sort, "limit" : 50, "isNsfw" : subreddit["isNsfw"], } return self._pagination( "SubredditChildrenQuery", variables, subreddit["children"]) class ScrolllerUserExtractor(ScrolllerExtractor): """Extractor for media from a scrolller Reddit user""" subcategory = "user" directory_fmt = ("{category}", "User", "{posted_by}") pattern = BASE_PATTERN + r"/reddit-user/([^/?#]+)(?:/?\?([^#]+))?" example = "https://scrolller.com/reddit-user/USER" def posts(self): query = "UserPostsQuery" variables = { "username": text.unquote(self.groups[0]), "iterator": None, "limit" : 40, "filter" : None, "sortBy" : "RANDOM", "isNsfw" : True, } posts = self._request_graphql(query, variables)["getUserPosts"] if not posts.get("items"): posts = None variables["isNsfw"] = False return self._pagination(query, variables, posts) class ScrolllerFollowingExtractor(ScrolllerExtractor): """Extractor for followed scrolller subreddits""" subcategory = "following" pattern = BASE_PATTERN + r"/following" example = "https://scrolller.com/following" def items(self): self.login() if not self.auth_token: raise self.exc.AuthorizationError("Login required") variables = { "iterator": None, "filter" : None, "limit" : 10, "isNsfw" : False, "sortBy" : "RANDOM", } for subreddit in self._pagination("GetFollowingSubreddits", variables): url = self.root + subreddit["url"] subreddit["_extractor"] = ScrolllerSubredditExtractor yield Message.Queue, url, subreddit class ScrolllerPostExtractor(ScrolllerExtractor): """Extractor for media from a single scrolller post""" subcategory = "post" pattern = BASE_PATTERN + r"/(?!r/|following$)([^/?#]+)" example = "https://scrolller.com/TITLE-SLUG-a1b2c3d4f5" def posts(self): variables = {"url": "/" + self.groups[0]} data = self._request_graphql("SubredditPostQuery", variables) return (data["getPost"],)