438 lines
14 KiB
Python
438 lines
14 KiB
Python
# -*- coding: utf-8 -*-
|
|
|
|
# Copyright 2025 Mike Fährmann
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License version 2 as
|
|
# published by the Free Software Foundation.
|
|
|
|
"""Extractors for https://fansly.com/"""
|
|
|
|
from .common import Extractor, Message
|
|
from .. import text, util, exception
|
|
import time
|
|
|
|
BASE_PATTERN = r"(?:https?://)?(?:www\.)?fansly\.com"
|
|
|
|
|
|
class FanslyExtractor(Extractor):
|
|
"""Base class for fansly extractors"""
|
|
category = "fansly"
|
|
root = "https://fansly.com"
|
|
directory_fmt = ("{category}", "{account[username]} ({account[id]})")
|
|
filename_fmt = "{id}_{num}_{file[id]}.{extension}"
|
|
archive_fmt = "{file[id]}"
|
|
|
|
def _init(self):
|
|
self.api = FanslyAPI(self)
|
|
self.previews = self.config("previews", True)
|
|
|
|
if fmts := self.config("formats"):
|
|
self.formats = set(fmts)
|
|
else:
|
|
self.formats = None
|
|
|
|
def items(self):
|
|
for post in self.posts():
|
|
files = self._extract_files(post)
|
|
post["count"] = len(files)
|
|
post["date"] = self.parse_timestamp(post["createdAt"])
|
|
|
|
yield Message.Directory, "", post
|
|
for post["num"], file in enumerate(files, 1):
|
|
post.update(file)
|
|
url = file["url"]
|
|
yield Message.Url, url, text.nameext_from_url(url, post)
|
|
|
|
def posts(self):
|
|
creator, wall_id = self.groups
|
|
account = self.api.account(creator)
|
|
walls = account["walls"]
|
|
|
|
if wall_id:
|
|
for wall in walls:
|
|
if wall["id"] == wall_id:
|
|
break
|
|
else:
|
|
raise exception.NotFoundError("wall")
|
|
walls = (wall,)
|
|
|
|
for wall in walls:
|
|
self.kwdict["wall"] = wall
|
|
yield from self.posts_wall(account, wall)
|
|
|
|
def _extract_files(self, post):
|
|
if "attachments" not in post:
|
|
return ()
|
|
|
|
if "_extra" in post:
|
|
extra = post.pop("_extra", ())
|
|
media = {
|
|
media["id"]: media
|
|
for media in self.api.account_media(extra)
|
|
}
|
|
post["attachments"].extend(
|
|
media[mid]
|
|
for mid in extra
|
|
if mid in media
|
|
)
|
|
|
|
files = []
|
|
for attachment in post.pop("attachments"):
|
|
try:
|
|
self._extract_attachment(files, post, attachment)
|
|
except Exception as exc:
|
|
self.log.traceback(exc)
|
|
self.log.error(
|
|
"%s/%s, Failed to extract media (%s: %s)",
|
|
post["id"], attachment.get("id"),
|
|
exc.__class__.__name__, exc)
|
|
return files
|
|
|
|
def _extract_attachment(self, files, post, attachment, preview=False):
|
|
media = attachment["preview" if preview else "media"]
|
|
|
|
variants = media.pop("variants") or []
|
|
if media.get("locations"):
|
|
variants.append(media)
|
|
|
|
fmts = self.formats
|
|
formats = [
|
|
(variant["width"], (type-500 if type > 256 else type), variant)
|
|
for variant in variants
|
|
if variant.get("locations") and
|
|
(type := variant["type"]) and
|
|
(fmts is None or type in fmts)
|
|
]
|
|
|
|
try:
|
|
variant = max(formats)[-1]
|
|
except Exception:
|
|
if self.previews and "preview" in attachment and not preview:
|
|
self.log.info("%s/%s: Downloading Preview",
|
|
post["id"], attachment["id"])
|
|
return self._extract_attachment(files, post, attachment, True)
|
|
return self.log.warning("%s/%s: No format available",
|
|
post["id"], attachment["id"])
|
|
|
|
mime = variant["mimetype"]
|
|
location = variant.pop("locations")[0]
|
|
if "metadata" in variant:
|
|
try:
|
|
variant.update(util.json_loads(variant.pop("metadata")))
|
|
except Exception:
|
|
pass
|
|
|
|
file = {
|
|
**variant,
|
|
"preview": preview,
|
|
"format": variant["type"],
|
|
"date": self.parse_timestamp(media["createdAt"]),
|
|
"date_updated": self.parse_timestamp(media["updatedAt"]),
|
|
}
|
|
|
|
if "metadata" in location:
|
|
# manifest
|
|
meta = location["metadata"]
|
|
file["type"] = "video"
|
|
|
|
try:
|
|
fallback = (media["locations"][0]["location"],)
|
|
except Exception:
|
|
fallback = ()
|
|
|
|
files.append({
|
|
"file": file,
|
|
"url": "ytdl:" + location["location"],
|
|
"_fallback": fallback,
|
|
"_ytdl_manifest":
|
|
"dash" if mime == "application/dash+xml" else "hls",
|
|
"_ytdl_manifest_cookies": (
|
|
("CloudFront-Key-Pair-Id", meta["Key-Pair-Id"]),
|
|
("CloudFront-Signature" , meta["Signature"]),
|
|
("CloudFront-Policy" , meta["Policy"]),
|
|
),
|
|
})
|
|
else:
|
|
file["type"] = "image" if mime.startswith("image/") else "video"
|
|
files.append({
|
|
"file": file,
|
|
"url" : location["location"],
|
|
})
|
|
|
|
|
|
class FanslyPostExtractor(FanslyExtractor):
|
|
subcategory = "post"
|
|
pattern = BASE_PATTERN + r"/post/(\d+)"
|
|
example = "https://fansly.com/post/1234567890"
|
|
|
|
def posts(self):
|
|
return self.api.post(self.groups[0])
|
|
|
|
|
|
class FanslyHomeExtractor(FanslyExtractor):
|
|
subcategory = "home"
|
|
pattern = BASE_PATTERN + r"/home(?:/(?:subscribed()|list/(\d+)))?"
|
|
example = "https://fansly.com/home"
|
|
|
|
def posts(self):
|
|
subscribed, list_id = self.groups
|
|
if subscribed is not None:
|
|
mode = "1"
|
|
elif list_id is not None:
|
|
mode = None
|
|
else:
|
|
mode = "0"
|
|
return self.api.timeline_home(mode, list_id)
|
|
|
|
|
|
class FanslyListExtractor(FanslyExtractor):
|
|
subcategory = "list"
|
|
pattern = BASE_PATTERN + r"/lists/(\d+)"
|
|
example = "https://fansly.com/lists/1234567890"
|
|
|
|
def items(self):
|
|
base = self.root + "/"
|
|
for account in self.api.lists_itemsnew(self.groups[0]):
|
|
account["_extractor"] = FanslyCreatorPostsExtractor
|
|
url = f"{base}{account['username']}/posts"
|
|
yield Message.Queue, url, account
|
|
|
|
|
|
class FanslyListsExtractor(FanslyExtractor):
|
|
subcategory = "lists"
|
|
pattern = BASE_PATTERN + r"/lists"
|
|
example = "https://fansly.com/lists"
|
|
|
|
def items(self):
|
|
base = self.root + "/lists/"
|
|
for list in self.api.lists_account():
|
|
list["_extractor"] = FanslyListExtractor
|
|
url = f"{base}{list['id']}#{list['label']}"
|
|
yield Message.Queue, url, list
|
|
|
|
|
|
class FanslyCreatorPostsExtractor(FanslyExtractor):
|
|
subcategory = "creator-posts"
|
|
pattern = BASE_PATTERN + r"/([^/?#]+)/posts(?:/wall/(\d+))?"
|
|
example = "https://fansly.com/CREATOR/posts"
|
|
|
|
def posts_wall(self, account, wall):
|
|
return self.api.timeline_new(account["id"], wall["id"])
|
|
|
|
|
|
class FanslyCreatorMediaExtractor(FanslyExtractor):
|
|
subcategory = "creator-media"
|
|
pattern = BASE_PATTERN + r"/([^/?#]+)/media(?:/wall/(\d+))?"
|
|
example = "https://fansly.com/CREATOR/media"
|
|
|
|
def posts_wall(self, account, wall):
|
|
return self.api.mediaoffers_location(account["id"], wall["id"])
|
|
|
|
|
|
class FanslyAPI():
|
|
ROOT = "https://apiv3.fansly.com"
|
|
|
|
def __init__(self, extractor):
|
|
self.extractor = extractor
|
|
self.headers = {
|
|
"fansly-client-ts": None,
|
|
"Origin" : extractor.root,
|
|
}
|
|
|
|
if token := extractor.config("token"):
|
|
self.headers["authorization"] = token
|
|
self.extractor.log.debug(
|
|
"Using authorization 'token' %.5s...", token)
|
|
else:
|
|
self.extractor.log.warning("No 'token' provided")
|
|
|
|
def account(self, creator):
|
|
if creator.startswith("id:"):
|
|
return self.account_by_id(creator[3:])
|
|
return self.account_by_username(creator)
|
|
|
|
def account_by_username(self, username):
|
|
endpoint = "/v1/account"
|
|
params = {"usernames": username}
|
|
return self._call(endpoint, params)[0]
|
|
|
|
def account_by_id(self, account_id):
|
|
endpoint = "/v1/account"
|
|
params = {"ids": account_id}
|
|
return self._call(endpoint, params)[0]
|
|
|
|
def accounts_by_id(self, account_ids):
|
|
endpoint = "/v1/account"
|
|
params = {"ids": ",".join(map(str, account_ids))}
|
|
return self._call(endpoint, params)
|
|
|
|
def account_media(self, media_ids):
|
|
endpoint = "/v1/account/media"
|
|
params = {"ids": ",".join(map(str, media_ids))}
|
|
return self._call(endpoint, params)
|
|
|
|
def lists_account(self):
|
|
endpoint = "/v1/lists/account"
|
|
params = {"itemId": ""}
|
|
return self._call(endpoint, params)
|
|
|
|
def lists_itemsnew(self, list_id, sort="3"):
|
|
endpoint = "/v1/lists/itemsnew"
|
|
params = {
|
|
"listId" : list_id,
|
|
"limit" : 50,
|
|
"after" : None,
|
|
"sortMode": sort,
|
|
}
|
|
return self._pagination_list(endpoint, params)
|
|
|
|
def mediaoffers_location(self, account_id, wall_id):
|
|
endpoint = "/v1/mediaoffers/location"
|
|
params = {
|
|
"locationId": wall_id,
|
|
"locationType": "1002",
|
|
"accountId": account_id,
|
|
"mediaType": "",
|
|
"before": "",
|
|
"after" : "0",
|
|
"limit" : "30",
|
|
"offset": "0",
|
|
}
|
|
return self._pagination_media(endpoint, params)
|
|
|
|
def post(self, post_id):
|
|
endpoint = "/v1/post"
|
|
params = {"ids": post_id}
|
|
return self._update_posts(self._call(endpoint, params))
|
|
|
|
def timeline_home(self, mode="0", list_id=None):
|
|
endpoint = "/v1/timeline/home"
|
|
params = {"before": "0", "after": "0"}
|
|
if list_id is None:
|
|
params["mode"] = mode
|
|
else:
|
|
params["listId"] = list_id
|
|
return self._pagination(endpoint, params)
|
|
|
|
def timeline_new(self, account_id, wall_id):
|
|
endpoint = "/v1/timelinenew/" + str(account_id)
|
|
params = {
|
|
"before" : "0",
|
|
"after" : "0",
|
|
"wallId" : wall_id,
|
|
"contentSearch": "",
|
|
}
|
|
return self._pagination(endpoint, params)
|
|
|
|
def _update_posts(self, response):
|
|
accounts = {
|
|
account["id"]: account
|
|
for account in response["accounts"]
|
|
}
|
|
media = {
|
|
media["id"]: media
|
|
for media in response["accountMedia"]
|
|
}
|
|
bundles = {
|
|
bundle["id"]: bundle
|
|
for bundle in response["accountMediaBundles"]
|
|
}
|
|
|
|
posts = response["posts"]
|
|
for post in posts:
|
|
try:
|
|
post["account"] = accounts[post.pop("accountId")]
|
|
except KeyError:
|
|
pass
|
|
|
|
extra = None
|
|
attachments = []
|
|
for attachment in post["attachments"]:
|
|
try:
|
|
cid = attachment["contentId"]
|
|
except KeyError:
|
|
attachments.append(attachment)
|
|
continue
|
|
|
|
if cid in media:
|
|
attachments.append(media[cid])
|
|
elif cid in bundles:
|
|
bundle = bundles[cid]["bundleContent"]
|
|
bundle.sort(key=lambda c: c["pos"])
|
|
for c in bundle:
|
|
mid = c["accountMediaId"]
|
|
if mid in media:
|
|
attachments.append(media[mid])
|
|
else:
|
|
if extra is None:
|
|
post["_extra"] = extra = []
|
|
extra.append(mid)
|
|
else:
|
|
self.extractor.log.warning(
|
|
"%s: Unhandled 'contentId' %s",
|
|
post["id"], cid)
|
|
post["attachments"] = attachments
|
|
|
|
return posts
|
|
|
|
def _update_media(self, items, response):
|
|
posts = {
|
|
post["id"]: post
|
|
for post in response["posts"]
|
|
}
|
|
|
|
response["posts"] = [
|
|
posts[item["correlationId"]]
|
|
for item in items
|
|
]
|
|
|
|
return self._update_posts(response)
|
|
|
|
def _update_items(self, items):
|
|
ids = [item["id"] for item in items]
|
|
accounts = {
|
|
account["id"]: account
|
|
for account in self.accounts_by_id(ids)
|
|
}
|
|
return [accounts[id] for id in ids]
|
|
|
|
def _call(self, endpoint, params):
|
|
url = f"{self.ROOT}/api{endpoint}"
|
|
params["ngsw-bypass"] = "true"
|
|
headers = self.headers.copy()
|
|
headers["fansly-client-ts"] = str(int(time.time() * 1000))
|
|
|
|
data = self.extractor.request_json(
|
|
url, params=params, headers=headers)
|
|
return data["response"]
|
|
|
|
def _pagination(self, endpoint, params):
|
|
while True:
|
|
response = self._call(endpoint, params)
|
|
|
|
if not response.get("posts"):
|
|
return
|
|
posts = self._update_posts(response)
|
|
yield from posts
|
|
params["before"] = min(p["id"] for p in posts)
|
|
|
|
def _pagination_list(self, endpoint, params):
|
|
while True:
|
|
response = self._call(endpoint, params)
|
|
|
|
if not response:
|
|
return
|
|
yield from self._update_items(response)
|
|
params["after"] = response[-1]["sortId"]
|
|
|
|
def _pagination_media(self, endpoint, params):
|
|
while True:
|
|
response = self._call(endpoint, params)
|
|
|
|
data = response["data"]
|
|
if not data:
|
|
return
|
|
yield from self._update_media(data, response["aggregationData"])
|
|
params["before"] = data[-1]["id"]
|