[fansly] improve file extraction code (#4401)
- add 'format' option - handle exceptions
This commit is contained in:
@@ -2811,6 +2811,18 @@ Description
|
|||||||
`fanbox.comments <extractor.fanbox.comments_>`__
|
`fanbox.comments <extractor.fanbox.comments_>`__
|
||||||
|
|
||||||
|
|
||||||
|
extractor.fansly.format
|
||||||
|
-----------------------
|
||||||
|
Type
|
||||||
|
``list`` of ``integers``
|
||||||
|
Default
|
||||||
|
``[303, 302, 1, 2, 4]``
|
||||||
|
Description
|
||||||
|
Selects the file format to extract.
|
||||||
|
|
||||||
|
When more than one format is given, the first available one is selected.
|
||||||
|
|
||||||
|
|
||||||
extractor.fansly.token
|
extractor.fansly.token
|
||||||
----------------------
|
----------------------
|
||||||
Type
|
Type
|
||||||
|
|||||||
@@ -330,7 +330,9 @@
|
|||||||
},
|
},
|
||||||
"fansly":
|
"fansly":
|
||||||
{
|
{
|
||||||
"token": ""
|
"token": "",
|
||||||
|
|
||||||
|
"format": [303, 302, 1, 2, 4]
|
||||||
},
|
},
|
||||||
"flickr":
|
"flickr":
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://fansly.com/"""
|
"""Extractors for https://fansly.com/"""
|
||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text
|
from .. import text, util
|
||||||
import time
|
import time
|
||||||
|
|
||||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?fansly\.com"
|
BASE_PATTERN = r"(?:https?://)?(?:www\.)?fansly\.com"
|
||||||
@@ -25,6 +25,7 @@ class FanslyExtractor(Extractor):
|
|||||||
|
|
||||||
def _init(self):
|
def _init(self):
|
||||||
self.api = FanslyAPI(self)
|
self.api = FanslyAPI(self)
|
||||||
|
self.formats = self.config("format") or (303, 302, 1, 2, 4)
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
for post in self.posts():
|
for post in self.posts():
|
||||||
@@ -40,49 +41,71 @@ class FanslyExtractor(Extractor):
|
|||||||
|
|
||||||
def _extract_files(self, post):
|
def _extract_files(self, post):
|
||||||
files = []
|
files = []
|
||||||
|
|
||||||
for attachment in post.pop("attachments"):
|
for attachment in post.pop("attachments"):
|
||||||
media = attachment["media"]
|
try:
|
||||||
file = {
|
self._extract_attachment(files, post, attachment)
|
||||||
**media,
|
except Exception as exc:
|
||||||
"date": text.parse_timestamp(media["createdAt"]),
|
self.log.debug("", exc_info=exc)
|
||||||
"date_updated": text.parse_timestamp(media["updatedAt"]),
|
self.log.error(
|
||||||
}
|
"%s/%s, Failed to extract media (%s: %s)",
|
||||||
|
post["id"], attachment.get("id"),
|
||||||
|
exc.__class__.__name__, exc)
|
||||||
|
return files
|
||||||
|
|
||||||
width = 0
|
def _extract_attachment(self, files, post, attachment):
|
||||||
for variant in media["variants"]:
|
media = attachment["media"]
|
||||||
if variant["width"] > width:
|
variants = {
|
||||||
width = variant["width"]
|
variant["type"]: variant
|
||||||
variant_max = variant
|
for variant in media.pop("variants", ())
|
||||||
if variant["type"] == 303:
|
}
|
||||||
break
|
variants[media["type"]] = media
|
||||||
else:
|
|
||||||
# image
|
|
||||||
file["type"] = "image"
|
|
||||||
files.append({
|
|
||||||
"file": file,
|
|
||||||
"url" : variant_max["locations"][0]["location"],
|
|
||||||
})
|
|
||||||
continue
|
|
||||||
|
|
||||||
# video
|
for fmt in self.formats:
|
||||||
location = variant["locations"][0]
|
if fmt in variants and (variant := variants[fmt]).get("locations"):
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
return self.log.warning(
|
||||||
|
"%s/%s: Requested format not available",
|
||||||
|
post["id"], attachment["id"])
|
||||||
|
|
||||||
|
mime = variant["mimetype"]
|
||||||
|
location = variant.pop("locations")[0]
|
||||||
|
if "metadata" in variant:
|
||||||
|
try:
|
||||||
|
variant.update(util.json_loads(variant.pop("metadata")))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
file = {
|
||||||
|
**variant,
|
||||||
|
"format": fmt,
|
||||||
|
"date": text.parse_timestamp(media["createdAt"]),
|
||||||
|
"date_updated": text.parse_timestamp(media["updatedAt"]),
|
||||||
|
}
|
||||||
|
|
||||||
|
if "metadata" in location:
|
||||||
|
# manifest
|
||||||
meta = location["metadata"]
|
meta = location["metadata"]
|
||||||
|
|
||||||
file["type"] = "video"
|
file["type"] = "video"
|
||||||
files.append({
|
files.append({
|
||||||
"file": file,
|
"file": file,
|
||||||
"url": f"ytdl:{location['location']}",
|
"url": f"ytdl:{location['location']}",
|
||||||
"_fallback": (media["locations"][0]["location"],),
|
# "_fallback": (media["locations"][0]["location"],),
|
||||||
"_ytdl_manifest": "dash",
|
"_ytdl_manifest":
|
||||||
|
"dash" if mime == "application/dash+xml" else "hls",
|
||||||
"_ytdl_manifest_cookies": (
|
"_ytdl_manifest_cookies": (
|
||||||
("CloudFront-Key-Pair-Id", meta["Key-Pair-Id"]),
|
("CloudFront-Key-Pair-Id", meta["Key-Pair-Id"]),
|
||||||
("CloudFront-Signature" , meta["Signature"]),
|
("CloudFront-Signature" , meta["Signature"]),
|
||||||
("CloudFront-Policy" , meta["Policy"]),
|
("CloudFront-Policy" , meta["Policy"]),
|
||||||
),
|
),
|
||||||
})
|
})
|
||||||
|
else:
|
||||||
return files
|
file["type"] = "image" if mime.startswith("image/") else "video"
|
||||||
|
files.append({
|
||||||
|
"file": file,
|
||||||
|
"url" : location["location"],
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
class FanslyPostExtractor(FanslyExtractor):
|
class FanslyPostExtractor(FanslyExtractor):
|
||||||
@@ -250,6 +273,7 @@ class FanslyAPI():
|
|||||||
attachments.extend(
|
attachments.extend(
|
||||||
media[m["accountMediaId"]]
|
media[m["accountMediaId"]]
|
||||||
for m in bundle
|
for m in bundle
|
||||||
|
if m["accountMediaId"] in media
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
self.extractor.log.warning(
|
self.extractor.log.warning(
|
||||||
|
|||||||
@@ -20,6 +20,40 @@ __tests__ = (
|
|||||||
"#class" : fansly.FanslyPostExtractor,
|
"#class" : fansly.FanslyPostExtractor,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://fansly.com/post/545313467469410305",
|
||||||
|
"#comment" : "'This post does not exist or has been deleted.'",
|
||||||
|
"#class" : fansly.FanslyPostExtractor,
|
||||||
|
"#count" : 0,
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://fansly.com/post/543835794918354944",
|
||||||
|
"#comment" : "one locked image",
|
||||||
|
"#class" : fansly.FanslyPostExtractor,
|
||||||
|
"#pattern" : r"https://cdn3.fansly.com/364164066794549248/542559086856646656.jpeg\?.+",
|
||||||
|
"#count" : 1,
|
||||||
|
"#auth" : False,
|
||||||
|
"#log" : (
|
||||||
|
"No 'token' provided",
|
||||||
|
"543835794918354944/542560754868432896: Requested format not available",
|
||||||
|
),
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://fansly.com/post/451349524175138816",
|
||||||
|
"#comment" : "locked image + 2 locked videos",
|
||||||
|
"#class" : fansly.FanslyPostExtractor,
|
||||||
|
"#count" : 0,
|
||||||
|
"#auth" : False,
|
||||||
|
"#log" : (
|
||||||
|
"No 'token' provided",
|
||||||
|
"451349524175138816/451349523013316609: Requested format not available",
|
||||||
|
"451349524175138816/451349523000729600: Requested format not available",
|
||||||
|
"451349524175138816/451349523025899520: Requested format not available",
|
||||||
|
),
|
||||||
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://fansly.com/Oliviaus/posts",
|
"#url" : "https://fansly.com/Oliviaus/posts",
|
||||||
"#class" : fansly.FanslyCreatorPostsExtractor,
|
"#class" : fansly.FanslyCreatorPostsExtractor,
|
||||||
|
|||||||
Reference in New Issue
Block a user