[civitai] fix/improve video downloads (#7502)

- add 'quality-videos' option
- fix downloading static .webp files instead of video
This commit is contained in:
Mike Fährmann
2025-05-10 16:13:48 +02:00
parent 3799369248
commit f343c8c0b6
4 changed files with 118 additions and 8 deletions

View File

@@ -1895,6 +1895,26 @@ Description
to download images in JPEG format at their original resolution.
extractor.civitai.quality-videos
--------------------------------
Type
* ``string``
* ``list`` of ``strings``
Default
``"quality=100"``
Example
* ``"+transcode=true,quality=100"``
* ``["+", "transcode=true", "quality=100"]``
Description
A (comma-separated) list of video quality options
to pass with every video URL.
Known available options include ``original``, ``quality``, ``transcode``
Use ``+`` as first character to `add` the given options to the
`quality <extractor.civitai.quality_>`__ ones.
extractor.cyberdrop.domain
--------------------------
Type

View File

@@ -188,7 +188,8 @@
"include" : ["user-models", "user-posts"],
"metadata": false,
"nsfw" : true,
"quality" : "original=true"
"quality" : "original=true",
"quality-videos": "quality=100"
},
"coomerparty":
{

View File

@@ -45,6 +45,20 @@ class CivitaiExtractor(Extractor):
self._image_quality = "original=true"
self._image_ext = "png"
quality_video = self.config("quality-videos")
if quality_video:
if not isinstance(quality_video, str):
quality_video = ",".join(quality_video)
if quality_video[0] == "+":
quality_video = (self._image_quality + "," +
quality_video.lstrip("+,"))
self._video_quality = quality_video
elif quality_video is not None and quality:
self._video_quality = self._image_quality
else:
self._video_quality = "quality=100"
self._video_ext = "webm"
metadata = self.config("metadata")
if metadata:
if isinstance(metadata, str):
@@ -114,7 +128,10 @@ class CivitaiExtractor(Extractor):
image["date"] = text.parse_datetime(
image["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
text.nameext_from_url(url, image)
image["extension"] = self._image_ext
if not image["extension"]:
image["extension"] = (
self._video_ext if image.get("type") == "video" else
self._image_ext)
yield Message.Directory, image
yield Message.Url, url, image
return
@@ -130,10 +147,13 @@ class CivitaiExtractor(Extractor):
def _url(self, image):
url = image["url"]
video = image.get("type") == "video"
quality = self._video_quality if video else self._image_quality
if "/" in url:
parts = url.rsplit("/", 3)
image["uuid"] = parts[1]
parts[2] = self._image_quality
parts[2] = quality
return "/".join(parts)
image["uuid"] = url
@@ -143,7 +163,7 @@ class CivitaiExtractor(Extractor):
name = "{}.{}".format(image.get("id"), mime.rpartition("/")[2])
return (
"https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/{}/{}/{}".format(
url, self._image_quality, name)
url, quality, name)
)
def _image_results(self, images):
@@ -154,7 +174,9 @@ class CivitaiExtractor(Extractor):
"url" : self._url(file),
})
if not data["extension"]:
data["extension"] = self._image_ext
data["extension"] = (
self._video_ext if file.get("type") == "video" else
self._image_ext)
if "id" not in file and data["filename"].isdecimal():
file["id"] = text.parse_int(data["filename"])
if self._meta_generation:

View File

@@ -86,9 +86,10 @@ __tests__ = (
"createdAt": "2024-08-31T01:11:47.021Z",
"date" : "dt:2024-08-31 01:11:47",
"extension": "jpg",
"extension": "png",
"filename" : "00014-3150861441",
"hash" : "ULN0-w?b4nRjxGM{-;t7M_t7NGae~qRjMyt7",
"width" : 1152,
"height" : 1536,
"id" : 26962948,
"nsfwLevel": 1,
@@ -96,14 +97,12 @@ __tests__ = (
"stats" : dict,
"url" : "69bf3279-df2c-4ec8-b795-479e9cd3db1b",
"uuid" : "69bf3279-df2c-4ec8-b795-479e9cd3db1b",
"width" : 1152,
"user" : {
"username": "bolero537",
},
"generation": {
"canRemix" : True,
"external" : None,
"generationProcess": "img2img",
"resources" : list,
"techniques": [],
"tools" : [],
@@ -136,6 +135,74 @@ __tests__ = (
},
},
{
"#url" : "https://civitai.com/images/44789630",
"#comment": "video",
"#class" : civitai.CivitaiImageExtractor,
"#urls" : "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/6a09ec54-6de4-4af1-b11d-2d0d8a66d651/original=true,transcode=true,quality=100/copy_C6C532CE-EC47-4A52-9138-AEF1D7756F16.Mp4",
"date" : "dt:2024-12-10 19:19:14",
"extension": "mp4",
"filename" : "copy_C6C532CE-EC47-4A52-9138-AEF1D7756F16",
"hash" : "U9D8%cIU03Rk02?F$$WE0gs,?GSg~B9ut6sl",
"width" : 1080,
"height" : 1920,
"id" : 44789630,
"mimeType" : "video/mp4",
"nsfwLevel": 2,
"postId" : 10151863,
"stats" : dict,
"type" : "video",
"url" : "6a09ec54-6de4-4af1-b11d-2d0d8a66d651",
"uuid" : "6a09ec54-6de4-4af1-b11d-2d0d8a66d651",
"metadata" : {
"audio" : True,
"duration": 15.033,
"hash" : "U9D8%cIU03Rk02?F$$WE0gs,?GSg~B9ut6sl",
"height" : 1920,
"size" : 23984479,
"width" : 1080,
},
"user" : {
"username": "jboogx_creative",
},
},
{
"#url" : "https://civitai.com/images/74353746",
"#comment": "video, rated 'R', WebP download (#7502)",
"#class": civitai.CivitaiImageExtractor,
"#urls" : "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/c7e3744b-8f0d-4124-94c1-75e2af00431d/original=true,transcode=true,quality=100/2025-04-25-23h40m21s_seed665048144_A man appears from off screen and spanks her butto_2.webm",
"date" : "dt:2025-05-05 12:27:28",
"extension": "webm",
"filename" : "2025-04-25-23h40m21s_seed665048144_A man appears from off screen and spanks her butto_2",
"hash" : "UMCsEoRPivxY~VjuWBoenMWBx]WrxvV?xvbb",
"width" : 512,
"height" : 752,
"id" : 74353746,
"mimeType" : "video/webm",
"nsfwLevel": 4,
"postId" : 16509805,
"stats" : dict,
"type" : "video",
"url" : "c7e3744b-8f0d-4124-94c1-75e2af00431d",
"uuid" : "c7e3744b-8f0d-4124-94c1-75e2af00431d",
"metadata" : {
"audio" : False,
"duration": 5.016,
"hash" : "UMCsEoRPivxY~VjuWBoenMWBx]WrxvV?xvbb",
"height" : 752,
"size" : 6011344,
"skipScannedAtReassignment": True,
"width" : 512,
},
"user" : {
"id" : 4856161,
"username": "VlrgRomNS",
},
},
{
"#url" : "https://civitai.com/posts/6877551",
"#class" : civitai.CivitaiPostExtractor,