[turbo] update 'saint' extractors (#8893 #8896)

* Implements turbo.py & remove from domain pattern from saints.py
* Remove leftover commented pattern from saints.py
* Make turbo.py comply with flake8
* Add album support
* Improved metadata extracion for albums and single files & created turbo.py tests using saints.py test
* Align turbo.py extractor with flake8 rules
* Fix #class name on turbo.py tests
* Fix #category test
* Fix #category test x2
* Fix #category tests
* Fix #category tests
* Fix TurboMediaExtractor self.groups unpacking

* update basic module formatting
* replace 'saint' with 'turbo' in modules list
* remove saint extractors and tests
* update & simplify 'media' extractor
* update & simplify 'album' extractor
* update tests
* update supportedsites
* update 'category-map' & 'config-map'

---------

Co-authored-by: Mike Fährmann <mike_faehrmann@web.de>
This commit is contained in:
brerk
2026-01-19 15:20:13 -06:00
committed by GitHub
parent cc5bfa6eb0
commit e00c717b15
11 changed files with 230 additions and 282 deletions

View File

@@ -9260,6 +9260,7 @@ Special Values
{
"coomer" : "coomerparty",
"kemono" : "kemonoparty",
"turbo" : "saint",
"schalenetwork": "koharu",
"naver-chzzk" : "chzzk",
"naver-blog" : "naver",
@@ -9287,7 +9288,8 @@ Default
"chzzk" : "naver-chzzk",
"naver" : "naver-blog",
"naverwebtoon" : "naver-webtoon",
"pixiv" : "pixiv-novel"
"pixiv" : "pixiv-novel",
"saint" : "turbo"
}
Description
Duplicate the configuration settings of extractor `categories`

View File

@@ -102,7 +102,8 @@
"chzzk" : "naver-chzzk",
"naver" : "naver-blog",
"naverwebtoon" : "naver-webtoon",
"pixiv" : "pixiv-novel"
"pixiv" : "pixiv-novel",
"saint" : "turbo"
},

View File

@@ -949,12 +949,6 @@ Consider all listed sites to potentially be NSFW.
<td>Posts, Search Results, User Profiles</td>
<td></td>
</tr>
<tr id="saint" title="saint">
<td>Saint</td>
<td>https://saint2.su/</td>
<td>Albums, Media Files</td>
<td></td>
</tr>
<tr id="sankaku" title="sankaku">
<td>Sankaku Channel</td>
<td>https://sankaku.app/</td>
@@ -1135,9 +1129,9 @@ Consider all listed sites to potentially be NSFW.
<td>Models, Posts, User Profiles</td>
<td></td>
</tr>
<tr id="turbovid" title="turbovid">
<td>turbovid.cr</td>
<td>https://turbovid.cr/</td>
<tr id="turbo" title="turbo">
<td>turbo.cr</td>
<td>https://turbo.cr/</td>
<td>Albums, Media Files</td>
<td></td>
</tr>

View File

@@ -321,6 +321,7 @@ def main():
catmap = {
"coomer" : "coomerparty",
"kemono" : "kemonoparty",
"turbo" : "saint",
"schalenetwork": "koharu",
"naver-blog" : "naver",
"naver-chzzk" : "chzzk",

View File

@@ -175,6 +175,7 @@ def remap_categories():
("chzzk" , "naver-chzzk"),
("naverwebtoon", "naver-webtoon"),
("pixiv" , "pixiv-novel"),
("saint" , "turbo"),
)
elif not cmap:
return

View File

@@ -181,7 +181,6 @@ modules = [
"rule34vault",
"rule34xyz",
"s3ndpics",
"saint",
"sankaku",
"sankakucomplex",
"schalenetwork",
@@ -215,6 +214,7 @@ modules = [
"tumblr",
"tumblrgallery",
"tungsten",
"turbo",
"twibooru",
"twitter",
"urlgalleries",

View File

@@ -1,119 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright 2024-2026 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://saint2.su/ and https://turbovid.cr/"""
from .lolisafe import LolisafeAlbumExtractor
from .. import text
BASE_PATTERN = r"(?:https?://)?(?:turbo(?:vid)?\.cr|saint\d*\.(?:su|pk|cr|to))"
class SaintAlbumExtractor(LolisafeAlbumExtractor):
"""Extractor for saint albums"""
category = "saint"
root = "https://saint2.su"
pattern = BASE_PATTERN + r"/a/([^/?#]+)"
example = "https://saint2.su/a/ID"
def fetch_album(self, album_id):
# album metadata
response = self.request(self.root + "/a/" + album_id)
extr = text.extract_from(response.text)
title = extr("<title>", "</title")
descr = extr('name="description" content="', '"/>')
files = []
while True:
id2 = extr("/thumbs/", '"')
if not id2:
break
id2, sep, ts = id2.rpartition(".")[0].rpartition("-")
if sep:
date = self.parse_timestamp(ts)
else:
date = None
id2 = ts
files.append({
"id" : extr("/embed/", '"'),
"id2" : id2,
"date" : date,
# "extension": extr("<td>", "</"),
"size" : text.parse_int(extr('data="', '"')),
"file" : text.unescape(extr(
"onclick=\"play(", ")").strip("\"'")),
"id_dl": extr("/d/", ")").rstrip("\"'"),
})
return files, {
"album_id" : album_id,
"album_name" : text.unescape(title.rpartition(" - ")[0]),
"album_size" : sum(file["size"] for file in files),
"description" : text.unescape(descr),
"count" : len(files),
"_http_headers": {"Referer": response.url}
}
class SaintMediaExtractor(SaintAlbumExtractor):
"""Extractor for saint media links"""
subcategory = "media"
directory_fmt = ("{category}",)
pattern = BASE_PATTERN + r"(/(embe)?d/([^/?#]+))"
example = "https://saint2.su/embed/ID"
def fetch_album(self, album_id):
try:
path, embed, _ = self.groups
url = self.root + path
response = self.request(url)
extr = text.extract_from(response.text)
if embed:
id2, sep, ts = extr(
"/thumbs/", '"').rpartition(".")[0].rpartition("-")
if sep:
date = self.parse_timestamp(ts)
else:
date = None
id2 = ts
file = {
"id" : album_id,
"id2" : id2,
"date" : date,
"file" : text.unescape(extr('<source src="', '"')),
"id_dl": extr("/d/", "'"),
}
else: # /d/
file = {
"file" : text.unescape(extr('<a href="', '"')),
"id" : album_id,
"id_dl" : album_id,
"name" : album_id,
"filename" : album_id,
"extension": "mp4",
}
file["_http_headers"] = {"Referer": response.url}
except Exception as exc:
self.log.error("%s: %s", exc.__class__.__name__, exc)
return (), {}
return (file,), {
"album_id" : "",
"album_name" : "",
"album_size" : -1,
"description": "",
"count" : 1,
}

View File

@@ -0,0 +1,94 @@
# -*- coding: utf-8 -*-
# Copyright 2024-2026 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://turbo.cr/"""
from .lolisafe import LolisafeAlbumExtractor
from .. import text
BASE_PATTERN = (r"(?:https?://)?(?:"
r"(?:www\.)?turbo(?:vid)?\.cr|"
r"saint\d*\.(?:su|pk|cr|to))")
class TurboAlbumExtractor(LolisafeAlbumExtractor):
"""Extractor for turbo.cr albums"""
category = "turbo"
root = "https://turbo.cr"
pattern = BASE_PATTERN + r"/a/([^/?#]+)"
example = "https://turbo.cr/a/ID"
def fetch_album(self, album_id):
url = f"{self.root}/a/{album_id}"
extr = text.extract_from(self.request(url).text)
title = extr("<h1 ", "<")
descr = extr("<p ", "<")
tbody = extr('id="fileTbody"', '</tbody>')
headers = {"Referer": url}
return self._extract_files(tbody, headers), {
"album_id" : album_id,
"album_name" : text.unescape(title[title.find(">")+1:]),
"description" : text.unescape(descr[descr.find(">")+1:]),
"album_size" : sum(map(text.parse_int, text.extract_iter(
tbody, 'data-size="', '"'))),
"count" : tbody.count("data-id="),
"_http_headers": headers,
}
def _extract_files(self, body, headers):
for file in text.extract_iter(body, "<tr", "</tr>"):
data_id = text.extr(file, 'data-id="', '"')
url = f"{self.root}/api/sign?v={data_id}"
data = self.request_json(url, headers=headers)
name = data.get("original_filename") or data.get("filename")
yield text.nameext_from_name(name, {
"id" : data_id,
"file": data.get("url"),
"size": text.parse_int(text.extr(file, 'data-size="', '"')),
"_http_headers": headers,
})
class TurboMediaExtractor(TurboAlbumExtractor):
"""Extractor for turbo.cr media links"""
subcategory = "media"
directory_fmt = ("{category}",)
pattern = BASE_PATTERN + r"/(?:embe)?[dv]/([^/?#]+)"
example = "https://turbo.cr/embed/ID"
def fetch_album(self, album_id):
try:
return (self._extract_file(album_id),), {
"album_id" : "",
"album_name" : "",
"album_size" : -1,
"description": "",
"count" : 1,
}
except Exception as exc:
self.log.error("%s: %s", exc.__class__.__name__, exc)
return (), {}
def _extract_file(self, data_id):
url = f"{self.root}/d/{data_id}"
headers = {"Referer": url}
page = self.request(url).text
size = text.extr(page, 'id="fileSizeBytes">', '<')
date = text.extract(page, "<span>", "<", page.find("File ID:"))[0]
url = f"{self.root}/api/sign?v={data_id}"
data = self.request_json(url, headers=headers)
name = data.get("original_filename") or data.get("filename")
return text.nameext_from_name(name, {
"id" : data_id,
"file": data.get("url"),
"size": int(text.parse_float(size.replace("&#43;", "+"))),
"date": self.parse_datetime_iso(date),
"_http_headers": headers,
})

View File

@@ -200,7 +200,7 @@ CATEGORY_MAP = {
"tmohentai" : "TMOHentai",
"tumblrgallery" : "TumblrGallery",
"turboimagehost" : "TurboImageHost.com",
"turbovid" : "turbovid.cr",
"turbo" : "turbo.cr",
"vanillarock" : "もえぴりあ",
"vidyart2" : "/v/idyart2",
"vidyapics" : "Vidya Booru",
@@ -709,11 +709,6 @@ def build_extractor_list():
default["wikifeetx"] = default["wikifeet"]
domains["wikifeetx"] = "https://www.wikifeetx.com/"
# turbovid
default["turbovid"] = default["saint"]
domains["turbovid"] = "https://turbovid.cr/"
domains["saint"] = "https://saint2.su/"
# imgdrive / imgtaxi / imgwallet
base = categories["imagehost"]
base["imgtaxi"] = base["imgdrive"]

View File

@@ -1,145 +0,0 @@
# -*- coding: utf-8 -*-
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
from gallery_dl.extractor import saint
__tests__ = (
{
"#url" : "https://saint2.su/a/2c5iuWHTumH",
"#class": saint.SaintAlbumExtractor,
"#results": (
"https://data.saint2.cr/data/3b1ccebf3576f8d5aac3ee0e5a12da95.mp4",
"https://data.saint2.cr/data/3b125e3fb4b98693f17d85cb53590215.mp4",
),
"album_id" : "2c5iuWHTumH",
"album_name" : "animations",
"album_size" : 37083862,
"count" : 2,
"date" : "type:datetime",
"description": "Descriptions can contain only alphanumeric ASCII characters",
"extension" : "mp4",
"file" : r"re:https://...",
"filename" : {"3b1ccebf3576f8d5aac3ee0e5a12da95-6lC7mKrJst8",
"3b125e3fb4b98693f17d85cb53590215-ze10Ohbpoy5"},
"id" : {"6lC7mKrJst8",
"ze10Ohbpoy5"},
"id2" : {"6712834015d67",
"671284a627e0e"},
"id_dl" : {"M2IxY2NlYmYzNTc2ZjhkNWFhYzNlZTBlNWExMmRhOTUubXA0",
"M2IxMjVlM2ZiNGI5ODY5M2YxN2Q4NWNiNTM1OTAyMTUubXA0"},
"name" : {"3b1ccebf3576f8d5aac3ee0e5a12da95",
"3b125e3fb4b98693f17d85cb53590215"},
"num" : {1, 2},
},
{
"#url" : "https://turbovid.cr/a/FiphGijfJoR",
"#comment" : "'turbovid' album (#8851)",
"#category": ("lolisafe", "saint", "album"),
"#class" : saint.SaintAlbumExtractor,
"#results" : (
"https://data.saint2.cr/data/jZqe1xxqw9bX7.mp4",
"https://data.saint2.cr/data/eJ9fLurGdaHqS.mp4",
"https://data.saint2.cr/data/WkD7hRaHdBpBI.mp4",
),
"album_id" : "FiphGijfJoR",
"album_name" : """test-???-"&> album""",
"album_size" : 37165256,
"count" : 3,
"num" : range(1, 3),
"date" : None,
"description": """test-???-"&> description""",
"extension" : "mp4",
"file" : r"re:https://data.saint2.cr/data/\w+.mp4",
"filename" : str,
"id" : str,
"id2" : str,
"id_dl" : str,
"name" : str,
"size" : int,
},
{
"#url" : "https://turbo.cr/a/FiphGijfJoR",
"#comment" : "'turbo' album (#8888)",
"#class" : saint.SaintAlbumExtractor,
},
{
"#url" : "https://saint2.su/embed/6lC7mKrJst8",
"#class": saint.SaintMediaExtractor,
"#results" : "https://data.saint2.cr/data/3b1ccebf3576f8d5aac3ee0e5a12da95.mp4",
"#sha1_content": "39037a029b3fe96f838b4545316caaa545c84075",
"count" : 1,
"date" : "dt:2024-10-18 15:48:16",
"extension": "mp4",
"file" : str,
"filename" : "3b1ccebf3576f8d5aac3ee0e5a12da95-6lC7mKrJst8",
"id" : "6lC7mKrJst8",
"id2" : "6712834015d67",
"id_dl" : "M2IxY2NlYmYzNTc2ZjhkNWFhYzNlZTBlNWExMmRhOTUubXA0",
"name" : "3b1ccebf3576f8d5aac3ee0e5a12da95",
"num" : 1,
},
{
"#url" : "https://saint2.su/d/M2IxMjVlM2ZiNGI5ODY5M2YxN2Q4NWNiNTM1OTAyMTUubXA0",
"#class": saint.SaintMediaExtractor,
"#results": "https://data.saint2.cr/data/3b125e3fb4b98693f17d85cb53590215.mp4",
"count" : 1,
"extension": "mp4",
"file" : str,
"filename" : "M2IxMjVlM2ZiNGI5ODY5M2YxN2Q4NWNiNTM1OTAyMTUubXA0",
"id" : "M2IxMjVlM2ZiNGI5ODY5M2YxN2Q4NWNiNTM1OTAyMTUubXA0",
"id_dl" : "M2IxMjVlM2ZiNGI5ODY5M2YxN2Q4NWNiNTM1OTAyMTUubXA0",
"name" : "M2IxMjVlM2ZiNGI5ODY5M2YxN2Q4NWNiNTM1OTAyMTUubXA0",
"num" : 1,
},
{
"#url" : "https://saint2.pk/embed/6lC7mKrJst8",
"#class": saint.SaintMediaExtractor,
},
{
"#url" : "https://saint2.cr/embed/6lC7mKrJst8",
"#class": saint.SaintMediaExtractor,
},
{
"#url" : "https://saint.to/embed/6lC7mKrJst8",
"#class": saint.SaintMediaExtractor,
},
{
"#url" : "https://turbovid.cr/embed/WkD7hRaHdBpBI",
"#comment" : "'turbovid' URL/video",
"#category": ("lolisafe", "saint", "media"),
"#class" : saint.SaintMediaExtractor,
"#results" : "https://data.saint2.cr/data/WkD7hRaHdBpBI.mp4",
"date" : None,
"extension" : "mp4",
"file" : "https://data.saint2.cr/data/WkD7hRaHdBpBI.mp4",
"filename" : "WkD7hRaHdBpBI",
"id" : "WkD7hRaHdBpBI",
"id2" : "WkD7hRaHdBpBI",
"id_dl" : "V2tEN2hSYUhkQnBCSS5tcDQ=",
"name" : "WkD7hRaHdBpBI",
},
{
"#url" : "https://turbo.cr/embed/WkD7hRaHdBpBI",
"#comment" : "'turbo' URL/video",
"#class" : saint.SaintMediaExtractor,
},
)

124
test/results/turbo.py Normal file
View File

@@ -0,0 +1,124 @@
# -*- coding: utf-8 -*-
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
from gallery_dl.extractor import turbo
__tests__ = (
{
"#url" : "https://turbo.cr/a/2c5iuWHTumH",
"#class": turbo.TurboAlbumExtractor,
"#pattern": (
r"https://dl\d+.turbocdn.st/data/3b125e3fb4b98693f17d85cb53590215.mp4\?exp=\d+&token=\w+&fn=3b125e3fb4b98693f17d85cb53590215.mp4",
r"https://dl\d+.turbocdn.st/data/3b1ccebf3576f8d5aac3ee0e5a12da95.mp4\?exp=\d+&token=\w+&fn=3b1ccebf3576f8d5aac3ee0e5a12da95.mp4",
),
"album_id" : "2c5iuWHTumH",
"album_name" : "animations",
"album_size" : 37083862,
"count" : 2,
"description": "Descriptions can contain only alphanumeric ASCII characters",
"extension" : "mp4",
"file" : r"re:https://...",
"filename" : {"3b1ccebf3576f8d5aac3ee0e5a12da95-6lC7mKrJst8",
"3b125e3fb4b98693f17d85cb53590215-ze10Ohbpoy5"},
"id" : {"6lC7mKrJst8",
"ze10Ohbpoy5"},
"name" : {"3b1ccebf3576f8d5aac3ee0e5a12da95",
"3b125e3fb4b98693f17d85cb53590215"},
"num" : {1, 2},
},
{
"#url" : "https://turbovid.cr/a/FiphGijfJoR",
"#comment" : "'turbovid' album (#8851)",
"#category": ("lolisafe", "turbo", "album"),
"#class" : turbo.TurboAlbumExtractor,
"#pattern" : (
r"https://dl\d+.turbocdn.st/data/WkD7hRaHdBpBI.mp4\?exp=\d+&token=\w+&fn=3b1ccebf3576f8d5aac3ee0e5a12da95-6lC7mKrJst8.mp4",
r"https://dl\d+.turbocdn.st/data/eJ9fLurGdaHqS.mp4\?exp=\d+&token=\w+&fn=3b125e3fb4b98693f17d85cb53590215-ze10Ohbpoy5.mp4",
r"https://dl\d+.turbocdn.st/data/jZqe1xxqw9bX7.mp4\?exp=\d+&token=\w+&fn=test-%E3%83%86%E3%82%B9%E3%83%88-%2522%26%3E.mp4",
),
"album_id" : "FiphGijfJoR",
"album_name" : """test-テスト-"&> album""",
"album_size" : 37165256,
"count" : 3,
"num" : range(1, 3),
"description": """test-テスト-"&> description""",
"extension" : "mp4",
"file" : r"re:https://dl\d+.turbocdn.st/data/.+",
"filename" : str,
"id" : str,
"name" : str,
"size" : int,
},
{
"#url" : "https://saint2.su/a/FiphGijfJoR",
"#comment" : "'saint' album (#8888)",
"#class" : turbo.TurboAlbumExtractor,
},
{
"#url" : "https://turbo.cr/embed/6lC7mKrJst8",
"#class": turbo.TurboMediaExtractor,
"#pattern" : r"https://dl\d+.turbocdn.st/data/3b1ccebf3576f8d5aac3ee0e5a12da95.mp4",
"#sha1_content": "39037a029b3fe96f838b4545316caaa545c84075",
"count" : 1,
"date" : "dt:2024-10-18 00:00:00",
"extension": "mp4",
"file" : str,
"filename" : "3b1ccebf3576f8d5aac3ee0e5a12da95-6lC7mKrJst8",
"id" : "6lC7mKrJst8",
"name" : "3b1ccebf3576f8d5aac3ee0e5a12da95",
"num" : 1,
},
{
"#url" : "https://turbo.cr/d/M2IxMjVlM2ZiNGI5ODY5M2YxN2Q4NWNiNTM1OTAyMTUubXA0",
"#comment": "'Page not found'",
"#class" : turbo.TurboMediaExtractor,
"#count" : 0,
},
{
"#url" : "https://saint2.pk/embed/6lC7mKrJst8",
"#class": turbo.TurboMediaExtractor,
},
{
"#url" : "https://saint2.cr/embed/6lC7mKrJst8",
"#class": turbo.TurboMediaExtractor,
},
{
"#url" : "https://saint.to/embed/6lC7mKrJst8",
"#class": turbo.TurboMediaExtractor,
},
{
"#url" : "https://turbovid.cr/embed/WkD7hRaHdBpBI",
"#comment" : "'turbovid' URL/video",
"#category": ("lolisafe", "turbo", "media"),
"#class" : turbo.TurboMediaExtractor,
"#pattern" : r"https://dl\d+.turbocdn.st/data/\w+.mp4",
"extension" : "mp4",
"file" : str,
"filename" : "3b1ccebf3576f8d5aac3ee0e5a12da95-6lC7mKrJst8-WkD7hRaHdBpBI",
"id" : "WkD7hRaHdBpBI",
"name" : "3b1ccebf3576f8d5aac3ee0e5a12da95-6lC7mKrJst8",
},
{
"#url" : "https://saint2.su/embed/WkD7hRaHdBpBI",
"#comment" : "'saint' URL/video",
"#class" : turbo.TurboMediaExtractor,
},
)