* Implements turbo.py & remove from domain pattern from saints.py * Remove leftover commented pattern from saints.py * Make turbo.py comply with flake8 * Add album support * Improved metadata extracion for albums and single files & created turbo.py tests using saints.py test * Align turbo.py extractor with flake8 rules * Fix #class name on turbo.py tests * Fix #category test * Fix #category test x2 * Fix #category tests * Fix #category tests * Fix TurboMediaExtractor self.groups unpacking * update basic module formatting * replace 'saint' with 'turbo' in modules list * remove saint extractors and tests * update & simplify 'media' extractor * update & simplify 'album' extractor * update tests * update supportedsites * update 'category-map' & 'config-map' --------- Co-authored-by: Mike Fährmann <mike_faehrmann@web.de>
This commit is contained in:
@@ -321,6 +321,7 @@ def main():
|
||||
catmap = {
|
||||
"coomer" : "coomerparty",
|
||||
"kemono" : "kemonoparty",
|
||||
"turbo" : "saint",
|
||||
"schalenetwork": "koharu",
|
||||
"naver-blog" : "naver",
|
||||
"naver-chzzk" : "chzzk",
|
||||
|
||||
@@ -175,6 +175,7 @@ def remap_categories():
|
||||
("chzzk" , "naver-chzzk"),
|
||||
("naverwebtoon", "naver-webtoon"),
|
||||
("pixiv" , "pixiv-novel"),
|
||||
("saint" , "turbo"),
|
||||
)
|
||||
elif not cmap:
|
||||
return
|
||||
|
||||
@@ -181,7 +181,6 @@ modules = [
|
||||
"rule34vault",
|
||||
"rule34xyz",
|
||||
"s3ndpics",
|
||||
"saint",
|
||||
"sankaku",
|
||||
"sankakucomplex",
|
||||
"schalenetwork",
|
||||
@@ -215,6 +214,7 @@ modules = [
|
||||
"tumblr",
|
||||
"tumblrgallery",
|
||||
"tungsten",
|
||||
"turbo",
|
||||
"twibooru",
|
||||
"twitter",
|
||||
"urlgalleries",
|
||||
|
||||
@@ -1,119 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2024-2026 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Extractors for https://saint2.su/ and https://turbovid.cr/"""
|
||||
|
||||
from .lolisafe import LolisafeAlbumExtractor
|
||||
from .. import text
|
||||
|
||||
BASE_PATTERN = r"(?:https?://)?(?:turbo(?:vid)?\.cr|saint\d*\.(?:su|pk|cr|to))"
|
||||
|
||||
|
||||
class SaintAlbumExtractor(LolisafeAlbumExtractor):
|
||||
"""Extractor for saint albums"""
|
||||
category = "saint"
|
||||
root = "https://saint2.su"
|
||||
pattern = BASE_PATTERN + r"/a/([^/?#]+)"
|
||||
example = "https://saint2.su/a/ID"
|
||||
|
||||
def fetch_album(self, album_id):
|
||||
# album metadata
|
||||
response = self.request(self.root + "/a/" + album_id)
|
||||
extr = text.extract_from(response.text)
|
||||
|
||||
title = extr("<title>", "</title")
|
||||
descr = extr('name="description" content="', '"/>')
|
||||
files = []
|
||||
|
||||
while True:
|
||||
id2 = extr("/thumbs/", '"')
|
||||
if not id2:
|
||||
break
|
||||
|
||||
id2, sep, ts = id2.rpartition(".")[0].rpartition("-")
|
||||
if sep:
|
||||
date = self.parse_timestamp(ts)
|
||||
else:
|
||||
date = None
|
||||
id2 = ts
|
||||
|
||||
files.append({
|
||||
"id" : extr("/embed/", '"'),
|
||||
"id2" : id2,
|
||||
"date" : date,
|
||||
# "extension": extr("<td>", "</"),
|
||||
"size" : text.parse_int(extr('data="', '"')),
|
||||
"file" : text.unescape(extr(
|
||||
"onclick=\"play(", ")").strip("\"'")),
|
||||
"id_dl": extr("/d/", ")").rstrip("\"'"),
|
||||
})
|
||||
|
||||
return files, {
|
||||
"album_id" : album_id,
|
||||
"album_name" : text.unescape(title.rpartition(" - ")[0]),
|
||||
"album_size" : sum(file["size"] for file in files),
|
||||
"description" : text.unescape(descr),
|
||||
"count" : len(files),
|
||||
"_http_headers": {"Referer": response.url}
|
||||
}
|
||||
|
||||
|
||||
class SaintMediaExtractor(SaintAlbumExtractor):
|
||||
"""Extractor for saint media links"""
|
||||
subcategory = "media"
|
||||
directory_fmt = ("{category}",)
|
||||
pattern = BASE_PATTERN + r"(/(embe)?d/([^/?#]+))"
|
||||
example = "https://saint2.su/embed/ID"
|
||||
|
||||
def fetch_album(self, album_id):
|
||||
try:
|
||||
path, embed, _ = self.groups
|
||||
|
||||
url = self.root + path
|
||||
response = self.request(url)
|
||||
extr = text.extract_from(response.text)
|
||||
|
||||
if embed:
|
||||
id2, sep, ts = extr(
|
||||
"/thumbs/", '"').rpartition(".")[0].rpartition("-")
|
||||
if sep:
|
||||
date = self.parse_timestamp(ts)
|
||||
else:
|
||||
date = None
|
||||
id2 = ts
|
||||
|
||||
file = {
|
||||
"id" : album_id,
|
||||
"id2" : id2,
|
||||
"date" : date,
|
||||
"file" : text.unescape(extr('<source src="', '"')),
|
||||
"id_dl": extr("/d/", "'"),
|
||||
}
|
||||
|
||||
else: # /d/
|
||||
file = {
|
||||
"file" : text.unescape(extr('<a href="', '"')),
|
||||
"id" : album_id,
|
||||
"id_dl" : album_id,
|
||||
"name" : album_id,
|
||||
"filename" : album_id,
|
||||
"extension": "mp4",
|
||||
}
|
||||
|
||||
file["_http_headers"] = {"Referer": response.url}
|
||||
except Exception as exc:
|
||||
self.log.error("%s: %s", exc.__class__.__name__, exc)
|
||||
return (), {}
|
||||
|
||||
return (file,), {
|
||||
"album_id" : "",
|
||||
"album_name" : "",
|
||||
"album_size" : -1,
|
||||
"description": "",
|
||||
"count" : 1,
|
||||
}
|
||||
94
gallery_dl/extractor/turbo.py
Normal file
94
gallery_dl/extractor/turbo.py
Normal file
@@ -0,0 +1,94 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2024-2026 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Extractors for https://turbo.cr/"""
|
||||
|
||||
from .lolisafe import LolisafeAlbumExtractor
|
||||
from .. import text
|
||||
|
||||
BASE_PATTERN = (r"(?:https?://)?(?:"
|
||||
r"(?:www\.)?turbo(?:vid)?\.cr|"
|
||||
r"saint\d*\.(?:su|pk|cr|to))")
|
||||
|
||||
|
||||
class TurboAlbumExtractor(LolisafeAlbumExtractor):
|
||||
"""Extractor for turbo.cr albums"""
|
||||
category = "turbo"
|
||||
root = "https://turbo.cr"
|
||||
pattern = BASE_PATTERN + r"/a/([^/?#]+)"
|
||||
example = "https://turbo.cr/a/ID"
|
||||
|
||||
def fetch_album(self, album_id):
|
||||
url = f"{self.root}/a/{album_id}"
|
||||
extr = text.extract_from(self.request(url).text)
|
||||
title = extr("<h1 ", "<")
|
||||
descr = extr("<p ", "<")
|
||||
tbody = extr('id="fileTbody"', '</tbody>')
|
||||
headers = {"Referer": url}
|
||||
|
||||
return self._extract_files(tbody, headers), {
|
||||
"album_id" : album_id,
|
||||
"album_name" : text.unescape(title[title.find(">")+1:]),
|
||||
"description" : text.unescape(descr[descr.find(">")+1:]),
|
||||
"album_size" : sum(map(text.parse_int, text.extract_iter(
|
||||
tbody, 'data-size="', '"'))),
|
||||
"count" : tbody.count("data-id="),
|
||||
"_http_headers": headers,
|
||||
}
|
||||
|
||||
def _extract_files(self, body, headers):
|
||||
for file in text.extract_iter(body, "<tr", "</tr>"):
|
||||
data_id = text.extr(file, 'data-id="', '"')
|
||||
url = f"{self.root}/api/sign?v={data_id}"
|
||||
data = self.request_json(url, headers=headers)
|
||||
name = data.get("original_filename") or data.get("filename")
|
||||
yield text.nameext_from_name(name, {
|
||||
"id" : data_id,
|
||||
"file": data.get("url"),
|
||||
"size": text.parse_int(text.extr(file, 'data-size="', '"')),
|
||||
"_http_headers": headers,
|
||||
})
|
||||
|
||||
|
||||
class TurboMediaExtractor(TurboAlbumExtractor):
|
||||
"""Extractor for turbo.cr media links"""
|
||||
subcategory = "media"
|
||||
directory_fmt = ("{category}",)
|
||||
pattern = BASE_PATTERN + r"/(?:embe)?[dv]/([^/?#]+)"
|
||||
example = "https://turbo.cr/embed/ID"
|
||||
|
||||
def fetch_album(self, album_id):
|
||||
try:
|
||||
return (self._extract_file(album_id),), {
|
||||
"album_id" : "",
|
||||
"album_name" : "",
|
||||
"album_size" : -1,
|
||||
"description": "",
|
||||
"count" : 1,
|
||||
}
|
||||
except Exception as exc:
|
||||
self.log.error("%s: %s", exc.__class__.__name__, exc)
|
||||
return (), {}
|
||||
|
||||
def _extract_file(self, data_id):
|
||||
url = f"{self.root}/d/{data_id}"
|
||||
headers = {"Referer": url}
|
||||
page = self.request(url).text
|
||||
size = text.extr(page, 'id="fileSizeBytes">', '<')
|
||||
date = text.extract(page, "<span>", "<", page.find("File ID:"))[0]
|
||||
|
||||
url = f"{self.root}/api/sign?v={data_id}"
|
||||
data = self.request_json(url, headers=headers)
|
||||
name = data.get("original_filename") or data.get("filename")
|
||||
return text.nameext_from_name(name, {
|
||||
"id" : data_id,
|
||||
"file": data.get("url"),
|
||||
"size": int(text.parse_float(size.replace("+", "+"))),
|
||||
"date": self.parse_datetime_iso(date),
|
||||
"_http_headers": headers,
|
||||
})
|
||||
Reference in New Issue
Block a user