[chevereto] add 'video' extractor (#8149)

This commit is contained in:
Mike Fährmann
2025-08-31 12:47:19 +02:00
parent 75e7dc818d
commit 8047aaccd5
3 changed files with 81 additions and 15 deletions

View File

@@ -1176,19 +1176,19 @@ Consider all listed sites to potentially be NSFW.
<tr id="jpgfish" title="jpgfish">
<td>JPG Fish</td>
<td>https://jpg5.su/</td>
<td>Albums, individual Images, User Profiles</td>
<td>Albums, individual Images, User Profiles, Videos</td>
<td></td>
</tr>
<tr id="imgkiwi" title="imgkiwi">
<td>IMG.Kiwi</td>
<td>https://img.kiwi/</td>
<td>Albums, individual Images, User Profiles</td>
<td>Albums, individual Images, User Profiles, Videos</td>
<td></td>
</tr>
<tr id="imagepond" title="imagepond">
<td>ImagePond</td>
<td>https://imagepond.net/</td>
<td>Albums, individual Images, User Profiles</td>
<td>Albums, individual Images, User Profiles, Videos</td>
<td></td>
</tr>

View File

@@ -54,7 +54,7 @@ BASE_PATTERN = CheveretoExtractor.update({
class CheveretoImageExtractor(CheveretoExtractor):
"""Extractor for chevereto Images"""
"""Extractor for chevereto images"""
subcategory = "image"
pattern = BASE_PATTERN + r"(/im(?:g|age)/[^/?#]+)"
example = "https://jpg2.su/img/TITLE.ID"
@@ -74,22 +74,66 @@ class CheveretoImageExtractor(CheveretoExtractor):
url, b"seltilovessimpcity@simpcityhatesscrapers",
fromhex=True)
image = {
file = {
"id" : self.path.rpartition(".")[2],
"url" : url,
"album": text.extr(extr("Added to <a", "/a>"), ">", "<"),
"album": text.remove_html(extr(
"Added to <a", "</a>").rpartition(">")[2]),
"date" : text.parse_datetime(extr(
'<span title="', '"'), "%Y-%m-%d %H:%M:%S"),
"user" : extr('username: "', '"'),
}
text.nameext_from_url(image["url"], image)
yield Message.Directory, image
yield Message.Url, image["url"], image
text.nameext_from_url(file["url"], file)
yield Message.Directory, file
yield Message.Url, file["url"], file
class CheveretoVideoExtractor(CheveretoExtractor):
"""Extractor for chevereto videos"""
subcategory = "video"
pattern = BASE_PATTERN + r"(/video/[^/?#]+)"
example = "https://imagepond.net/video/TITLE.ID"
def items(self):
url = self.root + self.path
page = self.request(url).text
extr = text.extract_from(page)
file = {
"id" : self.path.rpartition(".")[2],
"title" : text.unescape(extr(
'property="og:title" content="', '"')),
"thumbnail": extr(
'property="og:image" content="', '"'),
"url" : extr(
'property="og:video" content="', '"'),
"width" : text.parse_int(extr(
'property="video:width" content="', '"')),
"height" : text.parse_int(extr(
'property="video:height" content="', '"')),
"duration" : extr(
'class="far fa-clock"></i>', ""),
"album": text.remove_html(extr(
"Added to <a", "</a>").rpartition(">")[2]),
"date" : text.parse_datetime(extr(
'<span title="', '"'), "%Y-%m-%d %H:%M:%S"),
"user" : extr('username: "', '"'),
}
try:
min, _, sec = file["duration"].partition(":")
file["duration"] = int(min) * 60 + int(sec)
except Exception:
pass
text.nameext_from_url(file["url"], file)
yield Message.Directory, file
yield Message.Url, file["url"], file
class CheveretoAlbumExtractor(CheveretoExtractor):
"""Extractor for chevereto Albums"""
"""Extractor for chevereto albums"""
subcategory = "album"
pattern = BASE_PATTERN + r"(/a(?:lbum)?/[^/?#]+(?:/sub)?)"
example = "https://jpg2.su/album/TITLE.ID"
@@ -109,7 +153,7 @@ class CheveretoAlbumExtractor(CheveretoExtractor):
class CheveretoUserExtractor(CheveretoExtractor):
"""Extractor for chevereto Users"""
"""Extractor for chevereto users"""
subcategory = "user"
pattern = BASE_PATTERN + r"(/[^/?#]+(?:/albums)?)"
example = "https://jpg2.su/USER"
@@ -119,8 +163,11 @@ class CheveretoUserExtractor(CheveretoExtractor):
if self.path.endswith("/albums"):
data = {"_extractor": CheveretoAlbumExtractor}
for url in self._pagination(url):
yield Message.Queue, url, data
else:
data = {"_extractor": CheveretoImageExtractor}
for url in self._pagination(url):
yield Message.Queue, url, data
data_image = {"_extractor": CheveretoImageExtractor}
data_video = {"_extractor": CheveretoVideoExtractor}
for url in self._pagination(url):
data = data_video if "/video/" in url else data_image
yield Message.Queue, url, data

View File

@@ -22,7 +22,26 @@ __tests__ = (
"id" : "TJNphg",
"url" : "https://media.imagepond.net/media/IMG_20250217_1606226b345a5dbd0e8971.jpg",
"user" : "dariusbbb24",
},
{
"#url" : "https://imagepond.net/video/1000423939.zb8Fxy",
"#category": ("chevereto", "imagepond", "video"),
"#class" : chevereto.CheveretoVideoExtractor,
"#results" : "https://media.imagepond.net/media/100042393993a6bfa75fc505e9.mp4",
"album" : "",
"date" : "dt:2025-08-29 18:01:20",
"duration" : 7,
"extension": "mp4",
"filename" : "100042393993a6bfa75fc505e9",
"height" : 1280,
"id" : "zb8Fxy",
"thumbnail": "https://media.imagepond.net/media/100042393993a6bfa75fc505e9.fr.jpeg",
"title" : "1000423939",
"url" : "https://media.imagepond.net/media/100042393993a6bfa75fc505e9.mp4",
"user" : "christiankita",
"width" : 720,
},
{