[comedywildlifephoto] add 'gallery' extractor (#8690)

This commit is contained in:
Mike Fährmann
2025-12-14 11:00:42 +01:00
parent 85b7f63971
commit 774cb1e1a0
5 changed files with 107 additions and 0 deletions

View File

@@ -205,6 +205,12 @@ Consider all listed sites to potentially be NSFW.
<td>Collections, Generated Files, individual Images, Image Listings, Models, Model Listings, Posts, Post Listings, Image Searches, Model Searches, Tag Searches, User Profiles, User Collections, User Images, Image Reactions, User Models, User Posts, User Videos, Video Reactions, Video Listings</td>
<td></td>
</tr>
<tr id="comedywildlifephoto" title="comedywildlifephoto">
<td>Comedy Wildlife Photography Awards</td>
<td>https://www.comedywildlifephoto.com/</td>
<td>Galleries</td>
<td></td>
</tr>
<tr id="comicvine" title="comicvine">
<td>Comic Vine</td>
<td>https://comicvine.gamespot.com/</td>

View File

@@ -45,6 +45,7 @@ modules = [
"chevereto",
"cien",
"civitai",
"comedywildlifephoto",
"comick",
"comicvine",
"cyberdrop",

View File

@@ -0,0 +1,51 @@
# -*- coding: utf-8 -*-
# Copyright 2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://www.comedywildlifephoto.com/"""
from .common import GalleryExtractor
from .. import text
class ComedywildlifephotoGalleryExtractor(GalleryExtractor):
"""Extractor for comedywildlifephoto galleries"""
category = "comedywildlifephoto"
root = "https://www.comedywildlifephoto.com"
directory_fmt = ("{category}", "{section}", "{title}")
filename_fmt = "{num:>03} {filename}.{extension}"
archive_fmt = "{section}/{title}/{num}"
pattern = (r"(?:https?://)?(?:www\.)?comedywildlifephoto\.com"
r"(/gallery/[^/?#]+/[^/?#]+\.php)")
example = "https://www.comedywildlifephoto.com/gallery/SECTION/TITLE.php"
def metadata(self, page):
extr = text.extract_from(page)
return {
"section": extr("<h1>", "<").strip(),
"title" : extr(">", "<"),
"description": text.unescape(extr(
'class="c1 np">', "<div")),
}
def images(self, page):
results = []
for fig in text.extract_iter(page, "<figure", "</figure>"):
width, _, height = text.extr(
fig, 'data-size="', '"').partition("x")
results.append((
self.root + text.extr(fig, 'href="', '"'), {
"width" : text.parse_int(width),
"height" : text.parse_int(height),
"caption": text.unescape(text.extr(
fig, "<figcaption>", "<")),
}
))
return results

View File

@@ -43,6 +43,7 @@ CATEGORY_MAP = {
"cfake" : "Celebrity Fakes",
"cien" : "Ci-en",
"cohost" : "cohost!",
"comedywildlifephoto": "Comedy Wildlife Photography Awards",
"comicvine" : "Comic Vine",
"cyberfile" : "CyberFile",
"dankefuerslesen": "Danke fürs Lesen",

View File

@@ -0,0 +1,48 @@
# -*- coding: utf-8 -*-
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
from gallery_dl.extractor import comedywildlifephoto
__tests__ = (
{
"#url" : "https://www.comedywildlifephoto.com/gallery/finalists/2024_finalists.php",
"#class" : comedywildlifephoto.ComedywildlifephotoGalleryExtractor,
"#pattern" : r"https://www\.comedywildlifephoto\.com/images/gallery/\d/000017\d\d_p\.webp",
"#count" : 44,
"count" : 44,
"num" : range(1, 44),
"description": "<p>Here are the finalists from the 2024 Comedy Wildlife Photography Awards competition. Winners will be announced on the 10th of December 2024. Voting for the People's Choice Award runs from 26th September until 31st October.</p>",
"caption" : str,
"filename" : str,
"extension" : "webp",
"width" : range(750, 1600),
"height" : range(750, 1600),
"section" : "Gallery of Winners and Finalists",
"title" : "2024 Finalists",
},
{
"#url" : "https://www.comedywildlifephoto.com/gallery/finalists/2022_finalists.php",
"#comment" : "empty 'description'",
"#class" : comedywildlifephoto.ComedywildlifephotoGalleryExtractor,
"#range" : "4",
"#results" : "https://www.comedywildlifephoto.com/images/gallery/9/00001169_p.jpg",
"count" : 43,
"num" : 4,
"description": "",
"caption" : "",
"filename" : "00001169_p",
"extension" : "jpg",
"width" : 1600,
"height" : 900,
"section" : "Gallery of Winners and Finalists",
"title" : "2022 Finalists",
},
)