[comedywildlifephoto] add 'gallery' extractor (#8690)

2025-12-14 11:00:42 +01:00
parent 85b7f63971
commit 774cb1e1a0
5 changed files with 107 additions and 0 deletions
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -205,6 +205,12 @@ Consider all listed sites to potentially be NSFW.
    <td>Collections, Generated Files, individual Images, Image Listings, Models, Model Listings, Posts, Post Listings, Image Searches, Model Searches, Tag Searches, User Profiles, User Collections, User Images, Image Reactions, User Models, User Posts, User Videos, Video Reactions, Video Listings</td>
    <td></td>
 </tr>
+<tr id="comedywildlifephoto" title="comedywildlifephoto">
+    <td>Comedy Wildlife Photography Awards</td>
+    <td>https://www.comedywildlifephoto.com/</td>
+    <td>Galleries</td>
+    <td></td>
+</tr>
 <tr id="comicvine" title="comicvine">
    <td>Comic Vine</td>
    <td>https://comicvine.gamespot.com/</td>
--- a/gallery_dl/extractor/init.py
+++ b/gallery_dl/extractor/init.py
@@ -45,6 +45,7 @@ modules = [
    "chevereto",
    "cien",
    "civitai",
+    "comedywildlifephoto",
    "comick",
    "comicvine",
    "cyberdrop",
--- a/gallery_dl/extractor/comedywildlifephoto.py
+++ b/gallery_dl/extractor/comedywildlifephoto.py
@@ -0,0 +1,51 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2025 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://www.comedywildlifephoto.com/"""
+
+from .common import GalleryExtractor
+from .. import text
+
+
+class ComedywildlifephotoGalleryExtractor(GalleryExtractor):
+    """Extractor for comedywildlifephoto galleries"""
+    category = "comedywildlifephoto"
+    root = "https://www.comedywildlifephoto.com"
+    directory_fmt = ("{category}", "{section}", "{title}")
+    filename_fmt = "{num:>03} {filename}.{extension}"
+    archive_fmt = "{section}/{title}/{num}"
+    pattern = (r"(?:https?://)?(?:www\.)?comedywildlifephoto\.com"
+               r"(/gallery/[^/?#]+/[^/?#]+\.php)")
+    example = "https://www.comedywildlifephoto.com/gallery/SECTION/TITLE.php"
+
+    def metadata(self, page):
+        extr = text.extract_from(page)
+
+        return {
+            "section": extr("<h1>", "<").strip(),
+            "title"  : extr(">", "<"),
+            "description": text.unescape(extr(
+                'class="c1 np">', "<div")),
+        }
+
+    def images(self, page):
+        results = []
+
+        for fig in text.extract_iter(page, "<figure", "</figure>"):
+            width, _, height = text.extr(
+                fig, 'data-size="', '"').partition("x")
+            results.append((
+                self.root + text.extr(fig, 'href="', '"'), {
+                    "width"  : text.parse_int(width),
+                    "height" : text.parse_int(height),
+                    "caption": text.unescape(text.extr(
+                        fig, "<figcaption>", "<")),
+                }
+            ))
+
+        return results
--- a/scripts/supportedsites.py
+++ b/scripts/supportedsites.py
@@ -43,6 +43,7 @@ CATEGORY_MAP = {
    "cfake"          : "Celebrity Fakes",
    "cien"           : "Ci-en",
    "cohost"         : "cohost!",
+    "comedywildlifephoto": "Comedy Wildlife Photography Awards",
    "comicvine"      : "Comic Vine",
    "cyberfile"      : "CyberFile",
    "dankefuerslesen": "Danke fürs Lesen",
--- a/test/results/comedywildlifephoto.py
+++ b/test/results/comedywildlifephoto.py
@@ -0,0 +1,48 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+from gallery_dl.extractor import comedywildlifephoto
+
+
+__tests__ = (
+{
+    "#url"     : "https://www.comedywildlifephoto.com/gallery/finalists/2024_finalists.php",
+    "#class"   : comedywildlifephoto.ComedywildlifephotoGalleryExtractor,
+    "#pattern" : r"https://www\.comedywildlifephoto\.com/images/gallery/\d/000017\d\d_p\.webp",
+    "#count"   : 44,
+
+    "count"      : 44,
+    "num"        : range(1, 44),
+    "description": "<p>Here are the finalists from the 2024 Comedy Wildlife Photography Awards competition. Winners will be announced on the 10th of December 2024. Voting for the People's Choice Award runs from 26th September until 31st October.</p>",
+    "caption"    : str,
+    "filename"   : str,
+    "extension"  : "webp",
+    "width"      : range(750, 1600),
+    "height"     : range(750, 1600),
+    "section"    : "Gallery of Winners and Finalists",
+    "title"      : "2024 Finalists",
+},
+
+{
+    "#url"     : "https://www.comedywildlifephoto.com/gallery/finalists/2022_finalists.php",
+    "#comment" : "empty 'description'",
+    "#class"   : comedywildlifephoto.ComedywildlifephotoGalleryExtractor,
+    "#range"   : "4",
+    "#results" : "https://www.comedywildlifephoto.com/images/gallery/9/00001169_p.jpg",
+
+    "count"      : 43,
+    "num"        : 4,
+    "description": "",
+    "caption"    : "",
+    "filename"   : "00001169_p",
+    "extension"  : "jpg",
+    "width"      : 1600,
+    "height"     : 900,
+    "section"    : "Gallery of Winners and Finalists",
+    "title"      : "2022 Finalists",
+},
+
+)