[comedywildlifephoto] add 'gallery' extractor (#8690)
This commit is contained in:
@@ -205,6 +205,12 @@ Consider all listed sites to potentially be NSFW.
|
||||
<td>Collections, Generated Files, individual Images, Image Listings, Models, Model Listings, Posts, Post Listings, Image Searches, Model Searches, Tag Searches, User Profiles, User Collections, User Images, Image Reactions, User Models, User Posts, User Videos, Video Reactions, Video Listings</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr id="comedywildlifephoto" title="comedywildlifephoto">
|
||||
<td>Comedy Wildlife Photography Awards</td>
|
||||
<td>https://www.comedywildlifephoto.com/</td>
|
||||
<td>Galleries</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr id="comicvine" title="comicvine">
|
||||
<td>Comic Vine</td>
|
||||
<td>https://comicvine.gamespot.com/</td>
|
||||
|
||||
@@ -45,6 +45,7 @@ modules = [
|
||||
"chevereto",
|
||||
"cien",
|
||||
"civitai",
|
||||
"comedywildlifephoto",
|
||||
"comick",
|
||||
"comicvine",
|
||||
"cyberdrop",
|
||||
|
||||
51
gallery_dl/extractor/comedywildlifephoto.py
Normal file
51
gallery_dl/extractor/comedywildlifephoto.py
Normal file
@@ -0,0 +1,51 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2025 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Extractors for https://www.comedywildlifephoto.com/"""
|
||||
|
||||
from .common import GalleryExtractor
|
||||
from .. import text
|
||||
|
||||
|
||||
class ComedywildlifephotoGalleryExtractor(GalleryExtractor):
|
||||
"""Extractor for comedywildlifephoto galleries"""
|
||||
category = "comedywildlifephoto"
|
||||
root = "https://www.comedywildlifephoto.com"
|
||||
directory_fmt = ("{category}", "{section}", "{title}")
|
||||
filename_fmt = "{num:>03} {filename}.{extension}"
|
||||
archive_fmt = "{section}/{title}/{num}"
|
||||
pattern = (r"(?:https?://)?(?:www\.)?comedywildlifephoto\.com"
|
||||
r"(/gallery/[^/?#]+/[^/?#]+\.php)")
|
||||
example = "https://www.comedywildlifephoto.com/gallery/SECTION/TITLE.php"
|
||||
|
||||
def metadata(self, page):
|
||||
extr = text.extract_from(page)
|
||||
|
||||
return {
|
||||
"section": extr("<h1>", "<").strip(),
|
||||
"title" : extr(">", "<"),
|
||||
"description": text.unescape(extr(
|
||||
'class="c1 np">', "<div")),
|
||||
}
|
||||
|
||||
def images(self, page):
|
||||
results = []
|
||||
|
||||
for fig in text.extract_iter(page, "<figure", "</figure>"):
|
||||
width, _, height = text.extr(
|
||||
fig, 'data-size="', '"').partition("x")
|
||||
results.append((
|
||||
self.root + text.extr(fig, 'href="', '"'), {
|
||||
"width" : text.parse_int(width),
|
||||
"height" : text.parse_int(height),
|
||||
"caption": text.unescape(text.extr(
|
||||
fig, "<figcaption>", "<")),
|
||||
}
|
||||
))
|
||||
|
||||
return results
|
||||
@@ -43,6 +43,7 @@ CATEGORY_MAP = {
|
||||
"cfake" : "Celebrity Fakes",
|
||||
"cien" : "Ci-en",
|
||||
"cohost" : "cohost!",
|
||||
"comedywildlifephoto": "Comedy Wildlife Photography Awards",
|
||||
"comicvine" : "Comic Vine",
|
||||
"cyberfile" : "CyberFile",
|
||||
"dankefuerslesen": "Danke fürs Lesen",
|
||||
|
||||
48
test/results/comedywildlifephoto.py
Normal file
48
test/results/comedywildlifephoto.py
Normal file
@@ -0,0 +1,48 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from gallery_dl.extractor import comedywildlifephoto
|
||||
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url" : "https://www.comedywildlifephoto.com/gallery/finalists/2024_finalists.php",
|
||||
"#class" : comedywildlifephoto.ComedywildlifephotoGalleryExtractor,
|
||||
"#pattern" : r"https://www\.comedywildlifephoto\.com/images/gallery/\d/000017\d\d_p\.webp",
|
||||
"#count" : 44,
|
||||
|
||||
"count" : 44,
|
||||
"num" : range(1, 44),
|
||||
"description": "<p>Here are the finalists from the 2024 Comedy Wildlife Photography Awards competition. Winners will be announced on the 10th of December 2024. Voting for the People's Choice Award runs from 26th September until 31st October.</p>",
|
||||
"caption" : str,
|
||||
"filename" : str,
|
||||
"extension" : "webp",
|
||||
"width" : range(750, 1600),
|
||||
"height" : range(750, 1600),
|
||||
"section" : "Gallery of Winners and Finalists",
|
||||
"title" : "2024 Finalists",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.comedywildlifephoto.com/gallery/finalists/2022_finalists.php",
|
||||
"#comment" : "empty 'description'",
|
||||
"#class" : comedywildlifephoto.ComedywildlifephotoGalleryExtractor,
|
||||
"#range" : "4",
|
||||
"#results" : "https://www.comedywildlifephoto.com/images/gallery/9/00001169_p.jpg",
|
||||
|
||||
"count" : 43,
|
||||
"num" : 4,
|
||||
"description": "",
|
||||
"caption" : "",
|
||||
"filename" : "00001169_p",
|
||||
"extension" : "jpg",
|
||||
"width" : 1600,
|
||||
"height" : 900,
|
||||
"section" : "Gallery of Winners and Finalists",
|
||||
"title" : "2022 Finalists",
|
||||
},
|
||||
|
||||
)
|
||||
Reference in New Issue
Block a user