[fitnakedgirls] add support (#8671)
* [fitnakedgirls] add extractor Add support for fitnakedgirls.com: - Photo galleries (/photos/gallery/) - Category pages (/photos/gallery/category/) - Tag pages (/photos/tag/) - Video posts (/videos/) - Blog posts (/fitblog/) Handles both newer (wp-block-image) and older (size-large) templates. * simplify & fix - use '_extract_title' method - move '_pagination' into base class - update 'FitnakedgirlsTagExtractor' pattern * update docs/supportedsites --------- Co-authored-by: Mike Fährmann <mike_faehrmann@web.de>
This commit is contained in:
@@ -349,6 +349,12 @@ Consider all listed sites to potentially be NSFW.
|
||||
<td>Posts, User Profiles</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr id="fitnakedgirls" title="fitnakedgirls">
|
||||
<td>FitNakedGirls</td>
|
||||
<td>https://fitnakedgirls.com/</td>
|
||||
<td>Blogs, Categories, Galleries, Tag Searches, Videos</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr id="flickr" title="flickr">
|
||||
<td>Flickr</td>
|
||||
<td>https://www.flickr.com/</td>
|
||||
|
||||
@@ -68,6 +68,7 @@ modules = [
|
||||
"fapello",
|
||||
"fapachi",
|
||||
"fikfap",
|
||||
"fitnakedgirls",
|
||||
"flickr",
|
||||
"furaffinity",
|
||||
"furry34",
|
||||
|
||||
208
gallery_dl/extractor/fitnakedgirls.py
Normal file
208
gallery_dl/extractor/fitnakedgirls.py
Normal file
@@ -0,0 +1,208 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Extractors for https://fitnakedgirls.com/"""
|
||||
|
||||
from .common import GalleryExtractor, Extractor, Message
|
||||
from .. import text
|
||||
|
||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?fitnakedgirls\.com"
|
||||
|
||||
|
||||
class FitnakedgirlsExtractor(Extractor):
|
||||
"""Base class for fitnakedgirls extractors"""
|
||||
category = "fitnakedgirls"
|
||||
root = "https://fitnakedgirls.com"
|
||||
|
||||
def items(self):
|
||||
data = {"_extractor": FitnakedgirlsGalleryExtractor}
|
||||
for url in self.galleries():
|
||||
yield Message.Queue, url, data
|
||||
|
||||
def _pagination(self, base):
|
||||
url = base
|
||||
pnum = 1
|
||||
|
||||
while True:
|
||||
page = self.request(url).text
|
||||
|
||||
for post in text.extract_iter(
|
||||
page, 'class="entry-body', "</a>"):
|
||||
yield text.extr(post, 'href="', '"')
|
||||
|
||||
pnum += 1
|
||||
url = f"{base}page/{pnum}/"
|
||||
if f'href="{url}"' not in page:
|
||||
return
|
||||
|
||||
def _extract_title(self, extr, sep=" - "):
|
||||
title = text.unescape(extr("<title>", "<"))
|
||||
if sep in title:
|
||||
title = title.rpartition(sep)[0]
|
||||
return title.strip()
|
||||
|
||||
|
||||
class FitnakedgirlsGalleryExtractor(GalleryExtractor, FitnakedgirlsExtractor):
|
||||
"""Extractor for fitnakedgirls galleries"""
|
||||
directory_fmt = ("{category}", "{title}")
|
||||
filename_fmt = "{filename}.{extension}"
|
||||
archive_fmt = "{gallery_id}_{filename}"
|
||||
pattern = rf"{BASE_PATTERN}/photos/gallery/([\w-]+)/?$"
|
||||
example = "https://fitnakedgirls.com/photos/gallery/MODEL-nude/"
|
||||
|
||||
def __init__(self, match):
|
||||
url = f"{self.root}/photos/gallery/{match[1]}/"
|
||||
GalleryExtractor.__init__(self, match, url)
|
||||
|
||||
def metadata(self, page):
|
||||
extr = text.extract_from(page)
|
||||
title = self._extract_title(extr)
|
||||
|
||||
# Strip common patterns to get cleaner model name
|
||||
for pattern in (" Nudes", " Nude", " nudes", " nude"):
|
||||
if pattern in title:
|
||||
title = title.partition(pattern)[0]
|
||||
break
|
||||
|
||||
return {
|
||||
"gallery_id" : text.parse_int(extr('data-post-id="', '"')),
|
||||
"gallery_slug": self.groups[0],
|
||||
"model": title,
|
||||
"title": title,
|
||||
"date" : self.parse_datetime_iso(extr(
|
||||
'article:published_time" content="', '"')),
|
||||
}
|
||||
|
||||
def images(self, page):
|
||||
results = []
|
||||
|
||||
content = text.extr(
|
||||
page, 'itemprop="articleBody"', '<!-- .entry-content -->') or page
|
||||
|
||||
# Extract videos from wp-block-video figures
|
||||
for figure in text.extract_iter(
|
||||
content, '<figure class="wp-block-video">', '</figure>'):
|
||||
if src := text.extr(figure, 'src="', '"'):
|
||||
if "/wp-content/uploads/" in src:
|
||||
results.append((src, None))
|
||||
|
||||
# Extract images from wp-block-image figures (newer template)
|
||||
for figure in text.extract_iter(
|
||||
content, '<figure class="wp-block-image', '</figure>'):
|
||||
if src := text.extr(figure, 'data-src="', '"'):
|
||||
if "/wp-content/uploads/" in src:
|
||||
results.append((src, None))
|
||||
|
||||
# Fallback: Extract images with size-large class (older template)
|
||||
if not results:
|
||||
for img in text.extract_iter(content, "<img ", ">"):
|
||||
if "size-large" in img:
|
||||
if src := text.extr(img, 'data-src="', '"'):
|
||||
if "/wp-content/uploads/" in src:
|
||||
results.append((src, None))
|
||||
|
||||
return results
|
||||
|
||||
|
||||
class FitnakedgirlsCategoryExtractor(FitnakedgirlsExtractor):
|
||||
"""Extractor for fitnakedgirls category pages"""
|
||||
subcategory = "category"
|
||||
pattern = rf"{BASE_PATTERN}/photos/gallery/category/([\w-]+)"
|
||||
example = "https://fitnakedgirls.com/photos/gallery/category/CATEGORY/"
|
||||
|
||||
def galleries(self):
|
||||
base = f"{self.root}/photos/gallery/category/{self.groups[0]}/"
|
||||
return self._pagination(base)
|
||||
|
||||
|
||||
class FitnakedgirlsTagExtractor(FitnakedgirlsExtractor):
|
||||
"""Extractor for fitnakedgirls tag pages"""
|
||||
subcategory = "tag"
|
||||
pattern = rf"{BASE_PATTERN}/photos/gallery/tag/([\w-]+)"
|
||||
example = "https://fitnakedgirls.com/photos/gallery/tag/TAG/"
|
||||
|
||||
def galleries(self):
|
||||
base = f"{self.root}/photos/gallery/tag/{self.groups[0]}/"
|
||||
return self._pagination(base)
|
||||
|
||||
|
||||
class FitnakedgirlsVideoExtractor(FitnakedgirlsExtractor):
|
||||
"""Extractor for fitnakedgirls video posts"""
|
||||
subcategory = "video"
|
||||
directory_fmt = ("{category}", "{title}")
|
||||
filename_fmt = "{filename}.{extension}"
|
||||
archive_fmt = "{video_id}_{filename}"
|
||||
pattern = rf"{BASE_PATTERN}/videos/(\d+)/(\d+)/([\w-]+)"
|
||||
example = "https://fitnakedgirls.com/videos/2025/08/VIDEO-TITLE/"
|
||||
|
||||
def items(self):
|
||||
year, month, slug = self.groups
|
||||
url = f"{self.root}/videos/{year}/{month}/{slug}/"
|
||||
page = self.request(url).text
|
||||
|
||||
extr = text.extract_from(page)
|
||||
data = {
|
||||
"slug" : slug,
|
||||
"title" : self._extract_title(extr, " | "),
|
||||
"video_id": text.parse_int(extr('data-post-id="', '"')),
|
||||
"date" : self.parse_datetime_iso(
|
||||
extr('article:published_time" content="', '"')),
|
||||
}
|
||||
|
||||
yield Message.Directory, "", data
|
||||
|
||||
content = text.extr(
|
||||
page, 'itemprop="articleBody"', '<!-- .entry-content -->') or page
|
||||
for video in text.extract_iter(content, "<video ", "</video>"):
|
||||
if src := text.extr(video, 'src="', '"'):
|
||||
if "/wp-content/uploads/" in src:
|
||||
yield Message.Url, src, text.nameext_from_url(src, data)
|
||||
|
||||
|
||||
class FitnakedgirlsBlogExtractor(FitnakedgirlsExtractor):
|
||||
"""Extractor for fitnakedgirls blog posts"""
|
||||
subcategory = "blog"
|
||||
directory_fmt = ("{category}", "{title}")
|
||||
filename_fmt = "{filename}.{extension}"
|
||||
archive_fmt = "{post_id}_{filename}"
|
||||
pattern = rf"{BASE_PATTERN}/fitblog/([\w-]+)"
|
||||
example = "https://fitnakedgirls.com/fitblog/MODEL-NAME/"
|
||||
|
||||
def items(self):
|
||||
slug = self.groups[0]
|
||||
url = f"{self.root}/fitblog/{slug}/"
|
||||
page = self.request(url).text
|
||||
|
||||
extr = text.extract_from(page)
|
||||
data = {
|
||||
"slug" : slug,
|
||||
"title" : self._extract_title(extr),
|
||||
"post_id": text.parse_int(extr('data-post-id="', '"')),
|
||||
"date" : self.parse_datetime_iso(
|
||||
extr('article:published_time" content="', '"')),
|
||||
}
|
||||
|
||||
yield Message.Directory, "", data
|
||||
|
||||
# Extract images from wp-block-image figures
|
||||
content = text.extr(
|
||||
page, 'itemprop="articleBody"', '<!-- .entry-content -->') or page
|
||||
for figure in text.extract_iter(
|
||||
content, '<figure class="wp-block-image', '</figure>'):
|
||||
# Try srcset first for highest resolution
|
||||
if srcset := text.extr(figure, 'srcset="', '"'):
|
||||
# Get the last (largest) image from srcset
|
||||
urls = srcset.split(", ")
|
||||
if urls:
|
||||
src = urls[-1].partition(" ")[0]
|
||||
if "/wp-content/uploads/" in src:
|
||||
yield Message.Url, src, text.nameext_from_url(
|
||||
src, data)
|
||||
continue
|
||||
# Fallback to src
|
||||
if src := text.extr(figure, 'src="', '"'):
|
||||
if "/wp-content/uploads/" in src:
|
||||
yield Message.Url, src, text.nameext_from_url(src, data)
|
||||
@@ -62,6 +62,7 @@ CATEGORY_MAP = {
|
||||
"fappic" : "Fappic.com",
|
||||
"fashionnova" : "Fashion Nova",
|
||||
"fikfap" : "FikFap",
|
||||
"fitnakedgirls" : "FitNakedGirls",
|
||||
"furaffinity" : "Fur Affinity",
|
||||
"furry34" : "Furry 34 com",
|
||||
"girlswithmuscle": "Girls with Muscle",
|
||||
|
||||
81
test/results/fitnakedgirls.py
Normal file
81
test/results/fitnakedgirls.py
Normal file
@@ -0,0 +1,81 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from gallery_dl.extractor import fitnakedgirls
|
||||
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url" : "https://fitnakedgirls.com/photos/gallery/sparksgowild-nude/",
|
||||
"#comment" : "newer template with wp-block-image figures",
|
||||
"#category": ("", "fitnakedgirls", "gallery"),
|
||||
"#class" : fitnakedgirls.FitnakedgirlsGalleryExtractor,
|
||||
"#pattern" : r"https://fitnakedgirls\.com/photos/wp-content/uploads/\d+/\d+/.+\.(jpg|mp4)",
|
||||
"#count" : range(60, 70),
|
||||
|
||||
"gallery_id" : 419511,
|
||||
"gallery_slug": "sparksgowild-nude",
|
||||
"model" : "SparksGoWild",
|
||||
"title" : "SparksGoWild",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://fitnakedgirls.com/photos/gallery/mikayla-demaiter-mikayla_demaiter-nude-8-photos-2/",
|
||||
"#comment" : "older template with size-large img tags",
|
||||
"#category": ("", "fitnakedgirls", "gallery"),
|
||||
"#class" : fitnakedgirls.FitnakedgirlsGalleryExtractor,
|
||||
"#pattern" : r"https://fitnakedgirls\.com/photos/wp-content/uploads/\d+/\d+/.+\.jpg",
|
||||
"#count" : 8,
|
||||
|
||||
"gallery_id" : 329550,
|
||||
"gallery_slug": "mikayla-demaiter-mikayla_demaiter-nude-8-photos-2",
|
||||
"model" : "Mikayla Demaiter (mikayla_demaiter)",
|
||||
"title" : "Mikayla Demaiter (mikayla_demaiter)",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://fitnakedgirls.com/photos/gallery/category/fit-naked-girls/",
|
||||
"#category": ("", "fitnakedgirls", "category"),
|
||||
"#class" : fitnakedgirls.FitnakedgirlsCategoryExtractor,
|
||||
"#pattern" : fitnakedgirls.FitnakedgirlsGalleryExtractor.pattern,
|
||||
"#range" : "1-10",
|
||||
"#count" : 10,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://fitnakedgirls.com/photos/gallery/tag/blonde/",
|
||||
"#category": ("", "fitnakedgirls", "tag"),
|
||||
"#class" : fitnakedgirls.FitnakedgirlsTagExtractor,
|
||||
"#pattern" : fitnakedgirls.FitnakedgirlsGalleryExtractor.pattern,
|
||||
"#range" : "1-10",
|
||||
"#count" : 10,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://fitnakedgirls.com/videos/2025/08/arikytsya-gym-sybian-riding-ppv-video/",
|
||||
"#category": ("", "fitnakedgirls", "video"),
|
||||
"#class" : fitnakedgirls.FitnakedgirlsVideoExtractor,
|
||||
"#pattern" : r"https://fitnakedgirls\.com/videos/wp-content/uploads/.+\.mp4",
|
||||
"#count" : 1,
|
||||
|
||||
"video_id": 456559,
|
||||
"slug" : "arikytsya-gym-sybian-riding-ppv-video",
|
||||
"title" : "Arikytsya Gym Sybian Riding PPV Video",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://fitnakedgirls.com/fitblog/haven-schulz-2/",
|
||||
"#category": ("", "fitnakedgirls", "blog"),
|
||||
"#class" : fitnakedgirls.FitnakedgirlsBlogExtractor,
|
||||
"#pattern" : r"https://fitnakedgirls\.com/fitblog/wp-content/uploads/.+\.(jpg|png)",
|
||||
"#count" : 10,
|
||||
|
||||
"post_id": 165409,
|
||||
"slug" : "haven-schulz-2",
|
||||
"title" : "Haven Schulz",
|
||||
},
|
||||
|
||||
)
|
||||
Reference in New Issue
Block a user