* Create cfake.py
* Update __init__.py
* Create cfake.py tests
* update
- simplify & combine code
- adjust 'pattern': use '[^/?#]', match lines and groups
- generalize example URLs
- update default filenames
* update docs/supportedsites
* update test results
---------
Co-authored-by: Mike Fährmann <mike_faehrmann@web.de>
This commit is contained in:
@@ -181,6 +181,12 @@ Consider all listed sites to potentially be NSFW.
|
||||
<td>Albums, Files</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr id="cfake" title="cfake">
|
||||
<td>Celebrity Fakes</td>
|
||||
<td>https://cfake.com/</td>
|
||||
<td>Categories, Celebrities, Countries, Created</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr id="naver-chzzk" title="naver-chzzk">
|
||||
<td>CHZZK</td>
|
||||
<td>https://chzzk.naver.com/</td>
|
||||
|
||||
@@ -40,6 +40,7 @@ modules = [
|
||||
"booth",
|
||||
"bunkr",
|
||||
"catbox",
|
||||
"cfake",
|
||||
"chevereto",
|
||||
"cien",
|
||||
"civitai",
|
||||
|
||||
149
gallery_dl/extractor/cfake.py
Normal file
149
gallery_dl/extractor/cfake.py
Normal file
@@ -0,0 +1,149 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Extractors for https://cfake.com/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text
|
||||
|
||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?cfake\.com"
|
||||
|
||||
|
||||
class CfakeExtractor(Extractor):
|
||||
"""Base class for cfake extractors"""
|
||||
category = "cfake"
|
||||
root = "https://cfake.com"
|
||||
directory_fmt = ("{category}", "{type}", "{type_name} ({type_id})")
|
||||
filename_fmt = "{category}_{type_name}_{id}.{extension}"
|
||||
archive_fmt = "{id}"
|
||||
|
||||
def items(self):
|
||||
type, type_name, type_id, sub_id, pnum = self.groups
|
||||
|
||||
if type.endswith("ies"):
|
||||
type = type[:-3] + "y"
|
||||
|
||||
kwdict = self.kwdict
|
||||
kwdict["type"] = type
|
||||
kwdict["type_id"] = text.parse_int(type_id)
|
||||
kwdict["type_name"] = text.unquote(type_name).replace("_", " ")
|
||||
kwdict["sub_id"] = text.parse_int(sub_id)
|
||||
kwdict["page"] = pnum = text.parse_int(pnum, 1)
|
||||
yield Message.Directory, {}
|
||||
|
||||
base = f"{self.root}/images/{type}/{type_name}/{type_id}"
|
||||
if sub_id:
|
||||
base = f"{base}/{sub_id}"
|
||||
|
||||
while True:
|
||||
url = base if pnum < 2 else f"{base}/p{pnum}"
|
||||
page = self.request(url).text
|
||||
|
||||
# Extract and yield images
|
||||
num = 0
|
||||
for image in self._extract_images(page):
|
||||
num += 1
|
||||
image["num"] = num + (pnum - 1) * 50
|
||||
url = image["url"]
|
||||
yield Message.Url, url, text.nameext_from_url(url, image)
|
||||
|
||||
# Check for next page
|
||||
if not num or not (pnum := self._check_pagination(page)):
|
||||
return
|
||||
kwdict["page"] = pnum
|
||||
|
||||
def _extract_images(self, page):
|
||||
"""Extract image URLs and metadata from a gallery page"""
|
||||
for item in text.extract_iter(
|
||||
page, '<a href="javascript:showimage(', '</div></div>'):
|
||||
|
||||
# Extract image path from showimage call
|
||||
# Format: 'big.php?show=2025/filename.jpg&id_picture=...
|
||||
show_param = text.extr(item, "show=", "&")
|
||||
if not show_param:
|
||||
continue
|
||||
|
||||
# Extract metadata
|
||||
picture_id = text.extr(item, "id_picture=", "&")
|
||||
name_param = text.extr(item, "p_name=", "'")
|
||||
|
||||
# Extract date
|
||||
date = text.extr(item, 'id="date_vignette">', '</div>')
|
||||
|
||||
# Extract rating
|
||||
rating_text = text.extr(item, 'class="current-rating"', '</li>')
|
||||
rating = text.extr(rating_text, 'width:', 'px')
|
||||
|
||||
# Convert thumbnail path to full image path
|
||||
# show_param is like "2025/filename.jpg"
|
||||
image_url = f"{self.root}/medias/photos/{show_param}"
|
||||
|
||||
yield {
|
||||
"url": image_url,
|
||||
"id": text.parse_int(picture_id) if picture_id else 0,
|
||||
"name": text.unescape(name_param) if name_param else "",
|
||||
"date": date,
|
||||
"rating": rating,
|
||||
}
|
||||
|
||||
def _check_pagination(self, page):
|
||||
"""Check if there are more pages and return next page number"""
|
||||
# Look for current page indicator
|
||||
# Format: id="num_page_current" ><a href=".../ p1">1</a>
|
||||
current_section = text.extr(
|
||||
page, 'id="num_page_current"', '</div>')
|
||||
if not current_section:
|
||||
return None
|
||||
|
||||
# Extract current page number from the link text
|
||||
current_page_str = text.extr(current_section, '">', '</a>')
|
||||
if not current_page_str:
|
||||
return None
|
||||
|
||||
current_page = text.parse_int(current_page_str)
|
||||
if not current_page:
|
||||
return None
|
||||
|
||||
next_page = current_page + 1
|
||||
|
||||
# Check if next page link exists anywhere in the page
|
||||
# Look for href="/images/.../pN" pattern
|
||||
if f'/p{next_page}"' in page or f'/p{next_page} ' in page:
|
||||
return next_page
|
||||
|
||||
return None
|
||||
|
||||
|
||||
class CfakeCelebrityExtractor(CfakeExtractor):
|
||||
"""Extractor for celebrity image galleries from cfake.com"""
|
||||
subcategory = "celebrity"
|
||||
pattern = (BASE_PATTERN + r"/images/(celebrity)"
|
||||
r"/([^/?#]+)/(\d+)()(?:/p(\d+))?")
|
||||
example = "https://cfake.com/images/celebrity/NAME/123"
|
||||
|
||||
|
||||
class CfakeCategoryExtractor(CfakeExtractor):
|
||||
"""Extractor for category image galleries from cfake.com"""
|
||||
subcategory = "category"
|
||||
pattern = (BASE_PATTERN + r"/images/(categories)"
|
||||
r"/([^/?#]+)/(\d+)()(?:/p(\d+))?")
|
||||
example = "https://cfake.com/images/categories/NAME/123"
|
||||
|
||||
|
||||
class CfakeCreatedExtractor(CfakeExtractor):
|
||||
"""Extractor for 'created' image galleries from cfake.com"""
|
||||
subcategory = "created"
|
||||
pattern = (BASE_PATTERN + r"/images/(created)"
|
||||
r"/([^/?#]+)/(\d+)/(\d+)(?:/p(\d+))?")
|
||||
example = "https://cfake.com/images/created/NAME/12345/123"
|
||||
|
||||
|
||||
class CfakeCountryExtractor(CfakeExtractor):
|
||||
"""Extractor for country image galleries from cfake.com"""
|
||||
subcategory = "country"
|
||||
pattern = (BASE_PATTERN + r"/images/(country)"
|
||||
r"/([^/?#]+)/(\d+)/(\d+)(?:/p(\d+))?")
|
||||
example = "https://cfake.com/images/country/NAME/12345/123"
|
||||
@@ -40,6 +40,7 @@ CATEGORY_MAP = {
|
||||
"batoto" : "BATO.TO",
|
||||
"bbc" : "BBC",
|
||||
"booth" : "BOOTH",
|
||||
"cfake" : "Celebrity Fakes",
|
||||
"cien" : "Ci-en",
|
||||
"cohost" : "cohost!",
|
||||
"comicvine" : "Comic Vine",
|
||||
@@ -250,6 +251,9 @@ SUBCATEGORY_MAP = {
|
||||
"boosty": {
|
||||
"feed": "Subscriptions Feed",
|
||||
},
|
||||
"cfake": {
|
||||
"created": "Created",
|
||||
},
|
||||
"civitai": {
|
||||
"models": "Model Listings",
|
||||
"images": "Image Listings",
|
||||
|
||||
138
test/results/cfake.py
Normal file
138
test/results/cfake.py
Normal file
@@ -0,0 +1,138 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from gallery_dl.extractor import cfake
|
||||
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url" : "https://cfake.com/images/celebrity/Kaley_Cuoco/631/",
|
||||
"#category": ("", "cfake", "celebrity"),
|
||||
"#class" : cfake.CfakeCelebrityExtractor,
|
||||
"#pattern" : r"https://cfake\.com/medias/photos/\d{4}/[0-9a-f]+_cfake\.jpg",
|
||||
"#range" : "1-20",
|
||||
"#count" : 20,
|
||||
|
||||
"type" : "celebrity",
|
||||
"type_id" : 631,
|
||||
"type_name" : "Kaley Cuoco",
|
||||
"page" : 1,
|
||||
"id" : int,
|
||||
"num" : int,
|
||||
"date" : str,
|
||||
"rating" : str,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://cfake.com/images/celebrity/Kaley_Cuoco/631/p2",
|
||||
"#comment" : "pagination test - page 2",
|
||||
"#category": ("", "cfake", "celebrity"),
|
||||
"#class" : cfake.CfakeCelebrityExtractor,
|
||||
"#pattern" : r"https://cfake\.com/medias/photos/\d{4}/[0-9a-f]+_cfake\.jpg",
|
||||
"#range" : "1-5",
|
||||
|
||||
"type" : "celebrity",
|
||||
"type_id" : 631,
|
||||
"type_name" : "Kaley Cuoco",
|
||||
"page" : 2,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.cfake.com/images/celebrity/Chloe_Grace_Moretz/6575/",
|
||||
"#category": ("", "cfake", "celebrity"),
|
||||
"#class" : cfake.CfakeCelebrityExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://cfake.com/images/categories/Facial/25/",
|
||||
"#category": ("", "cfake", "category"),
|
||||
"#class" : cfake.CfakeCategoryExtractor,
|
||||
"#pattern" : r"https://cfake\.com/medias/photos/\d{4}/[0-9a-f]+_cfake\.jpg",
|
||||
"#range" : "1-10",
|
||||
"#count" : 10,
|
||||
|
||||
"type" : "category",
|
||||
"type_id" : 25,
|
||||
"type_name" : "Facial",
|
||||
"page" : 1,
|
||||
"id" : int,
|
||||
"num" : int,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://cfake.com/images/categories/Big_Tits/35/",
|
||||
"#category": ("", "cfake", "category"),
|
||||
"#class" : cfake.CfakeCategoryExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://cfake.com/images/categories/Big_Tits/35/p2",
|
||||
"#comment" : "category pagination test",
|
||||
"#category": ("", "cfake", "category"),
|
||||
"#class" : cfake.CfakeCategoryExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://cfake.com/images/created/Spice_Girls_%28band%29/72/4",
|
||||
"#category": ("", "cfake", "created"),
|
||||
"#class" : cfake.CfakeCreatedExtractor,
|
||||
"#pattern" : r"https://cfake\.com/medias/photos/\d{4}/[0-9a-f]+_cfake\.jpg",
|
||||
"#range" : "1-10",
|
||||
"#count" : 10,
|
||||
|
||||
"type" : "created",
|
||||
"type_id" : 72,
|
||||
"type_name" : "Spice Girls (band)",
|
||||
"sub_id" : 4,
|
||||
"page" : 1,
|
||||
"id" : int,
|
||||
"num" : int,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://cfake.com/images/created/Brooklyn_Nine-Nine/4142/4",
|
||||
"#category": ("", "cfake", "created"),
|
||||
"#class" : cfake.CfakeCreatedExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://cfake.com/images/created/Brooklyn_Nine-Nine/4142/4/p2",
|
||||
"#comment" : "created pagination test",
|
||||
"#category": ("", "cfake", "created"),
|
||||
"#class" : cfake.CfakeCreatedExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://cfake.com/images/country/Australia/12/5",
|
||||
"#category": ("", "cfake", "country"),
|
||||
"#class" : cfake.CfakeCountryExtractor,
|
||||
"#pattern" : r"https://cfake\.com/medias/photos/\d{4}/[0-9a-f]+_cfake\.jpg",
|
||||
"#range" : "1-10",
|
||||
"#count" : 10,
|
||||
|
||||
"type" : "country",
|
||||
"type_id" : 12,
|
||||
"type_name" : "Australia",
|
||||
"sub_id" : 5,
|
||||
"page" : 1,
|
||||
"id" : int,
|
||||
"num" : int,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://cfake.com/images/country/Mexico/139/5",
|
||||
"#category": ("", "cfake", "country"),
|
||||
"#class" : cfake.CfakeCountryExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://cfake.com/images/country/Mexico/139/5/p3",
|
||||
"#comment" : "country pagination test",
|
||||
"#category": ("", "cfake", "country"),
|
||||
"#class" : cfake.CfakeCountryExtractor,
|
||||
},
|
||||
|
||||
)
|
||||
Reference in New Issue
Block a user