[cfake] add support (#707 #6021 #8549 #8430)

* Create cfake.py * Update __init__.py * Create cfake.py tests * update - simplify & combine code - adjust 'pattern': use '[^/?#]', match lines and groups - generalize example URLs - update default filenames * update docs/supportedsites * update test results --------- Co-authored-by: Mike Fährmann <mike_faehrmann@web.de>
2025-11-15 14:55:00 -05:00
parent 2578f7b5c1
commit f6e67116f2
5 changed files with 298 additions and 0 deletions
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -181,6 +181,12 @@ Consider all listed sites to potentially be NSFW.
    <td>Albums, Files</td>
    <td></td>
 </tr>
+<tr id="cfake" title="cfake">
+    <td>Celebrity Fakes</td>
+    <td>https://cfake.com/</td>
+    <td>Categories, Celebrities, Countries, Created</td>
+    <td></td>
+</tr>
 <tr id="naver-chzzk" title="naver-chzzk">
    <td>CHZZK</td>
    <td>https://chzzk.naver.com/</td>
--- a/gallery_dl/extractor/init.py
+++ b/gallery_dl/extractor/init.py
@@ -40,6 +40,7 @@ modules = [
    "booth",
    "bunkr",
    "catbox",
+    "cfake",
    "chevereto",
    "cien",
    "civitai",
--- a/gallery_dl/extractor/cfake.py
+++ b/gallery_dl/extractor/cfake.py
@@ -0,0 +1,149 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://cfake.com/"""
+
+from .common import Extractor, Message
+from .. import text
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?cfake\.com"
+
+
+class CfakeExtractor(Extractor):
+    """Base class for cfake extractors"""
+    category = "cfake"
+    root = "https://cfake.com"
+    directory_fmt = ("{category}", "{type}", "{type_name} ({type_id})")
+    filename_fmt = "{category}_{type_name}_{id}.{extension}"
+    archive_fmt = "{id}"
+
+    def items(self):
+        type, type_name, type_id, sub_id, pnum = self.groups
+
+        if type.endswith("ies"):
+            type = type[:-3] + "y"
+
+        kwdict = self.kwdict
+        kwdict["type"] = type
+        kwdict["type_id"] = text.parse_int(type_id)
+        kwdict["type_name"] = text.unquote(type_name).replace("_", " ")
+        kwdict["sub_id"] = text.parse_int(sub_id)
+        kwdict["page"] = pnum = text.parse_int(pnum, 1)
+        yield Message.Directory, {}
+
+        base = f"{self.root}/images/{type}/{type_name}/{type_id}"
+        if sub_id:
+            base = f"{base}/{sub_id}"
+
+        while True:
+            url = base if pnum < 2 else f"{base}/p{pnum}"
+            page = self.request(url).text
+
+            # Extract and yield images
+            num = 0
+            for image in self._extract_images(page):
+                num += 1
+                image["num"] = num + (pnum - 1) * 50
+                url = image["url"]
+                yield Message.Url, url, text.nameext_from_url(url, image)
+
+            # Check for next page
+            if not num or not (pnum := self._check_pagination(page)):
+                return
+            kwdict["page"] = pnum
+
+    def _extract_images(self, page):
+        """Extract image URLs and metadata from a gallery page"""
+        for item in text.extract_iter(
+                page, '<a href="javascript:showimage(', '</div></div>'):
+
+            # Extract image path from showimage call
+            # Format: 'big.php?show=2025/filename.jpg&id_picture=...
+            show_param = text.extr(item, "show=", "&")
+            if not show_param:
+                continue
+
+            # Extract metadata
+            picture_id = text.extr(item, "id_picture=", "&")
+            name_param = text.extr(item, "p_name=", "'")
+
+            # Extract date
+            date = text.extr(item, 'id="date_vignette">', '</div>')
+
+            # Extract rating
+            rating_text = text.extr(item, 'class="current-rating"', '</li>')
+            rating = text.extr(rating_text, 'width:', 'px')
+
+            # Convert thumbnail path to full image path
+            # show_param is like "2025/filename.jpg"
+            image_url = f"{self.root}/medias/photos/{show_param}"
+
+            yield {
+                "url": image_url,
+                "id": text.parse_int(picture_id) if picture_id else 0,
+                "name": text.unescape(name_param) if name_param else "",
+                "date": date,
+                "rating": rating,
+            }
+
+    def _check_pagination(self, page):
+        """Check if there are more pages and return next page number"""
+        # Look for current page indicator
+        # Format: id="num_page_current" ><a href=".../ p1">1</a>
+        current_section = text.extr(
+            page, 'id="num_page_current"', '</div>')
+        if not current_section:
+            return None
+
+        # Extract current page number from the link text
+        current_page_str = text.extr(current_section, '">', '</a>')
+        if not current_page_str:
+            return None
+
+        current_page = text.parse_int(current_page_str)
+        if not current_page:
+            return None
+
+        next_page = current_page + 1
+
+        # Check if next page link exists anywhere in the page
+        # Look for href="/images/.../pN" pattern
+        if f'/p{next_page}"' in page or f'/p{next_page} ' in page:
+            return next_page
+
+        return None
+
+
+class CfakeCelebrityExtractor(CfakeExtractor):
+    """Extractor for celebrity image galleries from cfake.com"""
+    subcategory = "celebrity"
+    pattern = (BASE_PATTERN + r"/images/(celebrity)"
+               r"/([^/?#]+)/(\d+)()(?:/p(\d+))?")
+    example = "https://cfake.com/images/celebrity/NAME/123"
+
+
+class CfakeCategoryExtractor(CfakeExtractor):
+    """Extractor for category image galleries from cfake.com"""
+    subcategory = "category"
+    pattern = (BASE_PATTERN + r"/images/(categories)"
+               r"/([^/?#]+)/(\d+)()(?:/p(\d+))?")
+    example = "https://cfake.com/images/categories/NAME/123"
+
+
+class CfakeCreatedExtractor(CfakeExtractor):
+    """Extractor for 'created' image galleries from cfake.com"""
+    subcategory = "created"
+    pattern = (BASE_PATTERN + r"/images/(created)"
+               r"/([^/?#]+)/(\d+)/(\d+)(?:/p(\d+))?")
+    example = "https://cfake.com/images/created/NAME/12345/123"
+
+
+class CfakeCountryExtractor(CfakeExtractor):
+    """Extractor for country image galleries from cfake.com"""
+    subcategory = "country"
+    pattern = (BASE_PATTERN + r"/images/(country)"
+               r"/([^/?#]+)/(\d+)/(\d+)(?:/p(\d+))?")
+    example = "https://cfake.com/images/country/NAME/12345/123"
--- a/scripts/supportedsites.py
+++ b/scripts/supportedsites.py
@@ -40,6 +40,7 @@ CATEGORY_MAP = {
    "batoto"         : "BATO.TO",
    "bbc"            : "BBC",
    "booth"          : "BOOTH",
+    "cfake"          : "Celebrity Fakes",
    "cien"           : "Ci-en",
    "cohost"         : "cohost!",
    "comicvine"      : "Comic Vine",
@@ -250,6 +251,9 @@ SUBCATEGORY_MAP = {
    "boosty": {
        "feed": "Subscriptions Feed",
    },
+    "cfake": {
+        "created": "Created",
+    },
    "civitai": {
        "models": "Model Listings",
        "images": "Image Listings",
--- a/test/results/cfake.py
+++ b/test/results/cfake.py
@@ -0,0 +1,138 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+from gallery_dl.extractor import cfake
+
+
+__tests__ = (
+{
+    "#url"     : "https://cfake.com/images/celebrity/Kaley_Cuoco/631/",
+    "#category": ("", "cfake", "celebrity"),
+    "#class"   : cfake.CfakeCelebrityExtractor,
+    "#pattern" : r"https://cfake\.com/medias/photos/\d{4}/[0-9a-f]+_cfake\.jpg",
+    "#range"   : "1-20",
+    "#count"   : 20,
+
+    "type"         : "celebrity",
+    "type_id"      : 631,
+    "type_name"    : "Kaley Cuoco",
+    "page"         : 1,
+    "id"           : int,
+    "num"          : int,
+    "date"         : str,
+    "rating"       : str,
+},
+
+{
+    "#url"     : "https://cfake.com/images/celebrity/Kaley_Cuoco/631/p2",
+    "#comment" : "pagination test - page 2",
+    "#category": ("", "cfake", "celebrity"),
+    "#class"   : cfake.CfakeCelebrityExtractor,
+    "#pattern" : r"https://cfake\.com/medias/photos/\d{4}/[0-9a-f]+_cfake\.jpg",
+    "#range"   : "1-5",
+
+    "type"         : "celebrity",
+    "type_id"      : 631,
+    "type_name"    : "Kaley Cuoco",
+    "page"         : 2,
+},
+
+{
+    "#url"     : "https://www.cfake.com/images/celebrity/Chloe_Grace_Moretz/6575/",
+    "#category": ("", "cfake", "celebrity"),
+    "#class"   : cfake.CfakeCelebrityExtractor,
+},
+
+{
+    "#url"     : "https://cfake.com/images/categories/Facial/25/",
+    "#category": ("", "cfake", "category"),
+    "#class"   : cfake.CfakeCategoryExtractor,
+    "#pattern" : r"https://cfake\.com/medias/photos/\d{4}/[0-9a-f]+_cfake\.jpg",
+    "#range"   : "1-10",
+    "#count"   : 10,
+
+    "type"        : "category",
+    "type_id"     : 25,
+    "type_name"   : "Facial",
+    "page"        : 1,
+    "id"          : int,
+    "num"         : int,
+},
+
+{
+    "#url"     : "https://cfake.com/images/categories/Big_Tits/35/",
+    "#category": ("", "cfake", "category"),
+    "#class"   : cfake.CfakeCategoryExtractor,
+},
+
+{
+    "#url"     : "https://cfake.com/images/categories/Big_Tits/35/p2",
+    "#comment" : "category pagination test",
+    "#category": ("", "cfake", "category"),
+    "#class"   : cfake.CfakeCategoryExtractor,
+},
+
+{
+    "#url"     : "https://cfake.com/images/created/Spice_Girls_%28band%29/72/4",
+    "#category": ("", "cfake", "created"),
+    "#class"   : cfake.CfakeCreatedExtractor,
+    "#pattern" : r"https://cfake\.com/medias/photos/\d{4}/[0-9a-f]+_cfake\.jpg",
+    "#range"   : "1-10",
+    "#count"   : 10,
+
+    "type"       : "created",
+    "type_id"    : 72,
+    "type_name"  : "Spice Girls (band)",
+    "sub_id"     : 4,
+    "page"       : 1,
+    "id"         : int,
+    "num"        : int,
+},
+
+{
+    "#url"     : "https://cfake.com/images/created/Brooklyn_Nine-Nine/4142/4",
+    "#category": ("", "cfake", "created"),
+    "#class"   : cfake.CfakeCreatedExtractor,
+},
+
+{
+    "#url"     : "https://cfake.com/images/created/Brooklyn_Nine-Nine/4142/4/p2",
+    "#comment" : "created pagination test",
+    "#category": ("", "cfake", "created"),
+    "#class"   : cfake.CfakeCreatedExtractor,
+},
+
+{
+    "#url"     : "https://cfake.com/images/country/Australia/12/5",
+    "#category": ("", "cfake", "country"),
+    "#class"   : cfake.CfakeCountryExtractor,
+    "#pattern" : r"https://cfake\.com/medias/photos/\d{4}/[0-9a-f]+_cfake\.jpg",
+    "#range"   : "1-10",
+    "#count"   : 10,
+
+    "type"       : "country",
+    "type_id"    : 12,
+    "type_name"  : "Australia",
+    "sub_id"     : 5,
+    "page"       : 1,
+    "id"         : int,
+    "num"        : int,
+},
+
+{
+    "#url"     : "https://cfake.com/images/country/Mexico/139/5",
+    "#category": ("", "cfake", "country"),
+    "#class"   : cfake.CfakeCountryExtractor,
+},
+
+{
+    "#url"     : "https://cfake.com/images/country/Mexico/139/5/p3",
+    "#comment" : "country pagination test",
+    "#category": ("", "cfake", "country"),
+    "#class"   : cfake.CfakeCountryExtractor,
+},
+
+)