diff --git a/docs/supportedsites.md b/docs/supportedsites.md index f450b493..42e4ffa1 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -181,6 +181,12 @@ Consider all listed sites to potentially be NSFW. Albums, Files + + Celebrity Fakes + https://cfake.com/ + Categories, Celebrities, Countries, Created + + CHZZK https://chzzk.naver.com/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index c0d3a790..190251f7 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -40,6 +40,7 @@ modules = [ "booth", "bunkr", "catbox", + "cfake", "chevereto", "cien", "civitai", diff --git a/gallery_dl/extractor/cfake.py b/gallery_dl/extractor/cfake.py new file mode 100644 index 00000000..bd8a23f0 --- /dev/null +++ b/gallery_dl/extractor/cfake.py @@ -0,0 +1,149 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://cfake.com/""" + +from .common import Extractor, Message +from .. import text + +BASE_PATTERN = r"(?:https?://)?(?:www\.)?cfake\.com" + + +class CfakeExtractor(Extractor): + """Base class for cfake extractors""" + category = "cfake" + root = "https://cfake.com" + directory_fmt = ("{category}", "{type}", "{type_name} ({type_id})") + filename_fmt = "{category}_{type_name}_{id}.{extension}" + archive_fmt = "{id}" + + def items(self): + type, type_name, type_id, sub_id, pnum = self.groups + + if type.endswith("ies"): + type = type[:-3] + "y" + + kwdict = self.kwdict + kwdict["type"] = type + kwdict["type_id"] = text.parse_int(type_id) + kwdict["type_name"] = text.unquote(type_name).replace("_", " ") + kwdict["sub_id"] = text.parse_int(sub_id) + kwdict["page"] = pnum = text.parse_int(pnum, 1) + yield Message.Directory, {} + + base = f"{self.root}/images/{type}/{type_name}/{type_id}" + if sub_id: + base = f"{base}/{sub_id}" + + while True: + url = base if pnum < 2 else f"{base}/p{pnum}" + page = self.request(url).text + + # Extract and yield images + num = 0 + for image in self._extract_images(page): + num += 1 + image["num"] = num + (pnum - 1) * 50 + url = image["url"] + yield Message.Url, url, text.nameext_from_url(url, image) + + # Check for next page + if not num or not (pnum := self._check_pagination(page)): + return + kwdict["page"] = pnum + + def _extract_images(self, page): + """Extract image URLs and metadata from a gallery page""" + for item in text.extract_iter( + page, '', '') + + # Extract rating + rating_text = text.extr(item, 'class="current-rating"', '') + rating = text.extr(rating_text, 'width:', 'px') + + # Convert thumbnail path to full image path + # show_param is like "2025/filename.jpg" + image_url = f"{self.root}/medias/photos/{show_param}" + + yield { + "url": image_url, + "id": text.parse_int(picture_id) if picture_id else 0, + "name": text.unescape(name_param) if name_param else "", + "date": date, + "rating": rating, + } + + def _check_pagination(self, page): + """Check if there are more pages and return next page number""" + # Look for current page indicator + # Format: id="num_page_current" >1 + current_section = text.extr( + page, 'id="num_page_current"', '') + if not current_section: + return None + + # Extract current page number from the link text + current_page_str = text.extr(current_section, '">', '') + if not current_page_str: + return None + + current_page = text.parse_int(current_page_str) + if not current_page: + return None + + next_page = current_page + 1 + + # Check if next page link exists anywhere in the page + # Look for href="/images/.../pN" pattern + if f'/p{next_page}"' in page or f'/p{next_page} ' in page: + return next_page + + return None + + +class CfakeCelebrityExtractor(CfakeExtractor): + """Extractor for celebrity image galleries from cfake.com""" + subcategory = "celebrity" + pattern = (BASE_PATTERN + r"/images/(celebrity)" + r"/([^/?#]+)/(\d+)()(?:/p(\d+))?") + example = "https://cfake.com/images/celebrity/NAME/123" + + +class CfakeCategoryExtractor(CfakeExtractor): + """Extractor for category image galleries from cfake.com""" + subcategory = "category" + pattern = (BASE_PATTERN + r"/images/(categories)" + r"/([^/?#]+)/(\d+)()(?:/p(\d+))?") + example = "https://cfake.com/images/categories/NAME/123" + + +class CfakeCreatedExtractor(CfakeExtractor): + """Extractor for 'created' image galleries from cfake.com""" + subcategory = "created" + pattern = (BASE_PATTERN + r"/images/(created)" + r"/([^/?#]+)/(\d+)/(\d+)(?:/p(\d+))?") + example = "https://cfake.com/images/created/NAME/12345/123" + + +class CfakeCountryExtractor(CfakeExtractor): + """Extractor for country image galleries from cfake.com""" + subcategory = "country" + pattern = (BASE_PATTERN + r"/images/(country)" + r"/([^/?#]+)/(\d+)/(\d+)(?:/p(\d+))?") + example = "https://cfake.com/images/country/NAME/12345/123" diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index cf7ecf53..87c6ae57 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -40,6 +40,7 @@ CATEGORY_MAP = { "batoto" : "BATO.TO", "bbc" : "BBC", "booth" : "BOOTH", + "cfake" : "Celebrity Fakes", "cien" : "Ci-en", "cohost" : "cohost!", "comicvine" : "Comic Vine", @@ -250,6 +251,9 @@ SUBCATEGORY_MAP = { "boosty": { "feed": "Subscriptions Feed", }, + "cfake": { + "created": "Created", + }, "civitai": { "models": "Model Listings", "images": "Image Listings", diff --git a/test/results/cfake.py b/test/results/cfake.py new file mode 100644 index 00000000..57fdaa9b --- /dev/null +++ b/test/results/cfake.py @@ -0,0 +1,138 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +from gallery_dl.extractor import cfake + + +__tests__ = ( +{ + "#url" : "https://cfake.com/images/celebrity/Kaley_Cuoco/631/", + "#category": ("", "cfake", "celebrity"), + "#class" : cfake.CfakeCelebrityExtractor, + "#pattern" : r"https://cfake\.com/medias/photos/\d{4}/[0-9a-f]+_cfake\.jpg", + "#range" : "1-20", + "#count" : 20, + + "type" : "celebrity", + "type_id" : 631, + "type_name" : "Kaley Cuoco", + "page" : 1, + "id" : int, + "num" : int, + "date" : str, + "rating" : str, +}, + +{ + "#url" : "https://cfake.com/images/celebrity/Kaley_Cuoco/631/p2", + "#comment" : "pagination test - page 2", + "#category": ("", "cfake", "celebrity"), + "#class" : cfake.CfakeCelebrityExtractor, + "#pattern" : r"https://cfake\.com/medias/photos/\d{4}/[0-9a-f]+_cfake\.jpg", + "#range" : "1-5", + + "type" : "celebrity", + "type_id" : 631, + "type_name" : "Kaley Cuoco", + "page" : 2, +}, + +{ + "#url" : "https://www.cfake.com/images/celebrity/Chloe_Grace_Moretz/6575/", + "#category": ("", "cfake", "celebrity"), + "#class" : cfake.CfakeCelebrityExtractor, +}, + +{ + "#url" : "https://cfake.com/images/categories/Facial/25/", + "#category": ("", "cfake", "category"), + "#class" : cfake.CfakeCategoryExtractor, + "#pattern" : r"https://cfake\.com/medias/photos/\d{4}/[0-9a-f]+_cfake\.jpg", + "#range" : "1-10", + "#count" : 10, + + "type" : "category", + "type_id" : 25, + "type_name" : "Facial", + "page" : 1, + "id" : int, + "num" : int, +}, + +{ + "#url" : "https://cfake.com/images/categories/Big_Tits/35/", + "#category": ("", "cfake", "category"), + "#class" : cfake.CfakeCategoryExtractor, +}, + +{ + "#url" : "https://cfake.com/images/categories/Big_Tits/35/p2", + "#comment" : "category pagination test", + "#category": ("", "cfake", "category"), + "#class" : cfake.CfakeCategoryExtractor, +}, + +{ + "#url" : "https://cfake.com/images/created/Spice_Girls_%28band%29/72/4", + "#category": ("", "cfake", "created"), + "#class" : cfake.CfakeCreatedExtractor, + "#pattern" : r"https://cfake\.com/medias/photos/\d{4}/[0-9a-f]+_cfake\.jpg", + "#range" : "1-10", + "#count" : 10, + + "type" : "created", + "type_id" : 72, + "type_name" : "Spice Girls (band)", + "sub_id" : 4, + "page" : 1, + "id" : int, + "num" : int, +}, + +{ + "#url" : "https://cfake.com/images/created/Brooklyn_Nine-Nine/4142/4", + "#category": ("", "cfake", "created"), + "#class" : cfake.CfakeCreatedExtractor, +}, + +{ + "#url" : "https://cfake.com/images/created/Brooklyn_Nine-Nine/4142/4/p2", + "#comment" : "created pagination test", + "#category": ("", "cfake", "created"), + "#class" : cfake.CfakeCreatedExtractor, +}, + +{ + "#url" : "https://cfake.com/images/country/Australia/12/5", + "#category": ("", "cfake", "country"), + "#class" : cfake.CfakeCountryExtractor, + "#pattern" : r"https://cfake\.com/medias/photos/\d{4}/[0-9a-f]+_cfake\.jpg", + "#range" : "1-10", + "#count" : 10, + + "type" : "country", + "type_id" : 12, + "type_name" : "Australia", + "sub_id" : 5, + "page" : 1, + "id" : int, + "num" : int, +}, + +{ + "#url" : "https://cfake.com/images/country/Mexico/139/5", + "#category": ("", "cfake", "country"), + "#class" : cfake.CfakeCountryExtractor, +}, + +{ + "#url" : "https://cfake.com/images/country/Mexico/139/5/p3", + "#comment" : "country pagination test", + "#category": ("", "cfake", "country"), + "#class" : cfake.CfakeCountryExtractor, +}, + +)