From cf2e5a1619f77874d9846c7f5793ec78e1be4561 Mon Sep 17 00:00:00 2001 From: Farahat Date: Wed, 23 Jul 2025 03:50:25 +0700 Subject: [PATCH] [leakgallery] add support (#7872) * add new extractor for leakgallery.com Added support for downloading photo and video posts from leakgallery.com. Supports: * Individual post URLs * User profile URLs with pagination via AJAX * Optional type/sort filters (e.g. /Photos/MostRecent) * Proper file extension handling * Creator-based folder structure * Compatibility with --download-archive Tested locally and functional, but may still need review or improvement. * [leakgallery] add support Added leakgallery to extractor module imports so it's recognized and used. * [leakgallery] update extractor structure - Refactored using LeakGalleryExtractorBase to remove duplication - Moved init logic into items() using self.groups - Replaced re with text.re as per upstream guidance - Added creator fallback and media deduplication - Aligned structure with gallery-dl maintainer review tips * [leakgallery] add support - Added leakgallery entry to supportedsites.md - Includes post, user, trending, and most-liked subcategories * add exported extractor results * [leakgallery] fix flake8 style issues Cleaned up code to comply with flake8 rules, especially: - removed unused imports - split long lines >79 chars - ensured newline at EOF No functional changes made; purely formatting to satisfy CI checks. * [tests] update extractor results * [leakgallery] fix flake8 style issues (part 2) Fix remaining flake8 issues in leakgallery.py: - Reformat line breaks to avoid W503 (line break before binary operator) - Wrap long lines to respect E501 (line too long > 79 characters) - Cleaned up exception logging for better clarity - Confirmed all flake8 checks now pass successfully This superseedes the previous commit which partially fixed formatting violations. * [leakgallery] fix flake8 style issues (part 3) * [leakgallery] rename extractor classes * [tests] update extractor results * [tests] rename extractor results * [leakgallery] rename extractor classes (part 2) * [leakgallery] rename example * update docs/supportedsites * update test results and convert line endings to '\n' * update - convert line endings to '\n' - use _pagination method - fix logging calls * return more metadata for _pagination() results --- docs/supportedsites.md | 6 ++ gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/leakgallery.py | 141 ++++++++++++++++++++++++++++ scripts/supportedsites.py | 5 + test/results/leakgallery.py | 47 ++++++++++ 5 files changed, 200 insertions(+) create mode 100644 gallery_dl/extractor/leakgallery.py create mode 100644 test/results/leakgallery.py diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 2865e1eb..5a2f5425 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -547,6 +547,12 @@ Consider all listed sites to potentially be NSFW. Chapters, Manga + + Leak Gallery + https://leakgallery.com + Most Liked Posts, Posts, Trending Medias, User Profiles + + Lensdump https://lensdump.com/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 6ecba9bc..688f0a05 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -99,6 +99,7 @@ modules = [ "kemono", "khinsider", "komikcast", + "leakgallery", "lensdump", "lexica", "lightroom", diff --git a/gallery_dl/extractor/leakgallery.py b/gallery_dl/extractor/leakgallery.py new file mode 100644 index 00000000..c6098919 --- /dev/null +++ b/gallery_dl/extractor/leakgallery.py @@ -0,0 +1,141 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://leakgallery.com""" + +from .common import Extractor, Message +from .. import text + +BASE_PATTERN = r"(?:https?://)?(?:www\.)?leakgallery\.com" + + +class LeakgalleryExtractor(Extractor): + category = "leakgallery" + directory_fmt = ("{category}", "{creator}") + filename_fmt = "{id}_{filename}.{extension}" + archive_fmt = "{creator}_{id}" + + def _yield_media_items(self, medias, creator=None): + seen = set() + for media in medias: + path = media["file_path"] + if path in seen: + continue + seen.add(path) + + if creator is None: + try: + media["creator"] = \ + media["profile"]["username"] or "unknown" + except Exception: + media["creator"] = "unknown" + else: + media["creator"] = creator + + media["url"] = url = f"https://cdn.leakgallery.com/{path}" + text.nameext_from_url(url, media) + yield Message.Directory, media + yield Message.Url, url, media + + def _pagination(self, type, base, params=None, creator=None, pnum=1): + while True: + try: + data = self.request_json(f"{base}{pnum}", params=params) + + if not data: + return + if "medias" in data: + data = data["medias"] + if not data or not isinstance(data, list): + return + + yield from self._yield_media_items(data, creator) + pnum += 1 + except Exception as exc: + self.log.error("Failed to retrieve %s page %s: %s", + type, pnum, exc) + return + + +class LeakgalleryUserExtractor(LeakgalleryExtractor): + """Extractor for profile posts on leakgallery.com""" + subcategory = "user" + pattern = ( + BASE_PATTERN + + r"/(?!trending-medias|most-liked|random/medias)([^/?#]+)" + r"(?:/(Photos|Videos|All))?" + r"(?:/(MostRecent|MostViewed|MostLiked))?/?$" + ) + example = "https://leakgallery.com/creator" + + def items(self): + creator, mtype, msort = self.groups + base = f"https://api.leakgallery.com/profile/{creator}/" + params = {"type": mtype or "All", "sort": msort or "MostRecent"} + return self._pagination(creator, base, params, creator) + + +class LeakgalleryTrendingExtractor(LeakgalleryExtractor): + """Extractor for trending posts on leakgallery.com""" + subcategory = "trending" + pattern = BASE_PATTERN + r"/trending-medias(?:/([\w-]+))?" + example = "https://leakgallery.com/trending-medias/Week" + + def items(self): + period = self.groups[0] or "Last-Hour" + base = f"https://api.leakgallery.com/popular/media/{period}/" + return self._pagination("trending", base) + + +class LeakgalleryMostlikedExtractor(LeakgalleryExtractor): + """Extractor for most liked posts on leakgallery.com""" + subcategory = "mostliked" + pattern = BASE_PATTERN + r"/most-liked" + example = "https://leakgallery.com/most-liked" + + def items(self): + base = "https://api.leakgallery.com/most-liked/" + return self._pagination("most-liked", base) + + +class LeakgalleryPostExtractor(LeakgalleryExtractor): + """Extractor for individual posts on leakgallery.com""" + subcategory = "post" + pattern = BASE_PATTERN + r"/([^/?#]+)/(\d+)" + example = "https://leakgallery.com/CREATOR/12345" + + def items(self): + creator, post_id = self.groups + url = f"https://leakgallery.com/{creator}/{post_id}" + + try: + page = self.request(url).text + video_urls = text.re( + r"https://cdn\.leakgallery\.com/content[^/?#]*/" + r"(?:compressed_)?watermark_[^\"]+\." + r"(?:mp4|mov|m4a|webm)" + ).findall(page) + image_urls = text.re( + r"https://cdn\.leakgallery\.com/content[^/?#]*/" + r"watermark_[^\"]+\.(?:jpe?g|png)" + ).findall(page) + + seen = set() + for url in video_urls + image_urls: + if url in seen: + continue + seen.add(url) + data = { + "id": post_id, + "creator": creator, + "url": url, + } + text.nameext_from_url(url, data) + yield Message.Directory, data + yield Message.Url, url, data + except Exception as exc: + self.log.error("Failed to extract post page %s/%s: %s", + creator, post_id, exc) diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index eb51f80a..46399bfb 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -91,6 +91,7 @@ CATEGORY_MAP = { "jpgfish" : "JPG Fish", "kabeuchi" : "かべうち", "schalenetwork" : "Schale Network", + "leakgallery" : "Leak Gallery", "livedoor" : "livedoor Blog", "lofter" : "LOFTER", "ohpolly" : "Oh Polly", @@ -298,6 +299,10 @@ SUBCATEGORY_MAP = { "discord-server": "", "posts" : "", }, + "leakgallery": { + "trending" : "Trending Medias", + "mostliked": "Most Liked Posts", + }, "lensdump": { "albums": "", }, diff --git a/test/results/leakgallery.py b/test/results/leakgallery.py new file mode 100644 index 00000000..a4610d59 --- /dev/null +++ b/test/results/leakgallery.py @@ -0,0 +1,47 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +from gallery_dl.extractor import leakgallery +FILE_PATTERN = r"https://cdn.leakgallery.com/content(-videos|\d+)?/[\w.-]+\.\w+" + + +__tests__ = ( +{ + "#url" : "https://leakgallery.com/sophieraiin/12240", + "#class" : leakgallery.LeakgalleryPostExtractor, + "#results": "https://cdn.leakgallery.com/content-videos/watermark_745_sophieraiin_241.mp4", + + "id" : "12240", + "creator": "sophieraiin", +}, + +{ + "#url" : "https://leakgallery.com/sophieraiin", + "#class" : leakgallery.LeakgalleryUserExtractor, + "#pattern": r"https://cdn.leakgallery.com/content3/(compressed_)?watermark_[0-9a-f]+_sophieraiin_\w+\.(jpg|png|mp4|mov)", + "#range" : "1-100", + "#count" : 100, + + "creator": "sophieraiin", +}, + +{ + "#url" : "https://leakgallery.com/trending-medias/Week", + "#class" : leakgallery.LeakgalleryTrendingExtractor, + "#pattern": FILE_PATTERN, + "#range" : "1-100", + "#count" : 100, +}, + +{ + "#url" : "https://leakgallery.com/most-liked", + "#class" : leakgallery.LeakgalleryMostlikedExtractor, + "#pattern": FILE_PATTERN, + "#range" : "1-100", + "#count" : 100, +}, + +)