[leakgallery] add support (#7872)

* add new extractor for leakgallery.com Added support for downloading photo and video posts from leakgallery.com. Supports: * Individual post URLs * User profile URLs with pagination via AJAX * Optional type/sort filters (e.g. /Photos/MostRecent) * Proper file extension handling * Creator-based folder structure * Compatibility with --download-archive Tested locally and functional, but may still need review or improvement. * [leakgallery] add support Added leakgallery to extractor module imports so it's recognized and used. * [leakgallery] update extractor structure - Refactored using LeakGalleryExtractorBase to remove duplication - Moved init logic into items() using self.groups - Replaced re with text.re as per upstream guidance - Added creator fallback and media deduplication - Aligned structure with gallery-dl maintainer review tips * [leakgallery] add support - Added leakgallery entry to supportedsites.md - Includes post, user, trending, and most-liked subcategories * add exported extractor results * [leakgallery] fix flake8 style issues Cleaned up code to comply with flake8 rules, especially: - removed unused imports - split long lines >79 chars - ensured newline at EOF No functional changes made; purely formatting to satisfy CI checks. * [tests] update extractor results * [leakgallery] fix flake8 style issues (part 2) Fix remaining flake8 issues in leakgallery.py: - Reformat line breaks to avoid W503 (line break before binary operator) - Wrap long lines to respect E501 (line too long > 79 characters) - Cleaned up exception logging for better clarity - Confirmed all flake8 checks now pass successfully This superseedes the previous commit which partially fixed formatting violations. * [leakgallery] fix flake8 style issues (part 3) * [leakgallery] rename extractor classes * [tests] update extractor results * [tests] rename extractor results * [leakgallery] rename extractor classes (part 2) * [leakgallery] rename example * update docs/supportedsites * update test results and convert line endings to '\n' * update - convert line endings to '\n' - use _pagination method - fix logging calls * return more metadata for _pagination() results
2025-07-23 03:50:25 +07:00
parent 158ba1b95a
commit cf2e5a1619
5 changed files with 200 additions and 0 deletions
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -547,6 +547,12 @@ Consider all listed sites to potentially be NSFW.
    <td>Chapters, Manga</td>
    <td></td>
 </tr>
 <tr>
    <td>Leak Gallery</td>
    <td>https://leakgallery.com</td>
    <td>Most Liked Posts, Posts, Trending Medias, User Profiles</td>
    <td></td>
 </tr>
 <tr>
    <td>Lensdump</td>
    <td>https://lensdump.com/</td>
--- a/gallery_dl/extractor/init.py
+++ b/gallery_dl/extractor/init.py
@@ -99,6 +99,7 @@ modules = [
    "kemono",
    "khinsider",
    "komikcast",
    "leakgallery",
    "lensdump",
    "lexica",
    "lightroom",
--- a/gallery_dl/extractor/leakgallery.py
+++ b/gallery_dl/extractor/leakgallery.py
@@ -0,0 +1,141 @@
 # -*- coding: utf-8 -*-
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.
 """Extractors for https://leakgallery.com"""
 from .common import Extractor, Message
 from .. import text
 BASE_PATTERN = r"(?:https?://)?(?:www\.)?leakgallery\.com"
 class LeakgalleryExtractor(Extractor):
    category = "leakgallery"
    directory_fmt = ("{category}", "{creator}")
    filename_fmt = "{id}_{filename}.{extension}"
    archive_fmt = "{creator}_{id}"
    def _yield_media_items(self, medias, creator=None):
        seen = set()
        for media in medias:
            path = media["file_path"]
            if path in seen:
                continue
            seen.add(path)
            if creator is None:
                try:
                    media["creator"] = \
                        media["profile"]["username"] or "unknown"
                except Exception:
                    media["creator"] = "unknown"
            else:
                media["creator"] = creator
            media["url"] = url = f"https://cdn.leakgallery.com/{path}"
            text.nameext_from_url(url, media)
            yield Message.Directory, media
            yield Message.Url, url, media
    def _pagination(self, type, base, params=None, creator=None, pnum=1):
        while True:
            try:
                data = self.request_json(f"{base}{pnum}", params=params)
                if not data:
                    return
                if "medias" in data:
                    data = data["medias"]
                    if not data or not isinstance(data, list):
                        return
                yield from self._yield_media_items(data, creator)
                pnum += 1
            except Exception as exc:
                self.log.error("Failed to retrieve %s page %s: %s",
                               type, pnum, exc)
                return
 class LeakgalleryUserExtractor(LeakgalleryExtractor):
    """Extractor for profile posts on leakgallery.com"""
    subcategory = "user"
    pattern = (
        BASE_PATTERN +
        r"/(?!trending-medias|most-liked|random/medias)([^/?#]+)"
        r"(?:/(Photos|Videos|All))?"
        r"(?:/(MostRecent|MostViewed|MostLiked))?/?$"
    )
    example = "https://leakgallery.com/creator"
    def items(self):
        creator, mtype, msort = self.groups
        base = f"https://api.leakgallery.com/profile/{creator}/"
        params = {"type": mtype or "All", "sort": msort or "MostRecent"}
        return self._pagination(creator, base, params, creator)
 class LeakgalleryTrendingExtractor(LeakgalleryExtractor):
    """Extractor for trending posts on leakgallery.com"""
    subcategory = "trending"
    pattern = BASE_PATTERN + r"/trending-medias(?:/([\w-]+))?"
    example = "https://leakgallery.com/trending-medias/Week"
    def items(self):
        period = self.groups[0] or "Last-Hour"
        base = f"https://api.leakgallery.com/popular/media/{period}/"
        return self._pagination("trending", base)
 class LeakgalleryMostlikedExtractor(LeakgalleryExtractor):
    """Extractor for most liked posts on leakgallery.com"""
    subcategory = "mostliked"
    pattern = BASE_PATTERN + r"/most-liked"
    example = "https://leakgallery.com/most-liked"
    def items(self):
        base = "https://api.leakgallery.com/most-liked/"
        return self._pagination("most-liked", base)
 class LeakgalleryPostExtractor(LeakgalleryExtractor):
    """Extractor for individual posts on leakgallery.com"""
    subcategory = "post"
    pattern = BASE_PATTERN + r"/([^/?#]+)/(\d+)"
    example = "https://leakgallery.com/CREATOR/12345"
    def items(self):
        creator, post_id = self.groups
        url = f"https://leakgallery.com/{creator}/{post_id}"
        try:
            page = self.request(url).text
            video_urls = text.re(
                r"https://cdn\.leakgallery\.com/content[^/?#]*/"
                r"(?:compressed_)?watermark_[^\"]+\."
                r"(?:mp4|mov|m4a|webm)"
            ).findall(page)
            image_urls = text.re(
                r"https://cdn\.leakgallery\.com/content[^/?#]*/"
                r"watermark_[^\"]+\.(?:jpe?g|png)"
            ).findall(page)
            seen = set()
            for url in video_urls + image_urls:
                if url in seen:
                    continue
                seen.add(url)
                data = {
                    "id": post_id,
                    "creator": creator,
                    "url": url,
                }
                text.nameext_from_url(url, data)
                yield Message.Directory, data
                yield Message.Url, url, data
        except Exception as exc:
            self.log.error("Failed to extract post page %s/%s: %s",
                           creator, post_id, exc)
--- a/scripts/supportedsites.py
+++ b/scripts/supportedsites.py
@@ -91,6 +91,7 @@ CATEGORY_MAP = {
    "jpgfish"        : "JPG Fish",
    "kabeuchi"       : "かべうち",
    "schalenetwork"  : "Schale Network",
    "leakgallery"    : "Leak Gallery",
    "livedoor"       : "livedoor Blog",
    "lofter"         : "LOFTER",
    "ohpolly"        : "Oh Polly",
@@ -298,6 +299,10 @@ SUBCATEGORY_MAP = {
        "discord-server": "",
        "posts"         : "",
    },
    "leakgallery": {
        "trending" : "Trending Medias",
        "mostliked": "Most Liked Posts",
    },
    "lensdump": {
        "albums": "",
    },
--- a/test/results/leakgallery.py
+++ b/test/results/leakgallery.py
@@ -0,0 +1,47 @@
 # -*- coding: utf-8 -*-
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.
 from gallery_dl.extractor import leakgallery
 FILE_PATTERN = r"https://cdn.leakgallery.com/content(-videos|\d+)?/[\w.-]+\.\w+"
 __tests__ = (
 {
    "#url"    : "https://leakgallery.com/sophieraiin/12240",
    "#class"  : leakgallery.LeakgalleryPostExtractor,
    "#results": "https://cdn.leakgallery.com/content-videos/watermark_745_sophieraiin_241.mp4",
    "id"     : "12240",
    "creator": "sophieraiin",
 },
 {
    "#url"    : "https://leakgallery.com/sophieraiin",
    "#class"  : leakgallery.LeakgalleryUserExtractor,
    "#pattern": r"https://cdn.leakgallery.com/content3/(compressed_)?watermark_[0-9a-f]+_sophieraiin_\w+\.(jpg|png|mp4|mov)",
    "#range"  : "1-100",
    "#count"  : 100,
    "creator": "sophieraiin",
 },
 {
    "#url"    : "https://leakgallery.com/trending-medias/Week",
    "#class"  : leakgallery.LeakgalleryTrendingExtractor,
    "#pattern": FILE_PATTERN,
    "#range"  : "1-100",
    "#count"  : 100,
 },
 {
    "#url"    : "https://leakgallery.com/most-liked",
    "#class"  : leakgallery.LeakgalleryMostlikedExtractor,
    "#pattern": FILE_PATTERN,
    "#range"  : "1-100",
    "#count"  : 100,
 },
 )