From cf2e5a1619f77874d9846c7f5793ec78e1be4561 Mon Sep 17 00:00:00 2001
From: Farahat <mfarahat1660@gmail.com>
Date: Wed, 23 Jul 2025 03:50:25 +0700
Subject: [PATCH] [leakgallery] add support (#7872)

* add new extractor for leakgallery.com

    Added support for downloading photo and video posts from leakgallery.com.

    Supports:
    * Individual post URLs
    * User profile URLs with pagination via AJAX
    * Optional type/sort filters (e.g. /Photos/MostRecent)
    * Proper file extension handling
    * Creator-based folder structure
    * Compatibility with --download-archive

    Tested locally and functional, but may still need review or improvement.

* [leakgallery] add support
    Added leakgallery to extractor module imports so it's recognized and used.
* [leakgallery] update extractor structure
    - Refactored using LeakGalleryExtractorBase to remove duplication
    - Moved init logic into items() using self.groups
    - Replaced re with text.re as per upstream guidance
    - Added creator fallback and media deduplication
    - Aligned structure with gallery-dl maintainer review tips
* [leakgallery] add support
    - Added leakgallery entry to supportedsites.md
    - Includes post, user, trending, and most-liked subcategories
* add exported extractor results
* [leakgallery] fix flake8 style issues
    Cleaned up code to comply with flake8 rules, especially:
    - removed unused imports
    - split long lines >79 chars
    - ensured newline at EOF
    No functional changes made; purely formatting to satisfy CI checks.
* [tests] update extractor results
* [leakgallery] fix flake8 style issues (part 2)
    Fix remaining flake8 issues in leakgallery.py:
    - Reformat line breaks to avoid W503 (line break before binary operator)
    - Wrap long lines to respect E501 (line too long > 79 characters)
    - Cleaned up exception logging for better clarity
    - Confirmed all flake8 checks now pass successfully
    This superseedes the previous commit which partially fixed formatting violations.
* [leakgallery] fix flake8 style issues (part 3)
* [leakgallery] rename extractor classes
* [tests] update extractor results
* [tests] rename extractor results
* [leakgallery] rename extractor classes (part 2)
* [leakgallery] rename example
* update docs/supportedsites
* update test results
    and convert line endings to '\n'
* update
    - convert line endings to '\n'
    - use _pagination method
    - fix logging calls
* return more metadata for _pagination() results
---
 docs/supportedsites.md              |   6 ++
 gallery_dl/extractor/__init__.py    |   1 +
 gallery_dl/extractor/leakgallery.py | 141 ++++++++++++++++++++++++++++
 scripts/supportedsites.py           |   5 +
 test/results/leakgallery.py         |  47 ++++++++++
 5 files changed, 200 insertions(+)
 create mode 100644 gallery_dl/extractor/leakgallery.py
 create mode 100644 test/results/leakgallery.py
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index 2865e1eb..5a2f5425 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -547,6 +547,12 @@ Consider all listed sites to potentially be NSFW.
     <td>Chapters, Manga</td>
     <td></td>
 </tr>
+<tr>
+    <td>Leak Gallery</td>
+    <td>https://leakgallery.com</td>
+    <td>Most Liked Posts, Posts, Trending Medias, User Profiles</td>
+    <td></td>
+</tr>
 <tr>
     <td>Lensdump</td>
     <td>https://lensdump.com/</td>
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 6ecba9bc..688f0a05 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -99,6 +99,7 @@ modules = [
     "kemono",
     "khinsider",
     "komikcast",
+    "leakgallery",
     "lensdump",
     "lexica",
     "lightroom",
diff --git a/gallery_dl/extractor/leakgallery.py b/gallery_dl/extractor/leakgallery.py
new file mode 100644
index 00000000..c6098919
--- /dev/null
+++ b/gallery_dl/extractor/leakgallery.py
@@ -0,0 +1,141 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://leakgallery.com"""
+
+from .common import Extractor, Message
+from .. import text
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?leakgallery\.com"
+
+
+class LeakgalleryExtractor(Extractor):
+    category = "leakgallery"
+    directory_fmt = ("{category}", "{creator}")
+    filename_fmt = "{id}_{filename}.{extension}"
+    archive_fmt = "{creator}_{id}"
+
+    def _yield_media_items(self, medias, creator=None):
+        seen = set()
+        for media in medias:
+            path = media["file_path"]
+            if path in seen:
+                continue
+            seen.add(path)
+
+            if creator is None:
+                try:
+                    media["creator"] = \
+                        media["profile"]["username"] or "unknown"
+                except Exception:
+                    media["creator"] = "unknown"
+            else:
+                media["creator"] = creator
+
+            media["url"] = url = f"https://cdn.leakgallery.com/{path}"
+            text.nameext_from_url(url, media)
+            yield Message.Directory, media
+            yield Message.Url, url, media
+
+    def _pagination(self, type, base, params=None, creator=None, pnum=1):
+        while True:
+            try:
+                data = self.request_json(f"{base}{pnum}", params=params)
+
+                if not data:
+                    return
+                if "medias" in data:
+                    data = data["medias"]
+                    if not data or not isinstance(data, list):
+                        return
+
+                yield from self._yield_media_items(data, creator)
+                pnum += 1
+            except Exception as exc:
+                self.log.error("Failed to retrieve %s page %s: %s",
+                               type, pnum, exc)
+                return
+
+
+class LeakgalleryUserExtractor(LeakgalleryExtractor):
+    """Extractor for profile posts on leakgallery.com"""
+    subcategory = "user"
+    pattern = (
+        BASE_PATTERN +
+        r"/(?!trending-medias|most-liked|random/medias)([^/?#]+)"
+        r"(?:/(Photos|Videos|All))?"
+        r"(?:/(MostRecent|MostViewed|MostLiked))?/?$"
+    )
+    example = "https://leakgallery.com/creator"
+
+    def items(self):
+        creator, mtype, msort = self.groups
+        base = f"https://api.leakgallery.com/profile/{creator}/"
+        params = {"type": mtype or "All", "sort": msort or "MostRecent"}
+        return self._pagination(creator, base, params, creator)
+
+
+class LeakgalleryTrendingExtractor(LeakgalleryExtractor):
+    """Extractor for trending posts on leakgallery.com"""
+    subcategory = "trending"
+    pattern = BASE_PATTERN + r"/trending-medias(?:/([\w-]+))?"
+    example = "https://leakgallery.com/trending-medias/Week"
+
+    def items(self):
+        period = self.groups[0] or "Last-Hour"
+        base = f"https://api.leakgallery.com/popular/media/{period}/"
+        return self._pagination("trending", base)
+
+
+class LeakgalleryMostlikedExtractor(LeakgalleryExtractor):
+    """Extractor for most liked posts on leakgallery.com"""
+    subcategory = "mostliked"
+    pattern = BASE_PATTERN + r"/most-liked"
+    example = "https://leakgallery.com/most-liked"
+
+    def items(self):
+        base = "https://api.leakgallery.com/most-liked/"
+        return self._pagination("most-liked", base)
+
+
+class LeakgalleryPostExtractor(LeakgalleryExtractor):
+    """Extractor for individual posts on leakgallery.com"""
+    subcategory = "post"
+    pattern = BASE_PATTERN + r"/([^/?#]+)/(\d+)"
+    example = "https://leakgallery.com/CREATOR/12345"
+
+    def items(self):
+        creator, post_id = self.groups
+        url = f"https://leakgallery.com/{creator}/{post_id}"
+
+        try:
+            page = self.request(url).text
+            video_urls = text.re(
+                r"https://cdn\.leakgallery\.com/content[^/?#]*/"
+                r"(?:compressed_)?watermark_[^\"]+\."
+                r"(?:mp4|mov|m4a|webm)"
+            ).findall(page)
+            image_urls = text.re(
+                r"https://cdn\.leakgallery\.com/content[^/?#]*/"
+                r"watermark_[^\"]+\.(?:jpe?g|png)"
+            ).findall(page)
+
+            seen = set()
+            for url in video_urls + image_urls:
+                if url in seen:
+                    continue
+                seen.add(url)
+                data = {
+                    "id": post_id,
+                    "creator": creator,
+                    "url": url,
+                }
+                text.nameext_from_url(url, data)
+                yield Message.Directory, data
+                yield Message.Url, url, data
+        except Exception as exc:
+            self.log.error("Failed to extract post page %s/%s: %s",
+                           creator, post_id, exc)
diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py
index eb51f80a..46399bfb 100755
--- a/scripts/supportedsites.py
+++ b/scripts/supportedsites.py
@@ -91,6 +91,7 @@ CATEGORY_MAP = {
     "jpgfish"        : "JPG Fish",
     "kabeuchi"       : "かべうち",
     "schalenetwork"  : "Schale Network",
+    "leakgallery"    : "Leak Gallery",
     "livedoor"       : "livedoor Blog",
     "lofter"         : "LOFTER",
     "ohpolly"        : "Oh Polly",
@@ -298,6 +299,10 @@ SUBCATEGORY_MAP = {
         "discord-server": "",
         "posts"         : "",
     },
+    "leakgallery": {
+        "trending" : "Trending Medias",
+        "mostliked": "Most Liked Posts",
+    },
     "lensdump": {
         "albums": "",
     },
diff --git a/test/results/leakgallery.py b/test/results/leakgallery.py
new file mode 100644
index 00000000..a4610d59
--- /dev/null
+++ b/test/results/leakgallery.py
@@ -0,0 +1,47 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+from gallery_dl.extractor import leakgallery
+FILE_PATTERN = r"https://cdn.leakgallery.com/content(-videos|\d+)?/[\w.-]+\.\w+"
+
+
+__tests__ = (
+{
+    "#url"    : "https://leakgallery.com/sophieraiin/12240",
+    "#class"  : leakgallery.LeakgalleryPostExtractor,
+    "#results": "https://cdn.leakgallery.com/content-videos/watermark_745_sophieraiin_241.mp4",
+
+    "id"     : "12240",
+    "creator": "sophieraiin",
+},
+
+{
+    "#url"    : "https://leakgallery.com/sophieraiin",
+    "#class"  : leakgallery.LeakgalleryUserExtractor,
+    "#pattern": r"https://cdn.leakgallery.com/content3/(compressed_)?watermark_[0-9a-f]+_sophieraiin_\w+\.(jpg|png|mp4|mov)",
+    "#range"  : "1-100",
+    "#count"  : 100,
+
+    "creator": "sophieraiin",
+},
+
+{
+    "#url"    : "https://leakgallery.com/trending-medias/Week",
+    "#class"  : leakgallery.LeakgalleryTrendingExtractor,
+    "#pattern": FILE_PATTERN,
+    "#range"  : "1-100",
+    "#count"  : 100,
+},
+
+{
+    "#url"    : "https://leakgallery.com/most-liked",
+    "#class"  : leakgallery.LeakgalleryMostlikedExtractor,
+    "#pattern": FILE_PATTERN,
+    "#range"  : "1-100",
+    "#count"  : 100,
+},
+
+)