[leakgallery] add support (#7872)
* add new extractor for leakgallery.com
Added support for downloading photo and video posts from leakgallery.com.
Supports:
* Individual post URLs
* User profile URLs with pagination via AJAX
* Optional type/sort filters (e.g. /Photos/MostRecent)
* Proper file extension handling
* Creator-based folder structure
* Compatibility with --download-archive
Tested locally and functional, but may still need review or improvement.
* [leakgallery] add support
Added leakgallery to extractor module imports so it's recognized and used.
* [leakgallery] update extractor structure
- Refactored using LeakGalleryExtractorBase to remove duplication
- Moved init logic into items() using self.groups
- Replaced re with text.re as per upstream guidance
- Added creator fallback and media deduplication
- Aligned structure with gallery-dl maintainer review tips
* [leakgallery] add support
- Added leakgallery entry to supportedsites.md
- Includes post, user, trending, and most-liked subcategories
* add exported extractor results
* [leakgallery] fix flake8 style issues
Cleaned up code to comply with flake8 rules, especially:
- removed unused imports
- split long lines >79 chars
- ensured newline at EOF
No functional changes made; purely formatting to satisfy CI checks.
* [tests] update extractor results
* [leakgallery] fix flake8 style issues (part 2)
Fix remaining flake8 issues in leakgallery.py:
- Reformat line breaks to avoid W503 (line break before binary operator)
- Wrap long lines to respect E501 (line too long > 79 characters)
- Cleaned up exception logging for better clarity
- Confirmed all flake8 checks now pass successfully
This superseedes the previous commit which partially fixed formatting violations.
* [leakgallery] fix flake8 style issues (part 3)
* [leakgallery] rename extractor classes
* [tests] update extractor results
* [tests] rename extractor results
* [leakgallery] rename extractor classes (part 2)
* [leakgallery] rename example
* update docs/supportedsites
* update test results
and convert line endings to '\n'
* update
- convert line endings to '\n'
- use _pagination method
- fix logging calls
* return more metadata for _pagination() results
This commit is contained in:
@@ -547,6 +547,12 @@ Consider all listed sites to potentially be NSFW.
|
|||||||
<td>Chapters, Manga</td>
|
<td>Chapters, Manga</td>
|
||||||
<td></td>
|
<td></td>
|
||||||
</tr>
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Leak Gallery</td>
|
||||||
|
<td>https://leakgallery.com</td>
|
||||||
|
<td>Most Liked Posts, Posts, Trending Medias, User Profiles</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td>Lensdump</td>
|
<td>Lensdump</td>
|
||||||
<td>https://lensdump.com/</td>
|
<td>https://lensdump.com/</td>
|
||||||
|
|||||||
@@ -99,6 +99,7 @@ modules = [
|
|||||||
"kemono",
|
"kemono",
|
||||||
"khinsider",
|
"khinsider",
|
||||||
"komikcast",
|
"komikcast",
|
||||||
|
"leakgallery",
|
||||||
"lensdump",
|
"lensdump",
|
||||||
"lexica",
|
"lexica",
|
||||||
"lightroom",
|
"lightroom",
|
||||||
|
|||||||
141
gallery_dl/extractor/leakgallery.py
Normal file
141
gallery_dl/extractor/leakgallery.py
Normal file
@@ -0,0 +1,141 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
|
# published by the Free Software Foundation.
|
||||||
|
|
||||||
|
"""Extractors for https://leakgallery.com"""
|
||||||
|
|
||||||
|
from .common import Extractor, Message
|
||||||
|
from .. import text
|
||||||
|
|
||||||
|
BASE_PATTERN = r"(?:https?://)?(?:www\.)?leakgallery\.com"
|
||||||
|
|
||||||
|
|
||||||
|
class LeakgalleryExtractor(Extractor):
|
||||||
|
category = "leakgallery"
|
||||||
|
directory_fmt = ("{category}", "{creator}")
|
||||||
|
filename_fmt = "{id}_{filename}.{extension}"
|
||||||
|
archive_fmt = "{creator}_{id}"
|
||||||
|
|
||||||
|
def _yield_media_items(self, medias, creator=None):
|
||||||
|
seen = set()
|
||||||
|
for media in medias:
|
||||||
|
path = media["file_path"]
|
||||||
|
if path in seen:
|
||||||
|
continue
|
||||||
|
seen.add(path)
|
||||||
|
|
||||||
|
if creator is None:
|
||||||
|
try:
|
||||||
|
media["creator"] = \
|
||||||
|
media["profile"]["username"] or "unknown"
|
||||||
|
except Exception:
|
||||||
|
media["creator"] = "unknown"
|
||||||
|
else:
|
||||||
|
media["creator"] = creator
|
||||||
|
|
||||||
|
media["url"] = url = f"https://cdn.leakgallery.com/{path}"
|
||||||
|
text.nameext_from_url(url, media)
|
||||||
|
yield Message.Directory, media
|
||||||
|
yield Message.Url, url, media
|
||||||
|
|
||||||
|
def _pagination(self, type, base, params=None, creator=None, pnum=1):
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
data = self.request_json(f"{base}{pnum}", params=params)
|
||||||
|
|
||||||
|
if not data:
|
||||||
|
return
|
||||||
|
if "medias" in data:
|
||||||
|
data = data["medias"]
|
||||||
|
if not data or not isinstance(data, list):
|
||||||
|
return
|
||||||
|
|
||||||
|
yield from self._yield_media_items(data, creator)
|
||||||
|
pnum += 1
|
||||||
|
except Exception as exc:
|
||||||
|
self.log.error("Failed to retrieve %s page %s: %s",
|
||||||
|
type, pnum, exc)
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
class LeakgalleryUserExtractor(LeakgalleryExtractor):
|
||||||
|
"""Extractor for profile posts on leakgallery.com"""
|
||||||
|
subcategory = "user"
|
||||||
|
pattern = (
|
||||||
|
BASE_PATTERN +
|
||||||
|
r"/(?!trending-medias|most-liked|random/medias)([^/?#]+)"
|
||||||
|
r"(?:/(Photos|Videos|All))?"
|
||||||
|
r"(?:/(MostRecent|MostViewed|MostLiked))?/?$"
|
||||||
|
)
|
||||||
|
example = "https://leakgallery.com/creator"
|
||||||
|
|
||||||
|
def items(self):
|
||||||
|
creator, mtype, msort = self.groups
|
||||||
|
base = f"https://api.leakgallery.com/profile/{creator}/"
|
||||||
|
params = {"type": mtype or "All", "sort": msort or "MostRecent"}
|
||||||
|
return self._pagination(creator, base, params, creator)
|
||||||
|
|
||||||
|
|
||||||
|
class LeakgalleryTrendingExtractor(LeakgalleryExtractor):
|
||||||
|
"""Extractor for trending posts on leakgallery.com"""
|
||||||
|
subcategory = "trending"
|
||||||
|
pattern = BASE_PATTERN + r"/trending-medias(?:/([\w-]+))?"
|
||||||
|
example = "https://leakgallery.com/trending-medias/Week"
|
||||||
|
|
||||||
|
def items(self):
|
||||||
|
period = self.groups[0] or "Last-Hour"
|
||||||
|
base = f"https://api.leakgallery.com/popular/media/{period}/"
|
||||||
|
return self._pagination("trending", base)
|
||||||
|
|
||||||
|
|
||||||
|
class LeakgalleryMostlikedExtractor(LeakgalleryExtractor):
|
||||||
|
"""Extractor for most liked posts on leakgallery.com"""
|
||||||
|
subcategory = "mostliked"
|
||||||
|
pattern = BASE_PATTERN + r"/most-liked"
|
||||||
|
example = "https://leakgallery.com/most-liked"
|
||||||
|
|
||||||
|
def items(self):
|
||||||
|
base = "https://api.leakgallery.com/most-liked/"
|
||||||
|
return self._pagination("most-liked", base)
|
||||||
|
|
||||||
|
|
||||||
|
class LeakgalleryPostExtractor(LeakgalleryExtractor):
|
||||||
|
"""Extractor for individual posts on leakgallery.com"""
|
||||||
|
subcategory = "post"
|
||||||
|
pattern = BASE_PATTERN + r"/([^/?#]+)/(\d+)"
|
||||||
|
example = "https://leakgallery.com/CREATOR/12345"
|
||||||
|
|
||||||
|
def items(self):
|
||||||
|
creator, post_id = self.groups
|
||||||
|
url = f"https://leakgallery.com/{creator}/{post_id}"
|
||||||
|
|
||||||
|
try:
|
||||||
|
page = self.request(url).text
|
||||||
|
video_urls = text.re(
|
||||||
|
r"https://cdn\.leakgallery\.com/content[^/?#]*/"
|
||||||
|
r"(?:compressed_)?watermark_[^\"]+\."
|
||||||
|
r"(?:mp4|mov|m4a|webm)"
|
||||||
|
).findall(page)
|
||||||
|
image_urls = text.re(
|
||||||
|
r"https://cdn\.leakgallery\.com/content[^/?#]*/"
|
||||||
|
r"watermark_[^\"]+\.(?:jpe?g|png)"
|
||||||
|
).findall(page)
|
||||||
|
|
||||||
|
seen = set()
|
||||||
|
for url in video_urls + image_urls:
|
||||||
|
if url in seen:
|
||||||
|
continue
|
||||||
|
seen.add(url)
|
||||||
|
data = {
|
||||||
|
"id": post_id,
|
||||||
|
"creator": creator,
|
||||||
|
"url": url,
|
||||||
|
}
|
||||||
|
text.nameext_from_url(url, data)
|
||||||
|
yield Message.Directory, data
|
||||||
|
yield Message.Url, url, data
|
||||||
|
except Exception as exc:
|
||||||
|
self.log.error("Failed to extract post page %s/%s: %s",
|
||||||
|
creator, post_id, exc)
|
||||||
@@ -91,6 +91,7 @@ CATEGORY_MAP = {
|
|||||||
"jpgfish" : "JPG Fish",
|
"jpgfish" : "JPG Fish",
|
||||||
"kabeuchi" : "かべうち",
|
"kabeuchi" : "かべうち",
|
||||||
"schalenetwork" : "Schale Network",
|
"schalenetwork" : "Schale Network",
|
||||||
|
"leakgallery" : "Leak Gallery",
|
||||||
"livedoor" : "livedoor Blog",
|
"livedoor" : "livedoor Blog",
|
||||||
"lofter" : "LOFTER",
|
"lofter" : "LOFTER",
|
||||||
"ohpolly" : "Oh Polly",
|
"ohpolly" : "Oh Polly",
|
||||||
@@ -298,6 +299,10 @@ SUBCATEGORY_MAP = {
|
|||||||
"discord-server": "",
|
"discord-server": "",
|
||||||
"posts" : "",
|
"posts" : "",
|
||||||
},
|
},
|
||||||
|
"leakgallery": {
|
||||||
|
"trending" : "Trending Medias",
|
||||||
|
"mostliked": "Most Liked Posts",
|
||||||
|
},
|
||||||
"lensdump": {
|
"lensdump": {
|
||||||
"albums": "",
|
"albums": "",
|
||||||
},
|
},
|
||||||
|
|||||||
47
test/results/leakgallery.py
Normal file
47
test/results/leakgallery.py
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
|
# published by the Free Software Foundation.
|
||||||
|
|
||||||
|
from gallery_dl.extractor import leakgallery
|
||||||
|
FILE_PATTERN = r"https://cdn.leakgallery.com/content(-videos|\d+)?/[\w.-]+\.\w+"
|
||||||
|
|
||||||
|
|
||||||
|
__tests__ = (
|
||||||
|
{
|
||||||
|
"#url" : "https://leakgallery.com/sophieraiin/12240",
|
||||||
|
"#class" : leakgallery.LeakgalleryPostExtractor,
|
||||||
|
"#results": "https://cdn.leakgallery.com/content-videos/watermark_745_sophieraiin_241.mp4",
|
||||||
|
|
||||||
|
"id" : "12240",
|
||||||
|
"creator": "sophieraiin",
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://leakgallery.com/sophieraiin",
|
||||||
|
"#class" : leakgallery.LeakgalleryUserExtractor,
|
||||||
|
"#pattern": r"https://cdn.leakgallery.com/content3/(compressed_)?watermark_[0-9a-f]+_sophieraiin_\w+\.(jpg|png|mp4|mov)",
|
||||||
|
"#range" : "1-100",
|
||||||
|
"#count" : 100,
|
||||||
|
|
||||||
|
"creator": "sophieraiin",
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://leakgallery.com/trending-medias/Week",
|
||||||
|
"#class" : leakgallery.LeakgalleryTrendingExtractor,
|
||||||
|
"#pattern": FILE_PATTERN,
|
||||||
|
"#range" : "1-100",
|
||||||
|
"#count" : 100,
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://leakgallery.com/most-liked",
|
||||||
|
"#class" : leakgallery.LeakgalleryMostlikedExtractor,
|
||||||
|
"#pattern": FILE_PATTERN,
|
||||||
|
"#range" : "1-100",
|
||||||
|
"#count" : 100,
|
||||||
|
},
|
||||||
|
|
||||||
|
)
|
||||||
Reference in New Issue
Block a user