Files
gallery-dl/gallery_dl/extractor/__init__.py
Farahat cf2e5a1619 [leakgallery] add support (#7872)
* add new extractor for leakgallery.com

    Added support for downloading photo and video posts from leakgallery.com.

    Supports:
    * Individual post URLs
    * User profile URLs with pagination via AJAX
    * Optional type/sort filters (e.g. /Photos/MostRecent)
    * Proper file extension handling
    * Creator-based folder structure
    * Compatibility with --download-archive

    Tested locally and functional, but may still need review or improvement.
    
* [leakgallery] add support
    Added leakgallery to extractor module imports so it's recognized and used.
* [leakgallery] update extractor structure
    - Refactored using LeakGalleryExtractorBase to remove duplication
    - Moved init logic into items() using self.groups
    - Replaced re with text.re as per upstream guidance
    - Added creator fallback and media deduplication
    - Aligned structure with gallery-dl maintainer review tips
* [leakgallery] add support
    - Added leakgallery entry to supportedsites.md
    - Includes post, user, trending, and most-liked subcategories
* add exported extractor results
* [leakgallery] fix flake8 style issues
    Cleaned up code to comply with flake8 rules, especially:
    - removed unused imports
    - split long lines >79 chars
    - ensured newline at EOF
    No functional changes made; purely formatting to satisfy CI checks.
* [tests] update extractor results
* [leakgallery] fix flake8 style issues (part 2)
    Fix remaining flake8 issues in leakgallery.py:
    - Reformat line breaks to avoid W503 (line break before binary operator)
    - Wrap long lines to respect E501 (line too long > 79 characters)
    - Cleaned up exception logging for better clarity
    - Confirmed all flake8 checks now pass successfully
    This superseedes the previous commit which partially fixed formatting violations.
* [leakgallery] fix flake8 style issues (part 3)
* [leakgallery] rename extractor classes
* [tests] update extractor results
* [tests] rename extractor results
* [leakgallery] rename extractor classes (part 2)
* [leakgallery] rename example
* update docs/supportedsites
* update test results
    and convert line endings to '\n'
* update
    - convert line endings to '\n'
    - use _pagination method
    - fix logging calls
* return more metadata for _pagination() results
2025-07-22 22:50:25 +02:00

313 lines
5.5 KiB
Python

# -*- coding: utf-8 -*-
# Copyright 2015-2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
import sys
from ..text import re_compile
modules = [
"2ch",
"2chan",
"2chen",
"35photo",
"3dbooru",
"4chan",
"4archive",
"4chanarchives",
"500px",
"8chan",
"8muses",
"adultempire",
"agnph",
"ao3",
"arcalive",
"architizer",
"artstation",
"aryion",
"batoto",
"bbc",
"behance",
"bilibili",
"blogger",
"bluesky",
"boosty",
"bunkr",
"catbox",
"chevereto",
"cien",
"civitai",
"comick",
"comicvine",
"cyberdrop",
"danbooru",
"dankefuerslesen",
"desktopography",
"deviantart",
"discord",
"dynastyscans",
"e621",
"erome",
"everia",
"exhentai",
"facebook",
"fanbox",
"fantia",
"fapello",
"fapachi",
"flickr",
"furaffinity",
"furry34",
"fuskator",
"gelbooru",
"gelbooru_v01",
"gelbooru_v02",
"girlsreleased",
"girlswithmuscle",
"gofile",
"hatenablog",
"hentai2read",
"hentaicosplays",
"hentaifoundry",
"hentaihand",
"hentaihere",
"hentainexus",
"hiperdex",
"hitomi",
"hotleak",
"idolcomplex",
"imagebam",
"imagechest",
"imagefap",
"imgbb",
"imgbox",
"imgth",
"imgur",
"imhentai",
"inkbunny",
"instagram",
"issuu",
"itaku",
"itchio",
"iwara",
"jschan",
"kabeuchi",
"keenspot",
"kemono",
"khinsider",
"komikcast",
"leakgallery",
"lensdump",
"lexica",
"lightroom",
"livedoor",
"lofter",
"luscious",
"lynxchan",
"madokami",
"mangadex",
"mangafox",
"mangahere",
"manganelo",
"mangapark",
"mangaread",
"mangoxo",
"misskey",
"motherless",
"myhentaigallery",
"myportfolio",
"naverblog",
"naverchzzk",
"naverwebtoon",
"nekohouse",
"newgrounds",
"nhentai",
"nijie",
"nitter",
"nozomi",
"nsfwalbum",
"nudostar",
"paheal",
"patreon",
"pexels",
"philomena",
"photovogue",
"picarto",
"pictoa",
"piczel",
"pillowfort",
"pinterest",
"pixeldrain",
"pixiv",
"pixnet",
"plurk",
"poipiku",
"poringa",
"pornhub",
"pornpics",
"postmill",
"rawkuma",
"reactor",
"readcomiconline",
"realbooru",
"redbust",
"reddit",
"redgifs",
"rule34us",
"rule34vault",
"rule34xyz",
"saint",
"sankaku",
"sankakucomplex",
"schalenetwork",
"scrolller",
"seiga",
"senmanga",
"sexcom",
"shimmie2",
"simplyhentai",
"skeb",
"slickpic",
"slideshare",
"smugmug",
"soundgasm",
"speakerdeck",
"steamgriddb",
"subscribestar",
"szurubooru",
"tapas",
"tcbscans",
"telegraph",
"tenor",
"tiktok",
"tmohentai",
"toyhouse",
"tsumino",
"tumblr",
"tumblrgallery",
"twibooru",
"twitter",
"urlgalleries",
"unsplash",
"uploadir",
"urlshortener",
"vanillarock",
"vichan",
"vipergirls",
"vk",
"vsco",
"wallhaven",
"wallpapercave",
"warosu",
"weasyl",
"webmshare",
"webtoons",
"weebcentral",
"weibo",
"wikiart",
"wikifeet",
"wikimedia",
"xfolio",
"xhamster",
"xvideos",
"yiffverse",
"zerochan",
"zzup",
"booru",
"moebooru",
"foolfuuka",
"foolslide",
"mastodon",
"shopify",
"lolisafe",
"imagehosts",
"directlink",
"recursive",
"oauth",
"noop",
"ytdl",
"generic",
]
def find(url):
"""Find a suitable extractor for the given URL"""
for cls in _list_classes():
if match := cls.pattern.match(url):
return cls(match)
return None
def add(cls):
"""Add 'cls' to the list of available extractors"""
if isinstance(cls.pattern, str):
cls.pattern = re_compile(cls.pattern)
_cache.append(cls)
return cls
def add_module(module):
"""Add all extractors in 'module' to the list of available extractors"""
if classes := _get_classes(module):
if isinstance(classes[0].pattern, str):
for cls in classes:
cls.pattern = re_compile(cls.pattern)
_cache.extend(classes)
return classes
def extractors():
"""Yield all available extractor classes"""
return sorted(
_list_classes(),
key=lambda x: x.__name__
)
# --------------------------------------------------------------------
# internals
def _list_classes():
"""Yield available extractor classes"""
yield from _cache
for module in _module_iter:
yield from add_module(module)
globals()["_list_classes"] = lambda : _cache
def _modules_internal():
globals_ = globals()
for module_name in modules:
yield __import__(module_name, globals_, None, (), 1)
def _modules_path(path, files):
sys.path.insert(0, path)
try:
return [
__import__(name[:-3])
for name in files
if name.endswith(".py")
]
finally:
del sys.path[0]
def _get_classes(module):
"""Return a list of all extractor classes in a module"""
return [
cls for cls in module.__dict__.values() if (
hasattr(cls, "pattern") and cls.__module__ == module.__name__
)
]
_cache = []
_module_iter = _modules_internal()