Files
gallery-dl/scripts/supportedsites.py
Farahat cf2e5a1619 [leakgallery] add support (#7872)
* add new extractor for leakgallery.com

    Added support for downloading photo and video posts from leakgallery.com.

    Supports:
    * Individual post URLs
    * User profile URLs with pagination via AJAX
    * Optional type/sort filters (e.g. /Photos/MostRecent)
    * Proper file extension handling
    * Creator-based folder structure
    * Compatibility with --download-archive

    Tested locally and functional, but may still need review or improvement.
    
* [leakgallery] add support
    Added leakgallery to extractor module imports so it's recognized and used.
* [leakgallery] update extractor structure
    - Refactored using LeakGalleryExtractorBase to remove duplication
    - Moved init logic into items() using self.groups
    - Replaced re with text.re as per upstream guidance
    - Added creator fallback and media deduplication
    - Aligned structure with gallery-dl maintainer review tips
* [leakgallery] add support
    - Added leakgallery entry to supportedsites.md
    - Includes post, user, trending, and most-liked subcategories
* add exported extractor results
* [leakgallery] fix flake8 style issues
    Cleaned up code to comply with flake8 rules, especially:
    - removed unused imports
    - split long lines >79 chars
    - ensured newline at EOF
    No functional changes made; purely formatting to satisfy CI checks.
* [tests] update extractor results
* [leakgallery] fix flake8 style issues (part 2)
    Fix remaining flake8 issues in leakgallery.py:
    - Reformat line breaks to avoid W503 (line break before binary operator)
    - Wrap long lines to respect E501 (line too long > 79 characters)
    - Cleaned up exception logging for better clarity
    - Confirmed all flake8 checks now pass successfully
    This superseedes the previous commit which partially fixed formatting violations.
* [leakgallery] fix flake8 style issues (part 3)
* [leakgallery] rename extractor classes
* [tests] update extractor results
* [tests] rename extractor results
* [leakgallery] rename extractor classes (part 2)
* [leakgallery] rename example
* update docs/supportedsites
* update test results
    and convert line endings to '\n'
* update
    - convert line endings to '\n'
    - use _pagination method
    - fix logging calls
* return more metadata for _pagination() results
2025-07-22 22:50:25 +02:00

718 lines
21 KiB
Python
Executable File
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Generate a Markdown document listing all supported sites"""
import os
import sys
import collections
import util
from gallery_dl import extractor
try:
from test import results
except ImportError:
results = None
CATEGORY_MAP = {
"2chan" : "Futaba Channel",
"35photo" : "35PHOTO",
"adultempire" : "Adult Empire",
"agnph" : "AGNPH",
"allgirlbooru" : "All girl",
"ao3" : "Archive of Our Own",
"archivedmoe" : "Archived.Moe",
"archiveofsins" : "Archive of Sins",
"artstation" : "ArtStation",
"aryion" : "Eka's Portal",
"atfbooru" : "ATFBooru",
"azurlanewiki" : "Azur Lane Wiki",
"b4k" : "arch.b4k.dev",
"baraag" : "baraag",
"batoto" : "BATO.TO",
"bbc" : "BBC",
"cien" : "Ci-en",
"cohost" : "cohost!",
"comicvine" : "Comic Vine",
"dankefuerslesen": "Danke fürs Lesen",
"deviantart" : "DeviantArt",
"drawfriends" : "Draw Friends",
"dynastyscans" : "Dynasty Reader",
"e621" : "e621",
"e926" : "e926",
"e6ai" : "e6AI",
"erome" : "EroMe",
"everia" : "EVERIA.CLUB",
"e-hentai" : "E-Hentai",
"exhentai" : "ExHentai",
"fallenangels" : "Fallen Angels Scans",
"fanbox" : "pixivFANBOX",
"fashionnova" : "Fashion Nova",
"furaffinity" : "Fur Affinity",
"furry34" : "Furry 34 com",
"girlswithmuscle": "Girls with Muscle",
"hatenablog" : "HatenaBlog",
"hbrowse" : "HBrowse",
"hentai2read" : "Hentai2Read",
"hentaicosplay" : "Hentai Cosplay",
"hentaienvy" : "HentaiEnvy",
"hentaiera" : "HentaiEra",
"hentaifoundry" : "Hentai Foundry",
"hentaifox" : "HentaiFox",
"hentaihand" : "HentaiHand",
"hentaihere" : "HentaiHere",
"hentaiimg" : "Hentai Image",
"hentainexus" : "HentaiNexus",
"hentairox" : "HentaiRox",
"hentaizap" : "HentaiZap",
"hiperdex" : "HiperDEX",
"hitomi" : "Hitomi.la",
"horne" : "horne",
"idolcomplex" : "Idol Complex",
"illusioncardsbooru": "Illusion Game Cards",
"imagebam" : "ImageBam",
"imagefap" : "ImageFap",
"imagepond" : "ImagePond",
"imgbb" : "ImgBB",
"imgbox" : "imgbox",
"imagechest" : "ImageChest",
"imgkiwi" : "IMG.Kiwi",
"imgth" : "imgth",
"imgur" : "imgur",
"imhentai" : "IMHentai",
"joyreactor" : "JoyReactor",
"itchio" : "itch.io",
"jpgfish" : "JPG Fish",
"kabeuchi" : "かべうち",
"schalenetwork" : "Schale Network",
"leakgallery" : "Leak Gallery",
"livedoor" : "livedoor Blog",
"lofter" : "LOFTER",
"ohpolly" : "Oh Polly",
"omgmiamiswimwear": "Omg Miami Swimwear",
"mangadex" : "MangaDex",
"mangafox" : "Manga Fox",
"mangahere" : "Manga Here",
"mangakakalot" : "MangaKakalot",
"manganato" : "MangaNato",
"mangapark" : "MangaPark",
"mangaread" : "MangaRead",
"mariowiki" : "Super Mario Wiki",
"mastodon.social": "mastodon.social",
"mediawiki" : "MediaWiki",
"micmicidol" : "MIC MIC IDOL",
"myhentaigallery": "My Hentai Gallery",
"myportfolio" : "Adobe Portfolio",
"natomanga" : "MangaNato",
"naver-blog" : "Naver Blog",
"naver-chzzk" : "CHZZK",
"naver-webtoon" : "Naver Webtoon",
"nelomanga" : "MangaNelo",
"nhentai" : "nhentai",
"nijie" : "nijie",
"nozomi" : "Nozomi.la",
"nsfwalbum" : "NSFWalbum.com",
"nudostar" : "NudoStar.TV",
"paheal" : "Rule 34",
"photovogue" : "PhotoVogue",
"pidgiwiki" : "PidgiWiki",
"pixeldrain" : "pixeldrain",
"pixiv" : "[pixiv]",
"pixiv-novel" : "[pixiv] Novels",
"pornimage" : "Porn Image",
"pornpics" : "PornPics.com",
"pornreactor" : "PornReactor",
"readcomiconline": "Read Comic Online",
"redbust" : "RedBust",
"rbt" : "RebeccaBlackTech",
"redgifs" : "RedGIFs",
"rozenarcana" : "Rozen Arcana",
"rule34" : "Rule 34",
"rule34hentai" : "Rule34Hentai",
"rule34us" : "Rule 34",
"rule34vault" : "R34 Vault",
"rule34xyz" : "Rule 34 XYZ",
"sankaku" : "Sankaku Channel",
"sankakucomplex" : "Sankaku Complex",
"seiga" : "Niconico Seiga",
"senmanga" : "Sen Manga",
"sensescans" : "Sense-Scans",
"sexcom" : "Sex.com",
"simplyhentai" : "Simply Hentai",
"slickpic" : "SlickPic",
"slideshare" : "SlideShare",
"smugmug" : "SmugMug",
"speakerdeck" : "Speaker Deck",
"steamgriddb" : "SteamGridDB",
"subscribestar" : "SubscribeStar",
"tbib" : "The Big ImageBoard",
"tcbscans" : "TCB Scans",
"tco" : "Twitter t.co",
"thatpervert" : "ThatPervert",
"thebarchive" : "The /b/ Archive",
"thecollection" : "The /co/llection",
"tiktok" : "TikTok",
"tmohentai" : "TMOHentai",
"tumblrgallery" : "TumblrGallery",
"vanillarock" : "もえぴりあ",
"vidyart2" : "/v/idyart2",
"vidyapics" : "Vidya Booru",
"visuabusters" : "VISUABUSTERS",
"vk" : "VK",
"vsco" : "VSCO",
"wallpapercave" : "Wallpaper Cave",
"webmshare" : "webmshare",
"webtoons" : "WEBTOON",
"weebcentral" : "Weeb Central",
"wikiart" : "WikiArt.org",
"wikigg" : "wiki.gg",
"wikimediacommons": "Wikimedia Commons",
"xbunkr" : "xBunkr",
"xhamster" : "xHamster",
"xvideos" : "XVideos",
"yandere" : "yande.re",
"yiffverse" : "Yiff verse",
}
SUBCATEGORY_MAP = {
"" : "",
"art" : "Art",
"audio" : "Audio",
"doujin" : "Doujin",
"home" : "Home Feed",
"image" : "individual Images",
"index" : "Site Index",
"info" : "User Profile Information",
"issue" : "Comic Issues",
"manga" : "Manga",
"media" : "Media Files",
"popular": "Popular Images",
"recent" : "Recent Images",
"search" : "Search Results",
"status" : "Images from Statuses",
"tag" : "Tag Searches",
"tweets" : "",
"user" : "User Profiles",
"watch" : "Watches",
"direct-messages": "DMs",
"following" : "Followed Users",
"related-pin" : "related Pins",
"related-board" : "",
"ao3": {
"user-works" : "",
"user-series" : "",
"user-bookmark": "Bookmarks",
},
"arcalive": {
"user": "User Posts",
},
"artstation": {
"artwork": "Artwork Listings",
"collections": "",
},
"bilibili": {
"user-articles": "User Articles",
"user-articles-favorite": "User Article Favorites",
},
"bluesky": {
"posts": "",
},
"boosty": {
"feed": "Subscriptions Feed",
},
"civitai": {
"models": "Model Listings",
"images": "Image Listings",
"posts" : "Post Listings",
"search-models": "Model Searches",
"search-images": "Image Searches",
"user-models": "User Models",
"user-images": ("User Images", "Image Reactions"),
"user-posts" : "User Posts",
"user-videos": ("User Videos", "Video Reactions"),
"generated": "Generated Files",
},
"coomer": {
"discord" : "",
"discord-server": "",
"posts" : "",
},
"Danbooru": {
"artist-search": "Artist Searches",
"favgroup": "Favorite Groups",
},
"desktopography": {
"site": "",
},
"deviantart": {
"gallery-search": "Gallery Searches",
"stash" : "Sta.sh",
"status": "Status Updates",
"watch-posts": "",
},
"discord": {
"direct-message" : "",
},
"fanbox": {
"supporting": "Supported User Feed",
"redirect" : "Pixiv Redirects",
},
"fapello": {
"path": ["Videos", "Trending Posts", "Popular Videos", "Top Models"],
},
"furaffinity": {
"submissions": "New Submissions",
},
"hatenablog": {
"archive": "Archive",
"entry" : "Individual Posts",
},
"hentaifoundry": {
"story": "",
},
"imgur": {
"favorite-folder": "Favorites Folders",
"me": "Personal Posts",
},
"inkbunny": {
"unread": "Unread Submissions",
},
"instagram": {
"posts": "",
"saved": "Saved Posts",
"tagged": "Tagged Posts",
},
"iwara": {
"user-images": "User Images",
"user-videos": "User Videos",
"user-playlists": "User Playlists",
},
"kemono": {
"discord" : "Discord Servers",
"discord-server": "",
"posts" : "",
},
"leakgallery": {
"trending" : "Trending Medias",
"mostliked": "Most Liked Posts",
},
"lensdump": {
"albums": "",
},
"lofter": {
"blog-posts": "Blog Posts",
},
"mangadex": {
"feed": "Updates Feed",
"following" : "Library",
"list": "MDLists",
},
"misskey": {
"note" : "Notes",
"notes": "User Notes",
},
"nijie": {
"followed": "Followed Users",
"nuita" : "Nuita History",
},
"pinterest": {
"board": "",
"pinit": "pin.it Links",
"created": "Created Pins",
"allpins": "All Pins",
},
"pixeldrain": {
"folder": "Filesystems",
},
"pixiv": {
"me" : "pixiv.me Links",
"novel-bookmark": "Novel Bookmarks",
"novel-series": "Novel Series",
"novel-user": "",
"pixivision": "pixivision",
"sketch": "Sketch",
"unlisted": "Unlisted Works",
"work": "individual Images",
},
"poringa": {
"post": "Posts Images",
},
"pornhub": {
"gifs": "",
},
"raddle": {
"usersubmissions": "User Profiles",
"post" : "Individual Posts",
"shorturl" : "",
},
"redbust": {
"gallery": ("Galleries", "Categories"),
},
"redgifs": {
"collections": "",
},
"sankaku": {
"books": "Book Searches",
},
"scrolller": {
"following": "Followed Subreddits",
},
"sexcom": {
"pins": "User Pins",
},
"skeb": {
"following" : "Followed Creators",
"following-users": "Followed Users",
},
"smugmug": {
"path": "Images from Users and Folders",
},
"steamgriddb": {
"asset": "Individual Assets",
},
"tiktok": {
"vmpost": "VM Posts",
},
"tumblr": {
"day": "Days",
},
"twitter": {
"media": "Media Timelines",
"tweets": "",
"replies": "",
"community": "",
"list-members": "List Members",
},
"vk": {
"tagged": "Tagged Photos",
},
"vsco": {
"spaces": "",
},
"wallhaven": {
"collections": "",
"uploads" : "",
},
"wallpapercave": {
"image": ["individual Images", "Search Results"],
},
"weasyl": {
"journals" : "",
"submissions": "",
},
"weibo": {
"home": "",
"newvideo": "",
},
"wikiart": {
"artists": "Artist Listings",
},
"wikimedia": {
"article": ["Articles", "Categories", "Files"],
},
}
BASE_MAP = {
"E621" : "e621 Instances",
"foolfuuka" : "FoolFuuka 4chan Archives",
"foolslide" : "FoOlSlide Instances",
"gelbooru_v01": "Gelbooru Beta 0.1.11",
"gelbooru_v02": "Gelbooru Beta 0.2",
"hentaicosplays": "Hentai Cosplay Instances",
"IMHentai" : "IMHentai and Mirror Sites",
"jschan" : "jschan Imageboards",
"lolisafe" : "lolisafe and chibisafe",
"lynxchan" : "LynxChan Imageboards",
"manganelo" : "MangaNelo and Mirror Sites",
"moebooru" : "Moebooru and MyImouto",
"szurubooru" : "szurubooru Instances",
"urlshortener": "URL Shorteners",
"vichan" : "vichan Imageboards",
}
URL_MAP = {
"blogspot" : "https://www.blogger.com/",
"wikimedia": "https://www.wikimedia.org/",
}
_OAUTH = '<a href="https://github.com/mikf/gallery-dl#oauth">OAuth</a>'
_COOKIES = '<a href="https://github.com/mikf/gallery-dl#cookies">Cookies</a>'
_APIKEY_DB = ('<a href="https://gdl-org.github.io/docs/configuration.html'
'#extractor-derpibooru-api-key">API Key</a>')
_APIKEY_WH = ('<a href="https://gdl-org.github.io/docs/configuration.html'
'#extractor-wallhaven-api-key">API Key</a>')
_APIKEY_WY = ('<a href="https://gdl-org.github.io/docs/configuration.html'
'#extractor-weasyl-api-key">API Key</a>')
_TIKTOK_MORE_INFO = '''
<br /><span title="Pass `-o videos` to download photos only, and
`-o tiktok-range=a-yt-dlp-range` to tell yt-dlp to only extract XYZ links
when scraping a user profile. Also note that profile avatars will only be
downloaded when downloading a User Profile.">Hover for more info</span>'''
AUTH_MAP = {
"aibooru" : "Supported",
"ao3" : "Supported",
"aryion" : "Supported",
"atfbooru" : "Supported",
"baraag" : _OAUTH,
"bluesky" : "Supported",
"booruvar" : "Supported",
"boosty" : _COOKIES,
"coomer" : "Supported",
"danbooru" : "Supported",
"derpibooru" : _APIKEY_DB,
"deviantart" : _OAUTH,
"e621" : "Supported",
"e6ai" : "Supported",
"e926" : "Supported",
"e-hentai" : "Supported",
"exhentai" : "Supported",
"facebook" : _COOKIES,
"fanbox" : _COOKIES,
"fantia" : _COOKIES,
"flickr" : _OAUTH,
"furaffinity" : _COOKIES,
"furbooru" : "API Key",
"girlswithmuscle": "Supported",
"horne" : "Required",
"idolcomplex" : "Supported",
"imgbb" : "Supported",
"inkbunny" : "Supported",
"instagram" : _COOKIES,
"iwara" : "Supported",
"kemono" : "Supported",
"madokami" : "Required",
"mangadex" : "Supported",
"mangoxo" : "Supported",
"mastodon.social": _OAUTH,
"newgrounds" : "Supported",
"nijie" : "Required",
"patreon" : _COOKIES,
"pawoo" : _OAUTH,
"pillowfort" : "Supported",
"pinterest" : _COOKIES,
"pixiv" : _OAUTH,
"ponybooru" : "API Key",
"reddit" : _OAUTH,
"rule34xyz" : "Supported",
"sankaku" : "Supported",
"scrolller" : "Supported",
"seiga" : "Supported",
"smugmug" : _OAUTH,
"subscribestar" : "Supported",
"tapas" : "Supported",
"tiktok" : _COOKIES + _TIKTOK_MORE_INFO,
"tsumino" : "Supported",
"tumblr" : _OAUTH,
"twitter" : "Supported",
"vipergirls" : "Supported",
"wallhaven" : _APIKEY_WH,
"weasyl" : _APIKEY_WY,
"zerochan" : "Supported",
}
IGNORE_LIST = (
"directlink",
"oauth",
"recursive",
"test",
"ytdl",
"generic",
"noop",
)
def domain(cls):
"""Return the domain name associated with an extractor class"""
try:
url = sys.modules[cls.__module__].__doc__.split()[-1]
if url.startswith("http"):
return url
except Exception:
pass
if hasattr(cls, "root") and cls.root:
return cls.root + "/"
url = cls.example
return url[:url.find("/", 8)+1]
def category_text(c):
"""Return a human-readable representation of a category"""
return CATEGORY_MAP.get(c) or c.capitalize()
def subcategory_text(bc, c, sc):
"""Return a human-readable representation of a subcategory"""
if c in SUBCATEGORY_MAP:
scm = SUBCATEGORY_MAP[c]
if sc in scm:
txt = scm[sc]
if not isinstance(txt, str):
txt = ", ".join(txt)
return txt
if bc and bc in SUBCATEGORY_MAP:
scm = SUBCATEGORY_MAP[bc]
if sc in scm:
txt = scm[sc]
if not isinstance(txt, str):
txt = ", ".join(txt)
return txt
if sc in SUBCATEGORY_MAP:
return SUBCATEGORY_MAP[sc]
sc = sc.capitalize()
if sc.endswith("y"):
sc = sc[:-1] + "ies"
elif not sc.endswith("s"):
sc += "s"
return sc
def category_key(c):
"""Generate sorting keys by category"""
return category_text(c[0]).lower().lstrip("[")
def subcategory_key(sc):
"""Generate sorting keys by subcategory"""
return "A" if sc == "issue" else sc
def build_extractor_list():
"""Generate a sorted list of lists of extractor classes"""
categories = collections.defaultdict(lambda: collections.defaultdict(list))
default = categories[""]
domains = {"": ""}
for extr in extractor._list_classes():
category = extr.category
if category in IGNORE_LIST:
continue
if category:
default[category].append(extr.subcategory)
if category not in domains:
domains[category] = domain(extr)
else:
base = categories[extr.basecategory]
if not extr.instances:
base[""].append(extr.subcategory)
continue
for category, root, info in extr.instances:
base[category].append(extr.subcategory)
if category not in domains:
if not root:
if category in URL_MAP:
root = URL_MAP[category].rstrip("/")
elif results:
# use domain from first matching test
test = results.category(category)[0]
root = test["#class"].from_url(test["#url"]).root
domains[category] = root + "/"
# sort subcategory lists
for base in categories.values():
for subcategories in base.values():
subcategories.sort(key=subcategory_key)
domains["pixiv-novel"] += "novel"
# add e-hentai.org
default["e-hentai"] = default["exhentai"]
domains["e-hentai"] = domains["exhentai"].replace("x", "-")
# add coomer.su
default["coomer"] = default["kemono"]
domains["coomer"] = domains["kemono"].replace("kemono", "coomer")
# add wikifeetx.com
default["wikifeetx"] = default["wikifeet"]
domains["wikifeetx"] = "https://www.wikifeetx.com/"
# add extra e621 extractors
categories["E621"]["e621"].extend(default.pop("e621", ()))
return categories, domains
# define table columns
COLUMNS = (
("Site", 20,
lambda bc, c, scs, d: category_text(c)),
("URL" , 35,
lambda bc, c, scs, d: d),
("Capabilities", 50,
lambda bc, c, scs, d: ", ".join(subcategory_text(bc, c, sc) for sc in scs
if subcategory_text(bc, c, sc))),
("Authentication", 16,
lambda bc, c, scs, d: AUTH_MAP.get(c, "")),
)
def generate_output(columns, categories, domains):
thead = []
append = thead.append
append("<tr>")
for column in columns:
append(" <th>" + column[0] + "</th>")
append("</tr>")
tbody = []
append = tbody.append
for bcat, base in categories.items():
if bcat and base:
name = BASE_MAP.get(bcat) or (bcat.capitalize() + " Instances")
append('\n<tr>\n <td colspan="4"><strong>' +
name + '</strong></td>\n</tr>')
clist = base.items()
else:
clist = sorted(base.items(), key=category_key)
for category, subcategories in clist:
append("<tr>")
for column in columns:
domain = domains[category]
content = column[2](bcat, category, subcategories, domain)
append(" <td>" + content + "</td>")
append("</tr>")
TEMPLATE = """# Supported Sites
<!-- auto-generated by {} -->
Consider all listed sites to potentially be NSFW.
<table>
<thead valign="bottom">
{}
</thead>
<tbody valign="top">
{}
</tbody>
</table>
"""
return TEMPLATE.format(
"/".join(os.path.normpath(__file__).split(os.sep)[-2:]),
"\n".join(thead),
"\n".join(tbody),
)
categories, domains = build_extractor_list()
PATH = (sys.argv[1] if len(sys.argv) > 1 else
util.path("docs", "supportedsites.md"))
with util.lazy(PATH) as fp:
fp.write(generate_output(COLUMNS, categories, domains))