From ea6fbfdd393cada0cc46da410a15497fd7935791 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Wed, 20 Feb 2019 19:25:41 +0100 Subject: [PATCH] simplify build_supportedsites.py --- docs/supportedsites.rst | 40 ++--- scripts/build_supportedsites.py | 253 ++++++++++++++------------------ 2 files changed, 131 insertions(+), 162 deletions(-) diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index 73ae6706..c1ff8165 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -11,25 +11,25 @@ Adobe Portfolio https://www.myportfolio.com/ Images from Users, Gall arch.b4k.co https://arch.b4k.co/ Threads Archive of Sins https://archiveofsins.com/ Threads Archived.Moe https://archived.moe/ Threads -ArtStation https://www.artstation.com/ |Capabilities-0| +ArtStation https://www.artstation.com/ |artstation-C| Behance https://www.behance.net/ Images from Users, Collections, Galleries BobX http://www.bobx.com/dark/ Galleries, Idols Danbooru https://danbooru.donmai.us/ Pools, Popular Images, Posts, Tag-Searches Optional Desuarchive https://desuarchive.org/ Threads -DeviantArt https://www.deviantart.com/ |Capabilities-1| Optional (OAuth) +DeviantArt https://www.deviantart.com/ |deviantart-C| Optional (OAuth) Doki Reader https://kobato.hologfx.com/reader/ Chapters, Manga Dynasty Reader https://dynasty-scans.com/ Chapters, individual Images, Search Results e621 https://e621.net/ Pools, Popular Images, Posts, Tag-Searches ExHentai https://exhentai.org/ Favorites, Galleries, Search Results Optional Fallen Angels Scans https://www.fascans.com/ Chapters, Manga Fireden https://boards.fireden.net/ Threads -Flickr https://www.flickr.com/ |Capabilities-2| Optional (OAuth) +Flickr https://www.flickr.com/ |flickr-C| Optional (OAuth) Futaba Channel https://www.2chan.net/ Threads Gelbooru https://gelbooru.com/ Pools, Posts, Tag-Searches Gfycat https://gfycat.com/ individual Images HBrowse https://www.hbrowse.com/ Chapters, Manga Hentai Cafe https://hentai.cafe/ Chapters, Manga -Hentai Foundry https://www.hentai-foundry.com/ |Capabilities-3| +Hentai Foundry https://www.hentai-foundry.com/ |hentaifoundry-C| Hentai2Read https://hentai2read.com/ Chapters, Manga HentaiFox https://hentaifox.com/ Galleries, Search Results HentaiHere https://hentaihere.com/ Chapters, Manga @@ -42,7 +42,7 @@ imgth https://imgth.com/ Galleries imgur https://imgur.com/ Albums, individual Images Instagram https://www.instagram.com/ Images from Users, individual Images Jaimini's Box https://jaiminisbox.com/reader/ Chapters, Manga -Joyreactor http://joyreactor.cc/ |Capabilities-4| +Joyreactor http://joyreactor.cc/ |joyreactor-C| Khinsider https://downloads.khinsider.com/ Soundtracks Kirei Cake https://reader.kireicake.com/ Chapters, Manga KissManga https://kissmanga.com/ Chapters, Manga @@ -60,14 +60,14 @@ Newgrounds https://www.newgrounds.com/ Images from Users, indi Ngomik http://ngomik.in/ Chapters nhentai https://nhentai.net/ Galleries, Search Results Niconico Seiga http://seiga.nicovideo.jp/ Images from Users, individual Images Required -nijie https://nijie.info/ |Capabilities-5| Required +nijie https://nijie.info/ |nijie-C| Required Nyafuu Archive https://archive.nyafuu.org/ Threads Pawoo https://pawoo.net/ Images from Users, Images from Statuses Photobucket http://photobucket.com/ Albums, individual Images Piczel https://piczel.tv/ Images from Users, Folders, individual Images Pinterest https://www.pinterest.com/ Boards, Pins, pin.it Links, related Pins -Pixiv https://www.pixiv.net/ |Capabilities-6| Required -Pornreactor http://pornreactor.cc/ |Capabilities-7| +Pixiv https://www.pixiv.net/ |pixiv-C| Required +Pornreactor http://pornreactor.cc/ |pornreactor-C| PowerManga https://read.powermanga.org/ Chapters, Manga Read Comic Online https://readcomiconline.to/ Comic-Issues, Comics RebeccaBlackTech https://rbt.asia/ Threads @@ -81,7 +81,7 @@ Sen Manga https://raw.senmanga.com/ Chapters Sense-Scans http://sensescans.com/reader/ Chapters, Manga Simply Hentai https://www.simply-hentai.com/ Galleries, individual Images, Videos SlideShare https://www.slideshare.net/ Presentations -SmugMug https://www.smugmug.com/ |Capabilities-8| Optional (OAuth) +SmugMug https://www.smugmug.com/ |smugmug-C| Optional (OAuth) The /b/ Archive https://thebarchive.com/ Threads Tsumino https://www.tsumino.com/ Galleries, Search Results Optional Tumblr https://www.tumblr.com/ Images from Users, Likes, Posts, Tag-Searches Optional (OAuth) @@ -92,7 +92,7 @@ Weibo https://www.weibo.com/ Images from Users, Imag World Three http://www.slide.world-three.org/ Chapters, Manga XVideos https://www.xvideos.com/ Images from Users, Galleries Yandere https://yande.re/ Pools, Popular Images, Posts, Tag-Searches -|Site-0| https://yuki.la/ Threads +|yuki-S| https://yuki.la/ Threads Acidimg https://acidimg.cc/ individual Images Imagetwist https://imagetwist.com/ individual Images Imagevenue http://imagevenue.com/ individual Images @@ -103,13 +103,13 @@ Postimg https://postimages.org/ individual Images Turboimagehost https://www.turboimagehost.com/ individual Images ==================== =================================== ================================================== ================ -.. |Site-0| replace:: yuki.la 4chan archive -.. |Capabilities-0| replace:: Images from Users, Albums, Artwork Listings, Challenges, individual Images, Likes, Search Results -.. |Capabilities-1| replace:: Collections, Deviations, Favorites, Folders, Galleries, Journals, Popular Images, Sta.sh -.. |Capabilities-2| replace:: Images from Users, Albums, Favorites, Galleries, Groups, individual Images, Search Results -.. |Capabilities-3| replace:: Images from Users, Favorites, individual Images, Popular Images, Recent Images, Scraps -.. |Capabilities-4| replace:: Images from Users, Posts, Search Results, Tag-Searches -.. |Capabilities-5| replace:: Images from Users, Doujin, Favorites, individual Images -.. |Capabilities-6| replace:: Images from Users, Favorites, Follows, pixiv.me Links, Rankings, Search Results, Individual Images -.. |Capabilities-7| replace:: Images from Users, Posts, Search Results, Tag-Searches -.. |Capabilities-8| replace:: Albums, individual Images, Images from Users and Folders +.. |artstation-C| replace:: Images from Users, Albums, Artwork Listings, Challenges, individual Images, Likes, Search Results +.. |deviantart-C| replace:: Collections, Deviations, Favorites, Folders, Galleries, Journals, Popular Images, Sta.sh +.. |flickr-C| replace:: Images from Users, Albums, Favorites, Galleries, Groups, individual Images, Search Results +.. |hentaifoundry-C| replace:: Images from Users, Favorites, individual Images, Popular Images, Recent Images, Scraps +.. |joyreactor-C| replace:: Images from Users, Posts, Search Results, Tag-Searches +.. |nijie-C| replace:: Images from Users, Doujin, Favorites, individual Images +.. |pixiv-C| replace:: Images from Users, Favorites, Follows, pixiv.me Links, Rankings, Search Results, Individual Images +.. |pornreactor-C| replace:: Images from Users, Posts, Search Results, Tag-Searches +.. |smugmug-C| replace:: Albums, individual Images, Images from Users and Folders +.. |yuki-S| replace:: yuki.la 4chan archive diff --git a/scripts/build_supportedsites.py b/scripts/build_supportedsites.py index 995e1cd5..c0795768 100755 --- a/scripts/build_supportedsites.py +++ b/scripts/build_supportedsites.py @@ -2,10 +2,11 @@ import sys import os.path +import collections ROOTDIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.insert(0, os.path.realpath(ROOTDIR)) -import gallery_dl.extractor # noqa +from gallery_dl import extractor # noqa CATEGORY_MAP = { @@ -66,6 +67,7 @@ CATEGORY_MAP = { } SUBCATEGORY_MAP = { + "artwork": "Artwork Listings", "doujin" : "Doujin", "gallery": "Galleries", "image" : "individual Images", @@ -106,176 +108,143 @@ AUTH_MAP = { } IGNORE_LIST = ( + "directlink", "oauth", + "recursive", + "test", ) -class RstColumn(): - _substitutions = [] +def domain(cls): + """Return the web-domain related to an extractor class""" + url = sys.modules[cls.__module__].__doc__.split()[-1] + if url.startswith("http"): + return url - def __init__(self, title, data, size=None): - self.data = self._transform(data) - self._subs = [] - self._substitutions.append(self._subs) + if hasattr(cls, "root") and cls.root: + return cls.root + "/" - if not size: - self.size = max(len(value) for value in data + [title]) - else: - self.size = size + if hasattr(cls, "https"): + scheme = "https" if cls.https else "http" + netloc = cls.__doc__.split()[-1] + return "{}://{}/".format(scheme, netloc) - self.title = self._pad(title) - for i, value in enumerate(self.data): - self.data[i] = self._pad(value) + test = next(cls._get_tests(), None) + if test: + url = test[0] + return url[:url.find("/", 8)+1] - def __str__(self): - return self.title - - def __len__(self): - return len(self.data) - - def __getitem__(self, key): - return self.data[key] if key < len(self.data) else [""] - - def _transform(self, data): - return [ - value if isinstance(value, str) else ", ".join(value) - for value in data - ] - - def _pad(self, s): - if len(s) <= self.size: - return s + " " * (self.size - len(s)) - else: - return self._substitute(s) - - def _substitute(self, value): - sub = "|{}-{}|".format(self.title.strip(), len(self._subs)) - self._subs.append((sub, value)) - return sub + " " * (self.size - len(sub)) - - -class RstTable(): - - def __init__(self, columns): - self.columns = columns - self.rowcount = max(len(col) for col in columns) - self.sep = " ".join("=" * col.size for col in columns) - - def __iter__(self): - yield self.sep - yield " ".join(col.title for col in self.columns) - yield self.sep - for i in range(self.rowcount): - yield self._format_row(i) - yield self.sep - - def _format_row(self, row): - return " ".join(col[row] for col in self.columns) - - -def build_list(): - extractors = [] - classes = [] - last = None - - for extr in gallery_dl.extractor.extractors(): - if not extr.category or extr.category in IGNORE_LIST: - continue - if extr.category == last or not last: - classes.append(extr) - elif last: - if classes[0].subcategory: - extractors.append(classes) - classes = [extr] - last = extr.category - extractors.append(classes) - - for extrlist in extractors: - extrlist.sort(key=subcategory_key) - for extr in extrlist: - extr.cat = map_category(extr.category) - extr.subcat = map_subcategory(extr.subcategory) - extractors.sort(key=category_key) - - return extractors - - -def get_domain(classes): - try: - cls = classes[0] - - url = sys.modules[cls.__module__].__doc__.split()[-1] - if url.startswith("http"): - return url - - if hasattr(cls, "root") and cls.root: - return cls.root + "/" - - if hasattr(cls, "https"): - scheme = "https" if cls.https else "http" - domain = cls.__doc__.split()[-1] - return "{}://{}/".format(scheme, domain) - - test = next(cls._get_tests(), None) - if test: - url = test[0] - return url[:url.find("/", 8)+1] - except (IndexError, AttributeError): - pass return "" -def map_category(c): - return CATEGORY_MAP.get(c, c.capitalize()) +def category_text(cls): + """Return a human-readable representation of a category""" + c = cls.category + return CATEGORY_MAP.get(c) or c.capitalize() -def map_subcategory(sc): +def subcategory_text(cls): + """Return a human-readable representation of a subcategory""" + sc = cls.subcategory if sc in SUBCATEGORY_MAP: return SUBCATEGORY_MAP[sc] sc = sc.capitalize() return sc if sc.endswith("s") else sc + "s" -def category_key(extrlist): - key = extrlist[0].cat.lower() - if len(extrlist) == 1 and extrlist[0].__module__.endswith(".imagehosts"): +def category_key(cls): + """Generate sorting keys by category""" + key = category_text(cls).lower() + if cls.__module__.endswith(".imagehosts"): key = "zz" + key return key def subcategory_key(cls): + """Generate sorting keys by subcategory""" if cls.subcategory in ("user", "issue"): return "A" return cls.subcategory -extractors = build_list() -columns = [ - RstColumn("Site", [ - extrlist[0].cat - for extrlist in extractors - ], 20), - RstColumn("URL", [ - get_domain(extrlist) - for extrlist in extractors - ], 35), - RstColumn("Capabilities", [ - ", ".join(extr.subcat for extr in extrlist if extr.subcat) - for extrlist in extractors - ], 50), - RstColumn("Authentication", [ - AUTH_MAP.get(extrlist[0].category, "") - for extrlist in extractors - ]), -] +def build_extractor_list(): + """Generate a sorted list of lists of extractor classes""" + extractors = collections.defaultdict(list) + + # get lists of extractor classes grouped by category + for extr in extractor.extractors(): + if not extr.category or extr.category in IGNORE_LIST: + continue + extractors[extr.category].append(extr) + + # sort extractor lists with the same category + for extrlist in extractors.values(): + extrlist.sort(key=subcategory_key) + + # sort lists by category + return sorted( + extractors.values(), + key=lambda lst: category_key(lst[0]), + ) + + +# define table columns +COLUMNS = ( + ("Site", 20, + lambda x: category_text(x[0])), + ("URL" , 35, + lambda x: domain(x[0])), + ("Capabilities", 50, + lambda x: ", ".join(subcategory_text(extr) for extr in x + if subcategory_text(extr))), + ("Authentication", 16, + lambda x: AUTH_MAP.get(x[0].category, "")), +) + + +def write_output(fobj, columns, extractors): + + def pad(output, col, category=None): + size = col[1] + output = output if isinstance(output, str) else col[2](output) + + if len(output) > size: + sub = "|{}-{}|".format(category, col[0][0]) + subs.append((sub, output)) + output = sub + + return output + " " * (size - len(output)) + + w = fobj.write + subs = [] + + # caption + w("Supported Sites\n") + w("===============\n") + + # table head + sep = " ".join("=" * c[1] for c in columns) + "\n" + w(sep) + w(" ".join(pad(c[0], c) for c in columns).strip() + "\n") + w(sep) + + # table body + for lst in extractors: + w(" ".join( + pad(col[2](lst), col, lst[0].category) + for col in columns + ).strip()) + w("\n") + + # table bottom + w(sep) + w("\n") + + # substitutions + for sub, value in subs: + w(".. {} replace:: {}\n".format(sub, value)) + outfile = sys.argv[1] if len(sys.argv) > 1 else "supportedsites.rst" with open(os.path.join(ROOTDIR, "docs", outfile), "w") as file: - file.write("Supported Sites\n" - "===============\n") - for line in RstTable(columns): - file.write(line.rstrip() + "\n") - file.write("\n") - for subs in RstColumn._substitutions: - for sub, val in subs: - file.write(".. {} replace:: {}\n".format(sub, val)) + write_output(file, COLUMNS, build_extractor_list())