From 58a9eede3857605599d674f9c66242c410f113cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sun, 3 Feb 2019 00:40:12 +0100 Subject: [PATCH] [foolfuuka] dynamically generate extractor classes --- gallery_dl/extractor/4plebs.py | 21 --- gallery_dl/extractor/__init__.py | 10 +- gallery_dl/extractor/archivedmoe.py | 27 ---- gallery_dl/extractor/archiveofsins.py | 23 --- gallery_dl/extractor/b4k.py | 24 --- gallery_dl/extractor/chan.py | 61 +------- gallery_dl/extractor/desuarchive.py | 21 --- gallery_dl/extractor/fireden.py | 21 --- gallery_dl/extractor/foolfuuka.py | 185 +++++++++++++++++++++++ gallery_dl/extractor/nyafuu.py | 21 --- gallery_dl/extractor/rebeccablacktech.py | 27 ---- gallery_dl/extractor/thebarchive.py | 21 --- 12 files changed, 188 insertions(+), 274 deletions(-) delete mode 100644 gallery_dl/extractor/4plebs.py delete mode 100644 gallery_dl/extractor/archivedmoe.py delete mode 100644 gallery_dl/extractor/archiveofsins.py delete mode 100644 gallery_dl/extractor/b4k.py delete mode 100644 gallery_dl/extractor/desuarchive.py delete mode 100644 gallery_dl/extractor/fireden.py create mode 100644 gallery_dl/extractor/foolfuuka.py delete mode 100644 gallery_dl/extractor/nyafuu.py delete mode 100644 gallery_dl/extractor/rebeccablacktech.py delete mode 100644 gallery_dl/extractor/thebarchive.py diff --git a/gallery_dl/extractor/4plebs.py b/gallery_dl/extractor/4plebs.py deleted file mode 100644 index 17d50b26..00000000 --- a/gallery_dl/extractor/4plebs.py +++ /dev/null @@ -1,21 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright 2017 Mike Fährmann -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. - -"""Extract images from https://archive.4plebs.org/""" - -from . import chan - - -class FourplebsThreadExtractor(chan.FoolfuukaThreadExtractor): - """Extractor for images from threads on 4plebs.org""" - category = "4plebs" - root = "https://archive.4plebs.org" - pattern = [r"(?:https?://)?(?:archive\.)?4plebs\.org/([^/]+)/thread/(\d+)"] - test = [("https://archive.4plebs.org/tg/thread/54059290", { - "url": "07452944164b602502b02b24521f8cee5c484d2a", - })] diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 164f5462..e751aaa8 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -13,23 +13,17 @@ modules = [ "2chan", "3dbooru", "4chan", - "4plebs", "8chan", - "archivedmoe", - "archiveofsins", "artstation", - "b4k", "behance", "bobx", "danbooru", - "desuarchive", "deviantart", "dokireader", "dynastyscans", "e621", "exhentai", "fallenangels", - "fireden", "flickr", "gelbooru", "gfycat", @@ -66,7 +60,6 @@ modules = [ "ngomik", "nhentai", "nijie", - "nyafuu", "paheal", "photobucket", "piczel", @@ -75,7 +68,6 @@ modules = [ "powermanga", "reactor", "readcomiconline", - "rebeccablacktech", "reddit", "rule34", "safebooru", @@ -87,7 +79,6 @@ modules = [ "simplyhentai", "slideshare", "smugmug", - "thebarchive", "tsumino", "tumblr", "twitter", @@ -97,6 +88,7 @@ modules = [ "yandere", "xvideos", "yuki", + "foolfuuka", "mastodon", "imagehosts", "directlink", diff --git a/gallery_dl/extractor/archivedmoe.py b/gallery_dl/extractor/archivedmoe.py deleted file mode 100644 index a72eeef4..00000000 --- a/gallery_dl/extractor/archivedmoe.py +++ /dev/null @@ -1,27 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright 2017 Mike Fährmann -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. - -"""Extract images from https://archived.moe/""" - -from . import chan - - -class ArchivedmoeThreadExtractor(chan.FoolfuukaThreadExtractor): - """Extractor for images from threads on archived.moe""" - category = "archivedmoe" - root = "https://archived.moe" - pattern = [r"(?:https?://)?archived\.moe/([^/]+)/thread/(\d+)"] - test = [ - ("https://archived.moe/gd/thread/309639/", { - "url": "fdd533840e2d535abd162c02d6dfadbc12e2dcd8", - "content": "c27e2a7be3bc989b5dd859f7789cc854db3f5573", - }), - ("https://archived.moe/a/thread/159767162/", { - "url": "ffec05a1a1b906b5ca85992513671c9155ee9e87", - }), - ] diff --git a/gallery_dl/extractor/archiveofsins.py b/gallery_dl/extractor/archiveofsins.py deleted file mode 100644 index cdd58e39..00000000 --- a/gallery_dl/extractor/archiveofsins.py +++ /dev/null @@ -1,23 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright 2017 Mike Fährmann -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. - -"""Extract images from https://archiveofsins.com/""" - -from . import chan - - -class ArchiveofsinsThreadExtractor(chan.FoolfuukaThreadExtractor): - """Extractor for images from threads on archiveofsins.com""" - category = "archiveofsins" - root = "https://archiveofsins.com" - pattern = [r"(?:https?://)?(?:www\.)?archiveofsins\.com" - r"/([^/]+)/thread/(\d+)"] - test = [("https://www.archiveofsins.com/h/thread/4668813/", { - "url": "f612d287087e10a228ef69517cf811539db9a102", - "content": "0dd92d0d8a7bf6e2f7d1f5ac8954c1bcf18c22a4", - })] diff --git a/gallery_dl/extractor/b4k.py b/gallery_dl/extractor/b4k.py deleted file mode 100644 index a669f221..00000000 --- a/gallery_dl/extractor/b4k.py +++ /dev/null @@ -1,24 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright 2017 Mike Fährmann -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. - -"""Extract images from https://arch.b4k.co/""" - -from . import chan - - -class BfourkThreadExtractor(chan.FoolfuukaThreadExtractor): - """Extractor for images from threads on arch.b4k.co""" - category = "b4k" - root = "https://arch.b4k.co" - pattern = [r"(?:https?://)?arch\.b4k\.co/([^/]+)/thread/(\d+)"] - test = [("http://arch.b4k.co/meta/thread/196/", { - "url": "cdd4931ac1cd00264b0b54e2e3b0d8f6ae48957e", - })] - - def remote(self, media): - return media["remote_media_link"] diff --git a/gallery_dl/extractor/chan.py b/gallery_dl/extractor/chan.py index 8dcfad45..bf3b5602 100644 --- a/gallery_dl/extractor/chan.py +++ b/gallery_dl/extractor/chan.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2015-2018 Mike Fährmann +# Copyright 2015-2019 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -8,10 +8,8 @@ """Base classes for extractors for different Futaba Channel-like boards""" -from .common import Extractor, SharedConfigExtractor, Message +from .common import Extractor, Message from .. import text -import itertools -import operator class ChanThreadExtractor(Extractor): @@ -61,58 +59,3 @@ class ChanThreadExtractor(Extractor): """Return thread title from first post""" title = post["sub"] if "sub" in post else text.remove_html(post["com"]) return text.unescape(title)[:50] - - -class FoolfuukaThreadExtractor(SharedConfigExtractor): - """Base extractor for FoolFuuka based boards/archives""" - basecategory = "foolfuuka" - subcategory = "thread" - directory_fmt = ["{category}", "{board[shortname]}", - "{thread_num}{title:? - //}"] - filename_fmt = "{media[media]}" - archive_fmt = "{board[shortname]}_{num}_{timestamp}" - root = "" - referer = True - - def __init__(self, match): - SharedConfigExtractor.__init__(self) - self.board, self.thread = match.groups() - if self.referer: - self.session.headers["Referer"] = self.root - - def items(self): - op = True - yield Message.Version, 1 - for post in self.posts(): - if op: - yield Message.Directory, post - op = False - if not post["media"]: - continue - - media = post["media"] - url = media["media_link"] - - if not url and "remote_media_link" in media: - url = self.remote(media) - if url.startswith("/"): - url = self.root + url - - post["extension"] = url.rpartition(".")[2] - yield Message.Url, url, post - - def posts(self): - url = self.root + "/_/api/chan/thread/" - params = {"board": self.board, "num": self.thread} - data = self.request(url, params=params).json()[self.thread] - - # sort post-objects by their key - posts = sorted(data.get("posts", {}).items()) - posts = map(operator.itemgetter(1), posts) - - return itertools.chain((data["op"],), posts) - - def remote(self, media): - needle = '