diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index f8e8f4f5..34f30d04 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -10,6 +10,7 @@ Site URL Capabilities 4chan https://www.4chan.org/ Boards, Threads 4plebs https://archive.4plebs.org/ Threads 500px https://500px.com/ Galleries, individual Images, User Profiles +8kun https://8kun.top/ Boards, Threads 8muses https://www.8muses.com/ Albums Adobe Portfolio https://www.myportfolio.com/ Galleries Adult Empire https://www.adultempire.com/ Galleries diff --git a/gallery_dl/extractor/8kun.py b/gallery_dl/extractor/8kun.py new file mode 100644 index 00000000..7f4baf06 --- /dev/null +++ b/gallery_dl/extractor/8kun.py @@ -0,0 +1,90 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://8kun.top/""" + +from .common import Extractor, Message +from .. import text + + +class _8kunThreadExtractor(Extractor): + """Extractor for 8kun threads""" + category = "8kun" + subcategory = "thread" + directory_fmt = ("{category}", "{board}", "{thread} {title}") + filename_fmt = "{time}{num:?-//} {filename}.{extension}" + archive_fmt = "{board}_{thread}_{tim}" + pattern = r"(?:https?://)?8kun\.top/([^/]+)/res/(\d+)" + test = ("https://8kun.top/test/res/65248.html", { + "pattern": r"https://media\.8kun\.top/file_store/\w{64}\.\w+", + "count": ">= 8", + }) + + def __init__(self, match): + Extractor.__init__(self, match) + self.board, self.thread = match.groups() + + def items(self): + url = "https://8kun.top/{}/res/{}.json".format(self.board, self.thread) + posts = self.request(url).json()["posts"] + title = posts[0].get("sub") or text.remove_html(posts[0]["com"]) + process = self._process + + data = { + "board" : self.board, + "thread": self.thread, + "title" : text.unescape(title)[:50], + "num" : 0, + } + + yield Message.Version, 1 + yield Message.Directory, data + for post in posts: + if "filename" in post: + yield process(post, data) + if "extra_files" in post: + for post["num"], filedata in enumerate( + post["extra_files"], 1): + yield process(post, filedata) + + @staticmethod + def _process(post, data): + post.update(data) + post["extension"] = post["ext"][1:] + url = "https://media.8kun.top/file_store/" + post["tim"] + post["ext"] + return Message.Url, url, post + + +class _8kunBoardExtractor(Extractor): + """Extractor for 8kun boards""" + category = "8kun" + subcategory = "board" + pattern = r"(?:https?://)?8kun\.top/([^/?&#]+)/(?:index|\d+)\.html$" + test = ( + ("https://8kun.top/v/index.html", { + "pattern": _8kunThreadExtractor.pattern, + "count": ">= 100", + }), + ("https://8kun.top/v/2.html"), + ) + + def __init__(self, match): + Extractor.__init__(self, match) + self.board = match.group(1) + + def items(self): + url = "https://8kun.top/{}/threads.json".format(self.board) + threads = self.request(url).json() + + for page in threads: + for thread in page["threads"]: + url = "https://8kun.top/{}/res/{}.html".format( + self.board, thread["no"]) + thread["page"] = page["page"] + thread["_extractor"] = _8kunThreadExtractor + yield Message.Queue, url, thread diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index cc997107..5b8d79cf 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -15,6 +15,7 @@ modules = [ "3dbooru", "4chan", "500px", + "8kun", "8muses", "adultempire", "artstation",