From 474e9c1aeca531b7933335eb01a125fdacb1b5f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Mon, 3 Jul 2017 16:43:04 +0200 Subject: [PATCH] [4plebs] add thread extractor (#18) --- gallery_dl/extractor/4plebs.py | 22 +++++++++++++++++++ gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/chan.py | 37 +++++++++++++++++++++++++++++++- 3 files changed, 59 insertions(+), 1 deletion(-) create mode 100644 gallery_dl/extractor/4plebs.py diff --git a/gallery_dl/extractor/4plebs.py b/gallery_dl/extractor/4plebs.py new file mode 100644 index 00000000..fa72199c --- /dev/null +++ b/gallery_dl/extractor/4plebs.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- + +# Copyright 2017 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extract images from https://archive.4plebs.org/""" + +from . import chan + + +class FourplebsThreadExtractor(chan.FoolfuukaThreadExtractor): + """Extractor for images from threads on 4plebs.org""" + category = "4plebs" + pattern = [r"(?:https?://)?(?:archive\.)?4plebs\.org/([^/]+)/thread/(\d+)"] + test = [("https://archive.4plebs.org/tg/thread/54111182/", { + "url": "85f54faf037dee29ad1c413142bcc45cd905be5a", + "keyword": "59c414bddc58b77b3e481fbe1c4e4ea3d582b2d3", + })] + root = "https://archive.4plebs.org" diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index b3324b39..4687f586 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -13,6 +13,7 @@ modules = [ "pixiv", "3dbooru", "4chan", + "4plebs", "8chan", "batoto", "danbooru", diff --git a/gallery_dl/extractor/chan.py b/gallery_dl/extractor/chan.py index 9553ebe5..067ba0a5 100644 --- a/gallery_dl/extractor/chan.py +++ b/gallery_dl/extractor/chan.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2015, 2016 Mike Fährmann +# Copyright 2015-2017 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -58,3 +58,38 @@ class ChanThreadExtractor(Extractor): """Return thread title from first post""" title = post["sub"] if "sub" in post else text.remove_html(post["com"]) return text.unescape(title)[:50] + + +class FoolfuukaThreadExtractor(Extractor): + """Base extractor for FoolFuuka based boards/archives""" + category = "foolfuuka" + subcategory = "thread" + directory_fmt = ["{category}", "{board[shortname]}", + "{thread_num} - {title}"] + filename_fmt = "{media[media]}" + root = "" + + def __init__(self, match): + Extractor.__init__(self) + self.board, self.thread = match.groups() + + def items(self): + op = True + yield Message.Version, 1 + for post in self.posts(): + if op: + yield Message.Directory, post + op = False + if not post["media"]: + continue + url = post["media"]["media_link"] + post["extension"] = url.rpartition(".")[2] + yield Message.Url, url, post + + def posts(self): + url = self.root + "/_/api/chan/thread/" + params = {"board": self.board, "num": self.thread} + data = self.request(url, params=params).json()[self.thread] + + yield data["op"] + yield from data["posts"].values()