[4plebs] add thread extractor (#18)

This commit is contained in:
Mike Fährmann
2017-07-03 16:43:04 +02:00
parent dcc1d3b2ea
commit 474e9c1aec
3 changed files with 59 additions and 1 deletions

View File

@@ -0,0 +1,22 @@
# -*- coding: utf-8 -*-
# Copyright 2017 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extract images from https://archive.4plebs.org/"""
from . import chan
class FourplebsThreadExtractor(chan.FoolfuukaThreadExtractor):
"""Extractor for images from threads on 4plebs.org"""
category = "4plebs"
pattern = [r"(?:https?://)?(?:archive\.)?4plebs\.org/([^/]+)/thread/(\d+)"]
test = [("https://archive.4plebs.org/tg/thread/54111182/", {
"url": "85f54faf037dee29ad1c413142bcc45cd905be5a",
"keyword": "59c414bddc58b77b3e481fbe1c4e4ea3d582b2d3",
})]
root = "https://archive.4plebs.org"

View File

@@ -13,6 +13,7 @@ modules = [
"pixiv",
"3dbooru",
"4chan",
"4plebs",
"8chan",
"batoto",
"danbooru",

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2015, 2016 Mike Fährmann
# Copyright 2015-2017 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -58,3 +58,38 @@ class ChanThreadExtractor(Extractor):
"""Return thread title from first post"""
title = post["sub"] if "sub" in post else text.remove_html(post["com"])
return text.unescape(title)[:50]
class FoolfuukaThreadExtractor(Extractor):
"""Base extractor for FoolFuuka based boards/archives"""
category = "foolfuuka"
subcategory = "thread"
directory_fmt = ["{category}", "{board[shortname]}",
"{thread_num} - {title}"]
filename_fmt = "{media[media]}"
root = ""
def __init__(self, match):
Extractor.__init__(self)
self.board, self.thread = match.groups()
def items(self):
op = True
yield Message.Version, 1
for post in self.posts():
if op:
yield Message.Directory, post
op = False
if not post["media"]:
continue
url = post["media"]["media_link"]
post["extension"] = url.rpartition(".")[2]
yield Message.Url, url, post
def posts(self):
url = self.root + "/_/api/chan/thread/"
params = {"board": self.board, "num": self.thread}
data = self.request(url, params=params).json()[self.thread]
yield data["op"]
yield from data["posts"].values()