diff --git a/gallery_dl/extractor/2chan.py b/gallery_dl/extractor/2chan.py
new file mode 100644
index 00000000..c1a1be8d
--- /dev/null
+++ b/gallery_dl/extractor/2chan.py
@@ -0,0 +1,91 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2017 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extract images from https://www.2chan.net/"""
+
+from .common import Extractor, Message
+from .. import text
+
+
+class FutabaThreadExtractor(Extractor):
+ """Extractor for images from threads on www.2chan.net"""
+ category = "2chan"
+ subcategory = "thread"
+ directory_fmt = ["{category}", "{board-name}", "{thread}"]
+ pattern = [r"(?:https?://)?(([^.]+)\.2chan\.net/([^/]+)/res/(\d+))"]
+ urlfmt = "https://{server}.2chan.net/{board}/src/{filename}"
+ test = [("http://dec.2chan.net/70/res/947.htm", {
+ "url": "c5c12b80b290e224b6758507b3bb952044f4595b",
+ "keyword": "e1295c0a96f733898e92742bcc1a4c4b320e3748",
+ })]
+
+ def __init__(self, match):
+ Extractor.__init__(self)
+ url, self.server, self.board, self.thread = match.groups()
+ self.url = "https://" + url + ".htm"
+
+ def items(self):
+ page = self.request(self.url).text
+ data = self.get_metadata(page)
+ yield Message.Version, 1
+ yield Message.Directory, data
+ for post in self.posts(page):
+ if "filename" not in post:
+ continue
+ post.update(data)
+ url = self.urlfmt.format_map(post)
+ yield Message.Url, url, post
+
+ def get_metadata(self, page):
+ """Collect metadata for extractor-job"""
+ title = text.extract(page, "
", "")[0]
+ title, _, boardname = title.rpartition(" - ")
+ return {
+ "server": self.server,
+ "title": title,
+ "board": self.board,
+ "board-name": boardname[:-4],
+ "thread": self.thread,
+ }
+
+ def posts(self, page):
+ """Build a list of all post-objects"""
+ page = text.extract(
+ page, '', '
')[0]
+ return [
+ self.parse(post)
+ for post in page.split('
')
+ ]
+
+ def parse(self, post):
+ """Build post-object by extracting data from an HTML post"""
+ data = self._extract_post(post)
+ if '', ''),
+ ("name" , '', ' '),
+ ("now" , ' ', ' '),
+ (None , '', '
'),
+ ))[0]
+
+ @staticmethod
+ def _extract_image(post, data):
+ text.extract_all(post, (
+ ("filename", '_blank">', '<'),
+ ("fsize" , '(', ' '),
+ ), 0, data)
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index f9817a72..e17c3501 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -11,6 +11,7 @@ import importlib
modules = [
"pixiv",
+ "2chan",
"3dbooru",
"4chan",
"4plebs",
diff --git a/gallery_dl/extractor/chan.py b/gallery_dl/extractor/chan.py
index 067ba0a5..f2307404 100644
--- a/gallery_dl/extractor/chan.py
+++ b/gallery_dl/extractor/chan.py
@@ -6,14 +6,14 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Base classes for extractors for different Futaba Channel boards"""
+"""Base classes for extractors for different Futaba Channel-like boards"""
from .common import Extractor, Message
from .. import text
class ChanThreadExtractor(Extractor):
- """Base class for extractors for Futaba Channel boards"""
+ """Base class for extractors for Futaba Channel-like boards"""
category = "chan"
subcategory = "thread"
directory_fmt = ["{category}", "{board}-{thread}"]