diff --git a/gallery_dl/extractor/2chen.py b/gallery_dl/extractor/2chen.py new file mode 100644 index 00000000..27c50482 --- /dev/null +++ b/gallery_dl/extractor/2chen.py @@ -0,0 +1,90 @@ +# -*- coding: utf-8 -*- + +# Copyright 2017-2021 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +from .common import Extractor, Message +from .. import text + + +class _2chenThreadExtractor(Extractor): + """Extractor for 2chen threads""" + category = "2chen" + subcategory = "thread" + directory_fmt = ("{category}", "{board}", "{thread} {title}") + filename_fmt = "{filename}" + pattern = (r"(?:https?://)?2chen\.moe" + r"/([^/]+)/(\d+)") + test = ("https://2chen.moe/jp/303786",) + + def __init__(self, match): + Extractor.__init__(self, match) + self.board, self.thread = match.groups() + + def items(self): + url = "https://2chen.moe/{}/{}".format(self.board, self.thread) + page = self.request(url, encoding="utf-8").text + data = self.metadata(page) + yield Message.Directory, data + for post in self.posts(page): + if post["url"] == None or post["filename"] == None: + continue + url = "https://2chen.moe{}".format(post["url"]) + yield Message.Url, url, post + + def metadata(self, page): + title = text.extract(page, "

", "

")[0] + return { + "board": self.board, + "thread": self.thread, + "title": title + } + + def posts(self, page): + posts = text.extract_iter( + page, '
', '
') + return [self.parse(post) for post in posts] + + def parse(self, post): + data = self._extract_post(post) + data["extension"] = str(data["filename"]).split(".")[-1] + return data + + @staticmethod + def _extract_post(post): + return text.extract_all(post, ( + ('url', '') diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index e273f843..1b6d4ec6 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -10,6 +10,7 @@ import re modules = [ "2chan", + "2chen", "35photo", "3dbooru", "420chan",