diff --git a/gallery_dl/extractor/2chen.py b/gallery_dl/extractor/2chen.py new file mode 100644 index 00000000..27c50482 --- /dev/null +++ b/gallery_dl/extractor/2chen.py @@ -0,0 +1,90 @@ +# -*- coding: utf-8 -*- + +# Copyright 2017-2021 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +from .common import Extractor, Message +from .. import text + + +class _2chenThreadExtractor(Extractor): + """Extractor for 2chen threads""" + category = "2chen" + subcategory = "thread" + directory_fmt = ("{category}", "{board}", "{thread} {title}") + filename_fmt = "{filename}" + pattern = (r"(?:https?://)?2chen\.moe" + r"/([^/]+)/(\d+)") + test = ("https://2chen.moe/jp/303786",) + + def __init__(self, match): + Extractor.__init__(self, match) + self.board, self.thread = match.groups() + + def items(self): + url = "https://2chen.moe/{}/{}".format(self.board, self.thread) + page = self.request(url, encoding="utf-8").text + data = self.metadata(page) + yield Message.Directory, data + for post in self.posts(page): + if post["url"] == None or post["filename"] == None: + continue + url = "https://2chen.moe{}".format(post["url"]) + yield Message.Url, url, post + + def metadata(self, page): + title = text.extract(page, "