diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 1065143f..396de05e 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -46,6 +46,7 @@ modules = [ "imgth", "imgur", "jaiminisbox", + "joyreactor", "khinsider", "kireicake", "kissmanga", diff --git a/gallery_dl/extractor/joyreactor.py b/gallery_dl/extractor/joyreactor.py new file mode 100644 index 00000000..fa28430a --- /dev/null +++ b/gallery_dl/extractor/joyreactor.py @@ -0,0 +1,195 @@ +# -*- coding: utf-8 -*- + +# Copyright 2018 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for http://joyreactor.com/""" + +from .common import Extractor, Message +from .. import text +import json + + +class JoyreactorExtractor(Extractor): + """Base class for joyreactor extractors""" + category = "joyreactor" + directory_fmt = ["{category}"] + filename_fmt = "{post_id}_{num:>02}{title[:100]:?_//}.{extension}" + archive_fmt = "{post_id}_{num}" + + def __init__(self, match): + Extractor.__init__(self) + self.url = match.group(0) + self.root = "http://joyreactor." + match.group(1) + self.session.headers["Referer"] = self.root + + def items(self): + data = self.metadata() + yield Message.Version, 1 + yield Message.Directory, data + for post in self.posts(): + for image in self._parse_post(post): + url = image["file_url"] + image.update(data) + yield Message.Url, url, text.nameext_from_url(url, image) + + def metadata(self): + """Collect metadata for extractor-job""" + return {} + + def posts(self): + """Return all relevant post-objects""" + return self._pagination(self.url) + + def _pagination(self, url): + while True: + page = self.request(url).text + + yield from text.extract_iter( + page, '