From a9b8a2430d29eccdc92da587e7051567d5178eeb Mon Sep 17 00:00:00 2001 From: enduser420 Date: Thu, 30 Jun 2022 19:57:44 +0530 Subject: [PATCH] [Jpgchurch] Add Jpgchurch extractor --- gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/jpgchurch.py | 121 ++++++++++++++++++++++++++++++ 2 files changed, 122 insertions(+) create mode 100644 gallery_dl/extractor/jpgchurch.py diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 1b6d4ec6..d63c7934 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -66,6 +66,7 @@ modules = [ "instagram", "issuu", "itaku", + "jpgchurch", "kabeuchi", "keenspot", "kemonoparty", diff --git a/gallery_dl/extractor/jpgchurch.py b/gallery_dl/extractor/jpgchurch.py new file mode 100644 index 00000000..e0105f26 --- /dev/null +++ b/gallery_dl/extractor/jpgchurch.py @@ -0,0 +1,121 @@ +# -*- coding: utf-8 -*- + +# Copyright 2022 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +from .common import Extractor, Message +from .. import text + +BASE_PATTERN = r"(?:https?://)?jpg\.church" + + +class JpgchurchImageExtractor(Extractor): + """Base Extractor for Jpgchurch Images""" + category = "Jpgchurch" + subcategory = "image" + directory_fmt = ("{category}", "{user}") + filename_fmt = "{filename}" + pattern = BASE_PATTERN + r"/img/([\w\d\-\.]+)" + root = "https://jpg.church" + test = ("https://jpg.church/img/funnymeme.LecXGS",) + + def __init__(self, match): + Extractor.__init__(self, match) + self.image = match.group(1) + + def items(self): + data = self.metadata() + for image in self.images(): + if "album" in image or "user" in image: + data.update(image) + yield Message.Directory, data + yield Message.Url, image["url"], image + + def metadata(self): + """Return general metadata""" + return {} + + def images(self): + """Return an iterable containing the image(s)""" + url = "{}/img/{}".format(self.root, self.image) + return [self._get_images(url)] + + def _get_images(self, url): + page = self.request(url).text + data = self._extract_image(page) + data.update({ + "user": data["user"].split("/")[-1], + "extension": text.ext_from_url(data["url"]) + }) + return data + + @staticmethod + def _extract_image(page): + _page = text.extract( + page, + '
', '')))[0] + + +class JpgchurchAlbumExtractor(JpgchurchImageExtractor, Extractor): + """Extractor for Jpgchurch Albums""" + subcategory = "album" + directory_fmt = ("{category}", "{user}", "{album}",) + pattern = BASE_PATTERN + r"/a(?:lbum)?/([\w\d\-\.]+)" + test = ("https://jpg.church/album/CDilP/?sort=date_desc&page=1",) + + def __init__(self, match): + Extractor.__init__(self, match) + self.album = match.group(1).split('.')[-1] + + def metadata(self): + return {"album": self.album} + + def images(self): + url = "{}/a/{}".format(self.root, self.album) + for _url in self._get_album_images(url): + yield self._get_images(_url) + + def _pagination(self, url): + """Uses recursion to yield the next page""" + yield url + page = self.request(url).text + _next = text.extract( + page, '<')[0] + if _next: + url = _next + yield from self._pagination(_next) + + def _get_album_images(self, url): + for _url in self._pagination(url): + page = self.request(_url).text + _page = text.extract_iter( + page, '