diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index ebc2341a..ef52593d 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -73,6 +73,7 @@ modules = [ "kissgoddess", "kohlchan", "komikcast", + "lensdump", "lightroom", "lineblog", "livedoor", diff --git a/gallery_dl/extractor/jpgchurch.py b/gallery_dl/extractor/jpgchurch.py index e0105f26..dc848d3a 100644 --- a/gallery_dl/extractor/jpgchurch.py +++ b/gallery_dl/extractor/jpgchurch.py @@ -12,75 +12,37 @@ from .. import text BASE_PATTERN = r"(?:https?://)?jpg\.church" -class JpgchurchImageExtractor(Extractor): - """Base Extractor for Jpgchurch Images""" - category = "Jpgchurch" - subcategory = "image" - directory_fmt = ("{category}", "{user}") - filename_fmt = "{filename}" - pattern = BASE_PATTERN + r"/img/([\w\d\-\.]+)" +class JpgchurchExtractor(Extractor): + """Base class for Jpgchurch extractors""" + category = "jpgchurch" root = "https://jpg.church" - test = ("https://jpg.church/img/funnymeme.LecXGS",) + directory_fmt = ("{category}", "{user}",) + archive_fmt = "{filename}" def __init__(self, match): Extractor.__init__(self, match) - self.image = match.group(1) def items(self): - data = self.metadata() for image in self.images(): - if "album" in image or "user" in image: - data.update(image) - yield Message.Directory, data + yield Message.Directory, image yield Message.Url, image["url"], image - def metadata(self): - """Return general metadata""" - return {} - def images(self): """Return an iterable containing the image(s)""" - url = "{}/img/{}".format(self.root, self.image) - return [self._get_images(url)] - - def _get_images(self, url): - page = self.request(url).text - data = self._extract_image(page) - data.update({ - "user": data["user"].split("/")[-1], - "extension": text.ext_from_url(data["url"]) - }) - return data @staticmethod - def _extract_image(page): - _page = text.extract( - page, - '
', '')))[0] + def _extract_user(page): + return text.extract(page, 'username: "', '"')[0] - -class JpgchurchAlbumExtractor(JpgchurchImageExtractor, Extractor): - """Extractor for Jpgchurch Albums""" - subcategory = "album" - directory_fmt = ("{category}", "{user}", "{album}",) - pattern = BASE_PATTERN + r"/a(?:lbum)?/([\w\d\-\.]+)" - test = ("https://jpg.church/album/CDilP/?sort=date_desc&page=1",) - - def __init__(self, match): - Extractor.__init__(self, match) - self.album = match.group(1).split('.')[-1] - - def metadata(self): - return {"album": self.album} - - def images(self): - url = "{}/a/{}".format(self.root, self.album) - for _url in self._get_album_images(url): - yield self._get_images(_url) + def _extract_image(self, url): + page = self.request(url).text + data = { + "url": text.extract( + page, '')[0], + } + text.nameext_from_url(data["url"], data) + data["user"] = self._extract_user(page) + return data def _pagination(self, url): """Uses recursion to yield the next page""" @@ -92,30 +54,114 @@ class JpgchurchAlbumExtractor(JpgchurchImageExtractor, Extractor): url = _next yield from self._pagination(_next) - def _get_album_images(self, url): - for _url in self._pagination(url): - page = self.request(_url).text - _page = text.extract_iter( + def _get_images(self, url): + for url in self._pagination(url): + page = self.request(url).text + album = text.extract(page, '')[0] + album = text.extract(album, '>', '')[0] + page = text.extract_iter( page, '
')[0] + album = text.extract(album, '>', '')[0] + page = text.extract_iter( + page, '