diff --git a/docs/supportedsites.md b/docs/supportedsites.md index dafd0f27..6d4fd9c0 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -427,12 +427,6 @@ Consider all sites to be NSFW unless otherwise known. Games - - JPG Fish - https://jpg1.su/ - Albums, individual Images, User Profiles - - Keenspot http://www.keenspot.com/ @@ -998,6 +992,22 @@ Consider all sites to be NSFW unless otherwise known. + + Chevereto Instances + + + JPG Fish + https://jpg2.su/ + Albums, individual Images, User Profiles + + + + Pixl + https://pixl.li/ + Albums, individual Images, User Profiles + + + Danbooru Instances diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 3abe74b6..1c1473a0 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -28,6 +28,7 @@ modules = [ "blogger", "bunkr", "catbox", + "chevereto", "comicvine", "cyberdrop", "danbooru", @@ -73,7 +74,6 @@ modules = [ "issuu", "itaku", "itchio", - "jpgfish", "jschan", "kabeuchi", "keenspot", diff --git a/gallery_dl/extractor/chevereto.py b/gallery_dl/extractor/chevereto.py new file mode 100644 index 00000000..f7824e2f --- /dev/null +++ b/gallery_dl/extractor/chevereto.py @@ -0,0 +1,105 @@ +# -*- coding: utf-8 -*- + +# Copyright 2023 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for Chevereto galleries""" + +from .common import BaseExtractor, Message +from .. import text + + +class CheveretoExtractor(BaseExtractor): + """Base class for chevereto extractors""" + basecategory = "chevereto" + directory_fmt = ("{category}", "{user}", "{album}",) + archive_fmt = "{id}" + + def __init__(self, match): + BaseExtractor.__init__(self, match) + self.path = match.group(match.lastindex) + + def _pagination(self, url): + while url: + page = self.request(url).text + + for item in text.extract_iter( + page, '
<') + + +BASE_PATTERN = CheveretoExtractor.update({ + "jpgfish": { + "root": "https://jpg2.su", + "pattern": r"jpe?g\d?\.(?:su|pet|fish(?:ing)?|church)", + }, + "pixl": { + "root": "https://pixl.li", + "pattern": r"pixl\.(?:li|is)", + }, +}) + + +class CheveretoImageExtractor(CheveretoExtractor): + """Extractor for chevereto Images""" + subcategory = "image" + pattern = BASE_PATTERN + r"(/im(?:g|age)/[^/?#]+)" + example = "https://jpg2.su/img/TITLE.ID" + + def items(self): + url = self.root + self.path + extr = text.extract_from(self.request(url).text) + + image = { + "id" : self.path.rpartition(".")[2], + "url" : extr('"), ">", "<"), + "user" : extr('username: "', '"'), + } + + text.nameext_from_url(image["url"], image) + yield Message.Directory, image + yield Message.Url, image["url"], image + + +class CheveretoAlbumExtractor(CheveretoExtractor): + """Extractor for chevereto Albums""" + subcategory = "album" + pattern = BASE_PATTERN + r"(/a(?:lbum)?/[^/?#]+(?:/sub)?)" + example = "https://jpg2.su/album/TITLE.ID" + + def items(self): + url = self.root + self.path + data = {"_extractor": CheveretoImageExtractor} + + if self.path.endswith("/sub"): + albums = self._pagination(url) + else: + albums = (url,) + + for album in albums: + for image in self._pagination(album): + yield Message.Queue, image, data + + +class CheveretoUserExtractor(CheveretoExtractor): + """Extractor for chevereto Users""" + subcategory = "user" + pattern = BASE_PATTERN + r"(/(?!img|image|a(?:lbum)?)[^/?#]+(?:/albums)?)" + example = "https://jpg2.su/USER" + + def items(self): + url = self.root + self.path + + if self.path.endswith("/albums"): + data = {"_extractor": CheveretoAlbumExtractor} + else: + data = {"_extractor": CheveretoImageExtractor} + + for url in self._pagination(url): + yield Message.Queue, url, data diff --git a/gallery_dl/extractor/jpgfish.py b/gallery_dl/extractor/jpgfish.py deleted file mode 100644 index 8862a7b7..00000000 --- a/gallery_dl/extractor/jpgfish.py +++ /dev/null @@ -1,105 +0,0 @@ -# -*- coding: utf-8 -*- - -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. - -"""Extractors for https://jpg1.su/""" - -from .common import Extractor, Message -from .. import text - -BASE_PATTERN = r"(?:https?://)?jpe?g\d?\.(?:su|pet|fish(?:ing)?|church)" - - -class JpgfishExtractor(Extractor): - """Base class for jpgfish extractors""" - category = "jpgfish" - root = "https://jpg1.su" - directory_fmt = ("{category}", "{user}", "{album}",) - archive_fmt = "{id}" - - def _pagination(self, url): - while url: - page = self.request(url).text - - for item in text.extract_iter( - page, '
<')[0] - - -class JpgfishImageExtractor(JpgfishExtractor): - """Extractor for jpgfish Images""" - subcategory = "image" - pattern = BASE_PATTERN + r"/img/((?:[^/?#]+\.)?(\w+))" - example = "https://jpg1.su/img/TITLE.ID" - - def __init__(self, match): - JpgfishExtractor.__init__(self, match) - self.path, self.image_id = match.groups() - - def items(self): - url = "{}/img/{}".format(self.root, self.path) - extr = text.extract_from(self.request(url).text) - - image = { - "id" : self.image_id, - "url" : extr('"), ">", "<")[0] or "", - "user" : extr('username: "', '"'), - } - - text.nameext_from_url(image["url"], image) - yield Message.Directory, image - yield Message.Url, image["url"], image - - -class JpgfishAlbumExtractor(JpgfishExtractor): - """Extractor for jpgfish Albums""" - subcategory = "album" - pattern = BASE_PATTERN + r"/a(?:lbum)?/([^/?#]+)(/sub)?" - example = "https://jpg1.su/album/TITLE.ID" - - def __init__(self, match): - JpgfishExtractor.__init__(self, match) - self.album, self.sub_albums = match.groups() - - def items(self): - url = "{}/a/{}".format(self.root, self.album) - data = {"_extractor": JpgfishImageExtractor} - - if self.sub_albums: - albums = self._pagination(url + "/sub") - else: - albums = (url,) - - for album in albums: - for image in self._pagination(album): - yield Message.Queue, image, data - - -class JpgfishUserExtractor(JpgfishExtractor): - """Extractor for jpgfish Users""" - subcategory = "user" - pattern = BASE_PATTERN + r"/(?!img|a(?:lbum)?)([^/?#]+)(/albums)?" - example = "https://jpg1.su/USER" - - def __init__(self, match): - JpgfishExtractor.__init__(self, match) - self.user, self.albums = match.groups() - - def items(self): - url = "{}/{}".format(self.root, self.user) - - if self.albums: - url += "/albums" - data = {"_extractor": JpgfishAlbumExtractor} - else: - data = {"_extractor": JpgfishImageExtractor} - - for url in self._pagination(url): - yield Message.Queue, url, data diff --git a/test/results/jpgfish.py b/test/results/jpgfish.py index 5aa4a126..bf35bf7a 100644 --- a/test/results/jpgfish.py +++ b/test/results/jpgfish.py @@ -4,15 +4,15 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -from gallery_dl.extractor import jpgfish +from gallery_dl.extractor import chevereto __tests__ = ( { - "#url" : "https://jpg1.su/img/funnymeme.LecXGS", - "#category": ("", "jpgfish", "image"), - "#class" : jpgfish.JpgfishImageExtractor, - "#pattern" : r"https://simp3\.jpg\.church/images/funnymeme\.jpg", + "#url" : "https://jpg2.su/img/funnymeme.LecXGS", + "#category": ("chevereto", "jpgfish", "image"), + "#class" : chevereto.CheveretoImageExtractor, + "#urls" : "https://simp3.jpg.church/images/funnymeme.jpg", "#sha1_content": "098e5e9b17ad634358426e0ffd1c93871474d13c", "album" : "", @@ -25,125 +25,131 @@ __tests__ = ( { "#url" : "https://jpg.church/img/auCruA", - "#category": ("", "jpgfish", "image"), - "#class" : jpgfish.JpgfishImageExtractor, + "#category": ("chevereto", "jpgfish", "image"), + "#class" : chevereto.CheveretoImageExtractor, "#pattern" : r"https://simp2\.jpg\.church/hannahowo_00457\.jpg", "album": "401-500", }, +{ + "#url" : "https://jpg1.su/img/funnymeme.LecXGS", + "#category": ("chevereto", "jpgfish", "image"), + "#class" : chevereto.CheveretoImageExtractor, +}, + { "#url" : "https://jpeg.pet/img/funnymeme.LecXGS", - "#category": ("", "jpgfish", "image"), - "#class" : jpgfish.JpgfishImageExtractor, + "#category": ("chevereto", "jpgfish", "image"), + "#class" : chevereto.CheveretoImageExtractor, }, { "#url" : "https://jpg.pet/img/funnymeme.LecXGS", - "#category": ("", "jpgfish", "image"), - "#class" : jpgfish.JpgfishImageExtractor, + "#category": ("chevereto", "jpgfish", "image"), + "#class" : chevereto.CheveretoImageExtractor, }, { "#url" : "https://jpg.fishing/img/funnymeme.LecXGS", - "#category": ("", "jpgfish", "image"), - "#class" : jpgfish.JpgfishImageExtractor, + "#category": ("chevereto", "jpgfish", "image"), + "#class" : chevereto.CheveretoImageExtractor, }, { "#url" : "https://jpg.fish/img/funnymeme.LecXGS", - "#category": ("", "jpgfish", "image"), - "#class" : jpgfish.JpgfishImageExtractor, + "#category": ("chevereto", "jpgfish", "image"), + "#class" : chevereto.CheveretoImageExtractor, }, { "#url" : "https://jpg.church/img/funnymeme.LecXGS", - "#category": ("", "jpgfish", "image"), - "#class" : jpgfish.JpgfishImageExtractor, + "#category": ("chevereto", "jpgfish", "image"), + "#class" : chevereto.CheveretoImageExtractor, }, { "#url" : "https://jpg1.su/album/CDilP/?sort=date_desc&page=1", - "#category": ("", "jpgfish", "album"), - "#class" : jpgfish.JpgfishAlbumExtractor, + "#category": ("chevereto", "jpgfish", "album"), + "#class" : chevereto.CheveretoAlbumExtractor, "#count" : 2, }, { "#url" : "https://jpg.fishing/a/gunggingnsk.N9OOI", - "#category": ("", "jpgfish", "album"), - "#class" : jpgfish.JpgfishAlbumExtractor, + "#category": ("chevereto", "jpgfish", "album"), + "#class" : chevereto.CheveretoAlbumExtractor, "#count" : 114, }, { "#url" : "https://jpg.fish/a/101-200.aNJ6A/", - "#category": ("", "jpgfish", "album"), - "#class" : jpgfish.JpgfishAlbumExtractor, + "#category": ("chevereto", "jpgfish", "album"), + "#class" : chevereto.CheveretoAlbumExtractor, "#count" : 100, }, { "#url" : "https://jpg.church/a/hannahowo.aNTdH/sub", - "#category": ("", "jpgfish", "album"), - "#class" : jpgfish.JpgfishAlbumExtractor, + "#category": ("chevereto", "jpgfish", "album"), + "#class" : chevereto.CheveretoAlbumExtractor, "#count" : 606, }, { "#url" : "https://jpeg.pet/album/CDilP/?sort=date_desc&page=1", - "#category": ("", "jpgfish", "album"), - "#class" : jpgfish.JpgfishAlbumExtractor, + "#category": ("chevereto", "jpgfish", "album"), + "#class" : chevereto.CheveretoAlbumExtractor, }, { "#url" : "https://jpg.pet/album/CDilP/?sort=date_desc&page=1", - "#category": ("", "jpgfish", "album"), - "#class" : jpgfish.JpgfishAlbumExtractor, + "#category": ("chevereto", "jpgfish", "album"), + "#class" : chevereto.CheveretoAlbumExtractor, }, { "#url" : "https://jpg1.su/exearco", - "#category": ("", "jpgfish", "user"), - "#class" : jpgfish.JpgfishUserExtractor, + "#category": ("chevereto", "jpgfish", "user"), + "#class" : chevereto.CheveretoUserExtractor, "#count" : 3, }, { "#url" : "https://jpg.church/exearco/albums", - "#category": ("", "jpgfish", "user"), - "#class" : jpgfish.JpgfishUserExtractor, + "#category": ("chevereto", "jpgfish", "user"), + "#class" : chevereto.CheveretoUserExtractor, "#count" : 1, }, { "#url" : "https://jpeg.pet/exearco", - "#category": ("", "jpgfish", "user"), - "#class" : jpgfish.JpgfishUserExtractor, + "#category": ("chevereto", "jpgfish", "user"), + "#class" : chevereto.CheveretoUserExtractor, }, { "#url" : "https://jpg.pet/exearco", - "#category": ("", "jpgfish", "user"), - "#class" : jpgfish.JpgfishUserExtractor, + "#category": ("chevereto", "jpgfish", "user"), + "#class" : chevereto.CheveretoUserExtractor, }, { "#url" : "https://jpg.fishing/exearco", - "#category": ("", "jpgfish", "user"), - "#class" : jpgfish.JpgfishUserExtractor, + "#category": ("chevereto", "jpgfish", "user"), + "#class" : chevereto.CheveretoUserExtractor, }, { "#url" : "https://jpg.fish/exearco", - "#category": ("", "jpgfish", "user"), - "#class" : jpgfish.JpgfishUserExtractor, + "#category": ("chevereto", "jpgfish", "user"), + "#class" : chevereto.CheveretoUserExtractor, }, { "#url" : "https://jpg.church/exearco", - "#category": ("", "jpgfish", "user"), - "#class" : jpgfish.JpgfishUserExtractor, + "#category": ("chevereto", "jpgfish", "user"), + "#class" : chevereto.CheveretoUserExtractor, }, ) diff --git a/test/results/pixl.py b/test/results/pixl.py new file mode 100644 index 00000000..e82353ee --- /dev/null +++ b/test/results/pixl.py @@ -0,0 +1,63 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +from gallery_dl.extractor import chevereto + + +__tests__ = ( +{ + "#url" : "https://pixl.li/image/894x1023-1c8d6dd3b1b0cd4b0d286b229157a7de.z3DwHB", + "#category": ("chevereto", "pixl", "image"), + "#class" : chevereto.CheveretoImageExtractor, + "#urls" : "https://i.pixl.li/894x1023_1c8d6dd3b1b0cd4b0d286b229157a7de.jpg", + "#sha1_content": "3279b86d0ac42348c703770c4781ecdc300fc13c", + + "album": "", + "extension": "jpg", + "filename": "894x1023_1c8d6dd3b1b0cd4b0d286b229157a7de", + "id": "z3DwHB", + "url": "https://i.pixl.li/894x1023_1c8d6dd3b1b0cd4b0d286b229157a7de.jpg", + "user": "matafaka1", +}, + +{ + "#url" : "https://pixl.is/image/894x1023-1c8d6dd3b1b0cd4b0d286b229157a7de.z3DwHB", + "#category": ("chevereto", "pixl", "image"), + "#class" : chevereto.CheveretoImageExtractor, +}, + +{ + "#url" : "https://pixl.li/album/estelasaubi.D0bJf", + "#category": ("chevereto", "pixl", "album"), + "#class" : chevereto.CheveretoAlbumExtractor, + "#pattern" : chevereto.CheveretoImageExtractor.pattern, + "#count" : 173, +}, + +{ + "#url" : "https://pixl.li/mjstik", + "#category": ("chevereto", "pixl", "user"), + "#class" : chevereto.CheveretoUserExtractor, + "#pattern" : chevereto.CheveretoImageExtractor.pattern, + "#range" : "1-20", + "#count" : 20, +}, + +{ + "#url" : "https://pixl.li/mjstik/albums", + "#category": ("chevereto", "pixl", "user"), + "#class" : chevereto.CheveretoUserExtractor, + "#pattern" : chevereto.CheveretoAlbumExtractor.pattern, + "#count" : 285, +}, + +{ + "#url" : "https://pixl.is/renford/albums", + "#category": ("chevereto", "pixl", "user"), + "#class" : chevereto.CheveretoUserExtractor, +}, + +)