diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 27bb0bbe..995f2519 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -463,6 +463,12 @@ Consider all sites to be NSFW unless otherwise known. Chapters, Manga + + Lensdump + https://lensdump.com/ + Albums, individual Images + + Lexica https://lexica.art/ diff --git a/gallery_dl/extractor/lensdump.py b/gallery_dl/extractor/lensdump.py index c35c33ef..89906215 100644 --- a/gallery_dl/extractor/lensdump.py +++ b/gallery_dl/extractor/lensdump.py @@ -1,21 +1,22 @@ # -*- coding: utf-8 -*- +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + """Extractors for https://lensdump.com/""" -import json - from .common import GalleryExtractor, Extractor, Message -from .. import text +from .. import text, util + +BASE_PATTERN = r"(?:https?://)?lensdump\.com" -class LensdumpExtractor(GalleryExtractor): - """Extractor for lensdump.com""" +class LensdumpBase(): + """Base class for lensdump extractors""" category = "lensdump" root = "https://lensdump.com" - def get_meta_prop(self, page, name): - return text.extr(page, 'property="{}" content="'.format(name), '"') - def nodes(self, page=None): if page is None: page = self.request(self.url).text @@ -44,10 +45,9 @@ class LensdumpExtractor(GalleryExtractor): page_url = None -class LensdumpAlbumExtractor(LensdumpExtractor): +class LensdumpAlbumExtractor(LensdumpBase, GalleryExtractor): subcategory = "album" - pattern = (r"(?:https?://)?lensdump\.com/" - r"(?:((?!\w+/albums|a/|i/)\w+)|a/(\w+))") + pattern = BASE_PATTERN + r"/(?:((?!\w+/albums|a/|i/)\w+)|a/(\w+))" test = ( ("https://lensdump.com/a/1IhJr", { "url": "7428cc906e7b291c778d446a11c602b81ba72840", @@ -76,7 +76,7 @@ class LensdumpAlbumExtractor(LensdumpExtractor): def images(self, page): for node in self.nodes(page): # get urls and filenames of images in current page - json_data = json.loads(text.unquote( + json_data = util.json_loads(text.unquote( text.extr(node, 'data-object="', '"'))) image_id = json_data.get('name') image_url = json_data.get('url') @@ -95,13 +95,11 @@ class LensdumpAlbumExtractor(LensdumpExtractor): }) -class LensdumpAlbumsExtractor(LensdumpExtractor): +class LensdumpAlbumsExtractor(LensdumpBase, Extractor): """Extractor for album list from lensdump.com""" subcategory = "albums" - pattern = r"(?:https?://)?lensdump\.com/\w+/albums" - - def __init__(self, match): - Extractor.__init__(self, match) + pattern = BASE_PATTERN + r"/\w+/albums" + test = ("https://lensdump.com/vstar925/albums",) def items(self): for node in self.nodes(): @@ -111,25 +109,27 @@ class LensdumpAlbumsExtractor(LensdumpExtractor): "_extractor": LensdumpAlbumExtractor} -class LensdumpImageExtractor(LensdumpExtractor): +class LensdumpImageExtractor(LensdumpBase, Extractor): """Extractor for individual images on lensdump.com""" subcategory = "image" filename_fmt = "{category}_{id}{title:?_//}.{extension}" directory_fmt = ("{category}",) archive_fmt = "{id}" - pattern = r"(?:https?://)?lensdump\.com/i/(\w+)" + pattern = BASE_PATTERN + r"/i/(\w+)" test = ( ("https://lensdump.com/i/tyoAyM", { + "pattern": r"https://i\d\.lensdump\.com/i/tyoAyM\.webp", "url": "ae9933f5f3bd9497bfc34e3e70a0fbef6c562d38", "content": "1aa749ed2c0cf679ec8e1df60068edaf3875de46", "keyword": { + "date": "dt:2022-08-01 08:24:28", "extension": "webp", "filename": "tyoAyM", - "height": "400", + "height": 400, "id": "tyoAyM", "title": "MYOBI clovis bookcaseset", "url": "https://i2.lensdump.com/i/tyoAyM.webp", - "width": "620", + "width": 620, }, }), ) @@ -139,15 +139,23 @@ class LensdumpImageExtractor(LensdumpExtractor): self.key = match.group(1) def items(self): - page = self.request(self.url).text - image_url = text.extr(page, 'property="og:image" content="', '"') - data = text.nameext_from_url(image_url) - data.update({ - 'id': self.key, - 'url': image_url, - 'title': self.get_meta_prop(page, "og:title"), - 'height': self.get_meta_prop(page, "image:height"), - 'width': self.get_meta_prop(page, "image:width"), - }) + url = "{}/i/{}".format(self.root, self.key) + extr = text.extract_from(self.request(url).text) + + data = { + "id" : self.key, + "title" : text.unescape(extr( + 'property="og:title" content="', '"')), + "url" : extr( + 'property="og:image" content="', '"'), + "width" : text.parse_int(extr( + 'property="image:width" content="', '"')), + "height": text.parse_int(extr( + 'property="image:height" content="', '"')), + "date" : text.parse_datetime(extr( + '