diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index 27bb0bbe..995f2519 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -463,6 +463,12 @@ Consider all sites to be NSFW unless otherwise known.
Chapters, Manga |
|
+
+ | Lensdump |
+ https://lensdump.com/ |
+ Albums, individual Images |
+ |
+
| Lexica |
https://lexica.art/ |
diff --git a/gallery_dl/extractor/lensdump.py b/gallery_dl/extractor/lensdump.py
index c35c33ef..89906215 100644
--- a/gallery_dl/extractor/lensdump.py
+++ b/gallery_dl/extractor/lensdump.py
@@ -1,21 +1,22 @@
# -*- coding: utf-8 -*-
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
"""Extractors for https://lensdump.com/"""
-import json
-
from .common import GalleryExtractor, Extractor, Message
-from .. import text
+from .. import text, util
+
+BASE_PATTERN = r"(?:https?://)?lensdump\.com"
-class LensdumpExtractor(GalleryExtractor):
- """Extractor for lensdump.com"""
+class LensdumpBase():
+ """Base class for lensdump extractors"""
category = "lensdump"
root = "https://lensdump.com"
- def get_meta_prop(self, page, name):
- return text.extr(page, 'property="{}" content="'.format(name), '"')
-
def nodes(self, page=None):
if page is None:
page = self.request(self.url).text
@@ -44,10 +45,9 @@ class LensdumpExtractor(GalleryExtractor):
page_url = None
-class LensdumpAlbumExtractor(LensdumpExtractor):
+class LensdumpAlbumExtractor(LensdumpBase, GalleryExtractor):
subcategory = "album"
- pattern = (r"(?:https?://)?lensdump\.com/"
- r"(?:((?!\w+/albums|a/|i/)\w+)|a/(\w+))")
+ pattern = BASE_PATTERN + r"/(?:((?!\w+/albums|a/|i/)\w+)|a/(\w+))"
test = (
("https://lensdump.com/a/1IhJr", {
"url": "7428cc906e7b291c778d446a11c602b81ba72840",
@@ -76,7 +76,7 @@ class LensdumpAlbumExtractor(LensdumpExtractor):
def images(self, page):
for node in self.nodes(page):
# get urls and filenames of images in current page
- json_data = json.loads(text.unquote(
+ json_data = util.json_loads(text.unquote(
text.extr(node, 'data-object="', '"')))
image_id = json_data.get('name')
image_url = json_data.get('url')
@@ -95,13 +95,11 @@ class LensdumpAlbumExtractor(LensdumpExtractor):
})
-class LensdumpAlbumsExtractor(LensdumpExtractor):
+class LensdumpAlbumsExtractor(LensdumpBase, Extractor):
"""Extractor for album list from lensdump.com"""
subcategory = "albums"
- pattern = r"(?:https?://)?lensdump\.com/\w+/albums"
-
- def __init__(self, match):
- Extractor.__init__(self, match)
+ pattern = BASE_PATTERN + r"/\w+/albums"
+ test = ("https://lensdump.com/vstar925/albums",)
def items(self):
for node in self.nodes():
@@ -111,25 +109,27 @@ class LensdumpAlbumsExtractor(LensdumpExtractor):
"_extractor": LensdumpAlbumExtractor}
-class LensdumpImageExtractor(LensdumpExtractor):
+class LensdumpImageExtractor(LensdumpBase, Extractor):
"""Extractor for individual images on lensdump.com"""
subcategory = "image"
filename_fmt = "{category}_{id}{title:?_//}.{extension}"
directory_fmt = ("{category}",)
archive_fmt = "{id}"
- pattern = r"(?:https?://)?lensdump\.com/i/(\w+)"
+ pattern = BASE_PATTERN + r"/i/(\w+)"
test = (
("https://lensdump.com/i/tyoAyM", {
+ "pattern": r"https://i\d\.lensdump\.com/i/tyoAyM\.webp",
"url": "ae9933f5f3bd9497bfc34e3e70a0fbef6c562d38",
"content": "1aa749ed2c0cf679ec8e1df60068edaf3875de46",
"keyword": {
+ "date": "dt:2022-08-01 08:24:28",
"extension": "webp",
"filename": "tyoAyM",
- "height": "400",
+ "height": 400,
"id": "tyoAyM",
"title": "MYOBI clovis bookcaseset",
"url": "https://i2.lensdump.com/i/tyoAyM.webp",
- "width": "620",
+ "width": 620,
},
}),
)
@@ -139,15 +139,23 @@ class LensdumpImageExtractor(LensdumpExtractor):
self.key = match.group(1)
def items(self):
- page = self.request(self.url).text
- image_url = text.extr(page, 'property="og:image" content="', '"')
- data = text.nameext_from_url(image_url)
- data.update({
- 'id': self.key,
- 'url': image_url,
- 'title': self.get_meta_prop(page, "og:title"),
- 'height': self.get_meta_prop(page, "image:height"),
- 'width': self.get_meta_prop(page, "image:width"),
- })
+ url = "{}/i/{}".format(self.root, self.key)
+ extr = text.extract_from(self.request(url).text)
+
+ data = {
+ "id" : self.key,
+ "title" : text.unescape(extr(
+ 'property="og:title" content="', '"')),
+ "url" : extr(
+ 'property="og:image" content="', '"'),
+ "width" : text.parse_int(extr(
+ 'property="image:width" content="', '"')),
+ "height": text.parse_int(extr(
+ 'property="image:height" content="', '"')),
+ "date" : text.parse_datetime(extr(
+ '