[lensdump] update

- update docs/supportedsites.md - add GPL2 header - use BASE_PATTERN - improve LensdumpImageExtractor
2023-05-26 23:39:17 +02:00
parent d5300cf381
commit 58f7480d46
3 changed files with 49 additions and 32 deletions
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -463,6 +463,12 @@ Consider all sites to be NSFW unless otherwise known.
    <td>Chapters, Manga</td>
    <td></td>
 </tr>
+<tr>
+    <td>Lensdump</td>
+    <td>https://lensdump.com/</td>
+    <td>Albums, individual Images</td>
+    <td></td>
+</tr>
 <tr>
    <td>Lexica</td>
    <td>https://lexica.art/</td>
--- a/gallery_dl/extractor/lensdump.py
+++ b/gallery_dl/extractor/lensdump.py
@@ -1,21 +1,22 @@
 # -*- coding: utf-8 -*-

+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
 """Extractors for https://lensdump.com/"""

-import json
-
 from .common import GalleryExtractor, Extractor, Message
-from .. import text
+from .. import text, util
+
+BASE_PATTERN = r"(?:https?://)?lensdump\.com"


-class LensdumpExtractor(GalleryExtractor):
-    """Extractor for lensdump.com"""
+class LensdumpBase():
+    """Base class for lensdump extractors"""
    category = "lensdump"
    root = "https://lensdump.com"

-    def get_meta_prop(self, page, name):
-        return text.extr(page, 'property="{}" content="'.format(name), '"')
-
    def nodes(self, page=None):
        if page is None:
            page = self.request(self.url).text
@@ -44,10 +45,9 @@ class LensdumpExtractor(GalleryExtractor):
                page_url = None


-class LensdumpAlbumExtractor(LensdumpExtractor):
+class LensdumpAlbumExtractor(LensdumpBase, GalleryExtractor):
    subcategory = "album"
-    pattern = (r"(?:https?://)?lensdump\.com/"
-               r"(?:((?!\w+/albums|a/|i/)\w+)|a/(\w+))")
+    pattern = BASE_PATTERN + r"/(?:((?!\w+/albums|a/|i/)\w+)|a/(\w+))"
    test = (
        ("https://lensdump.com/a/1IhJr", {
            "url": "7428cc906e7b291c778d446a11c602b81ba72840",
@@ -76,7 +76,7 @@ class LensdumpAlbumExtractor(LensdumpExtractor):
    def images(self, page):
        for node in self.nodes(page):
            # get urls and filenames of images in current page
-            json_data = json.loads(text.unquote(
+            json_data = util.json_loads(text.unquote(
                text.extr(node, 'data-object="', '"')))
            image_id = json_data.get('name')
            image_url = json_data.get('url')
@@ -95,13 +95,11 @@ class LensdumpAlbumExtractor(LensdumpExtractor):
            })


-class LensdumpAlbumsExtractor(LensdumpExtractor):
+class LensdumpAlbumsExtractor(LensdumpBase, Extractor):
    """Extractor for album list from lensdump.com"""
    subcategory = "albums"
-    pattern = r"(?:https?://)?lensdump\.com/\w+/albums"
-
-    def __init__(self, match):
-        Extractor.__init__(self, match)
+    pattern = BASE_PATTERN + r"/\w+/albums"
+    test = ("https://lensdump.com/vstar925/albums",)

    def items(self):
        for node in self.nodes():
@@ -111,25 +109,27 @@ class LensdumpAlbumsExtractor(LensdumpExtractor):
                "_extractor": LensdumpAlbumExtractor}


-class LensdumpImageExtractor(LensdumpExtractor):
+class LensdumpImageExtractor(LensdumpBase, Extractor):
    """Extractor for individual images on lensdump.com"""
    subcategory = "image"
    filename_fmt = "{category}_{id}{title:?_//}.{extension}"
    directory_fmt = ("{category}",)
    archive_fmt = "{id}"
-    pattern = r"(?:https?://)?lensdump\.com/i/(\w+)"
+    pattern = BASE_PATTERN + r"/i/(\w+)"
    test = (
        ("https://lensdump.com/i/tyoAyM", {
+            "pattern": r"https://i\d\.lensdump\.com/i/tyoAyM\.webp",
            "url": "ae9933f5f3bd9497bfc34e3e70a0fbef6c562d38",
            "content": "1aa749ed2c0cf679ec8e1df60068edaf3875de46",
            "keyword": {
+                "date": "dt:2022-08-01 08:24:28",
                "extension": "webp",
                "filename": "tyoAyM",
-                "height": "400",
+                "height": 400,
                "id": "tyoAyM",
                "title": "MYOBI clovis bookcaseset",
                "url": "https://i2.lensdump.com/i/tyoAyM.webp",
-                "width": "620",
+                "width": 620,
            },
        }),
    )
@@ -139,15 +139,23 @@ class LensdumpImageExtractor(LensdumpExtractor):
        self.key = match.group(1)

    def items(self):
-        page = self.request(self.url).text
-        image_url = text.extr(page, 'property="og:image" content="', '"')
-        data = text.nameext_from_url(image_url)
-        data.update({
-            'id': self.key,
-            'url': image_url,
-            'title': self.get_meta_prop(page, "og:title"),
-            'height': self.get_meta_prop(page, "image:height"),
-            'width': self.get_meta_prop(page, "image:width"),
-        })
+        url = "{}/i/{}".format(self.root, self.key)
+        extr = text.extract_from(self.request(url).text)
+
+        data = {
+            "id"    : self.key,
+            "title" : text.unescape(extr(
+                'property="og:title" content="', '"')),
+            "url"   : extr(
+                'property="og:image" content="', '"'),
+            "width" : text.parse_int(extr(
+                'property="image:width" content="', '"')),
+            "height": text.parse_int(extr(
+                'property="image:height" content="', '"')),
+            "date"  : text.parse_datetime(extr(
+                '<span title="', '"'), "%Y-%m-%d %H:%M:%S"),
+        }
+
+        text.nameext_from_url(data["url"], data)
        yield Message.Directory, data
-        yield Message.Url, image_url, data
+        yield Message.Url, data["url"], data
--- a/scripts/supportedsites.py
+++ b/scripts/supportedsites.py
@@ -197,6 +197,9 @@ SUBCATEGORY_MAP = {
        "discord": "Discord Servers",
        "discord-server": "",
    },
+    "lensdump": {
+        "albums": "",
+    },
    "mangadex": {
        "feed" : "Followed Feed",
    },