[pixeldrain] add 'file' and 'album' extractors (#4839)

2023-11-22 19:01:19 +01:00
parent 725c8dd55a
commit e1404827a6
5 changed files with 183 additions and 0 deletions
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -637,6 +637,12 @@ Consider all sites to be NSFW unless otherwise known.
    <td>All Pins, Created Pins, Pins, pin.it Links, related Pins, Search Results, Sections, User Profiles</td>
    <td><a href="https://github.com/mikf/gallery-dl#cookies">Cookies</a></td>
 </tr>
+<tr>
+    <td>pixeldrain</td>
+    <td>https://pixeldrain.com/</td>
+    <td>Albums, Files</td>
+    <td></td>
+</tr>
 <tr>
    <td>Pixhost</td>
    <td>https://pixhost.to/</td>
--- a/gallery_dl/extractor/init.py
+++ b/gallery_dl/extractor/init.py
@@ -117,6 +117,7 @@ modules = [
    "piczel",
    "pillowfort",
    "pinterest",
+    "pixeldrain",
    "pixiv",
    "pixnet",
    "plurk",
--- a/gallery_dl/extractor/pixeldrain.py
+++ b/gallery_dl/extractor/pixeldrain.py
@@ -0,0 +1,83 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2023 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://pixeldrain.com/"""
+
+from .common import Extractor, Message
+from .. import text
+
+BASE_PATTERN = r"(?:https?://)?pixeldrain\.com"
+
+
+class PixeldrainExtractor(Extractor):
+    """Base class for pixeldrain extractors"""
+    category = "pixeldrain"
+    root = "https://pixeldrain.com"
+    archive_fmt = "{id}"
+
+    def parse_datetime(self, date_string):
+        return text.parse_datetime(
+            date_string, "%Y-%m-%dT%H:%M:%S.%fZ")
+
+
+class PixeldrainFileExtractor(PixeldrainExtractor):
+    """Extractor for pixeldrain files"""
+    subcategory = "file"
+    filename_fmt = "{filename[:230]} ({id}).{extension}"
+    pattern = BASE_PATTERN + r"/(?:u|api/file)/(\w+)"
+    example = "https://pixeldrain.com/u/abcdefgh"
+
+    def __init__(self, match):
+        Extractor.__init__(self, match)
+        self.file_id = match.group(1)
+
+    def items(self):
+        url = "{}/api/file/{}".format(self.root, self.file_id)
+        file = self.request(url + "/info").json()
+
+        file["url"] = url + "?download"
+        file["date"] = self.parse_datetime(file["date_upload"])
+
+        text.nameext_from_url(file["name"], file)
+        yield Message.Directory, file
+        yield Message.Url, file["url"], file
+
+
+class PixeldrainAlbumExtractor(PixeldrainExtractor):
+    """Extractor for pixeldrain albums"""
+    subcategory = "album"
+    directory_fmt = ("{category}",
+                     "{album[date]:%Y-%m-%d} {album[title]} ({album[id]})")
+    filename_fmt = "{num:>03} {filename[:230]} ({id}).{extension}"
+    pattern = BASE_PATTERN + r"/(?:l|api/list)/(\w+)"
+    example = "https://pixeldrain.com/l/abcdefgh"
+
+    def __init__(self, match):
+        Extractor.__init__(self, match)
+        self.album_id = match.group(1)
+
+    def items(self):
+        url = "{}/api/list/{}".format(self.root, self.album_id)
+        album = self.request(url).json()
+
+        files = album["files"]
+        album["count"] = album["file_count"]
+        album["date"] = self.parse_datetime(album["date_created"])
+
+        del album["files"]
+        del album["file_count"]
+
+        yield Message.Directory, {"album": album}
+        for num, file in enumerate(files, 1):
+            file["album"] = album
+            file["num"] = num
+            file["url"] = url = "{}/api/file/{}?download".format(
+                self.root, file["id"])
+            file["date"] = self.parse_datetime(file["date_upload"])
+            text.nameext_from_url(file["name"], file)
+            yield Message.Url, url, file
--- a/scripts/supportedsites.py
+++ b/scripts/supportedsites.py
@@ -97,6 +97,7 @@ CATEGORY_MAP = {
    "nsfwalbum"      : "NSFWalbum.com",
    "paheal"         : "rule #34",
    "photovogue"     : "PhotoVogue",
+    "pixeldrain"     : "pixeldrain",
    "pornimagesxxx"  : "Porn Image",
    "pornpics"       : "PornPics.com",
    "pornreactor"    : "PornReactor",
--- a/test/results/pixeldrain.py
+++ b/test/results/pixeldrain.py
@@ -0,0 +1,92 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+from gallery_dl.extractor import pixeldrain
+import datetime
+
+__tests__ = (
+{
+    "#url"     : "https://pixeldrain.com/u/jW9E6s4h",
+    "#category": ("", "pixeldrain", "file"),
+    "#class"   : pixeldrain.PixeldrainFileExtractor,
+    "#urls"        : "https://pixeldrain.com/api/file/jW9E6s4h?download",
+    "#sha1_content": "0c8768055e4e20e7c7259608b67799171b691140",
+
+    "abuse_reporter_name" : "",
+    "abuse_type"          : "",
+    "allow_video_player"  : True,
+    "availability"        : "",
+    "availability_message": "",
+    "bandwidth_used"      : int,
+    "bandwidth_used_paid" : 0,
+    "can_download"        : True,
+    "can_edit"            : False,
+    "date"                : "dt:2023-11-22 16:33:27",
+    "date_last_view"      : r"re:\d+-\d+-\d+T\d+:\d+:\d+\.\d+Z",
+    "date_upload"         : "2023-11-22T16:33:27.744Z",
+    "delete_after_date"   : "0001-01-01T00:00:00Z",
+    "delete_after_downloads": 0,
+    "download_speed_limit": 0,
+    "downloads"           : int,
+    "extension"           : "png",
+    "filename"            : "test-テスト-\"&>",
+    "hash_sha256"         : "eb359cd8f02a7d6762f9863798297ff6a22569c5c87a9d38c55bdb3a3e26003f",
+    "id"                  : "jW9E6s4h",
+    "mime_type"           : "image/png",
+    "name"                : "test-テスト-\"&>.png",
+    "show_ads"            : True,
+    "size"                : 182,
+    "success"             : True,
+    "thumbnail_href"      : "/file/jW9E6s4h/thumbnail",
+    "url"                 : "https://pixeldrain.com/api/file/jW9E6s4h?download",
+    "views"               : int,
+},
+
+{
+    "#url"     : "https://pixeldrain.com/u/yEK1n2Qc",
+    "#category": ("", "pixeldrain", "file"),
+    "#class"   : pixeldrain.PixeldrainFileExtractor,
+    "#urls"        : "https://pixeldrain.com/api/file/yEK1n2Qc?download",
+    "#sha1_content": "08463261191d403de2133d829060050d8b04609f",
+
+    "date"       : "dt:2023-11-22 16:38:04",
+    "date_upload": "2023-11-22T16:38:04.928Z",
+    "extension"  : "txt",
+    "filename"   : '"&>',
+    "hash_sha256": "4c1e2bbcbe1dea8b6f895f5cdd8461c37c561bce4f1b3556ba58392d95964294",
+    "id"         : "yEK1n2Qc",
+    "mime_type"  : "text/plain; charset=utf-8",
+    "name"       : '"&>.txt',
+    "size"       : 14,
+},
+
+{
+    "#url"     : "https://pixeldrain.com/l/zQ7XpWfM",
+    "#category": ("", "pixeldrain", "album"),
+    "#class"   : pixeldrain.PixeldrainAlbumExtractor,
+    "#urls"    : (
+        "https://pixeldrain.com/api/file/yEK1n2Qc?download",
+        "https://pixeldrain.com/api/file/jW9E6s4h?download",
+    ),
+
+    "album"      : {
+        "can_edit"    : False,
+        "count"       : 2,
+        "date"        : "dt:2023-11-22 16:40:39",
+        "date_created": "2023-11-22T16:40:39.218Z",
+        "id"          : "zQ7XpWfM",
+        "success"     : True,
+        "title"       : "アルバム",
+    },
+    "date"       : datetime.datetime,
+    "description": "",
+    "detail_href": r"re:/file/(yEK1n2Qc|jW9E6s4h)/info",
+    "hash_sha256": r"re:\w{64}",
+    "id"         : r"re:yEK1n2Qc|jW9E6s4h",
+    "mime_type"  : str,
+},
+
+)