diff --git a/docs/supportedsites.md b/docs/supportedsites.md index f2f00b7b..5e24224b 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -121,6 +121,12 @@ Consider all sites to be NSFW unless otherwise known. Pools, Popular Images, Posts, Tag Searches Supported + + Desktopography + https://desktopography.net/ + Entrys, Exhibitions + + DeviantArt https://www.deviantart.com/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index c737f7ff..c5125487 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -28,6 +28,7 @@ modules = [ "comicvine", "cyberdrop", "danbooru", + "desktopography", "deviantart", "dynastyscans", "e621", @@ -148,7 +149,6 @@ modules = [ "oauth", "test", "ytdl", - "desktopography", ] diff --git a/gallery_dl/extractor/desktopography.py b/gallery_dl/extractor/desktopography.py index d6627c6d..363341a5 100644 --- a/gallery_dl/extractor/desktopography.py +++ b/gallery_dl/extractor/desktopography.py @@ -1,12 +1,10 @@ # -*- coding: utf-8 -*- -# Copyright 2021 Mike Fährmann -# # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extractors for https://desktopography.net""" +"""Extractors for https://desktopography.net/""" from .common import Extractor, Message from .. import text @@ -16,56 +14,35 @@ BASE_PATTERN = r"(?:https?://)?desktopography\.net" class DesktopographyExtractor(Extractor): """Base class for desktopography extractors""" - category = "desktopography" - - filename_fmt = "{filename}.{extension}" - # filename_fmt = "{filename}_{filename}.{extension}" archive_fmt = "{filename}" root = "https://desktopography.net" - test = ( - ("https://desktopography.net") - ) - def __init__(self, match): - Extractor.__init__(self, match) + +class DesktopographySiteExtractor(DesktopographyExtractor): + """Extractor for all desktopography exhibitions """ + subcategory = "site" + pattern = BASE_PATTERN + r"/$" + test = ("https://desktopography.net/",) def items(self): - url = self.root + page = self.request(self.root).text + data = {"_extractor": DesktopographyExhibitionExtractor} - page = self.request(url).text - last_pos = 0 - - while True: - exhibition_year, pos = text.extract( + for exhibition_year in text.extract_iter( page, '', - last_pos, - ) + '/">'): - exhibition_url = self.root + "/exhibition-" + exhibition_year + "/" - - if exhibition_year is not None: - data = {} - # data = {"filename": last_pos, "extension": "jpg"} - data["_extractor"] = DesktopographyExhibitionExtractor - - last_pos = pos - # yield Message.Url, final_image_url, data - yield Message.Queue, exhibition_url, data - - else: - break + url = self.root + "/exhibition-" + exhibition_year + "/" + yield Message.Queue, url, data class DesktopographyExhibitionExtractor(DesktopographyExtractor): - """Extractor for an yearly desktopography exhibition""" - pattern = BASE_PATTERN + r"/exhibition-(.*)/" + """Extractor for a yearly desktopography exhibition""" subcategory = "exhibition" - test = ( - ("https://desktopography.net/exhibition-2020/") - ) + pattern = BASE_PATTERN + r"/exhibition-([^/?#]+)/" + test = ("https://desktopography.net/exhibition-2020/",) def __init__(self, match): DesktopographyExtractor.__init__(self, match) @@ -75,39 +52,26 @@ class DesktopographyExhibitionExtractor(DesktopographyExtractor): url = "{}/exhibition-{}/".format(self.root, self.year) base_entry_url = "https://desktopography.net/portfolios/" page = self.request(url).text - last_pos = 0 - while True: - entry_url, pos = text.extract( + data = { + "_extractor": DesktopographyEntryExtractor, + "year": self.year, + } + + for entry_url in text.extract_iter( page, '', - last_pos, - ) + '">'): - if entry_url is not None: - final_entry_url = base_entry_url + entry_url - data = {} - # data = {"filename": last_pos, "extension": "jpg"} - data["_extractor"] = DesktopographyEntryExtractor - data["exhibition_year"] = self.year - - last_pos = pos - # yield Message.Url, final_image_url, data - yield Message.Queue, final_entry_url, data - - else: - break + url = base_entry_url + entry_url + yield Message.Queue, url, data class DesktopographyEntryExtractor(DesktopographyExtractor): """Extractor for all resolutions of a desktopography wallpaper""" - - pattern = BASE_PATTERN + r"/portfolios/([\w-]+)" subcategory = "entry" - test = ( - ("https://desktopography.net/portfolios/new-era/") - ) + pattern = BASE_PATTERN + r"/portfolios/([\w-]+)" + test = ("https://desktopography.net/portfolios/new-era/",) def __init__(self, match): DesktopographyExtractor.__init__(self, match) @@ -115,40 +79,17 @@ class DesktopographyEntryExtractor(DesktopographyExtractor): def items(self): url = "{}/portfolios/{}".format(self.root, self.entry) - page = self.request(url).text - last_pos = 0 - entry_data = {} - - yield Message.Version, 1 + entry_data = {"entry": self.entry} yield Message.Directory, entry_data - while True: - image_data, pos = text.extract( + for image_data in text.extract_iter( page, '', - last_pos, - ) + '">'): - if image_data is not None: - - plit_string = '" class="wallpaper-button" download="' - image_data = image_data.split(plit_string) - - final_image_url = self.root + image_data[0] - - image_data = image_data[1].split('.') - - entry_data["filename"] = image_data[0] - entry_data["extension"] = image_data[1] - - image_data = image_data[0].split('_') - entry_data["entry_tile"] = image_data[0] - - last_pos = pos - yield Message.Url, final_image_url, entry_data - - else: - break + path, _, filename = image_data.partition( + '" class="wallpaper-button" download="') + text.nameext_from_url(filename, entry_data) + yield Message.Url, self.root + path, entry_data diff --git a/gallery_dl/extractor/fantia.py b/gallery_dl/extractor/fantia.py index 5a359b7d..62f74291 100644 --- a/gallery_dl/extractor/fantia.py +++ b/gallery_dl/extractor/fantia.py @@ -66,7 +66,8 @@ class FantiaExtractor(Extractor): "comment": resp["comment"], "rating": resp["rating"], "posted_at": resp["posted_at"], - "date": text.parse_datetime(resp["posted_at"], "%a, %d %b %Y %H:%M:%S %z"), + "date": text.parse_datetime( + resp["posted_at"], "%a, %d %b %Y %H:%M:%S %z"), "fanclub_id": resp["fanclub"]["id"], "fanclub_user_id": resp["fanclub"]["user"]["id"], "fanclub_user_name": resp["fanclub"]["user"]["name"], diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index e6963715..03689877 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -134,6 +134,9 @@ SUBCATEGORY_MAP = { "artstation": { "artwork": "Artwork Listings", }, + "desktopography": { + "site": "", + }, "deviantart": { "stash": "Sta.sh", "watch-posts": "",