[desktopography] simplify (#1740)

2021-09-17 20:09:24 +02:00
parent 4a7d7899ff
commit e4684c5cb9
5 changed files with 46 additions and 95 deletions
--- a/gallery_dl/extractor/init.py
+++ b/gallery_dl/extractor/init.py
@@ -28,6 +28,7 @@ modules = [
    "comicvine",
    "cyberdrop",
    "danbooru",
+    "desktopography",
    "deviantart",
    "dynastyscans",
    "e621",
@@ -148,7 +149,6 @@ modules = [
    "oauth",
    "test",
    "ytdl",
-    "desktopography",
 ]


--- a/gallery_dl/extractor/desktopography.py
+++ b/gallery_dl/extractor/desktopography.py
@@ -1,12 +1,10 @@
 # -*- coding: utf-8 -*-

-# Copyright 2021 Mike Fährmann
-#
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.

-"""Extractors for https://desktopography.net"""
+"""Extractors for https://desktopography.net/"""

 from .common import Extractor, Message
 from .. import text
@@ -16,56 +14,35 @@ BASE_PATTERN = r"(?:https?://)?desktopography\.net"

 class DesktopographyExtractor(Extractor):
    """Base class for desktopography extractors"""
-
    category = "desktopography"
-
-    filename_fmt = "{filename}.{extension}"
-    # filename_fmt = "{filename}_{filename}.{extension}"
    archive_fmt = "{filename}"
    root = "https://desktopography.net"
-    test = (
-        ("https://desktopography.net")
-    )

-    def __init__(self, match):
-        Extractor.__init__(self, match)
+
+class DesktopographySiteExtractor(DesktopographyExtractor):
+    """Extractor for all desktopography exhibitions """
+    subcategory = "site"
+    pattern = BASE_PATTERN + r"/$"
+    test = ("https://desktopography.net/",)

    def items(self):
-        url = self.root
+        page = self.request(self.root).text
+        data = {"_extractor": DesktopographyExhibitionExtractor}

-        page = self.request(url).text
-        last_pos = 0
-
-        while True:
-            exhibition_year, pos = text.extract(
+        for exhibition_year in text.extract_iter(
                page,
                '<a href="https://desktopography.net/exhibition-',
-                '/">',
-                last_pos,
-            )
+                '/">'):

-            exhibition_url = self.root + "/exhibition-" + exhibition_year + "/"
-
-            if exhibition_year is not None:
-                data = {}
-                # data = {"filename": last_pos, "extension": "jpg"}
-                data["_extractor"] = DesktopographyExhibitionExtractor
-
-                last_pos = pos
-                # yield Message.Url, final_image_url, data
-                yield Message.Queue, exhibition_url, data
-
-            else:
-                break
+            url = self.root + "/exhibition-" + exhibition_year + "/"
+            yield Message.Queue, url, data


 class DesktopographyExhibitionExtractor(DesktopographyExtractor):
-    """Extractor for an yearly desktopography exhibition"""
-    pattern = BASE_PATTERN + r"/exhibition-(.*)/"
+    """Extractor for a yearly desktopography exhibition"""
    subcategory = "exhibition"
-    test = (
-        ("https://desktopography.net/exhibition-2020/")
-    )
+    pattern = BASE_PATTERN + r"/exhibition-([^/?#]+)/"
+    test = ("https://desktopography.net/exhibition-2020/",)

    def __init__(self, match):
        DesktopographyExtractor.__init__(self, match)
@@ -75,39 +52,26 @@ class DesktopographyExhibitionExtractor(DesktopographyExtractor):
        url = "{}/exhibition-{}/".format(self.root, self.year)
        base_entry_url = "https://desktopography.net/portfolios/"
        page = self.request(url).text
-        last_pos = 0

-        while True:
-            entry_url, pos = text.extract(
+        data = {
+            "_extractor": DesktopographyEntryExtractor,
+            "year": self.year,
+        }
+
+        for entry_url in text.extract_iter(
                page,
                '<a class="overlay-background" href="' + base_entry_url,
-                '">',
-                last_pos,
-            )
+                '">'):

-            if entry_url is not None:
-                final_entry_url = base_entry_url + entry_url
-                data = {}
-                # data = {"filename": last_pos, "extension": "jpg"}
-                data["_extractor"] = DesktopographyEntryExtractor
-                data["exhibition_year"] = self.year
-
-                last_pos = pos
-                # yield Message.Url, final_image_url, data
-                yield Message.Queue, final_entry_url, data
-
-            else:
-                break
+            url = base_entry_url + entry_url
+            yield Message.Queue, url, data


 class DesktopographyEntryExtractor(DesktopographyExtractor):
    """Extractor for all resolutions of a desktopography wallpaper"""
-
-    pattern = BASE_PATTERN + r"/portfolios/([\w-]+)"
    subcategory = "entry"
-    test = (
-        ("https://desktopography.net/portfolios/new-era/")
-    )
+    pattern = BASE_PATTERN + r"/portfolios/([\w-]+)"
+    test = ("https://desktopography.net/portfolios/new-era/",)

    def __init__(self, match):
        DesktopographyExtractor.__init__(self, match)
@@ -115,40 +79,17 @@ class DesktopographyEntryExtractor(DesktopographyExtractor):

    def items(self):
        url = "{}/portfolios/{}".format(self.root, self.entry)
-
        page = self.request(url).text
-        last_pos = 0

-        entry_data = {}
-
-        yield Message.Version, 1
+        entry_data = {"entry": self.entry}
        yield Message.Directory, entry_data

-        while True:
-            image_data, pos = text.extract(
+        for image_data in text.extract_iter(
                page,
                '<a target="_blank" href="https://desktopography.net',
-                '">',
-                last_pos,
-            )
+                '">'):

-            if image_data is not None:
-
-                plit_string = '" class="wallpaper-button" download="'
-                image_data = image_data.split(plit_string)
-
-                final_image_url = self.root + image_data[0]
-
-                image_data = image_data[1].split('.')
-
-                entry_data["filename"] = image_data[0]
-                entry_data["extension"] = image_data[1]
-
-                image_data = image_data[0].split('_')
-                entry_data["entry_tile"] = image_data[0]
-
-                last_pos = pos
-                yield Message.Url, final_image_url, entry_data
-
-            else:
-                break
+            path, _, filename = image_data.partition(
+                '" class="wallpaper-button" download="')
+            text.nameext_from_url(filename, entry_data)
+            yield Message.Url, self.root + path, entry_data
--- a/gallery_dl/extractor/fantia.py
+++ b/gallery_dl/extractor/fantia.py
@@ -66,7 +66,8 @@ class FantiaExtractor(Extractor):
            "comment": resp["comment"],
            "rating": resp["rating"],
            "posted_at": resp["posted_at"],
-            "date": text.parse_datetime(resp["posted_at"], "%a, %d %b %Y %H:%M:%S %z"),
+            "date": text.parse_datetime(
+                resp["posted_at"], "%a, %d %b %Y %H:%M:%S %z"),
            "fanclub_id": resp["fanclub"]["id"],
            "fanclub_user_id": resp["fanclub"]["user"]["id"],
            "fanclub_user_name": resp["fanclub"]["user"]["name"],