From 72d3ca0bf9f081c9de00c06f83ded05d4a906221 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Thu, 12 Jan 2017 21:08:49 +0100 Subject: [PATCH] [deviantart] use API for user-extractor --- gallery_dl/extractor/deviantart.py | 131 +++++++++++++---------------- test/test_extractors.py | 3 +- 2 files changed, 60 insertions(+), 74 deletions(-) diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index aacb9b03..625a7061 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -8,92 +8,38 @@ """Extract images from http://www.deviantart.com/""" -from .common import Extractor, AsynchronousExtractor, Message -from .. import text -import re +from .common import Extractor, Message +from .. import text, exception +from ..cache import cache -class DeviantartUserExtractor(AsynchronousExtractor): + +class DeviantartUserExtractor(Extractor): """Extractor for all works from an artist on deviantart.com""" category = "deviantart" subcategory = "user" - directory_fmt = ["{category}", "{artist}"] + directory_fmt = ["{category}", "{username}"] filename_fmt = "{category}_{index}_{title}.{extension}" pattern = [r"(?:https?://)?([^\.]+)\.deviantart\.com(?:/gallery)?/?$"] test = [("http://shimoda7.deviantart.com/gallery/", { "url": "63bfa8efba199e27181943c9060f6770f91a8441", - "keyword": "741bbea4891a23335bb5d119c4a42aeb54702c50", + "keyword": "4ffe227a50f373faf643d7e5ae89a04859af8d19", })] def __init__(self, match): - AsynchronousExtractor.__init__(self) - self.session.cookies["agegate_state"] = "1" - self.artist = match.group(1) + Extractor.__init__(self) + self.api = DeviantartAPI(self.session) + self.user = match.group(1) def items(self): - metadata = self.get_job_metadata() + first = True yield Message.Version, 1 - yield Message.Directory, metadata - for url, data in self.get_works(): - data.update(metadata) - yield Message.Url, url, data - - def get_works(self): - """Yield all work-items for a deviantart-artist""" - url = "http://{}.deviantart.com/gallery/".format(self.artist) - params = {"catpath": "/", "offset": 0} - while True: - num = 0 - page = self.request(url, params=params).text - _, pos = text.extract(page, '
1: - print(sys.argv) extractors = [ extr for extr in extractors if extr.category in sys.argv ] del sys.argv[1:] -skip = ("deviantart"1, "kissmanga") +skip = ["kissmanga"] for extr in extractors: if extr.category in skip: continue