[deviantart] revert to getting download URLs from OAuth API

This commit (partially) reverts 27b5b24, 94eb7c6, and a437e78.

Download URLs from the 'extended_fetch' endpoint are now only
usable for logged in users, while those from the respective
OAuth API endpoint are working again. Everything except
scraps and direct deviation links should be fixed, and those
two categories will work with exported cookies. (#488)

TODO:
- "native" login with --username and --password
- better handling of internally stored cookies
This commit is contained in:
Mike Fährmann
2019-11-26 23:29:46 +01:00
parent 42b9633c7e
commit 359c3bc1c5

View File

@@ -47,6 +47,12 @@ class DeviantartExtractor(Extractor):
if self.quality: if self.quality:
self.quality = "q_{}".format(self.quality) self.quality = "q_{}".format(self.quality)
if self.original != "image":
self._update_content = self._update_content_default
else:
self._update_content = self._update_content_image
self.original = True
self.commit_journal = { self.commit_journal = {
"html": self._commit_journal_html, "html": self._commit_journal_html,
"text": self._commit_journal_text, "text": self._commit_journal_text,
@@ -96,8 +102,7 @@ class DeviantartExtractor(Extractor):
yield self.commit(deviation, content) yield self.commit(deviation, content)
elif deviation["is_downloadable"]: elif deviation["is_downloadable"]:
content = {} content = self.api.deviation_download(deviation["deviationid"])
self._update_content(deviation, content)
yield self.commit(deviation, content) yield self.commit(deviation, content)
if "videos" in deviation: if "videos" in deviation:
@@ -128,9 +133,14 @@ class DeviantartExtractor(Extractor):
deviation["url"].rpartition("-")[2]) deviation["url"].rpartition("-")[2])
except KeyError: except KeyError:
deviation["index"] = 0 deviation["index"] = 0
if self.user: if self.user:
deviation["username"] = self.user deviation["username"] = self.user
deviation["_username"] = self.user.lower() deviation["_username"] = self.user.lower()
else:
deviation["username"] = deviation["author"]["username"]
deviation["_username"] = deviation["username"].lower()
deviation["da_category"] = deviation["category"] deviation["da_category"] = deviation["category"]
deviation["published_time"] = text.parse_int( deviation["published_time"] = text.parse_int(
deviation["published_time"]) deviation["published_time"])
@@ -240,29 +250,15 @@ class DeviantartExtractor(Extractor):
url = "{}/{}/{}/0/".format(self.root, self.user, category) url = "{}/{}/{}/0/".format(self.root, self.user, category)
return [(url + folder["name"], folder) for folder in folders] return [(url + folder["name"], folder) for folder in folders]
def _update_content(self, deviation, content): def _update_content_default(self, deviation, content):
try: content.update(self.api.deviation_download(deviation["deviationid"]))
data = self.api.deviation_extended_fetch(
deviation["index"], def _update_content_image(self, deviation, content):
deviation["author"]["username"], data = self.api.deviation_download(deviation["deviationid"])
"journal" if "excerpt" in deviation else "art", url = data["src"].partition("?")[0]
) mtype = mimetypes.guess_type(url, False)[0]
download = data["deviation"]["extended"]["download"] if mtype and mtype.startswith("image/"):
download["src"] = download["url"] content.update(data)
except Exception as e:
self.log.warning(
"Unable to fetch original download URL for ID %s ('%s: %s')",
deviation["index"], e.__class__.__name__, e,
)
self.log.debug("Server response: %s", data)
else:
if self.original == "image":
url = data["src"].partition("?")[0]
mtype = mimetypes.guess_type(url, False)[0]
if not mtype or not mtype.startswith("image/"):
return
del download["url"]
content.update(download)
class DeviantartUserExtractor(DeviantartExtractor): class DeviantartUserExtractor(DeviantartExtractor):
@@ -298,7 +294,7 @@ class DeviantartGalleryExtractor(DeviantartExtractor):
pattern = BASE_PATTERN + r"/gallery(?:/all|/?\?catpath=)?/?$" pattern = BASE_PATTERN + r"/gallery(?:/all|/?\?catpath=)?/?$"
test = ( test = (
("https://www.deviantart.com/shimoda7/gallery/", { ("https://www.deviantart.com/shimoda7/gallery/", {
"pattern": r"https://(www.deviantart.com/download/\d+/" "pattern": r"https://(api-da\.wixmp\.com/_api/download/file"
r"|images-wixmp-[^.]+.wixmp.com/f/.+/.+.jpg\?token=.+)", r"|images-wixmp-[^.]+.wixmp.com/f/.+/.+.jpg\?token=.+)",
"count": ">= 30", "count": ">= 30",
"keyword": { "keyword": {
@@ -431,7 +427,8 @@ class DeviantartStashExtractor(DeviantartExtractor):
pattern = r"(?:https?://)?sta\.sh/([a-z0-9]+)" pattern = r"(?:https?://)?sta\.sh/([a-z0-9]+)"
test = ( test = (
("https://sta.sh/022c83odnaxc", { ("https://sta.sh/022c83odnaxc", {
"pattern": r"https://sta.sh/download/7549925030122512/.+\?token=", "pattern": r"https://api-da\.wixmp\.com/_api/download/file",
"content": "057eb2f2861f6c8a96876b13cca1a4b7a408c11f",
"count": 1, "count": 1,
}), }),
# multiple stash items # multiple stash items
@@ -441,7 +438,7 @@ class DeviantartStashExtractor(DeviantartExtractor):
}), }),
# downloadable, but no "content" field (#307) # downloadable, but no "content" field (#307)
("https://sta.sh/024t4coz16mi", { ("https://sta.sh/024t4coz16mi", {
"pattern": r"https://sta.sh/download/7800709982190282/.+\?token=", "pattern": r"https://api-da\.wixmp\.com/_api/download/file",
"count": 1, "count": 1,
}), }),
("https://sta.sh/abcdefghijkl", { ("https://sta.sh/abcdefghijkl", {
@@ -459,36 +456,19 @@ class DeviantartStashExtractor(DeviantartExtractor):
def deviations(self): def deviations(self):
url = "https://sta.sh/" + self.stash_id url = "https://sta.sh/" + self.stash_id
page = self.request(url).text page = self.request(url).text
deviation_id, pos = text.extract(page, '//deviation/', '"') deviation_id = text.extract(page, '//deviation/', '"')[0]
if deviation_id: if deviation_id:
deviation = self.api.deviation(deviation_id) return (self.api.deviation(deviation_id),)
deviation["username"] = deviation["author"]["username"]
pos = page.find("dev-page-download", pos)
if pos >= 0:
deviation["_download"] = {
"width" : text.parse_int(text.extract(
page, 'data-download_width="' , '"', pos)[0]),
"height": text.parse_int(text.extract(
page, 'data-download_height="', '"', pos)[0]),
"src" : text.unescape(text.extract(
page, 'data-download_url="' , '"', pos)[0]),
}
return (deviation,)
else: else:
data = {"_extractor": DeviantartStashExtractor} data = {"_extractor": DeviantartStashExtractor}
page = text.extract( page = text.extract(page, 'id="stash-body"', 'class="footer"')[0]
page, 'id="stash-body"', 'class="footer"', pos)[0]
return [ return [
(url, data) (url, data)
for url in text.extract_iter(page, '<a href="', '"') for url in text.extract_iter(page, '<a href="', '"')
] ]
def _update_content(self, deviation, content):
if "_download" in deviation:
content.update(deviation["_download"])
del deviation["_download"]
class DeviantartFavoriteExtractor(DeviantartExtractor): class DeviantartFavoriteExtractor(DeviantartExtractor):
"""Extractor for an artist's favorites""" """Extractor for an artist's favorites"""
@@ -643,7 +623,8 @@ class DeviantartExtractorV2(DeviantartExtractor):
) )
if "deviation" not in data: if "deviation" not in data:
self.log.warning("Skipping ID %s", deviation["deviationId"]) self.log.warning("Unable to fetch deviation ID %s",
deviation["deviationId"])
self.log.debug("Server response: %s", data) self.log.debug("Server response: %s", data)
continue continue
deviation = self._extract(data) deviation = self._extract(data)
@@ -738,7 +719,7 @@ class DeviantartDeviationExtractor(DeviantartExtractorV2):
test = ( test = (
(("https://www.deviantart.com/shimoda7/art/For-the-sake-10073852"), { (("https://www.deviantart.com/shimoda7/art/For-the-sake-10073852"), {
"options": (("original", 0),), "options": (("original", 0),),
"content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e", # "content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e",
}), }),
("https://www.deviantart.com/zzz/art/zzz-1234567890", { ("https://www.deviantart.com/zzz/art/zzz-1234567890", {
"count": 0, "count": 0,
@@ -839,14 +820,6 @@ class DeviantartScrapsExtractor(DeviantartExtractorV2):
) )
def deviations(self): def deviations(self):
# copy self.session
session = self.session.__class__()
for attr in session.__attrs__:
setattr(session, attr, getattr(self.session, attr, None))
# reset cookies in the original session object
self.session.cookies = session.cookies.__class__()
url = self.root + "/_napi/da-user-profile/api/gallery/contents" url = self.root + "/_napi/da-user-profile/api/gallery/contents"
params = { params = {
"username" : self.user, "username" : self.user,
@@ -859,8 +832,7 @@ class DeviantartScrapsExtractor(DeviantartExtractorV2):
} }
while True: while True:
data = self.request( data = self.request(url, params=params, headers=headers).json()
url, session=session, params=params, headers=headers).json()
for obj in data["results"]: for obj in data["results"]:
yield obj["deviation"] yield obj["deviation"]