[smugmug] improve API code; use data expansions
This commit is contained in:
@@ -39,7 +39,9 @@ class PinterestPinExtractor(PinterestExtractor):
|
|||||||
test = [
|
test = [
|
||||||
("https://www.pinterest.com/pin/858146903966145189/", {
|
("https://www.pinterest.com/pin/858146903966145189/", {
|
||||||
"url": "afb3c26719e3a530bb0e871c480882a801a4e8a5",
|
"url": "afb3c26719e3a530bb0e871c480882a801a4e8a5",
|
||||||
"content": "d3e24bc9f7af585e8c23b9136956bd45a4d9b947",
|
# image version depends on CDN server used
|
||||||
|
# "content": "d3e24bc9f7af585e8c23b9136956bd45a4d9b947",
|
||||||
|
# "content": "4c435a66f6bb82bb681db2ecc888f76cf6c5f9ca",
|
||||||
}),
|
}),
|
||||||
("https://www.pinterest.com/pin/858146903966145188/", {
|
("https://www.pinterest.com/pin/858146903966145188/", {
|
||||||
"exception": exception.NotFoundError,
|
"exception": exception.NotFoundError,
|
||||||
|
|||||||
@@ -10,7 +10,6 @@
|
|||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, util, exception
|
from .. import text, util, exception
|
||||||
from ..cache import memcache
|
|
||||||
|
|
||||||
BASE_PATTERN = (
|
BASE_PATTERN = (
|
||||||
r"(?:smugmug:(?:https?://)?([^/]+)|"
|
r"(?:smugmug:(?:https?://)?([^/]+)|"
|
||||||
@@ -20,25 +19,16 @@ BASE_PATTERN = (
|
|||||||
class SmugmugExtractor(Extractor):
|
class SmugmugExtractor(Extractor):
|
||||||
"""Base class for smugmug extractors"""
|
"""Base class for smugmug extractors"""
|
||||||
category = "smugmug"
|
category = "smugmug"
|
||||||
filename_fmt = "{category}_{Owner[Name]}_{Image[ImageKey]}.{extension}"
|
filename_fmt = "{category}_{Owner[NickName]}_{Image[ImageKey]}.{extension}"
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
Extractor.__init__(self)
|
Extractor.__init__(self)
|
||||||
self.api = SmugmugAPI(self)
|
self.api = SmugmugAPI(self)
|
||||||
|
|
||||||
def update_image(self, image):
|
|
||||||
if "ArchivedUri" not in image:
|
|
||||||
largest = self.api.image_largest(image["ImageKey"])
|
|
||||||
for key in ("Url", "Width", "Height", "MD5", "Size"):
|
|
||||||
if key in largest:
|
|
||||||
image[key] = largest[key]
|
|
||||||
return image["Url"], image
|
|
||||||
return image["ArchivedUri"], image
|
|
||||||
|
|
||||||
|
|
||||||
class SmugmugAlbumExtractor(SmugmugExtractor):
|
class SmugmugAlbumExtractor(SmugmugExtractor):
|
||||||
subcategory = "album"
|
subcategory = "album"
|
||||||
directory_fmt = ["{category}", "{Owner[Name]}", "{Album[Name]}"]
|
directory_fmt = ["{category}", "{Owner[NickName]}", "{Album[Name]}"]
|
||||||
archive_fmt = "a_{Album[AlbumKey]}_{Image[ImageKey]}"
|
archive_fmt = "a_{Album[AlbumKey]}_{Image[ImageKey]}"
|
||||||
pattern = [r"smugmug:album:([^:]+)$"]
|
pattern = [r"smugmug:album:([^:]+)$"]
|
||||||
test = [("smugmug:album:xgkb4C", {
|
test = [("smugmug:album:xgkb4C", {
|
||||||
@@ -51,33 +41,30 @@ class SmugmugAlbumExtractor(SmugmugExtractor):
|
|||||||
self.album_id = match.group(1)
|
self.album_id = match.group(1)
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
album = self.api.album(self.album_id)
|
album = self.api.album(self.album_id, "User")
|
||||||
images = self.api.album_images(self.album_id)
|
owner = album["Uris"]["User"]
|
||||||
username = album["Uris"]["User"]["Uri"].rpartition("/")[2]
|
|
||||||
owner = self.api.user(username)
|
|
||||||
|
|
||||||
data = {
|
del album["Uris"]
|
||||||
"Album": album,
|
del owner["Uris"]
|
||||||
"Owner": owner,
|
data = {"Album": album, "Owner": owner}
|
||||||
}
|
|
||||||
|
|
||||||
yield Message.Version, 1
|
yield Message.Version, 1
|
||||||
yield Message.Directory, data
|
yield Message.Directory, data
|
||||||
|
|
||||||
for image in images:
|
for image in self.api.album_images(self.album_id, "LargestImage"):
|
||||||
url, image = self.update_image(image)
|
url = _apply_largest(image)
|
||||||
data["Image"] = image
|
data["Image"] = image
|
||||||
yield Message.Url, url, text.nameext_from_url(url, data)
|
yield Message.Url, url, text.nameext_from_url(url, data)
|
||||||
|
|
||||||
|
|
||||||
class SmugmugImageExtractor(SmugmugExtractor):
|
class SmugmugImageExtractor(SmugmugExtractor):
|
||||||
subcategory = "image"
|
subcategory = "image"
|
||||||
directory_fmt = ["{category}", "{Owner[Name]}"]
|
directory_fmt = ["{category}", "{Owner[NickName]}"]
|
||||||
archive_fmt = "{Image[ImageKey]}"
|
archive_fmt = "{Image[ImageKey]}"
|
||||||
pattern = [BASE_PATTERN + r"(?:/[^/?&#]+)+/i-([^/?&#]+)"]
|
pattern = [BASE_PATTERN + r"(?:/[^/?&#]+)+/i-([^/?&#]+)"]
|
||||||
test = [("https://mikf.smugmug.com/Test/n-xnNH3s/i-L4CxBdg", {
|
test = [("https://mikf.smugmug.com/Test/n-xnNH3s/i-L4CxBdg", {
|
||||||
"url": "905bfdef52ce1a731a4eae17e9ac348511e17ae4",
|
"url": "905bfdef52ce1a731a4eae17e9ac348511e17ae4",
|
||||||
"keyword": "d53df829d493ec3e31b8fe300872beb968812bfd",
|
"keyword": "490f2b977801e1f9c817be7aceea46d37418f08d",
|
||||||
"content": "626fe50d25fe49beeda15e116938db36e163c01f",
|
"content": "626fe50d25fe49beeda15e116938db36e163c01f",
|
||||||
})]
|
})]
|
||||||
|
|
||||||
@@ -86,18 +73,13 @@ class SmugmugImageExtractor(SmugmugExtractor):
|
|||||||
self.image_id = match.group(3)
|
self.image_id = match.group(3)
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
image = self.api.image(self.image_id)
|
image = self.api.image(self.image_id, "LargestImage,ImageOwner")
|
||||||
username = image["Uris"]["ImageOwner"]["Uri"].rpartition("/")[2]
|
owner = image["Uris"]["ImageOwner"]
|
||||||
owner = self.api.user(username)
|
|
||||||
|
|
||||||
url, image = self.update_image(image)
|
url = _apply_largest(image)
|
||||||
|
|
||||||
data = {
|
|
||||||
"Image": image,
|
|
||||||
"Owner": owner,
|
|
||||||
}
|
|
||||||
del image["Uris"]
|
|
||||||
del owner["Uris"]
|
del owner["Uris"]
|
||||||
|
data = {"Image": image, "Owner": owner}
|
||||||
text.nameext_from_url(url, data)
|
text.nameext_from_url(url, data)
|
||||||
|
|
||||||
yield Message.Version, 1
|
yield Message.Version, 1
|
||||||
@@ -106,10 +88,7 @@ class SmugmugImageExtractor(SmugmugExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class SmugmugNodeExtractor(SmugmugExtractor):
|
class SmugmugNodeExtractor(SmugmugExtractor):
|
||||||
""" """
|
|
||||||
subcategory = "node"
|
subcategory = "node"
|
||||||
directory_fmt = ["{category}"]
|
|
||||||
archive_fmt = "n_{Node[NodeID]}_{Image[ImageID]}"
|
|
||||||
pattern = [BASE_PATTERN + r"(?:/[^/?&#]+)+/n-([^/?&#]+)$"]
|
pattern = [BASE_PATTERN + r"(?:/[^/?&#]+)+/n-([^/?&#]+)$"]
|
||||||
test = [("https://mikf.smugmug.com/Test/n-xnNH3s", {
|
test = [("https://mikf.smugmug.com/Test/n-xnNH3s", {
|
||||||
"pattern": "^smugmug:album:xgkb4C$",
|
"pattern": "^smugmug:album:xgkb4C$",
|
||||||
@@ -124,9 +103,10 @@ class SmugmugNodeExtractor(SmugmugExtractor):
|
|||||||
|
|
||||||
data = self.api.node(self.node_id)
|
data = self.api.node(self.node_id)
|
||||||
if data["Type"] == "Album":
|
if data["Type"] == "Album":
|
||||||
album_id = data["Uris"]["Album"]["Uri"].rpartition("/")[2]
|
yield Message.Queue, "smugmug:album:" + _get(data, "Album"), data
|
||||||
yield Message.Queue, "smugmug:album:" + album_id, data
|
# if data["Type"] == "Folder":
|
||||||
# ...
|
# for child in self.api.node_children(self.node_id):
|
||||||
|
# yield Message.Queue, "smugmug:node:" + ...
|
||||||
|
|
||||||
|
|
||||||
class SmugmugAPI():
|
class SmugmugAPI():
|
||||||
@@ -154,61 +134,117 @@ class SmugmugAPI():
|
|||||||
self.session = extractor.session
|
self.session = extractor.session
|
||||||
self.api_key = api_key
|
self.api_key = api_key
|
||||||
|
|
||||||
def album(self, album_id):
|
self.log = extractor.log
|
||||||
return self._call("album/" + album_id)["Album"]
|
|
||||||
|
|
||||||
def album_images(self, album_id):
|
def album(self, album_id, expands=None):
|
||||||
return self._pagination("album/" + album_id + "!images")
|
return self._expansion("album/" + album_id, expands)
|
||||||
|
|
||||||
def image(self, image_id):
|
def image(self, image_id, expands=None):
|
||||||
return self._call("image/" + image_id)["Image"]
|
return self._expansion("image/" + image_id, expands)
|
||||||
|
|
||||||
def image_largest(self, image_id):
|
def node(self, node_id, expands=None):
|
||||||
endpoint = "image/" + image_id + "!largestimage"
|
return self._expansion("node/" + node_id, expands)
|
||||||
return self._call(endpoint)["LargestImage"]
|
|
||||||
|
|
||||||
def image_sizes(self, image_id):
|
def user(self, username, expands=None):
|
||||||
return self._call("image/" + image_id + "!sizedetails")
|
return self._expansion("user/" + username, expands)
|
||||||
|
|
||||||
def node(self, node_id):
|
def album_images(self, album_id, expands=None):
|
||||||
return self._call("node/" + node_id)["Node"]
|
return self._pagination("album/" + album_id + "!images", expands)
|
||||||
|
|
||||||
@memcache(keyarg=1)
|
def node_children(self, node_id, expands=None):
|
||||||
def user(self, username):
|
return self._pagination("node/" + node_id + "!children", expands)
|
||||||
return self._call("user/" + username)["User"]
|
|
||||||
|
|
||||||
def _call(self, endpoint, params=None):
|
def _call(self, endpoint, params=None):
|
||||||
url = self.API_URL + endpoint
|
url = self.API_URL + endpoint
|
||||||
params = params or {}
|
params = params or {}
|
||||||
if self.api_key:
|
if self.api_key:
|
||||||
params["APIKey"] = self.api_key
|
params["APIKey"] = self.api_key
|
||||||
|
params["_verbosity"] = "1"
|
||||||
|
|
||||||
response = self.session.get(url, params=params, headers=self.HEADERS)
|
response = self.session.get(url, params=params, headers=self.HEADERS)
|
||||||
data = response.json()
|
data = response.json()
|
||||||
|
|
||||||
if 200 <= data["Code"] < 400:
|
if 200 <= data["Code"] < 400:
|
||||||
return data["Response"]
|
return data
|
||||||
|
|
||||||
if data["Code"] == 404:
|
if data["Code"] == 404:
|
||||||
raise exception.NotFoundError()
|
raise exception.NotFoundError()
|
||||||
if data["Code"] == 429:
|
if data["Code"] == 429:
|
||||||
self.log.error("Rate limit reached")
|
self.log.error("Rate limit reached")
|
||||||
raise exception.StopExtraction()
|
else:
|
||||||
|
self.log.error("API request failed")
|
||||||
|
self.log.debug(data)
|
||||||
|
raise exception.StopExtraction()
|
||||||
|
|
||||||
|
def _expansion(self, endpoint, expands):
|
||||||
|
if expands:
|
||||||
|
endpoint += "?_expand=" + expands
|
||||||
|
return _apply_expansions(self._call(endpoint), expands)
|
||||||
|
|
||||||
|
def _pagination(self, endpoint, expands=None):
|
||||||
|
if expands:
|
||||||
|
endpoint += "?_expand=" + expands
|
||||||
|
params = {"start": 1, "count": 100}
|
||||||
|
|
||||||
def _pagination(self, endpoint):
|
|
||||||
params = {
|
|
||||||
"start": 1,
|
|
||||||
"count": 100,
|
|
||||||
}
|
|
||||||
while True:
|
while True:
|
||||||
response = self._call(endpoint, params)
|
data = self._call(endpoint, params)
|
||||||
|
yield from _apply_expansions_iter(data, expands)
|
||||||
|
|
||||||
obj = response[response["Locator"]]
|
if "NextPage" not in data["Response"]["Pages"]:
|
||||||
if isinstance(obj, list):
|
|
||||||
yield from obj
|
|
||||||
else:
|
|
||||||
yield obj
|
|
||||||
|
|
||||||
if "NextPage" not in response["Pages"]:
|
|
||||||
return
|
return
|
||||||
params["start"] += params["count"]
|
params["start"] += params["count"]
|
||||||
|
|
||||||
|
|
||||||
|
def _apply_largest(image, delete=True):
|
||||||
|
largest = image["Uris"]["LargestImage"]
|
||||||
|
if delete:
|
||||||
|
del image["Uris"]
|
||||||
|
for key in ("Url", "Width", "Height", "MD5", "Size", "Watermarked"):
|
||||||
|
if key in largest:
|
||||||
|
image[key] = largest[key]
|
||||||
|
return image["Url"]
|
||||||
|
|
||||||
|
|
||||||
|
def _get(obj, key):
|
||||||
|
return obj["Uris"][key].rpartition("/")[2]
|
||||||
|
|
||||||
|
|
||||||
|
def _apply_expansions(data, expands):
|
||||||
|
obj = _unwrap(data["Response"])
|
||||||
|
|
||||||
|
if "Expansions" in data:
|
||||||
|
expansions = data["Expansions"]
|
||||||
|
uris = obj["Uris"]
|
||||||
|
|
||||||
|
for name in expands.split(","):
|
||||||
|
uri = uris[name]
|
||||||
|
uris[name] = _unwrap(expansions[uri])
|
||||||
|
|
||||||
|
return obj
|
||||||
|
|
||||||
|
|
||||||
|
def _apply_expansions_iter(data, expands):
|
||||||
|
objs = _unwrap_iter(data["Response"])
|
||||||
|
|
||||||
|
if "Expansions" in data:
|
||||||
|
expansions = data["Expansions"]
|
||||||
|
expands = expands.split(",")
|
||||||
|
|
||||||
|
for obj in objs:
|
||||||
|
uris = obj["Uris"]
|
||||||
|
|
||||||
|
for name in expands:
|
||||||
|
uri = uris[name]
|
||||||
|
uris[name] = _unwrap(expansions[uri])
|
||||||
|
|
||||||
|
return objs
|
||||||
|
|
||||||
|
|
||||||
|
def _unwrap(response):
|
||||||
|
return response[response["Locator"]]
|
||||||
|
|
||||||
|
|
||||||
|
def _unwrap_iter(response):
|
||||||
|
obj = _unwrap(response)
|
||||||
|
if isinstance(obj, list):
|
||||||
|
return obj
|
||||||
|
return (obj,)
|
||||||
|
|||||||
Reference in New Issue
Block a user