[smugmug] improve API code; use data expansions

This commit is contained in:
Mike Fährmann
2018-04-30 18:19:28 +02:00
parent 3fe653d940
commit 2ea0d1da42
2 changed files with 110 additions and 72 deletions

View File

@@ -39,7 +39,9 @@ class PinterestPinExtractor(PinterestExtractor):
test = [ test = [
("https://www.pinterest.com/pin/858146903966145189/", { ("https://www.pinterest.com/pin/858146903966145189/", {
"url": "afb3c26719e3a530bb0e871c480882a801a4e8a5", "url": "afb3c26719e3a530bb0e871c480882a801a4e8a5",
"content": "d3e24bc9f7af585e8c23b9136956bd45a4d9b947", # image version depends on CDN server used
# "content": "d3e24bc9f7af585e8c23b9136956bd45a4d9b947",
# "content": "4c435a66f6bb82bb681db2ecc888f76cf6c5f9ca",
}), }),
("https://www.pinterest.com/pin/858146903966145188/", { ("https://www.pinterest.com/pin/858146903966145188/", {
"exception": exception.NotFoundError, "exception": exception.NotFoundError,

View File

@@ -10,7 +10,6 @@
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, util, exception from .. import text, util, exception
from ..cache import memcache
BASE_PATTERN = ( BASE_PATTERN = (
r"(?:smugmug:(?:https?://)?([^/]+)|" r"(?:smugmug:(?:https?://)?([^/]+)|"
@@ -20,25 +19,16 @@ BASE_PATTERN = (
class SmugmugExtractor(Extractor): class SmugmugExtractor(Extractor):
"""Base class for smugmug extractors""" """Base class for smugmug extractors"""
category = "smugmug" category = "smugmug"
filename_fmt = "{category}_{Owner[Name]}_{Image[ImageKey]}.{extension}" filename_fmt = "{category}_{Owner[NickName]}_{Image[ImageKey]}.{extension}"
def __init__(self): def __init__(self):
Extractor.__init__(self) Extractor.__init__(self)
self.api = SmugmugAPI(self) self.api = SmugmugAPI(self)
def update_image(self, image):
if "ArchivedUri" not in image:
largest = self.api.image_largest(image["ImageKey"])
for key in ("Url", "Width", "Height", "MD5", "Size"):
if key in largest:
image[key] = largest[key]
return image["Url"], image
return image["ArchivedUri"], image
class SmugmugAlbumExtractor(SmugmugExtractor): class SmugmugAlbumExtractor(SmugmugExtractor):
subcategory = "album" subcategory = "album"
directory_fmt = ["{category}", "{Owner[Name]}", "{Album[Name]}"] directory_fmt = ["{category}", "{Owner[NickName]}", "{Album[Name]}"]
archive_fmt = "a_{Album[AlbumKey]}_{Image[ImageKey]}" archive_fmt = "a_{Album[AlbumKey]}_{Image[ImageKey]}"
pattern = [r"smugmug:album:([^:]+)$"] pattern = [r"smugmug:album:([^:]+)$"]
test = [("smugmug:album:xgkb4C", { test = [("smugmug:album:xgkb4C", {
@@ -51,33 +41,30 @@ class SmugmugAlbumExtractor(SmugmugExtractor):
self.album_id = match.group(1) self.album_id = match.group(1)
def items(self): def items(self):
album = self.api.album(self.album_id) album = self.api.album(self.album_id, "User")
images = self.api.album_images(self.album_id) owner = album["Uris"]["User"]
username = album["Uris"]["User"]["Uri"].rpartition("/")[2]
owner = self.api.user(username)
data = { del album["Uris"]
"Album": album, del owner["Uris"]
"Owner": owner, data = {"Album": album, "Owner": owner}
}
yield Message.Version, 1 yield Message.Version, 1
yield Message.Directory, data yield Message.Directory, data
for image in images: for image in self.api.album_images(self.album_id, "LargestImage"):
url, image = self.update_image(image) url = _apply_largest(image)
data["Image"] = image data["Image"] = image
yield Message.Url, url, text.nameext_from_url(url, data) yield Message.Url, url, text.nameext_from_url(url, data)
class SmugmugImageExtractor(SmugmugExtractor): class SmugmugImageExtractor(SmugmugExtractor):
subcategory = "image" subcategory = "image"
directory_fmt = ["{category}", "{Owner[Name]}"] directory_fmt = ["{category}", "{Owner[NickName]}"]
archive_fmt = "{Image[ImageKey]}" archive_fmt = "{Image[ImageKey]}"
pattern = [BASE_PATTERN + r"(?:/[^/?&#]+)+/i-([^/?&#]+)"] pattern = [BASE_PATTERN + r"(?:/[^/?&#]+)+/i-([^/?&#]+)"]
test = [("https://mikf.smugmug.com/Test/n-xnNH3s/i-L4CxBdg", { test = [("https://mikf.smugmug.com/Test/n-xnNH3s/i-L4CxBdg", {
"url": "905bfdef52ce1a731a4eae17e9ac348511e17ae4", "url": "905bfdef52ce1a731a4eae17e9ac348511e17ae4",
"keyword": "d53df829d493ec3e31b8fe300872beb968812bfd", "keyword": "490f2b977801e1f9c817be7aceea46d37418f08d",
"content": "626fe50d25fe49beeda15e116938db36e163c01f", "content": "626fe50d25fe49beeda15e116938db36e163c01f",
})] })]
@@ -86,18 +73,13 @@ class SmugmugImageExtractor(SmugmugExtractor):
self.image_id = match.group(3) self.image_id = match.group(3)
def items(self): def items(self):
image = self.api.image(self.image_id) image = self.api.image(self.image_id, "LargestImage,ImageOwner")
username = image["Uris"]["ImageOwner"]["Uri"].rpartition("/")[2] owner = image["Uris"]["ImageOwner"]
owner = self.api.user(username)
url, image = self.update_image(image) url = _apply_largest(image)
data = {
"Image": image,
"Owner": owner,
}
del image["Uris"]
del owner["Uris"] del owner["Uris"]
data = {"Image": image, "Owner": owner}
text.nameext_from_url(url, data) text.nameext_from_url(url, data)
yield Message.Version, 1 yield Message.Version, 1
@@ -106,10 +88,7 @@ class SmugmugImageExtractor(SmugmugExtractor):
class SmugmugNodeExtractor(SmugmugExtractor): class SmugmugNodeExtractor(SmugmugExtractor):
""" """
subcategory = "node" subcategory = "node"
directory_fmt = ["{category}"]
archive_fmt = "n_{Node[NodeID]}_{Image[ImageID]}"
pattern = [BASE_PATTERN + r"(?:/[^/?&#]+)+/n-([^/?&#]+)$"] pattern = [BASE_PATTERN + r"(?:/[^/?&#]+)+/n-([^/?&#]+)$"]
test = [("https://mikf.smugmug.com/Test/n-xnNH3s", { test = [("https://mikf.smugmug.com/Test/n-xnNH3s", {
"pattern": "^smugmug:album:xgkb4C$", "pattern": "^smugmug:album:xgkb4C$",
@@ -124,9 +103,10 @@ class SmugmugNodeExtractor(SmugmugExtractor):
data = self.api.node(self.node_id) data = self.api.node(self.node_id)
if data["Type"] == "Album": if data["Type"] == "Album":
album_id = data["Uris"]["Album"]["Uri"].rpartition("/")[2] yield Message.Queue, "smugmug:album:" + _get(data, "Album"), data
yield Message.Queue, "smugmug:album:" + album_id, data # if data["Type"] == "Folder":
# ... # for child in self.api.node_children(self.node_id):
# yield Message.Queue, "smugmug:node:" + ...
class SmugmugAPI(): class SmugmugAPI():
@@ -154,61 +134,117 @@ class SmugmugAPI():
self.session = extractor.session self.session = extractor.session
self.api_key = api_key self.api_key = api_key
def album(self, album_id): self.log = extractor.log
return self._call("album/" + album_id)["Album"]
def album_images(self, album_id): def album(self, album_id, expands=None):
return self._pagination("album/" + album_id + "!images") return self._expansion("album/" + album_id, expands)
def image(self, image_id): def image(self, image_id, expands=None):
return self._call("image/" + image_id)["Image"] return self._expansion("image/" + image_id, expands)
def image_largest(self, image_id): def node(self, node_id, expands=None):
endpoint = "image/" + image_id + "!largestimage" return self._expansion("node/" + node_id, expands)
return self._call(endpoint)["LargestImage"]
def image_sizes(self, image_id): def user(self, username, expands=None):
return self._call("image/" + image_id + "!sizedetails") return self._expansion("user/" + username, expands)
def node(self, node_id): def album_images(self, album_id, expands=None):
return self._call("node/" + node_id)["Node"] return self._pagination("album/" + album_id + "!images", expands)
@memcache(keyarg=1) def node_children(self, node_id, expands=None):
def user(self, username): return self._pagination("node/" + node_id + "!children", expands)
return self._call("user/" + username)["User"]
def _call(self, endpoint, params=None): def _call(self, endpoint, params=None):
url = self.API_URL + endpoint url = self.API_URL + endpoint
params = params or {} params = params or {}
if self.api_key: if self.api_key:
params["APIKey"] = self.api_key params["APIKey"] = self.api_key
params["_verbosity"] = "1"
response = self.session.get(url, params=params, headers=self.HEADERS) response = self.session.get(url, params=params, headers=self.HEADERS)
data = response.json() data = response.json()
if 200 <= data["Code"] < 400: if 200 <= data["Code"] < 400:
return data["Response"] return data
if data["Code"] == 404: if data["Code"] == 404:
raise exception.NotFoundError() raise exception.NotFoundError()
if data["Code"] == 429: if data["Code"] == 429:
self.log.error("Rate limit reached") self.log.error("Rate limit reached")
raise exception.StopExtraction() else:
self.log.error("API request failed")
self.log.debug(data)
raise exception.StopExtraction()
def _expansion(self, endpoint, expands):
if expands:
endpoint += "?_expand=" + expands
return _apply_expansions(self._call(endpoint), expands)
def _pagination(self, endpoint, expands=None):
if expands:
endpoint += "?_expand=" + expands
params = {"start": 1, "count": 100}
def _pagination(self, endpoint):
params = {
"start": 1,
"count": 100,
}
while True: while True:
response = self._call(endpoint, params) data = self._call(endpoint, params)
yield from _apply_expansions_iter(data, expands)
obj = response[response["Locator"]] if "NextPage" not in data["Response"]["Pages"]:
if isinstance(obj, list):
yield from obj
else:
yield obj
if "NextPage" not in response["Pages"]:
return return
params["start"] += params["count"] params["start"] += params["count"]
def _apply_largest(image, delete=True):
largest = image["Uris"]["LargestImage"]
if delete:
del image["Uris"]
for key in ("Url", "Width", "Height", "MD5", "Size", "Watermarked"):
if key in largest:
image[key] = largest[key]
return image["Url"]
def _get(obj, key):
return obj["Uris"][key].rpartition("/")[2]
def _apply_expansions(data, expands):
obj = _unwrap(data["Response"])
if "Expansions" in data:
expansions = data["Expansions"]
uris = obj["Uris"]
for name in expands.split(","):
uri = uris[name]
uris[name] = _unwrap(expansions[uri])
return obj
def _apply_expansions_iter(data, expands):
objs = _unwrap_iter(data["Response"])
if "Expansions" in data:
expansions = data["Expansions"]
expands = expands.split(",")
for obj in objs:
uris = obj["Uris"]
for name in expands:
uri = uris[name]
uris[name] = _unwrap(expansions[uri])
return objs
def _unwrap(response):
return response[response["Locator"]]
def _unwrap_iter(response):
obj = _unwrap(response)
if isinstance(obj, list):
return obj
return (obj,)