[photobucket] add 'image' extractor (#117)
This commit is contained in:
@@ -9,7 +9,8 @@
|
||||
"""Extract images from http://photobucket.com/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text
|
||||
from .. import text, exception
|
||||
import base64
|
||||
import json
|
||||
|
||||
|
||||
@@ -90,3 +91,79 @@ class PhotobucketAlbumExtractor(Extractor):
|
||||
albums.extend(subs)
|
||||
|
||||
yield album
|
||||
|
||||
|
||||
class PhotobucketImageExtractor(Extractor):
|
||||
"""Extractor for individual images from photobucket.com"""
|
||||
category = "photobucket"
|
||||
subcategory = "image"
|
||||
directory_fmt = ["{category}", "{username}"]
|
||||
filename_fmt = "{pictureId:?/_/}{titleOrFilename}.{extension}"
|
||||
archive_fmt = "{username}_{id}"
|
||||
pattern = [r"(?:https?://)?(?:[^.]+\.)?photobucket\.com"
|
||||
r"(?:/gallery/user/([^/?&#]+)/media/([^/?&#]+)"
|
||||
r"|/user/([^/?&#]+)/media/[^?&#]+\.html)"]
|
||||
test = [
|
||||
(("http://s271.photobucket.com/user/lakerfanryan"
|
||||
"/media/Untitled-3-1.jpg.html"), {
|
||||
"url": "256fe63bee84762f92337e963ec0baa27bba87e2",
|
||||
"keyword": "81fbe6f5f821a2d20dabb931726ab9e7565ba96d",
|
||||
}),
|
||||
(("http://s271.photobucket.com/user/lakerfanryan"
|
||||
"/media/IsotopeswBros.jpg.html?sort=3&o=2"), {
|
||||
"url": "44e644e29a564398fcb2fd8edce738696afe7208",
|
||||
"keyword": "6addb30d6db6d7c3222761ade37c0bded67e5783",
|
||||
}),
|
||||
]
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self)
|
||||
self.url = match.group(0)
|
||||
self.user = match.group(1) or match.group(3)
|
||||
self.media_id = match.group(2)
|
||||
self.session.headers["Referer"] = self.url
|
||||
|
||||
def items(self):
|
||||
url = "http://photobucket.com/galleryd/search.php"
|
||||
params = {"userName": self.user, "searchTerm": "", "ref": ""}
|
||||
|
||||
if self.media_id:
|
||||
params["mediaId"] = self.media_id
|
||||
else:
|
||||
params["url"] = self.url
|
||||
|
||||
# retry API call up to 5 times, since it can randomly fail
|
||||
tries = 0
|
||||
while tries < 5:
|
||||
data = self.request(url, method="POST", params=params).json()
|
||||
image = data["mediaDocuments"]
|
||||
if "message" not in image:
|
||||
break # success
|
||||
tries += 1
|
||||
self.log.debug("'%s'", image["message"])
|
||||
else:
|
||||
self.log.error("photobucket says: '%s'", image["message"])
|
||||
raise exception.StopExtraction()
|
||||
|
||||
# adjust metadata entries to be at least somewhat similar
|
||||
# to the 'album' extractor
|
||||
if "media" in image:
|
||||
image = image["media"][image["mediaIndex"]]
|
||||
image["albumView"] = data["mediaDocuments"]["albumView"]
|
||||
image["username"] = image["ownerId"]
|
||||
else:
|
||||
image["fileUrl"] = image.pop("imageUrl")
|
||||
|
||||
image.setdefault("title", "")
|
||||
image.setdefault("description", "")
|
||||
name, _, ext = image["fileUrl"].rpartition("/")[2].rpartition(".")
|
||||
image["ext"] = image["extension"] = ext
|
||||
image["titleOrFilename"] = image["title"] or name
|
||||
image["tags"] = image.pop("clarifaiTagList", [])
|
||||
|
||||
mtype, _, mid = base64.b64decode(image["id"]).partition(b":")
|
||||
image["pictureId"] = mid.decode() if mtype == b"mediaId" else ""
|
||||
|
||||
yield Message.Version, 1
|
||||
yield Message.Directory, image
|
||||
yield Message.Url, image["fileUrl"], image
|
||||
|
||||
Reference in New Issue
Block a user