diff --git a/gallery_dl/extractor/idolcomplex.py b/gallery_dl/extractor/idolcomplex.py index bd3affd2..2751bbf7 100644 --- a/gallery_dl/extractor/idolcomplex.py +++ b/gallery_dl/extractor/idolcomplex.py @@ -52,7 +52,7 @@ class IdolcomplexPostExtractor(IdolcomplexExtractor, "keyword": { "tags_character": "shani_(the_witcher)", "tags_copyright": "the_witcher", - "tags_idol": "lyumos", + "tags_idol": str, "tags_medium": str, "tags_general": str, }, diff --git a/gallery_dl/extractor/photobucket.py b/gallery_dl/extractor/photobucket.py index a7691328..8e7e99e0 100644 --- a/gallery_dl/extractor/photobucket.py +++ b/gallery_dl/extractor/photobucket.py @@ -9,7 +9,8 @@ """Extract images from http://photobucket.com/""" from .common import Extractor, Message -from .. import text +from .. import text, exception +import base64 import json @@ -90,3 +91,79 @@ class PhotobucketAlbumExtractor(Extractor): albums.extend(subs) yield album + + +class PhotobucketImageExtractor(Extractor): + """Extractor for individual images from photobucket.com""" + category = "photobucket" + subcategory = "image" + directory_fmt = ["{category}", "{username}"] + filename_fmt = "{pictureId:?/_/}{titleOrFilename}.{extension}" + archive_fmt = "{username}_{id}" + pattern = [r"(?:https?://)?(?:[^.]+\.)?photobucket\.com" + r"(?:/gallery/user/([^/?&#]+)/media/([^/?&#]+)" + r"|/user/([^/?&#]+)/media/[^?&#]+\.html)"] + test = [ + (("http://s271.photobucket.com/user/lakerfanryan" + "/media/Untitled-3-1.jpg.html"), { + "url": "256fe63bee84762f92337e963ec0baa27bba87e2", + "keyword": "81fbe6f5f821a2d20dabb931726ab9e7565ba96d", + }), + (("http://s271.photobucket.com/user/lakerfanryan" + "/media/IsotopeswBros.jpg.html?sort=3&o=2"), { + "url": "44e644e29a564398fcb2fd8edce738696afe7208", + "keyword": "6addb30d6db6d7c3222761ade37c0bded67e5783", + }), + ] + + def __init__(self, match): + Extractor.__init__(self) + self.url = match.group(0) + self.user = match.group(1) or match.group(3) + self.media_id = match.group(2) + self.session.headers["Referer"] = self.url + + def items(self): + url = "http://photobucket.com/galleryd/search.php" + params = {"userName": self.user, "searchTerm": "", "ref": ""} + + if self.media_id: + params["mediaId"] = self.media_id + else: + params["url"] = self.url + + # retry API call up to 5 times, since it can randomly fail + tries = 0 + while tries < 5: + data = self.request(url, method="POST", params=params).json() + image = data["mediaDocuments"] + if "message" not in image: + break # success + tries += 1 + self.log.debug("'%s'", image["message"]) + else: + self.log.error("photobucket says: '%s'", image["message"]) + raise exception.StopExtraction() + + # adjust metadata entries to be at least somewhat similar + # to the 'album' extractor + if "media" in image: + image = image["media"][image["mediaIndex"]] + image["albumView"] = data["mediaDocuments"]["albumView"] + image["username"] = image["ownerId"] + else: + image["fileUrl"] = image.pop("imageUrl") + + image.setdefault("title", "") + image.setdefault("description", "") + name, _, ext = image["fileUrl"].rpartition("/")[2].rpartition(".") + image["ext"] = image["extension"] = ext + image["titleOrFilename"] = image["title"] or name + image["tags"] = image.pop("clarifaiTagList", []) + + mtype, _, mid = base64.b64decode(image["id"]).partition(b":") + image["pictureId"] = mid.decode() if mtype == b"mediaId" else "" + + yield Message.Version, 1 + yield Message.Directory, image + yield Message.Url, image["fileUrl"], image