diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 0247766d..f1988405 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -67,6 +67,7 @@ modules = [ "nijie", "nyafuu", "paheal", + "photobucket", "piczel", "pinterest", "pixiv", diff --git a/gallery_dl/extractor/photobucket.py b/gallery_dl/extractor/photobucket.py new file mode 100644 index 00000000..62afdac3 --- /dev/null +++ b/gallery_dl/extractor/photobucket.py @@ -0,0 +1,62 @@ +# -*- coding: utf-8 -*- + +# Copyright 2019 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extract images from http://photobucket.com/""" + +from .common import Extractor, Message +from .. import text +import json + + +class PhotobucketAlbumExtractor(Extractor): + """Extractor for albums on slideshare.net""" + category = "photobucket" + subcategory = "album" + directory_fmt = ["{category}", "{username}", "{location}"] + filename_fmt = "{offset:>03}_{pictureId}{title:?_//}.{extension}" + archive_fmt = "{id}" + pattern = [r"(?:https?://)?(?:[^.]+\.)?photobucket\.com" + r"/user/[^/?&#]+/library/[^?&#]*"] + test = [ + ("http://s258.photobucket.com/user/focolandia/library/", { + "pattern": r"http://i\d+.photobucket.com/albums/hh280/focolandia", + "count": ">= 39" + }), + ("http://s1110.photobucket.com/user/chndrmhn100/library/" + "Chandu%20is%20the%20King?sort=3&page=1", None), + ] + + def __init__(self, match): + Extractor.__init__(self) + self.url = match.group(0) + + def items(self): + # prevent watermarks + self.session.headers["Referer"] = self.url + + yield Message.Version, 1 + for image in self.images(): + image["title"] = text.unescape(image["title"]) + image["extension"] = image["ext"] + yield Message.Directory, image + yield Message.Url, image["fullsizeUrl"], image + + def images(self): + params = {"sort": "3", "page": 1} + return self._pagination(self.url, params) + + def _pagination(self, url, params): + while True: + page = self.request(url, params=params).text + data = json.loads(text.extract(page, "collectionData:", ",\n")[0]) + + yield from data["items"]["objects"] + + if data["total"] <= data["offset"] + data["pageSize"]: + return + params["page"] += 1