From 2c839f3760aea76def9f8baa52dbfbe0fe10bdc6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Thu, 1 Aug 2019 21:39:20 +0200 Subject: [PATCH] [imgbb] add user extractor + login support (#361) --- docs/supportedsites.rst | 2 +- gallery_dl/extractor/imgbb.py | 184 +++++++++++++++++++++++++--------- scripts/supportedsites.py | 1 + 3 files changed, 141 insertions(+), 46 deletions(-) diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index 3ac38f36..d2fb4ea9 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -47,7 +47,7 @@ Hypnohub https://hypnohub.net/ Pools, Popular Images, Idol Complex https://idol.sankakucomplex.com/ Pools, Posts, Tag-Searches Optional ImageBam http://www.imagebam.com/ Galleries, individual Images ImageFap https://imagefap.com/ Images from Users, Galleries, individual Images -ImgBB https://imgbb.com/ Albums +ImgBB https://imgbb.com/ Images from Users, Albums Optional imgbox https://imgbox.com/ Galleries, individual Images imgth https://imgth.com/ Galleries imgur https://imgur.com/ Albums, individual Images diff --git a/gallery_dl/extractor/imgbb.py b/gallery_dl/extractor/imgbb.py index 95df2f51..442634b3 100644 --- a/gallery_dl/extractor/imgbb.py +++ b/gallery_dl/extractor/imgbb.py @@ -9,69 +9,74 @@ """Extractors for https://imgbb.com/""" from .common import Extractor, Message -from .. import text +from .. import text, exception +from ..cache import cache import json -class ImgbbAlbumExtractor(Extractor): - """Extractor for albums on imgbb.com""" +class ImgbbExtractor(Extractor): + """Base class for imgbb extractors""" category = "imgbb" - subcategory = "album" - root = "https://imgbb.com" - directory_fmt = ("{category}", "{user[username]}", - "{album_id} {album_name}") - filename_fmt = "{id}{title:?_//}.{extension}" + filename_fmt = "{title} {id}.{extension}" archive_fmt = "{id}" - pattern = r"(?:https?://)?ibb\.co/album/([^/?&#]+)(?:\?([^#]+))?" - test = ("https://ibb.co/album/c6p5Yv", { - "range": "1-100", - "url": "", - "keyword": "", - }) + root = "https://imgbb.com" def __init__(self, match): Extractor.__init__(self, match) - self.album_id = match.group(1) - self.album_name = None - self.params = text.parse_query(match.group(2)) + self.page_url = self.sort = None def items(self): + self.login() + page = self.request(self.page_url, params={"sort": self.sort}).text + data = self.metadata(page) first = True - yield Message.Version, 1 - for img in self.images(): - url = img["image"]["url"] - img["album_id"] = self.album_id - img["album_name"] = self.album_name - img["id"] = img["url_viewer"].rpartition("/")[2] + yield Message.Version, 1 + for img in self.images(page): + image = { + "id" : img["url_viewer"].rpartition("/")[2], + "user" : img["user"]["username"], + "title" : text.unescape(img["title"]), + "url" : img["image"]["url"], + "extension": img["image"]["extension"], + "size" : text.parse_int(img["image"]["size"]), + "width" : text.parse_int(img["width"]), + "height" : text.parse_int(img["height"]), + } + image.update(data) if first: first = False - yield Message.Directory, img - yield Message.Url, url, img + yield Message.Directory, data + yield Message.Url, image["url"], image - def images(self): - url = "https://ibb.co/album/" + self.album_id + def login(self): + username, password = self._get_auth_info() + if username: + self._update_cookies(self._login_impl(username, password)) + + @cache(maxage=360*24*3600, keyarg=1) + def _login_impl(self, username, password): + self.log.info("Logging in as %s", username) + + url = self.root + "/login" page = self.request(url).text + token = text.extract(page, 'PF.obj.config.auth_token="', '"')[0] - self.album_name, pos = text.extract(page, '"og:title" content="', '"') - seek, pos = text.extract(page, 'data-seek="', '"', pos) - tokn, pos = text.extract(page, 'PF.obj.config.auth_token="', '"', pos) - - endpoint = "https://ibb.co/json" - data = None - params = { - "action" : "list", - "list" : "images", - "from" : "album", - "sort" : "date_desc", - "page" : 2, - "albumid": self.album_id, - "params_hidden[list]" : "images", - "params_hidden[from]" : "album", - "params_hidden[albumid]": self.album_id, - "seek" : seek, - "auth_token": tokn, + headers = {"Referer": url} + data = { + "auth_token" : token, + "login-subject": username, + "password" : password, } + response = self.request(url, method="POST", headers=headers, data=data) + + if not response.history: + raise exception.AuthenticationError() + return self.session.cookies + + def _pagination(self, page, endpoint, params): + params["page"] = 2 + data = None while True: for img in text.extract_iter(page, "data-object='", "'"): @@ -83,3 +88,92 @@ class ImgbbAlbumExtractor(Extractor): params["page"] += 1 data = self.request(endpoint, "POST", data=params).json() page = data["html"] + + +class ImgbbAlbumExtractor(ImgbbExtractor): + """Extractor for albums on imgbb.com""" + subcategory = "album" + directory_fmt = ("{category}", "{user}", "{album_name} {album_id}") + pattern = r"(?:https?://)?ibb\.co/album/([^/?&#]+)/?(?:\?([^#]+))?" + test = ( + ("https://ibb.co/album/c6p5Yv", { + "range": "1-80", + "url": "8adaf0f7dfc19ff8bc4712c97f534af8b1e06412", + "keyword": "155b665a53e83d359e914cab7c69d5b829444d64", + }), + ("https://ibb.co/album/c6p5Yv?sort=title_asc", { + "range": "1-80", + "url": "d6c45041d5c8323c435b183a976f3fde2af7c547", + "keyword": "30c3262214e2044bbcf6bf2dee8e3ca7ebd62b71", + }), + ) + + def __init__(self, match): + ImgbbExtractor.__init__(self, match) + self.album_name = None + self.album_id = match.group(1) + self.sort = text.parse_query(match.group(2)).get("sort", "date_desc") + self.page_url = "https://ibb.co/album/" + self.album_id + + def metadata(self, page): + album, pos = text.extract(page, '"og:title" content="', '"') + user , pos = text.extract(page, 'rel="author">', '<', pos) + return { + "album_id" : self.album_id, + "album_name": text.unescape(album), + "user" : user.lower(), + } + + def images(self, page): + seek, pos = text.extract(page, 'data-seek="', '"') + tokn, pos = text.extract(page, 'PF.obj.config.auth_token="', '"', pos) + + return self._pagination(page, "https://ibb.co/json", { + "action" : "list", + "list" : "images", + "from" : "album", + "sort" : self.sort, + "albumid" : self.album_id, + "seek" : seek, + "auth_token": tokn, + "params_hidden[list]" : "images", + "params_hidden[from]" : "album", + "params_hidden[albumid]": self.album_id, + }) + + +class ImgbbUserExtractor(ImgbbExtractor): + """Extractor for user profiles in imgbb.com""" + subcategory = "user" + directory_fmt = ("{category}", "{user}") + pattern = r"(?:https?://)?([^.]+)\.imgbb\.com/?(?:\?([^#]+))?$" + test = ("https://folkie.imgbb.com", { + "range": "1-80", + "pattern": r"https?://i\.ibb\.co/\w+/[^/?&#]+", + }) + + def __init__(self, match): + ImgbbExtractor.__init__(self, match) + self.user = match.group(1) + self.sort = text.parse_query(match.group(2)).get("sort", "date_desc") + self.page_url = "https://{}.imgbb.com/".format(self.user) + + def metadata(self, page): + return {"user": self.user} + + def images(self, page): + seek, pos = text.extract(page, 'data-seek="', '"') + tokn, pos = text.extract(page, 'PF.obj.config.auth_token="', '"', pos) + user, pos = text.extract(page, '.obj.resource={"id":"', '"', pos) + + return self._pagination(page, self.page_url + "json", { + "action" : "list", + "list" : "images", + "from" : "user", + "sort" : self.sort, + "seek" : seek, + "userid" : user, + "auth_token": tokn, + "params_hidden[userid]": user, + "params_hidden[from]" : "user", + }) diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index 5b94a262..498e3fce 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -112,6 +112,7 @@ AUTH_MAP = { "exhentai" : "Optional", "flickr" : "Optional (OAuth)", "idolcomplex": "Optional", + "imgbb" : "Optional", "instagram" : "Optional", "luscious" : "Optional", "mangoxo" : "Optional",