From 55d4d238603441c49b66c30f7cec69b713a07e8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Tue, 24 Apr 2018 22:17:25 +0200 Subject: [PATCH 01/12] [pinterest] use Pinterest's "Web" API (#83) no access tokens, no user credentials of any kind ... --- gallery_dl/extractor/pinterest.py | 126 +++++++++++++++--------------- test/test_results.py | 1 - 2 files changed, 63 insertions(+), 64 deletions(-) diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py index 99d97f52..5db506a8 100644 --- a/gallery_dl/extractor/pinterest.py +++ b/gallery_dl/extractor/pinterest.py @@ -9,14 +9,15 @@ """Extract images from https://www.pinterest.com""" from .common import Extractor, Message -from .. import text, util, exception +from .. import text, exception +import json class PinterestExtractor(Extractor): """Base class for pinterest extractors""" category = "pinterest" - filename_fmt = "{category}_{pin_id}.{extension}" - archive_fmt = "{pin_id}" + filename_fmt = "{category}_{id}.{extension}" + archive_fmt = "{id}" def __init__(self): Extractor.__init__(self) @@ -24,15 +25,11 @@ class PinterestExtractor(Extractor): def data_from_pin(self, pin): """Get image url and metadata from a pin-object""" - img = pin["image"]["original"] + img = pin["images"]["orig"] url = img["url"] - data = { - "pin_id": util.safe_int(pin["id"]), - "note": pin["note"], - "width": util.safe_int(img["width"]), - "height": util.safe_int(img["height"]), - } - return url, text.nameext_from_url(url, data) + pin["width"] = img["width"] + pin["height"] = img["height"] + return url, text.nameext_from_url(url, pin) class PinterestPinExtractor(PinterestExtractor): @@ -42,13 +39,9 @@ class PinterestPinExtractor(PinterestExtractor): test = [ ("https://www.pinterest.com/pin/858146903966145189/", { "url": "afb3c26719e3a530bb0e871c480882a801a4e8a5", - "keyword": "f651cb271247f306d1d30385d49c7b82da44c2b1", "content": "d3e24bc9f7af585e8c23b9136956bd45a4d9b947", }), ("https://www.pinterest.com/pin/858146903966145188/", { - "exception": exception.StopExtraction, - }), - ("https://www.pinterest.com/pin/85814690396614518/", { "exception": exception.NotFoundError, }), ] @@ -68,13 +61,13 @@ class PinterestPinExtractor(PinterestExtractor): class PinterestBoardExtractor(PinterestExtractor): """Extractor for images from a board from pinterest.com""" subcategory = "board" - directory_fmt = ["{category}", "{user}", "{board}"] + directory_fmt = ["{category}", "{board[owner][username]}", "{board[name]}"] + archive_fmt = "{board[id]}_{id}" pattern = [r"(?:https?://)?(?:[^./]+\.)?pinterest\.[^/]+/" r"(?!pin/)([^/?#&]+)/([^/?#&]+)"] test = [ ("https://www.pinterest.com/g1952849/test-/", { "url": "85911dfca313f3f7f48c2aa0bc684f539d1d80a6", - "keyword": "c54cf5aa830994f2ed4871efa7154a5fdaa1c2ce", }), ("https://www.pinterest.com/g1952848/test/", { "exception": exception.NotFoundError, @@ -87,27 +80,17 @@ class PinterestBoardExtractor(PinterestExtractor): def items(self): board = self.api.board(self.user, self.board) - data = self.data_from_board(board) + data = {"board": board, "count": board["pin_count"]} num = data["count"] yield Message.Version, 1 yield Message.Directory, data - for pin in self.api.board_pins(self.user, self.board): + for pin in self.api.board_pins(board["id"]): url, pdata = self.data_from_pin(pin) data.update(pdata) data["num"] = num num -= 1 yield Message.Url, url, data - def data_from_board(self, board): - """Get metadata from a board-object""" - data = { - "user": self.user, - "board_id": util.safe_int(board["id"]), - "board": board["name"], - "count": board["counts"]["pins"], - } - return data - class PinterestPinitExtractor(PinterestExtractor): """Extractor for images from a pin.it URL""" @@ -136,59 +119,76 @@ class PinterestPinitExtractor(PinterestExtractor): class PinterestAPI(): - """Minimal interface for the pinterest API""" + """Minimal interface for the Pinterest Web API - def __init__(self, extractor, access_token=None): + For a better and more complete implementation in PHP, see + - https://github.com/seregazhuk/php-pinterest-bot + """ + + BASE_URL = "https://uk.pinterest.com" + HEADERS = { + "Accept" : "application/json, text/javascript, " + "*/*, q=0.01", + "Accept-Language" : "en-US,en;q=0.5", + "X-Pinterest-AppState": "active", + "X-APP-VERSION" : "cb1c7f9", + "X-Requested-With" : "XMLHttpRequest", + "Origin" : BASE_URL + "/", + } + + def __init__(self, extractor): self.log = extractor.log self.session = extractor.session - self.access_token = ( - access_token or - extractor.config("access-token") or - "AfyIXxi1MJ6et0NlIl_vBchHbex-FSWylPyr2GJE2uu3W8A97QAAAAA" - ) - def pin(self, pin_id, fields="id,image,note"): + def pin(self, pin_id): """Query information about a pin""" - endpoint = "pins/{}/".format(pin_id) - params = {"fields": fields} - return self._call(endpoint, params)["data"] + options = {"id": pin_id, "field_set_key": "detailed"} + return self._call("Pin", options)["resource_response"]["data"] - def board(self, user, board, fields="id,name,counts"): + def board(self, user, board): """Query information about a board""" - endpoint = "boards/{}/{}/".format(user, board) - params = {"fields": fields} - return self._call(endpoint, params)["data"] + options = {"slug": board, "username": user, + "field_set_key": "detailed"} + return self._call("Board", options)["resource_response"]["data"] - def board_pins(self, user, board, fields="id,image,note", limit=100): + def board_pins(self, board_id): """Yield all pins of a specific board""" - endpoint = "boards/{}/{}/pins/".format(user, board) - params = {"fields": fields, "limit": limit} - return self._pagination(endpoint, params) + options = {"board_id": board_id} + return self._pagination("BoardFeed", options) - def _call(self, endpoint, params): - params["access_token"] = self.access_token - url = "https://api.pinterest.com/v1/" + endpoint + def _call(self, resource, options): + url = "{}/resource/{}Resource/get".format(self.BASE_URL, resource) + params = { + "source_url": "", + "data": json.dumps({"options": options}), + } - response = self.session.get(url, params=params) - status = response.status_code + response = self.session.get(url, params=params, headers=self.HEADERS) data = response.json() - if 200 <= status < 400 and data.get("data"): + if 200 <= response.status_code < 400 and "resource_response" in data: return data - msg = data.get("message", "") - if status == 404: + try: + msg = data["resource_response"]["error"]["message"] + except KeyError: + msg = "" + if response.status_code == 404: msg = msg.partition(" ")[0].lower() raise exception.NotFoundError(msg) - self.log.error("API request failed: %s", msg or "") + self.log.error("API request failed: %s", msg) raise exception.StopExtraction() - def _pagination(self, endpoint, params): + def _pagination(self, resource, options, bookmarks=None): while True: - response = self._call(endpoint, params) - yield from response["data"] + if bookmarks: + options["bookmarks"] = bookmarks + data = self._call(resource, options) + yield from data["resource_response"]["data"] - cursor = response["page"]["cursor"] - if not cursor: + try: + bookmarks = data["resource"]["options"]["bookmarks"] + if not bookmarks or bookmarks[0] == "-end-": + return + except KeyError: return - params["cursor"] = cursor diff --git a/test/test_results.py b/test/test_results.py index fd370dab..d83282ed 100644 --- a/test/test_results.py +++ b/test/test_results.py @@ -22,7 +22,6 @@ TRAVIS_SKIP = { # temporary issues, etc. BROKEN = { "gomanga", # server down - "pinterest", # access tokens have been set to 10 requests per hour "puremashiro", # online reader down } From 0f1e07f627f6a1e8c0c0c5a5b0efcfc2f2699b2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Wed, 25 Apr 2018 16:04:30 +0200 Subject: [PATCH 02/12] [pinterest] scrap OAuth implementation; code improvements OAuth authentication isn't needed anymore and other tools like Postman are better suited for this job anyway. --- docs/configuration.rst | 29 ----------------------------- gallery_dl/extractor/oauth.py | 23 ----------------------- gallery_dl/extractor/pinterest.py | 21 +++++++-------------- 3 files changed, 7 insertions(+), 66 deletions(-) diff --git a/docs/configuration.rst b/docs/configuration.rst index 6c464d41..f3fef6a0 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -820,35 +820,6 @@ How To =========== ===== -extractor.pinterest.access-token --------------------------------- -=========== ===== -Type ``string`` -How To - register a Pinterest application and use its client-id and - client-secret (see `extractor.pinterest.client-id & .secret`_) - - run `gallery-dl oauth:pinterest` and authenticate access with - (preferably) the same account that registered the application -Notes Access tokens currently only allow for 10 requests per hour. -=========== ===== - - -extractor.pinterest.client-id & .secret ---------------------------------------- -=========== ===== -Type ``string`` -How To - login and visit Pinterest's - `Apps `__ section - - agree to "Pinterest Developer Terms and the API Policy" - and click "Create app" - - choose a random name and description and click "Create" - - scroll down and set a Site URL (e.g. https://example.org/) - and allow https://mikf.github.io/gallery-dl/oauth-redirect.html - as Redirect URI - - scroll back up again, copy the "App ID" and "App secret" values - and put them in your configuration file -=========== ===== - - extractor.reddit.client-id & .user-agent ---------------------------------------- =========== ===== diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py index 8dc6e26f..f161126e 100644 --- a/gallery_dl/extractor/oauth.py +++ b/gallery_dl/extractor/oauth.py @@ -198,29 +198,6 @@ class OAuthFlickr(OAuthBase): ) -class OAuthPinterest(OAuthBase): - subcategory = "pinterest" - pattern = ["oauth:pinterest$"] - redirect_uri = "https://mikf.github.io/gallery-dl/oauth-redirect.html" - - def items(self): - yield Message.Version, 1 - - client_id = self.oauth_config("client-id") - client_secret = self.oauth_config("client-secret") - - if not client_id or not client_secret: - self.log.error("'client-id' and 'client-secret' required") - return - - self._oauth2_authorization_code_grant( - client_id, client_secret, - "https://api.pinterest.com/oauth/", - "https://api.pinterest.com/v1/oauth/token", - scope="read_public", key="access_token", auth=False, - ) - - class OAuthReddit(OAuthBase): subcategory = "reddit" pattern = ["oauth:reddit$"] diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py index 5db506a8..711d95de 100644 --- a/gallery_dl/extractor/pinterest.py +++ b/gallery_dl/extractor/pinterest.py @@ -81,15 +81,12 @@ class PinterestBoardExtractor(PinterestExtractor): def items(self): board = self.api.board(self.user, self.board) data = {"board": board, "count": board["pin_count"]} - num = data["count"] yield Message.Version, 1 yield Message.Directory, data for pin in self.api.board_pins(board["id"]): - url, pdata = self.data_from_pin(pin) - data.update(pdata) - data["num"] = num - num -= 1 - yield Message.Url, url, data + url, pin_data = self.data_from_pin(pin) + pin_data.update(data) + yield Message.Url, url, pin_data class PinterestPinitExtractor(PinterestExtractor): @@ -157,11 +154,8 @@ class PinterestAPI(): return self._pagination("BoardFeed", options) def _call(self, resource, options): - url = "{}/resource/{}Resource/get".format(self.BASE_URL, resource) - params = { - "source_url": "", - "data": json.dumps({"options": options}), - } + url = "{}/resource/{}Resource/get/".format(self.BASE_URL, resource) + params = {"data": json.dumps({"options": options}), "source_url": ""} response = self.session.get(url, params=params, headers=self.HEADERS) data = response.json() @@ -179,10 +173,8 @@ class PinterestAPI(): self.log.error("API request failed: %s", msg) raise exception.StopExtraction() - def _pagination(self, resource, options, bookmarks=None): + def _pagination(self, resource, options): while True: - if bookmarks: - options["bookmarks"] = bookmarks data = self._call(resource, options) yield from data["resource_response"]["data"] @@ -190,5 +182,6 @@ class PinterestAPI(): bookmarks = data["resource"]["options"]["bookmarks"] if not bookmarks or bookmarks[0] == "-end-": return + options["bookmarks"] = bookmarks except KeyError: return From 8b79eaafea0cde50c4d2dce510afa56887751013 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Wed, 25 Apr 2018 16:13:03 +0200 Subject: [PATCH 03/12] [tumblr] log actual time of rate limit resets ... instead of the amount of seconds until a reset --- gallery_dl/extractor/tumblr.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py index c4e94dad..a40969af 100644 --- a/gallery_dl/extractor/tumblr.py +++ b/gallery_dl/extractor/tumblr.py @@ -10,6 +10,7 @@ from .common import Extractor, Message from .. import text, util, exception +from datetime import datetime, timedelta import re import time @@ -322,10 +323,11 @@ class TumblrAPI(): # daily rate limit if response.headers.get("x-ratelimit-perday-remaining") == "0": + reset = response.headers.get("x-ratelimit-perday-reset") self.log.error( "Daily API rate limit exceeded: aborting; " - "%s seconds until rate limit reset", - response.headers.get("x-ratelimit-perday-reset"), + "rate limit will reset at %s", + self._to_time(reset), ) raise exception.StopExtraction() @@ -334,11 +336,19 @@ class TumblrAPI(): if reset: self.log.info( "Hourly API rate limit exceeded; " - "waiting %s seconds for rate limit reset", - reset, + "waiting until %s for rate limit reset", + self._to_time(reset), ) time.sleep(int(reset) + 1) return self._call(blog, endpoint, params) self.log.error(data) raise exception.StopExtraction() + + @staticmethod + def _to_time(reset): + try: + reset_time = datetime.now() + timedelta(seconds=int(reset)) + except (ValueError, TypeError): + return "?" + return reset_time.strftime("%H:%M:%S") From 2395d870dd68479eb37d5b4952e21ed367e7e5d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Thu, 26 Apr 2018 16:36:42 +0200 Subject: [PATCH 04/12] [pinterest] unquote board and user names, better errors --- gallery_dl/extractor/pinterest.py | 22 +++++++++------------- test/test_results.py | 2 +- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py index 711d95de..58fa7c55 100644 --- a/gallery_dl/extractor/pinterest.py +++ b/gallery_dl/extractor/pinterest.py @@ -76,7 +76,8 @@ class PinterestBoardExtractor(PinterestExtractor): def __init__(self, match): PinterestExtractor.__init__(self) - self.user, self.board = match.groups() + self.user = text.unquote(match.group(1)) + self.board = text.unquote(match.group(2)) def items(self): board = self.api.board(self.user, self.board) @@ -134,8 +135,7 @@ class PinterestAPI(): } def __init__(self, extractor): - self.log = extractor.log - self.session = extractor.session + self.extractor = extractor def pin(self, pin_id): """Query information about a pin""" @@ -157,20 +157,16 @@ class PinterestAPI(): url = "{}/resource/{}Resource/get/".format(self.BASE_URL, resource) params = {"data": json.dumps({"options": options}), "source_url": ""} - response = self.session.get(url, params=params, headers=self.HEADERS) + response = self.extractor.request( + url, params=params, headers=self.HEADERS, fatal=False) data = response.json() - if 200 <= response.status_code < 400 and "resource_response" in data: + if 200 <= response.status_code < 400 and not response.history: return data - try: - msg = data["resource_response"]["error"]["message"] - except KeyError: - msg = "" - if response.status_code == 404: - msg = msg.partition(" ")[0].lower() - raise exception.NotFoundError(msg) - self.log.error("API request failed: %s", msg) + if response.status_code == 404 or response.history: + raise exception.NotFoundError(self.extractor.subcategory) + self.extractor.log.error("API request failed") raise exception.StopExtraction() def _pagination(self, resource, options): diff --git a/test/test_results.py b/test/test_results.py index d83282ed..216457b2 100644 --- a/test/test_results.py +++ b/test/test_results.py @@ -16,7 +16,7 @@ from gallery_dl import extractor, job, config, exception # these don't work on travis-ci TRAVIS_SKIP = { "exhentai", "kissmanga", "mangafox", "dynastyscans", "nijie", - "archivedmoe", "archiveofsins", "thebarchive", + "archivedmoe", "archiveofsins", "thebarchive", "sankaku", "idolcomplex", } # temporary issues, etc. From b44a2964042092edff9c8bd64c2089767c0c5e26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sat, 28 Apr 2018 14:17:06 +0200 Subject: [PATCH 05/12] [gomanga] remove module site has been unreachable for a couple of weeks and the cloudflare status page shows host errors --- docs/supportedsites.rst | 1 - gallery_dl/extractor/__init__.py | 1 - gallery_dl/extractor/gomanga.py | 38 -------------------------------- test/test_results.py | 1 - 4 files changed, 41 deletions(-) delete mode 100644 gallery_dl/extractor/gomanga.py diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index e55ba8b9..8a2da995 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -24,7 +24,6 @@ Flickr https://www.flickr.com/ |Images from Use-2| Futaba Channel https://www.2chan.net/ Threads Gelbooru https://gelbooru.com/ Pools, Posts, Tag-Searches Gfycat https://gfycat.com/ individual Images -GoManga https://gomanga.co/ Chapters, Manga HBrowse http://www.hbrowse.com/ Chapters, Manga Hentai Foundry https://www.hentai-foundry.com/ Images from Users, individual Images Hentai2Read https://hentai2read.com/ Chapters, Manga diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 1a9c28c3..75e62761 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -31,7 +31,6 @@ modules = [ "flickr", "gelbooru", "gfycat", - "gomanga", "hbrowse", "hentai2read", "hentaifoundry", diff --git a/gallery_dl/extractor/gomanga.py b/gallery_dl/extractor/gomanga.py deleted file mode 100644 index 0e547a74..00000000 --- a/gallery_dl/extractor/gomanga.py +++ /dev/null @@ -1,38 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright 2017 Mike Fährmann -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. - -"""Extractors for https://gomanga.co/""" - -from . import foolslide - - -class GomangaChapterExtractor(foolslide.FoolslideChapterExtractor): - """Extractor for manga-chapters from gomanga.co""" - category = "gomanga" - pattern = foolslide.chapter_pattern(r"(?:www\.)?gomanga\.co/reader") - test = [ - ("https://gomanga.co/reader/read/mata-kata-omou/en/0/1/page/11", { - "url": "5088d75bb44327fc503c85b52b1d6a371b8057f2", - "keyword": "10624e78924c37fd39543270a6965f2082bde08f", - }), - ("https://gomanga.co/reader/read/pastel/en/31/144/", { - "url": "9cc2052fbf36344c573c754c5abe533a14b3e280", - "keyword": "a355cd3197e70c24b84d3885e8a5ff0ac22537bf", - }), - ] - method = "double" - - -class GomangaMangaExtractor(foolslide.FoolslideMangaExtractor): - """Extractor for manga from gomanga.co""" - category = "gomanga" - pattern = foolslide.manga_pattern(r"(?:www\.)?gomanga\.co/reader") - test = [("https://gomanga.co/reader/series/pastel/", { - "url": "bd1c82d70838d54140a8209296e789f27ceab7cd", - "keyword": "fb1fd14548602dbe4f6e70a633429762972c1d5d", - })] diff --git a/test/test_results.py b/test/test_results.py index 216457b2..3dfd6fcc 100644 --- a/test/test_results.py +++ b/test/test_results.py @@ -21,7 +21,6 @@ TRAVIS_SKIP = { # temporary issues, etc. BROKEN = { - "gomanga", # server down "puremashiro", # online reader down } From d96b3474e56541cd219dc8ad77bdccf60e7fc0e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sat, 28 Apr 2018 14:20:59 +0200 Subject: [PATCH 06/12] [puremashiro] remove module site has been unreachable for a couple of weeks and now the DNS record is gone as well --- docs/supportedsites.rst | 1 - gallery_dl/extractor/__init__.py | 1 - gallery_dl/extractor/puremashiro.py | 34 ----------------------------- test/test_results.py | 1 - 4 files changed, 37 deletions(-) delete mode 100644 gallery_dl/extractor/puremashiro.py diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index 8a2da995..eaa14a72 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -57,7 +57,6 @@ Pawoo https://pawoo.net Images from Users, Imag Pinterest https://www.pinterest.com Boards, Pins, pin.it Links Pixiv https://www.pixiv.net/ |Images from Use-4| Required PowerManga https://powermanga.org/ Chapters, Manga -Pure Mashiro http://reader.puremashiro.moe/ Chapters, Manga Read Comic Online http://readcomiconline.to/ Comic-Issues, Comics RebeccaBlackTech https://rbt.asia/ Threads Reddit https://reddit.com/ individual Images, Submissions, Subreddits Optional (OAuth) diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 75e62761..3787f8d8 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -64,7 +64,6 @@ modules = [ "pinterest", "pixiv", "powermanga", - "puremashiro", "readcomiconline", "rebeccablacktech", "reddit", diff --git a/gallery_dl/extractor/puremashiro.py b/gallery_dl/extractor/puremashiro.py deleted file mode 100644 index 00699faa..00000000 --- a/gallery_dl/extractor/puremashiro.py +++ /dev/null @@ -1,34 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright 2018 Mike Fährmann -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. - -"""Extractors for http://reader.puremashiro.moe/""" - -from . import foolslide - - -class PuremashiroChapterExtractor(foolslide.FoolslideChapterExtractor): - """Extractor for manga-chapters from reader.puremashiro.moe""" - category = "puremashiro" - pattern = foolslide.chapter_pattern(r"reader\.puremashiro\.moe") - test = [(("http://reader.puremashiro.moe" - "/read/parallel-paradise-eng/en-us/0/20/"), { - "url": "00d5bc9cbb413ed584ddb091ae2418ca7801b136", - "keyword": "73bba3222758927e5a7cdc9e1db9d8307fe944c0", - })] - scheme = "http" - - -class PuremashiroMangaExtractor(foolslide.FoolslideMangaExtractor): - """Extractor for manga from reader.puremashiro.moe""" - category = "puremashiro" - pattern = foolslide.manga_pattern(r"reader\.puremashiro\.moe") - test = [("http://reader.puremashiro.moe/series/hayate-no-gotoku/", { - "url": "0cf77a623bff35b43323427a8fd1e40ff0e13c09", - "keyword": "1b57d724b259a1d81b6352d889a1aa5eb86a6ef9", - })] - scheme = "http" diff --git a/test/test_results.py b/test/test_results.py index 3dfd6fcc..b20fcf67 100644 --- a/test/test_results.py +++ b/test/test_results.py @@ -21,7 +21,6 @@ TRAVIS_SKIP = { # temporary issues, etc. BROKEN = { - "puremashiro", # online reader down } From 16e014baaae566323610c304a267e19b64e8dc20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sun, 29 Apr 2018 21:27:25 +0200 Subject: [PATCH 07/12] [smugmug] added image and album extractor just some initial code that still requires a lot of work ... TODO: - folders - old-style albums (which are nearly all of them ...) - images from users - OAuth It could also happen that the API credentials used will become invalid whenever my 14 day trial period ends (7 days remaining), but that would just require users to supply their own. --- CHANGELOG.md | 5 + docs/supportedsites.rst | 1 + gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/directlink.py | 7 +- gallery_dl/extractor/powermanga.py | 2 +- gallery_dl/extractor/smugmug.py | 214 +++++++++++++++++++++++++++++ gallery_dl/util.py | 4 +- scripts/build_supportedsites.py | 10 +- 8 files changed, 229 insertions(+), 15 deletions(-) create mode 100644 gallery_dl/extractor/smugmug.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 844de878..89481b61 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,11 @@ # Changelog ## Unreleased +- Added support for: + - `smugmug` - https://www.smugmug.com/ +- Added title information for `mangadex` chapters +- Improved the `pinterest` API implementation (#83) +- Removed `gomanga` and `puremashiro` ## 1.3.4 - 2018-04-20 - Added support for custom OAuth2 credentials for `pinterest` diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index eaa14a72..ba5f8256 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -68,6 +68,7 @@ Sea Otter Scans https://reader.seaotterscans.com/ Chapters, Manga Sen Manga http://raw.senmanga.com/ Chapters Sense-Scans http://sensescans.com/ Chapters, Manga SlideShare https://www.slideshare.net/ Presentations +SmugMug https://www.smugmug.com/ Albums, individual Images, Nodes Subapics https://subapics.com/ Chapters, Manga The /b/ Archive https://thebarchive.com/ Threads Tumblr https://www.tumblr.com/ Images from Users, Likes, Posts, Tag-Searches Optional (OAuth) diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 3787f8d8..59213681 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -75,6 +75,7 @@ modules = [ "senmanga", "sensescans", "slideshare", + "smugmug", "subapics", "thebarchive", "tumblr", diff --git a/gallery_dl/extractor/directlink.py b/gallery_dl/extractor/directlink.py index b9e0c868..5cb76f34 100644 --- a/gallery_dl/extractor/directlink.py +++ b/gallery_dl/extractor/directlink.py @@ -21,10 +21,9 @@ class DirectlinkExtractor(Extractor): r"(?:jpe?g|jpe|png|gif|web[mp]|mp4|mkv|og[gmv]|opus))" r"(?:\?(?P[^/?#]*))?(?:#(?P.*))?$"] test = [ - (("https://photos.smugmug.com/The-World/Hawaii/" - "i-SWz2K6n/2/X3/IMG_0311-X3.jpg"), { - "url": "32ee1045881e17ef3f13a9958595afa42421ec6c", - "keyword": "2427b68c14006489df1776bb1bcd3bc24be25e10", + (("https://en.wikipedia.org/static/images/project-logos/enwiki.png"), { + "url": "18c5d00077332e98e53be9fed2ee4be66154b88d", + "keyword": "66bce3a0a6872d8497e1984eb49d54a3ed0d3d5e", }), # more complex example ("https://example.org/path/file.webm?que=1&ry=2#fragment", { diff --git a/gallery_dl/extractor/powermanga.py b/gallery_dl/extractor/powermanga.py index 281e20db..3b3be1a1 100644 --- a/gallery_dl/extractor/powermanga.py +++ b/gallery_dl/extractor/powermanga.py @@ -18,7 +18,7 @@ class PowermangaChapterExtractor(foolslide.FoolslideChapterExtractor): test = [(("https://read.powermanga.org" "/read/one_piece_digital_colour_comics/en/0/75/"), { "url": "854c5817f8f767e1bccd05fa9d58ffb5a4b09384", - "keyword": "9bf211d435060d1e38d3d13e4aaaa5a87381bfad", + "keyword": "9985bcb78491dff9c725958b06bba606be51b6d3", })] diff --git a/gallery_dl/extractor/smugmug.py b/gallery_dl/extractor/smugmug.py new file mode 100644 index 00000000..23b29150 --- /dev/null +++ b/gallery_dl/extractor/smugmug.py @@ -0,0 +1,214 @@ +# -*- coding: utf-8 -*- + +# Copyright 2018 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extract images from https://www.smugmug.com/""" + +from .common import Extractor, Message +from .. import text, util, exception +from ..cache import memcache + +BASE_PATTERN = ( + r"(?:smugmug:(?:https?://)?([^/]+)|" + r"(?:https?://)?([^.]+\.smugmug\.com))") + + +class SmugmugExtractor(Extractor): + """Base class for smugmug extractors""" + category = "smugmug" + filename_fmt = "{category}_{Owner[Name]}_{Image[ImageKey]}.{extension}" + + def __init__(self): + Extractor.__init__(self) + self.api = SmugmugAPI(self) + + def update_image(self, image): + if "ArchivedUri" not in image: + largest = self.api.image_largest(image["ImageKey"]) + for key in ("Url", "Width", "Height", "MD5", "Size"): + if key in largest: + image[key] = largest[key] + return image["Url"], image + return image["ArchivedUri"], image + + +class SmugmugAlbumExtractor(SmugmugExtractor): + subcategory = "album" + directory_fmt = ["{category}", "{Owner[Name]}", "{Album[Name]}"] + archive_fmt = "a_{Album[AlbumKey]}_{Image[ImageKey]}" + pattern = [r"smugmug:album:([^:]+)$"] + test = [("smugmug:album:xgkb4C", { + "url": "eb6133445064115ad83d32cbc6472520a2d24d53", + "content": "864f6953cb04121290407a579611bc5087d117ee", + })] + + def __init__(self, match): + SmugmugExtractor.__init__(self) + self.album_id = match.group(1) + + def items(self): + album = self.api.album(self.album_id) + images = self.api.album_images(self.album_id) + username = album["Uris"]["User"]["Uri"].rpartition("/")[2] + owner = self.api.user(username) + + data = { + "Album": album, + "Owner": owner, + } + + yield Message.Version, 1 + yield Message.Directory, data + + for image in images: + url, image = self.update_image(image) + data["Image"] = image + yield Message.Url, url, text.nameext_from_url(url, data) + + +class SmugmugImageExtractor(SmugmugExtractor): + subcategory = "image" + directory_fmt = ["{category}", "{Owner[Name]}"] + archive_fmt = "{Image[ImageKey]}" + pattern = [BASE_PATTERN + r"(?:/[^/?&#]+)+/i-([^/?&#]+)"] + test = [("https://mikf.smugmug.com/Test/n-xnNH3s/i-L4CxBdg", { + "url": "905bfdef52ce1a731a4eae17e9ac348511e17ae4", + "keyword": "d53df829d493ec3e31b8fe300872beb968812bfd", + "content": "626fe50d25fe49beeda15e116938db36e163c01f", + })] + + def __init__(self, match): + SmugmugExtractor.__init__(self) + self.image_id = match.group(3) + + def items(self): + image = self.api.image(self.image_id) + username = image["Uris"]["ImageOwner"]["Uri"].rpartition("/")[2] + owner = self.api.user(username) + + url, image = self.update_image(image) + + data = { + "Image": image, + "Owner": owner, + } + del image["Uris"] + del owner["Uris"] + text.nameext_from_url(url, data) + + yield Message.Version, 1 + yield Message.Directory, data + yield Message.Url, url, data + + +class SmugmugNodeExtractor(SmugmugExtractor): + """ """ + subcategory = "node" + directory_fmt = ["{category}"] + archive_fmt = "n_{Node[NodeID]}_{Image[ImageID]}" + pattern = [BASE_PATTERN + r"(?:/[^/?&#]+)+/n-([^/?&#]+)$"] + test = [("https://mikf.smugmug.com/Test/n-xnNH3s", { + "pattern": "^smugmug:album:xgkb4C$", + })] + + def __init__(self, match): + SmugmugExtractor.__init__(self) + self.node_id = match.group(3) + + def items(self): + yield Message.Version, 1 + + data = self.api.node(self.node_id) + if data["Type"] == "Album": + album_id = data["Uris"]["Album"]["Uri"].rpartition("/")[2] + yield Message.Queue, "smugmug:album:" + album_id, data + # ... + + +class SmugmugAPI(): + """Minimal interface for the smugmug API v2""" + API_URL = "https://api.smugmug.com/api/v2/" + API_KEY = "DFqxg4jf7GrtsQ5PnbNB8899zKfnDrdK" + API_SECRET = ("fknV35p9r9BwZC4XbTzvCXpcSJRdD83S" + "9nMFQm25ndGBzNPnwRDbRnnVBvqt4xTq") + HEADERS = {"Accept": "application/json"} + + def __init__(self, extractor): + api_key = extractor.config("api-key", self.API_KEY) + api_secret = extractor.config("api-secret", self.API_SECRET) + token = extractor.config("access-token") + token_secret = extractor.config("access-token-secret") + + if api_key and api_secret and token and token_secret: + self.session = util.OAuthSession( + extractor.session, + api_key, api_secret, + token, token_secret, + ) + self.api_key = None + else: + self.session = extractor.session + self.api_key = api_key + + def album(self, album_id): + return self._call("album/" + album_id)["Album"] + + def album_images(self, album_id): + return self._pagination("album/" + album_id + "!images") + + def image(self, image_id): + return self._call("image/" + image_id)["Image"] + + def image_largest(self, image_id): + endpoint = "image/" + image_id + "!largestimage" + return self._call(endpoint)["LargestImage"] + + def image_sizes(self, image_id): + return self._call("image/" + image_id + "!sizedetails") + + def node(self, node_id): + return self._call("node/" + node_id)["Node"] + + @memcache(keyarg=1) + def user(self, username): + return self._call("user/" + username)["User"] + + def _call(self, endpoint, params=None): + url = self.API_URL + endpoint + params = params or {} + if self.api_key: + params["APIKey"] = self.api_key + + response = self.session.get(url, params=params, headers=self.HEADERS) + data = response.json() + + if 200 <= data["Code"] < 400: + return data["Response"] + + if data["Code"] == 404: + raise exception.NotFoundError() + if data["Code"] == 429: + self.log.error("Rate limit reached") + raise exception.StopExtraction() + + def _pagination(self, endpoint): + params = { + "start": 1, + "count": 100, + } + while True: + response = self._call(endpoint, params) + + obj = response[response["Locator"]] + if isinstance(obj, list): + yield from obj + else: + yield obj + + if "NextPage" not in response["Pages"]: + return + params["start"] += params["count"] diff --git a/gallery_dl/util.py b/gallery_dl/util.py index 8341e084..d566bfca 100644 --- a/gallery_dl/util.py +++ b/gallery_dl/util.py @@ -536,11 +536,11 @@ class OAuthSession(): self.params["oauth_signature_method"] = "HMAC-SHA1" self.params["oauth_version"] = "1.0" - def get(self, url, params): + def get(self, url, params, **kwargs): params.update(self.params) params["oauth_nonce"] = self.nonce(16) params["oauth_timestamp"] = int(time.time()) - return self.session.get(url + self.sign(url, params)) + return self.session.get(url + self.sign(url, params), **kwargs) def sign(self, url, params): """Generate 'oauth_signature' value and return query string""" diff --git a/scripts/build_supportedsites.py b/scripts/build_supportedsites.py index 398d3623..594f58d2 100755 --- a/scripts/build_supportedsites.py +++ b/scripts/build_supportedsites.py @@ -5,7 +5,7 @@ import os.path ROOTDIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.insert(0, os.path.realpath(ROOTDIR)) -import gallery_dl.extractor +import gallery_dl.extractor # noqa CATEGORY_MAP = { @@ -20,7 +20,6 @@ CATEGORY_MAP = { "e621" : "e621", "exhentai" : "ExHentai", "fallenangels" : "Fallen Angels Scans", - "gomanga" : "GoManga", "hbrowse" : "HBrowse", "hentai2read" : "Hentai2Read", "hentaifoundry" : "Hentai Foundry", @@ -30,14 +29,11 @@ CATEGORY_MAP = { "imagebam" : "ImageBam", "imagefap" : "ImageFap", "imgbox" : "imgbox", - "imgchili" : "imgChili", "imgth" : "imgth", "imgur" : "imgur", "jaiminisbox" : "Jaimini's Box", "kireicake" : "Kirei Cake", - "kisscomic" : "KissComic", "kissmanga" : "KissManga", - "loveisover" : "Love is Over Archive", "mangadex" : "MangaDex", "mangafox" : "Manga Fox", "mangahere" : "Manga Here", @@ -48,7 +44,6 @@ CATEGORY_MAP = { "nyafuu" : "Nyafuu Archive", "paheal" : "rule #34", "powermanga" : "PowerManga", - "puremashiro" : "Pure Mashiro", "readcomiconline": "Read Comic Online", "rbt" : "RebeccaBlackTech", "rule34" : "Rule 34", @@ -58,10 +53,9 @@ CATEGORY_MAP = { "senmanga" : "Sen Manga", "sensescans" : "Sense-Scans", "slideshare" : "SlideShare", - "spectrumnexus" : "Spectrum Nexus", + "smugmug" : "SmugMug", "thebarchive" : "The /b/ Archive", "worldthree" : "World Three", - "yeet" : "YEET Archive", "xvideos" : "XVideos", } From 3fe653d940f83fb4be6e6aaad6417b391afe7b31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sun, 29 Apr 2018 22:37:13 +0200 Subject: [PATCH 08/12] fix test_results for empty sets {} is an empty dict and doesn't support set operations --- test/test_results.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_results.py b/test/test_results.py index b20fcf67..2e13ebea 100644 --- a/test/test_results.py +++ b/test/test_results.py @@ -143,9 +143,9 @@ def generate_tests(): fltr = lambda c, bc: c in argv or bc in argv # noqa: E731 del sys.argv[1:] else: - skip = BROKEN.copy() + skip = set(BROKEN) if "CI" in os.environ and "TRAVIS" in os.environ: - skip |= TRAVIS_SKIP + skip |= set(TRAVIS_SKIP) print("skipping:", ", ".join(skip)) fltr = lambda c, bc: c not in skip # noqa: E731 From 2ea0d1da4275e354d997ba4655378135d4ca3e27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Mon, 30 Apr 2018 18:19:28 +0200 Subject: [PATCH 09/12] [smugmug] improve API code; use data expansions --- gallery_dl/extractor/pinterest.py | 4 +- gallery_dl/extractor/smugmug.py | 178 ++++++++++++++++++------------ 2 files changed, 110 insertions(+), 72 deletions(-) diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py index 58fa7c55..a244cf9e 100644 --- a/gallery_dl/extractor/pinterest.py +++ b/gallery_dl/extractor/pinterest.py @@ -39,7 +39,9 @@ class PinterestPinExtractor(PinterestExtractor): test = [ ("https://www.pinterest.com/pin/858146903966145189/", { "url": "afb3c26719e3a530bb0e871c480882a801a4e8a5", - "content": "d3e24bc9f7af585e8c23b9136956bd45a4d9b947", + # image version depends on CDN server used + # "content": "d3e24bc9f7af585e8c23b9136956bd45a4d9b947", + # "content": "4c435a66f6bb82bb681db2ecc888f76cf6c5f9ca", }), ("https://www.pinterest.com/pin/858146903966145188/", { "exception": exception.NotFoundError, diff --git a/gallery_dl/extractor/smugmug.py b/gallery_dl/extractor/smugmug.py index 23b29150..fc37eaea 100644 --- a/gallery_dl/extractor/smugmug.py +++ b/gallery_dl/extractor/smugmug.py @@ -10,7 +10,6 @@ from .common import Extractor, Message from .. import text, util, exception -from ..cache import memcache BASE_PATTERN = ( r"(?:smugmug:(?:https?://)?([^/]+)|" @@ -20,25 +19,16 @@ BASE_PATTERN = ( class SmugmugExtractor(Extractor): """Base class for smugmug extractors""" category = "smugmug" - filename_fmt = "{category}_{Owner[Name]}_{Image[ImageKey]}.{extension}" + filename_fmt = "{category}_{Owner[NickName]}_{Image[ImageKey]}.{extension}" def __init__(self): Extractor.__init__(self) self.api = SmugmugAPI(self) - def update_image(self, image): - if "ArchivedUri" not in image: - largest = self.api.image_largest(image["ImageKey"]) - for key in ("Url", "Width", "Height", "MD5", "Size"): - if key in largest: - image[key] = largest[key] - return image["Url"], image - return image["ArchivedUri"], image - class SmugmugAlbumExtractor(SmugmugExtractor): subcategory = "album" - directory_fmt = ["{category}", "{Owner[Name]}", "{Album[Name]}"] + directory_fmt = ["{category}", "{Owner[NickName]}", "{Album[Name]}"] archive_fmt = "a_{Album[AlbumKey]}_{Image[ImageKey]}" pattern = [r"smugmug:album:([^:]+)$"] test = [("smugmug:album:xgkb4C", { @@ -51,33 +41,30 @@ class SmugmugAlbumExtractor(SmugmugExtractor): self.album_id = match.group(1) def items(self): - album = self.api.album(self.album_id) - images = self.api.album_images(self.album_id) - username = album["Uris"]["User"]["Uri"].rpartition("/")[2] - owner = self.api.user(username) + album = self.api.album(self.album_id, "User") + owner = album["Uris"]["User"] - data = { - "Album": album, - "Owner": owner, - } + del album["Uris"] + del owner["Uris"] + data = {"Album": album, "Owner": owner} yield Message.Version, 1 yield Message.Directory, data - for image in images: - url, image = self.update_image(image) + for image in self.api.album_images(self.album_id, "LargestImage"): + url = _apply_largest(image) data["Image"] = image yield Message.Url, url, text.nameext_from_url(url, data) class SmugmugImageExtractor(SmugmugExtractor): subcategory = "image" - directory_fmt = ["{category}", "{Owner[Name]}"] + directory_fmt = ["{category}", "{Owner[NickName]}"] archive_fmt = "{Image[ImageKey]}" pattern = [BASE_PATTERN + r"(?:/[^/?&#]+)+/i-([^/?&#]+)"] test = [("https://mikf.smugmug.com/Test/n-xnNH3s/i-L4CxBdg", { "url": "905bfdef52ce1a731a4eae17e9ac348511e17ae4", - "keyword": "d53df829d493ec3e31b8fe300872beb968812bfd", + "keyword": "490f2b977801e1f9c817be7aceea46d37418f08d", "content": "626fe50d25fe49beeda15e116938db36e163c01f", })] @@ -86,18 +73,13 @@ class SmugmugImageExtractor(SmugmugExtractor): self.image_id = match.group(3) def items(self): - image = self.api.image(self.image_id) - username = image["Uris"]["ImageOwner"]["Uri"].rpartition("/")[2] - owner = self.api.user(username) + image = self.api.image(self.image_id, "LargestImage,ImageOwner") + owner = image["Uris"]["ImageOwner"] - url, image = self.update_image(image) + url = _apply_largest(image) - data = { - "Image": image, - "Owner": owner, - } - del image["Uris"] del owner["Uris"] + data = {"Image": image, "Owner": owner} text.nameext_from_url(url, data) yield Message.Version, 1 @@ -106,10 +88,7 @@ class SmugmugImageExtractor(SmugmugExtractor): class SmugmugNodeExtractor(SmugmugExtractor): - """ """ subcategory = "node" - directory_fmt = ["{category}"] - archive_fmt = "n_{Node[NodeID]}_{Image[ImageID]}" pattern = [BASE_PATTERN + r"(?:/[^/?&#]+)+/n-([^/?&#]+)$"] test = [("https://mikf.smugmug.com/Test/n-xnNH3s", { "pattern": "^smugmug:album:xgkb4C$", @@ -124,9 +103,10 @@ class SmugmugNodeExtractor(SmugmugExtractor): data = self.api.node(self.node_id) if data["Type"] == "Album": - album_id = data["Uris"]["Album"]["Uri"].rpartition("/")[2] - yield Message.Queue, "smugmug:album:" + album_id, data - # ... + yield Message.Queue, "smugmug:album:" + _get(data, "Album"), data + # if data["Type"] == "Folder": + # for child in self.api.node_children(self.node_id): + # yield Message.Queue, "smugmug:node:" + ... class SmugmugAPI(): @@ -154,61 +134,117 @@ class SmugmugAPI(): self.session = extractor.session self.api_key = api_key - def album(self, album_id): - return self._call("album/" + album_id)["Album"] + self.log = extractor.log - def album_images(self, album_id): - return self._pagination("album/" + album_id + "!images") + def album(self, album_id, expands=None): + return self._expansion("album/" + album_id, expands) - def image(self, image_id): - return self._call("image/" + image_id)["Image"] + def image(self, image_id, expands=None): + return self._expansion("image/" + image_id, expands) - def image_largest(self, image_id): - endpoint = "image/" + image_id + "!largestimage" - return self._call(endpoint)["LargestImage"] + def node(self, node_id, expands=None): + return self._expansion("node/" + node_id, expands) - def image_sizes(self, image_id): - return self._call("image/" + image_id + "!sizedetails") + def user(self, username, expands=None): + return self._expansion("user/" + username, expands) - def node(self, node_id): - return self._call("node/" + node_id)["Node"] + def album_images(self, album_id, expands=None): + return self._pagination("album/" + album_id + "!images", expands) - @memcache(keyarg=1) - def user(self, username): - return self._call("user/" + username)["User"] + def node_children(self, node_id, expands=None): + return self._pagination("node/" + node_id + "!children", expands) def _call(self, endpoint, params=None): url = self.API_URL + endpoint params = params or {} if self.api_key: params["APIKey"] = self.api_key + params["_verbosity"] = "1" response = self.session.get(url, params=params, headers=self.HEADERS) data = response.json() if 200 <= data["Code"] < 400: - return data["Response"] - + return data if data["Code"] == 404: raise exception.NotFoundError() if data["Code"] == 429: self.log.error("Rate limit reached") - raise exception.StopExtraction() + else: + self.log.error("API request failed") + self.log.debug(data) + raise exception.StopExtraction() + + def _expansion(self, endpoint, expands): + if expands: + endpoint += "?_expand=" + expands + return _apply_expansions(self._call(endpoint), expands) + + def _pagination(self, endpoint, expands=None): + if expands: + endpoint += "?_expand=" + expands + params = {"start": 1, "count": 100} - def _pagination(self, endpoint): - params = { - "start": 1, - "count": 100, - } while True: - response = self._call(endpoint, params) + data = self._call(endpoint, params) + yield from _apply_expansions_iter(data, expands) - obj = response[response["Locator"]] - if isinstance(obj, list): - yield from obj - else: - yield obj - - if "NextPage" not in response["Pages"]: + if "NextPage" not in data["Response"]["Pages"]: return params["start"] += params["count"] + + +def _apply_largest(image, delete=True): + largest = image["Uris"]["LargestImage"] + if delete: + del image["Uris"] + for key in ("Url", "Width", "Height", "MD5", "Size", "Watermarked"): + if key in largest: + image[key] = largest[key] + return image["Url"] + + +def _get(obj, key): + return obj["Uris"][key].rpartition("/")[2] + + +def _apply_expansions(data, expands): + obj = _unwrap(data["Response"]) + + if "Expansions" in data: + expansions = data["Expansions"] + uris = obj["Uris"] + + for name in expands.split(","): + uri = uris[name] + uris[name] = _unwrap(expansions[uri]) + + return obj + + +def _apply_expansions_iter(data, expands): + objs = _unwrap_iter(data["Response"]) + + if "Expansions" in data: + expansions = data["Expansions"] + expands = expands.split(",") + + for obj in objs: + uris = obj["Uris"] + + for name in expands: + uri = uris[name] + uris[name] = _unwrap(expansions[uri]) + + return objs + + +def _unwrap(response): + return response[response["Locator"]] + + +def _unwrap_iter(response): + obj = _unwrap(response) + if isinstance(obj, list): + return obj + return (obj,) From 42ed7667b84c1d42dca1f0ff37d1b02bb2ce5216 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Wed, 2 May 2018 20:06:50 +0200 Subject: [PATCH 10/12] [smugmug] support user- and general album URLs --- gallery_dl/extractor/smugmug.py | 111 +++++++++++++++++++++++++------- 1 file changed, 89 insertions(+), 22 deletions(-) diff --git a/gallery_dl/extractor/smugmug.py b/gallery_dl/extractor/smugmug.py index fc37eaea..6f8d1be0 100644 --- a/gallery_dl/extractor/smugmug.py +++ b/gallery_dl/extractor/smugmug.py @@ -12,8 +12,8 @@ from .common import Extractor, Message from .. import text, util, exception BASE_PATTERN = ( - r"(?:smugmug:(?:https?://)?([^/]+)|" - r"(?:https?://)?([^.]+\.smugmug\.com))") + r"(?:smugmug:(?!album:)(?:https?://)?([^/]+)|" + r"(?:https?://)?([^.]+)\.smugmug\.com)") class SmugmugExtractor(Extractor): @@ -24,6 +24,12 @@ class SmugmugExtractor(Extractor): def __init__(self): Extractor.__init__(self) self.api = SmugmugAPI(self) + self.domain = None + self.user = None + + def _resolve_user(self): + if not self.user: + self.user = self.api.site_user(self.domain)["NickName"] class SmugmugAlbumExtractor(SmugmugExtractor): @@ -64,7 +70,7 @@ class SmugmugImageExtractor(SmugmugExtractor): pattern = [BASE_PATTERN + r"(?:/[^/?&#]+)+/i-([^/?&#]+)"] test = [("https://mikf.smugmug.com/Test/n-xnNH3s/i-L4CxBdg", { "url": "905bfdef52ce1a731a4eae17e9ac348511e17ae4", - "keyword": "490f2b977801e1f9c817be7aceea46d37418f08d", + "keyword": "3fd6db2ab3d12a6d3cfc49ee57adc91fdd295a6c", "content": "626fe50d25fe49beeda15e116938db36e163c01f", })] @@ -87,31 +93,80 @@ class SmugmugImageExtractor(SmugmugExtractor): yield Message.Url, url, data -class SmugmugNodeExtractor(SmugmugExtractor): - subcategory = "node" - pattern = [BASE_PATTERN + r"(?:/[^/?&#]+)+/n-([^/?&#]+)$"] - test = [("https://mikf.smugmug.com/Test/n-xnNH3s", { - "pattern": "^smugmug:album:xgkb4C$", - })] +class SmugmugUserExtractor(SmugmugExtractor): + subcategory = "user" + pattern = [BASE_PATTERN + "(?:/browse)?/?$"] + test = [ + ("https://mikf.smugmug.com/", { + "pattern": "smugmug:album:xgkb4C$", + }), + ("https://mikf.smugmug.com/browse", None), + ("smugmug:https://www.creativedogportraits.com/", { + "pattern": "smugmug:album:txWXzs$", + }), + ("smugmug:www.creativedogportraits.com/", None), + ("smugmug:www.creativedogportraits.com/browse", None), + ] def __init__(self, match): SmugmugExtractor.__init__(self) - self.node_id = match.group(3) + self.domain = match.group(1) + self.user = match.group(2) + + def items(self): + self._resolve_user() + yield Message.Version, 1 + for album in self.api.user_albums(self.user): + uri = "smugmug:album:" + album["AlbumKey"] + yield Message.Queue, uri, album + + +class SmugmugNodeExtractor(SmugmugExtractor): + subcategory = "node" + pattern = [BASE_PATTERN + + r"((?:/[^/?&#a-z][^/?&#]*)+)" + r"(?:/n-([^/?&#]+))?/?$"] + test = [ + ("https://mikf.smugmug.com/Test/", { + "pattern": "smugmug:album:xgkb4C$", + }), + ("https://mikf.smugmug.com/Test/n-xnNH3s", { + "pattern": "smugmug:album:xgkb4C$", + }), + ("smugmug:https://www.creativedogportraits.com/PortfolioGallery/", { + "pattern": "smugmug:album:txWXzs$", + }), + ] + + def __init__(self, match): + SmugmugExtractor.__init__(self) + self.domain, self.user, self.path, self.node_id = match.groups() def items(self): yield Message.Version, 1 - data = self.api.node(self.node_id) - if data["Type"] == "Album": - yield Message.Queue, "smugmug:album:" + _get(data, "Album"), data - # if data["Type"] == "Folder": - # for child in self.api.node_children(self.node_id): - # yield Message.Queue, "smugmug:node:" + ... + if self.node_id: + node = self.api.node(self.node_id) + else: + self._resolve_user() + data = self.api.user_urlpathlookup(self.user, self.path) + node = data["Uris"]["Node"] + + nodes = (node,) if node["Type"] == "Album" else self.album_nodes(node) + for node in nodes: + yield Message.Queue, "smugmug:album:" + _get(node, "Album"), node + + def album_nodes(self, root): + for node in self.api.node_children(root["NodeID"]): + if node["Type"] == "Album": + yield node + elif node["Type"] == "Folder": + yield from self.album_nodes(node) class SmugmugAPI(): """Minimal interface for the smugmug API v2""" - API_URL = "https://api.smugmug.com/api/v2/" + API_DOMAIN = "api.smugmug.com" API_KEY = "DFqxg4jf7GrtsQ5PnbNB8899zKfnDrdK" API_SECRET = ("fknV35p9r9BwZC4XbTzvCXpcSJRdD83S" "9nMFQm25ndGBzNPnwRDbRnnVBvqt4xTq") @@ -154,8 +209,19 @@ class SmugmugAPI(): def node_children(self, node_id, expands=None): return self._pagination("node/" + node_id + "!children", expands) - def _call(self, endpoint, params=None): - url = self.API_URL + endpoint + def user_albums(self, username, expands=None): + return self._pagination("user/" + username + "!albums", expands) + + def site_user(self, domain): + return _unwrap(self._call("!siteuser", domain=domain)["Response"]) + + def user_urlpathlookup(self, username, path): + endpoint = "user/" + username + "!urlpathlookup" + params = {"urlpath": path} + return self._expansion(endpoint, "Node", params) + + def _call(self, endpoint, params=None, domain=API_DOMAIN): + url = "https://{}/api/v2/{}".format(domain, endpoint) params = params or {} if self.api_key: params["APIKey"] = self.api_key @@ -175,10 +241,10 @@ class SmugmugAPI(): self.log.debug(data) raise exception.StopExtraction() - def _expansion(self, endpoint, expands): + def _expansion(self, endpoint, expands, params=None): if expands: endpoint += "?_expand=" + expands - return _apply_expansions(self._call(endpoint), expands) + return _apply_expansions(self._call(endpoint, params), expands) def _pagination(self, endpoint, expands=None): if expands: @@ -240,7 +306,8 @@ def _apply_expansions_iter(data, expands): def _unwrap(response): - return response[response["Locator"]] + locator = response["Locator"] + return response[locator] if locator in response else [] def _unwrap_iter(response): From 3ce52963131e7fdcc7e47f18ec7c94fcb54766a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Thu, 3 May 2018 14:12:10 +0200 Subject: [PATCH 11/12] [smugmug] code cleanup - combine User and Node extractors - (re)move miscellaneous helper functions - rename "Owner" to "User" --- gallery_dl/extractor/smugmug.py | 221 +++++++++++++++----------------- 1 file changed, 101 insertions(+), 120 deletions(-) diff --git a/gallery_dl/extractor/smugmug.py b/gallery_dl/extractor/smugmug.py index 6f8d1be0..3306bc58 100644 --- a/gallery_dl/extractor/smugmug.py +++ b/gallery_dl/extractor/smugmug.py @@ -19,28 +19,43 @@ BASE_PATTERN = ( class SmugmugExtractor(Extractor): """Base class for smugmug extractors""" category = "smugmug" - filename_fmt = "{category}_{Owner[NickName]}_{Image[ImageKey]}.{extension}" + filename_fmt = ("{category}_{User[NickName]}_" + "{Image[UploadKey]}_{Image[ImageKey]}.{extension}") def __init__(self): Extractor.__init__(self) self.api = SmugmugAPI(self) - self.domain = None - self.user = None - def _resolve_user(self): - if not self.user: - self.user = self.api.site_user(self.domain)["NickName"] + @staticmethod + def _apply_largest(image, delete=True): + largest = image["Uris"]["LargestImage"] + if delete: + del image["Uris"] + for key in ("Url", "Width", "Height", "MD5", "Size", "Watermarked"): + if key in largest: + image[key] = largest[key] + return image["Url"] class SmugmugAlbumExtractor(SmugmugExtractor): + """Extractor for smugmug albums""" subcategory = "album" - directory_fmt = ["{category}", "{Owner[NickName]}", "{Album[Name]}"] + directory_fmt = ["{category}", "{User[NickName]}", "{Album[Name]}"] archive_fmt = "a_{Album[AlbumKey]}_{Image[ImageKey]}" pattern = [r"smugmug:album:([^:]+)$"] - test = [("smugmug:album:xgkb4C", { - "url": "eb6133445064115ad83d32cbc6472520a2d24d53", - "content": "864f6953cb04121290407a579611bc5087d117ee", - })] + test = [ + ("smugmug:album:MN6kHH", { + "count": 0, + }), + ("smugmug:album:6Ffcgk", { + "count": 1, + "pattern": ".*/i-L4CxBdg/0/33e0b290/X3/i-L4CxBdg-X3.jpg", + }), + ("smugmug:album:drn76C", { + "count": 2, + "content": "864f6953cb04121290407a579611bc5087d117ee", + }), + ] def __init__(self, match): SmugmugExtractor.__init__(self) @@ -48,29 +63,30 @@ class SmugmugAlbumExtractor(SmugmugExtractor): def items(self): album = self.api.album(self.album_id, "User") - owner = album["Uris"]["User"] + user = album["Uris"]["User"] + del user["Uris"] del album["Uris"] - del owner["Uris"] - data = {"Album": album, "Owner": owner} + data = {"Album": album, "User": user} yield Message.Version, 1 yield Message.Directory, data for image in self.api.album_images(self.album_id, "LargestImage"): - url = _apply_largest(image) + url = self._apply_largest(image) data["Image"] = image yield Message.Url, url, text.nameext_from_url(url, data) class SmugmugImageExtractor(SmugmugExtractor): + """Extractor for individual smugmug images""" subcategory = "image" - directory_fmt = ["{category}", "{Owner[NickName]}"] + directory_fmt = ["{category}", "{User[NickName]}"] archive_fmt = "{Image[ImageKey]}" pattern = [BASE_PATTERN + r"(?:/[^/?&#]+)+/i-([^/?&#]+)"] test = [("https://mikf.smugmug.com/Test/n-xnNH3s/i-L4CxBdg", { "url": "905bfdef52ce1a731a4eae17e9ac348511e17ae4", - "keyword": "3fd6db2ab3d12a6d3cfc49ee57adc91fdd295a6c", + "keyword": "df63d36cfaeb128fda86802942d9a4271d3feafc", "content": "626fe50d25fe49beeda15e116938db36e163c01f", })] @@ -80,12 +96,11 @@ class SmugmugImageExtractor(SmugmugExtractor): def items(self): image = self.api.image(self.image_id, "LargestImage,ImageOwner") - owner = image["Uris"]["ImageOwner"] + user = image["Uris"]["ImageOwner"] + url = self._apply_largest(image) - url = _apply_largest(image) - - del owner["Uris"] - data = {"Image": image, "Owner": owner} + del user["Uris"] + data = {"Image": image, "User": user} text.nameext_from_url(url, data) yield Message.Version, 1 @@ -93,39 +108,10 @@ class SmugmugImageExtractor(SmugmugExtractor): yield Message.Url, url, data -class SmugmugUserExtractor(SmugmugExtractor): - subcategory = "user" - pattern = [BASE_PATTERN + "(?:/browse)?/?$"] - test = [ - ("https://mikf.smugmug.com/", { - "pattern": "smugmug:album:xgkb4C$", - }), - ("https://mikf.smugmug.com/browse", None), - ("smugmug:https://www.creativedogportraits.com/", { - "pattern": "smugmug:album:txWXzs$", - }), - ("smugmug:www.creativedogportraits.com/", None), - ("smugmug:www.creativedogportraits.com/browse", None), - ] - - def __init__(self, match): - SmugmugExtractor.__init__(self) - self.domain = match.group(1) - self.user = match.group(2) - - def items(self): - self._resolve_user() - yield Message.Version, 1 - for album in self.api.user_albums(self.user): - uri = "smugmug:album:" + album["AlbumKey"] - yield Message.Queue, uri, album - - -class SmugmugNodeExtractor(SmugmugExtractor): - subcategory = "node" - pattern = [BASE_PATTERN + - r"((?:/[^/?&#a-z][^/?&#]*)+)" - r"(?:/n-([^/?&#]+))?/?$"] +class SmugmugPathExtractor(SmugmugExtractor): + """Extractor for smugmug albums from URL paths and users""" + subcategory = "path" + pattern = [BASE_PATTERN + r"((?:/[^/?&#a-mo-z][^/?&#]*)*)/?$"] test = [ ("https://mikf.smugmug.com/Test/", { "pattern": "smugmug:album:xgkb4C$", @@ -133,30 +119,51 @@ class SmugmugNodeExtractor(SmugmugExtractor): ("https://mikf.smugmug.com/Test/n-xnNH3s", { "pattern": "smugmug:album:xgkb4C$", }), - ("smugmug:https://www.creativedogportraits.com/PortfolioGallery/", { + ("https://mikf.smugmug.com/", { + "count": 4, + "pattern": "smugmug:album:(xgkb4C|MN6kHH|6Ffcgk|drn76C)$", + }), + ("smugmug:www.creativedogportraits.com/PortfolioGallery/", { "pattern": "smugmug:album:txWXzs$", }), + ("smugmug:www.creativedogportraits.com/", { + "pattern": "smugmug:album:txWXzs$", + }), + ("smugmug:https://www.creativedogportraits.com/", None), ] def __init__(self, match): SmugmugExtractor.__init__(self) - self.domain, self.user, self.path, self.node_id = match.groups() + self.domain, self.user, self.path = match.groups() def items(self): yield Message.Version, 1 - if self.node_id: - node = self.api.node(self.node_id) - else: - self._resolve_user() + if not self.user: + self.user = self.api.site_user(self.domain)["NickName"] + + if self.path: data = self.api.user_urlpathlookup(self.user, self.path) node = data["Uris"]["Node"] - nodes = (node,) if node["Type"] == "Album" else self.album_nodes(node) - for node in nodes: - yield Message.Queue, "smugmug:album:" + _get(node, "Album"), node + if node["Type"] == "Album": + nodes = (node,) + elif node["Type"] == "Folder": + nodes = self.album_nodes(node) + else: + nodes = () + + for node in nodes: + album_id = node["Uris"]["Album"].rpartition("/")[2] + yield Message.Queue, "smugmug:album:" + album_id, node + + else: + for album in self.api.user_albums(self.user): + uri = "smugmug:album:" + album["AlbumKey"] + yield Message.Queue, uri, album def album_nodes(self, root): + """Yield all descendant album nodes of 'root'""" for node in self.api.node_children(root["NodeID"]): if node["Type"] == "Album": yield node @@ -213,7 +220,7 @@ class SmugmugAPI(): return self._pagination("user/" + username + "!albums", expands) def site_user(self, domain): - return _unwrap(self._call("!siteuser", domain=domain)["Response"]) + return self._call("!siteuser", domain=domain)["Response"]["User"] def user_urlpathlookup(self, username, path): endpoint = "user/" + username + "!urlpathlookup" @@ -242,76 +249,50 @@ class SmugmugAPI(): raise exception.StopExtraction() def _expansion(self, endpoint, expands, params=None): - if expands: - endpoint += "?_expand=" + expands - return _apply_expansions(self._call(endpoint, params), expands) + endpoint = self._extend(endpoint, expands) + result = self._apply_expansions(self._call(endpoint, params), expands) + if not result: + raise exception.NotFoundError() + return result[0] def _pagination(self, endpoint, expands=None): - if expands: - endpoint += "?_expand=" + expands + endpoint = self._extend(endpoint, expands) params = {"start": 1, "count": 100} while True: data = self._call(endpoint, params) - yield from _apply_expansions_iter(data, expands) + yield from self._apply_expansions(data, expands) if "NextPage" not in data["Response"]["Pages"]: return params["start"] += params["count"] + @staticmethod + def _extend(endpoint, expands): + if expands: + endpoint += "?_expand=" + expands + return endpoint -def _apply_largest(image, delete=True): - largest = image["Uris"]["LargestImage"] - if delete: - del image["Uris"] - for key in ("Url", "Width", "Height", "MD5", "Size", "Watermarked"): - if key in largest: - image[key] = largest[key] - return image["Url"] + @staticmethod + def _apply_expansions(data, expands): + def unwrap(response): + locator = response["Locator"] + return response[locator] if locator in response else [] -def _get(obj, key): - return obj["Uris"][key].rpartition("/")[2] + objs = unwrap(data["Response"]) + if not isinstance(objs, list): + objs = (objs,) + if "Expansions" in data: + expansions = data["Expansions"] + expands = expands.split(",") -def _apply_expansions(data, expands): - obj = _unwrap(data["Response"]) + for obj in objs: + uris = obj["Uris"] - if "Expansions" in data: - expansions = data["Expansions"] - uris = obj["Uris"] + for name in expands: + uri = uris[name] + uris[name] = unwrap(expansions[uri]) - for name in expands.split(","): - uri = uris[name] - uris[name] = _unwrap(expansions[uri]) - - return obj - - -def _apply_expansions_iter(data, expands): - objs = _unwrap_iter(data["Response"]) - - if "Expansions" in data: - expansions = data["Expansions"] - expands = expands.split(",") - - for obj in objs: - uris = obj["Uris"] - - for name in expands: - uri = uris[name] - uris[name] = _unwrap(expansions[uri]) - - return objs - - -def _unwrap(response): - locator = response["Locator"] - return response[locator] if locator in response else [] - - -def _unwrap_iter(response): - obj = _unwrap(response) - if isinstance(obj, list): - return obj - return (obj,) + return objs From 82c50fa6098931ef14b9dc55871b4a11077da9d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Fri, 4 May 2018 10:03:20 +0200 Subject: [PATCH 12/12] release version 1.3.5 --- CHANGELOG.md | 5 +++-- README.rst | 4 ++-- docs/supportedsites.rst | 3 ++- gallery_dl/version.py | 2 +- scripts/build_supportedsites.py | 1 + 5 files changed, 9 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 89481b61..0765ef0d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,10 +1,11 @@ # Changelog -## Unreleased +## 1.3.5 - 2018-05-04 - Added support for: - `smugmug` - https://www.smugmug.com/ - Added title information for `mangadex` chapters -- Improved the `pinterest` API implementation (#83) +- Improved the `pinterest` API implementation ([#83](https://github.com/mikf/gallery-dl/issues/83)) +- Improved error handling for `deviantart` and `tumblr` - Removed `gomanga` and `puremashiro` ## 1.3.4 - 2018-04-20 diff --git a/README.rst b/README.rst index af008c44..cbdfdfc6 100644 --- a/README.rst +++ b/README.rst @@ -215,12 +215,12 @@ access to *gallery-dl*. Authorize it and you will he shown one or more .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Complete List: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst -.. _standalone executable: https://github.com/mikf/gallery-dl/releases/download/v1.3.4/gallery-dl.exe +.. _standalone executable: https://github.com/mikf/gallery-dl/releases/download/v1.3.5/gallery-dl.exe .. _Python: https://www.python.org/downloads/ .. _Requests: https://pypi.python.org/pypi/requests/ .. _PyPI: https://pypi.python.org/pypi .. _pip: https://pip.pypa.io/en/stable/ -.. _stable: https://github.com/mikf/gallery-dl/archive/v1.3.4.zip +.. _stable: https://github.com/mikf/gallery-dl/archive/v1.3.5.zip .. _dev: https://github.com/mikf/gallery-dl/archive/master.zip .. _OAuth: https://en.wikipedia.org/wiki/OAuth diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index ba5f8256..3a7a8fcf 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -68,7 +68,7 @@ Sea Otter Scans https://reader.seaotterscans.com/ Chapters, Manga Sen Manga http://raw.senmanga.com/ Chapters Sense-Scans http://sensescans.com/ Chapters, Manga SlideShare https://www.slideshare.net/ Presentations -SmugMug https://www.smugmug.com/ Albums, individual Images, Nodes +SmugMug https://www.smugmug.com/ |Albums, individ-5| Subapics https://subapics.com/ Chapters, Manga The /b/ Archive https://thebarchive.com/ Threads Tumblr https://www.tumblr.com/ Images from Users, Likes, Posts, Tag-Searches Optional (OAuth) @@ -92,3 +92,4 @@ Turboimagehost https://turboimagehost.com/ individual Images .. |Images from Use-2| replace:: Images from Users, Albums, Favorites, Galleries, Groups, individual Images, Search Results .. |Images from Use-3| replace:: Images from Users, Doujin, Favorites, individual Images .. |Images from Use-4| replace:: Images from Users, Bookmarks, Favorites, pixiv.me Links, Rankings, Individual Images +.. |Albums, individ-5| replace:: Albums, individual Images, Images from Users and Folders diff --git a/gallery_dl/version.py b/gallery_dl/version.py index 191fe110..fb73168e 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,4 +6,4 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.3.5-dev" +__version__ = "1.3.5" diff --git a/scripts/build_supportedsites.py b/scripts/build_supportedsites.py index 594f58d2..db12075d 100755 --- a/scripts/build_supportedsites.py +++ b/scripts/build_supportedsites.py @@ -66,6 +66,7 @@ SUBCATEGORY_MAP = { "issue" : "Comic-Issues", "manga" : "Manga", "me" : "pixiv.me Links", + "path" : "Images from Users and Folders", "pinit" : "pin.it Links", "popular": "Popular Images", "search" : "Search Results",