diff --git a/CHANGELOG.md b/CHANGELOG.md index 844de878..0765ef0d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,12 @@ # Changelog -## Unreleased +## 1.3.5 - 2018-05-04 +- Added support for: + - `smugmug` - https://www.smugmug.com/ +- Added title information for `mangadex` chapters +- Improved the `pinterest` API implementation ([#83](https://github.com/mikf/gallery-dl/issues/83)) +- Improved error handling for `deviantart` and `tumblr` +- Removed `gomanga` and `puremashiro` ## 1.3.4 - 2018-04-20 - Added support for custom OAuth2 credentials for `pinterest` diff --git a/README.rst b/README.rst index af008c44..cbdfdfc6 100644 --- a/README.rst +++ b/README.rst @@ -215,12 +215,12 @@ access to *gallery-dl*. Authorize it and you will he shown one or more .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Complete List: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst -.. _standalone executable: https://github.com/mikf/gallery-dl/releases/download/v1.3.4/gallery-dl.exe +.. _standalone executable: https://github.com/mikf/gallery-dl/releases/download/v1.3.5/gallery-dl.exe .. _Python: https://www.python.org/downloads/ .. _Requests: https://pypi.python.org/pypi/requests/ .. _PyPI: https://pypi.python.org/pypi .. _pip: https://pip.pypa.io/en/stable/ -.. _stable: https://github.com/mikf/gallery-dl/archive/v1.3.4.zip +.. _stable: https://github.com/mikf/gallery-dl/archive/v1.3.5.zip .. _dev: https://github.com/mikf/gallery-dl/archive/master.zip .. _OAuth: https://en.wikipedia.org/wiki/OAuth diff --git a/docs/configuration.rst b/docs/configuration.rst index 6c464d41..f3fef6a0 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -820,35 +820,6 @@ How To =========== ===== -extractor.pinterest.access-token --------------------------------- -=========== ===== -Type ``string`` -How To - register a Pinterest application and use its client-id and - client-secret (see `extractor.pinterest.client-id & .secret`_) - - run `gallery-dl oauth:pinterest` and authenticate access with - (preferably) the same account that registered the application -Notes Access tokens currently only allow for 10 requests per hour. -=========== ===== - - -extractor.pinterest.client-id & .secret ---------------------------------------- -=========== ===== -Type ``string`` -How To - login and visit Pinterest's - `Apps `__ section - - agree to "Pinterest Developer Terms and the API Policy" - and click "Create app" - - choose a random name and description and click "Create" - - scroll down and set a Site URL (e.g. https://example.org/) - and allow https://mikf.github.io/gallery-dl/oauth-redirect.html - as Redirect URI - - scroll back up again, copy the "App ID" and "App secret" values - and put them in your configuration file -=========== ===== - - extractor.reddit.client-id & .user-agent ---------------------------------------- =========== ===== diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index e55ba8b9..3a7a8fcf 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -24,7 +24,6 @@ Flickr https://www.flickr.com/ |Images from Use-2| Futaba Channel https://www.2chan.net/ Threads Gelbooru https://gelbooru.com/ Pools, Posts, Tag-Searches Gfycat https://gfycat.com/ individual Images -GoManga https://gomanga.co/ Chapters, Manga HBrowse http://www.hbrowse.com/ Chapters, Manga Hentai Foundry https://www.hentai-foundry.com/ Images from Users, individual Images Hentai2Read https://hentai2read.com/ Chapters, Manga @@ -58,7 +57,6 @@ Pawoo https://pawoo.net Images from Users, Imag Pinterest https://www.pinterest.com Boards, Pins, pin.it Links Pixiv https://www.pixiv.net/ |Images from Use-4| Required PowerManga https://powermanga.org/ Chapters, Manga -Pure Mashiro http://reader.puremashiro.moe/ Chapters, Manga Read Comic Online http://readcomiconline.to/ Comic-Issues, Comics RebeccaBlackTech https://rbt.asia/ Threads Reddit https://reddit.com/ individual Images, Submissions, Subreddits Optional (OAuth) @@ -70,6 +68,7 @@ Sea Otter Scans https://reader.seaotterscans.com/ Chapters, Manga Sen Manga http://raw.senmanga.com/ Chapters Sense-Scans http://sensescans.com/ Chapters, Manga SlideShare https://www.slideshare.net/ Presentations +SmugMug https://www.smugmug.com/ |Albums, individ-5| Subapics https://subapics.com/ Chapters, Manga The /b/ Archive https://thebarchive.com/ Threads Tumblr https://www.tumblr.com/ Images from Users, Likes, Posts, Tag-Searches Optional (OAuth) @@ -93,3 +92,4 @@ Turboimagehost https://turboimagehost.com/ individual Images .. |Images from Use-2| replace:: Images from Users, Albums, Favorites, Galleries, Groups, individual Images, Search Results .. |Images from Use-3| replace:: Images from Users, Doujin, Favorites, individual Images .. |Images from Use-4| replace:: Images from Users, Bookmarks, Favorites, pixiv.me Links, Rankings, Individual Images +.. |Albums, individ-5| replace:: Albums, individual Images, Images from Users and Folders diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 1a9c28c3..59213681 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -31,7 +31,6 @@ modules = [ "flickr", "gelbooru", "gfycat", - "gomanga", "hbrowse", "hentai2read", "hentaifoundry", @@ -65,7 +64,6 @@ modules = [ "pinterest", "pixiv", "powermanga", - "puremashiro", "readcomiconline", "rebeccablacktech", "reddit", @@ -77,6 +75,7 @@ modules = [ "senmanga", "sensescans", "slideshare", + "smugmug", "subapics", "thebarchive", "tumblr", diff --git a/gallery_dl/extractor/directlink.py b/gallery_dl/extractor/directlink.py index b9e0c868..5cb76f34 100644 --- a/gallery_dl/extractor/directlink.py +++ b/gallery_dl/extractor/directlink.py @@ -21,10 +21,9 @@ class DirectlinkExtractor(Extractor): r"(?:jpe?g|jpe|png|gif|web[mp]|mp4|mkv|og[gmv]|opus))" r"(?:\?(?P[^/?#]*))?(?:#(?P.*))?$"] test = [ - (("https://photos.smugmug.com/The-World/Hawaii/" - "i-SWz2K6n/2/X3/IMG_0311-X3.jpg"), { - "url": "32ee1045881e17ef3f13a9958595afa42421ec6c", - "keyword": "2427b68c14006489df1776bb1bcd3bc24be25e10", + (("https://en.wikipedia.org/static/images/project-logos/enwiki.png"), { + "url": "18c5d00077332e98e53be9fed2ee4be66154b88d", + "keyword": "66bce3a0a6872d8497e1984eb49d54a3ed0d3d5e", }), # more complex example ("https://example.org/path/file.webm?que=1&ry=2#fragment", { diff --git a/gallery_dl/extractor/gomanga.py b/gallery_dl/extractor/gomanga.py deleted file mode 100644 index 0e547a74..00000000 --- a/gallery_dl/extractor/gomanga.py +++ /dev/null @@ -1,38 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright 2017 Mike Fährmann -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. - -"""Extractors for https://gomanga.co/""" - -from . import foolslide - - -class GomangaChapterExtractor(foolslide.FoolslideChapterExtractor): - """Extractor for manga-chapters from gomanga.co""" - category = "gomanga" - pattern = foolslide.chapter_pattern(r"(?:www\.)?gomanga\.co/reader") - test = [ - ("https://gomanga.co/reader/read/mata-kata-omou/en/0/1/page/11", { - "url": "5088d75bb44327fc503c85b52b1d6a371b8057f2", - "keyword": "10624e78924c37fd39543270a6965f2082bde08f", - }), - ("https://gomanga.co/reader/read/pastel/en/31/144/", { - "url": "9cc2052fbf36344c573c754c5abe533a14b3e280", - "keyword": "a355cd3197e70c24b84d3885e8a5ff0ac22537bf", - }), - ] - method = "double" - - -class GomangaMangaExtractor(foolslide.FoolslideMangaExtractor): - """Extractor for manga from gomanga.co""" - category = "gomanga" - pattern = foolslide.manga_pattern(r"(?:www\.)?gomanga\.co/reader") - test = [("https://gomanga.co/reader/series/pastel/", { - "url": "bd1c82d70838d54140a8209296e789f27ceab7cd", - "keyword": "fb1fd14548602dbe4f6e70a633429762972c1d5d", - })] diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py index 8dc6e26f..f161126e 100644 --- a/gallery_dl/extractor/oauth.py +++ b/gallery_dl/extractor/oauth.py @@ -198,29 +198,6 @@ class OAuthFlickr(OAuthBase): ) -class OAuthPinterest(OAuthBase): - subcategory = "pinterest" - pattern = ["oauth:pinterest$"] - redirect_uri = "https://mikf.github.io/gallery-dl/oauth-redirect.html" - - def items(self): - yield Message.Version, 1 - - client_id = self.oauth_config("client-id") - client_secret = self.oauth_config("client-secret") - - if not client_id or not client_secret: - self.log.error("'client-id' and 'client-secret' required") - return - - self._oauth2_authorization_code_grant( - client_id, client_secret, - "https://api.pinterest.com/oauth/", - "https://api.pinterest.com/v1/oauth/token", - scope="read_public", key="access_token", auth=False, - ) - - class OAuthReddit(OAuthBase): subcategory = "reddit" pattern = ["oauth:reddit$"] diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py index 67f7b318..a244cf9e 100644 --- a/gallery_dl/extractor/pinterest.py +++ b/gallery_dl/extractor/pinterest.py @@ -10,13 +10,14 @@ from .common import Extractor, Message from .. import text, exception +import json class PinterestExtractor(Extractor): """Base class for pinterest extractors""" category = "pinterest" - filename_fmt = "{category}_{pin_id}.{extension}" - archive_fmt = "{pin_id}" + filename_fmt = "{category}_{id}.{extension}" + archive_fmt = "{id}" def __init__(self): Extractor.__init__(self) @@ -24,15 +25,11 @@ class PinterestExtractor(Extractor): def data_from_pin(self, pin): """Get image url and metadata from a pin-object""" - img = pin["image"]["original"] + img = pin["images"]["orig"] url = img["url"] - data = { - "pin_id": text.parse_int(pin["id"]), - "note": pin["note"], - "width": text.parse_int(img["width"]), - "height": text.parse_int(img["height"]), - } - return url, text.nameext_from_url(url, data) + pin["width"] = img["width"] + pin["height"] = img["height"] + return url, text.nameext_from_url(url, pin) class PinterestPinExtractor(PinterestExtractor): @@ -42,13 +39,11 @@ class PinterestPinExtractor(PinterestExtractor): test = [ ("https://www.pinterest.com/pin/858146903966145189/", { "url": "afb3c26719e3a530bb0e871c480882a801a4e8a5", - "keyword": "f651cb271247f306d1d30385d49c7b82da44c2b1", - "content": "d3e24bc9f7af585e8c23b9136956bd45a4d9b947", + # image version depends on CDN server used + # "content": "d3e24bc9f7af585e8c23b9136956bd45a4d9b947", + # "content": "4c435a66f6bb82bb681db2ecc888f76cf6c5f9ca", }), ("https://www.pinterest.com/pin/858146903966145188/", { - "exception": exception.StopExtraction, - }), - ("https://www.pinterest.com/pin/85814690396614518/", { "exception": exception.NotFoundError, }), ] @@ -68,13 +63,13 @@ class PinterestPinExtractor(PinterestExtractor): class PinterestBoardExtractor(PinterestExtractor): """Extractor for images from a board from pinterest.com""" subcategory = "board" - directory_fmt = ["{category}", "{user}", "{board}"] + directory_fmt = ["{category}", "{board[owner][username]}", "{board[name]}"] + archive_fmt = "{board[id]}_{id}" pattern = [r"(?:https?://)?(?:[^./]+\.)?pinterest\.[^/]+/" r"(?!pin/)([^/?#&]+)/([^/?#&]+)"] test = [ ("https://www.pinterest.com/g1952849/test-/", { "url": "85911dfca313f3f7f48c2aa0bc684f539d1d80a6", - "keyword": "c54cf5aa830994f2ed4871efa7154a5fdaa1c2ce", }), ("https://www.pinterest.com/g1952848/test/", { "exception": exception.NotFoundError, @@ -83,30 +78,18 @@ class PinterestBoardExtractor(PinterestExtractor): def __init__(self, match): PinterestExtractor.__init__(self) - self.user, self.board = match.groups() + self.user = text.unquote(match.group(1)) + self.board = text.unquote(match.group(2)) def items(self): board = self.api.board(self.user, self.board) - data = self.data_from_board(board) - num = data["count"] + data = {"board": board, "count": board["pin_count"]} yield Message.Version, 1 yield Message.Directory, data - for pin in self.api.board_pins(self.user, self.board): - url, pdata = self.data_from_pin(pin) - data.update(pdata) - data["num"] = num - num -= 1 - yield Message.Url, url, data - - def data_from_board(self, board): - """Get metadata from a board-object""" - data = { - "user": self.user, - "board_id": text.parse_int(board["id"]), - "board": board["name"], - "count": board["counts"]["pins"], - } - return data + for pin in self.api.board_pins(board["id"]): + url, pin_data = self.data_from_pin(pin) + pin_data.update(data) + yield Message.Url, url, pin_data class PinterestPinitExtractor(PinterestExtractor): @@ -136,59 +119,67 @@ class PinterestPinitExtractor(PinterestExtractor): class PinterestAPI(): - """Minimal interface for the pinterest API""" + """Minimal interface for the Pinterest Web API - def __init__(self, extractor, access_token=None): - self.log = extractor.log - self.session = extractor.session - self.access_token = ( - access_token or - extractor.config("access-token") or - "AfyIXxi1MJ6et0NlIl_vBchHbex-FSWylPyr2GJE2uu3W8A97QAAAAA" - ) + For a better and more complete implementation in PHP, see + - https://github.com/seregazhuk/php-pinterest-bot + """ - def pin(self, pin_id, fields="id,image,note"): + BASE_URL = "https://uk.pinterest.com" + HEADERS = { + "Accept" : "application/json, text/javascript, " + "*/*, q=0.01", + "Accept-Language" : "en-US,en;q=0.5", + "X-Pinterest-AppState": "active", + "X-APP-VERSION" : "cb1c7f9", + "X-Requested-With" : "XMLHttpRequest", + "Origin" : BASE_URL + "/", + } + + def __init__(self, extractor): + self.extractor = extractor + + def pin(self, pin_id): """Query information about a pin""" - endpoint = "pins/{}/".format(pin_id) - params = {"fields": fields} - return self._call(endpoint, params)["data"] + options = {"id": pin_id, "field_set_key": "detailed"} + return self._call("Pin", options)["resource_response"]["data"] - def board(self, user, board, fields="id,name,counts"): + def board(self, user, board): """Query information about a board""" - endpoint = "boards/{}/{}/".format(user, board) - params = {"fields": fields} - return self._call(endpoint, params)["data"] + options = {"slug": board, "username": user, + "field_set_key": "detailed"} + return self._call("Board", options)["resource_response"]["data"] - def board_pins(self, user, board, fields="id,image,note", limit=100): + def board_pins(self, board_id): """Yield all pins of a specific board""" - endpoint = "boards/{}/{}/pins/".format(user, board) - params = {"fields": fields, "limit": limit} - return self._pagination(endpoint, params) + options = {"board_id": board_id} + return self._pagination("BoardFeed", options) - def _call(self, endpoint, params): - params["access_token"] = self.access_token - url = "https://api.pinterest.com/v1/" + endpoint + def _call(self, resource, options): + url = "{}/resource/{}Resource/get/".format(self.BASE_URL, resource) + params = {"data": json.dumps({"options": options}), "source_url": ""} - response = self.session.get(url, params=params) - status = response.status_code + response = self.extractor.request( + url, params=params, headers=self.HEADERS, fatal=False) data = response.json() - if 200 <= status < 400 and data.get("data"): + if 200 <= response.status_code < 400 and not response.history: return data - msg = data.get("message", "") - if status == 404: - msg = msg.partition(" ")[0].lower() - raise exception.NotFoundError(msg) - self.log.error("API request failed: %s", msg or "") + if response.status_code == 404 or response.history: + raise exception.NotFoundError(self.extractor.subcategory) + self.extractor.log.error("API request failed") raise exception.StopExtraction() - def _pagination(self, endpoint, params): + def _pagination(self, resource, options): while True: - response = self._call(endpoint, params) - yield from response["data"] + data = self._call(resource, options) + yield from data["resource_response"]["data"] - cursor = response["page"]["cursor"] - if not cursor: + try: + bookmarks = data["resource"]["options"]["bookmarks"] + if not bookmarks or bookmarks[0] == "-end-": + return + options["bookmarks"] = bookmarks + except KeyError: return - params["cursor"] = cursor diff --git a/gallery_dl/extractor/powermanga.py b/gallery_dl/extractor/powermanga.py index 281e20db..3b3be1a1 100644 --- a/gallery_dl/extractor/powermanga.py +++ b/gallery_dl/extractor/powermanga.py @@ -18,7 +18,7 @@ class PowermangaChapterExtractor(foolslide.FoolslideChapterExtractor): test = [(("https://read.powermanga.org" "/read/one_piece_digital_colour_comics/en/0/75/"), { "url": "854c5817f8f767e1bccd05fa9d58ffb5a4b09384", - "keyword": "9bf211d435060d1e38d3d13e4aaaa5a87381bfad", + "keyword": "9985bcb78491dff9c725958b06bba606be51b6d3", })] diff --git a/gallery_dl/extractor/puremashiro.py b/gallery_dl/extractor/puremashiro.py deleted file mode 100644 index 00699faa..00000000 --- a/gallery_dl/extractor/puremashiro.py +++ /dev/null @@ -1,34 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright 2018 Mike Fährmann -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. - -"""Extractors for http://reader.puremashiro.moe/""" - -from . import foolslide - - -class PuremashiroChapterExtractor(foolslide.FoolslideChapterExtractor): - """Extractor for manga-chapters from reader.puremashiro.moe""" - category = "puremashiro" - pattern = foolslide.chapter_pattern(r"reader\.puremashiro\.moe") - test = [(("http://reader.puremashiro.moe" - "/read/parallel-paradise-eng/en-us/0/20/"), { - "url": "00d5bc9cbb413ed584ddb091ae2418ca7801b136", - "keyword": "73bba3222758927e5a7cdc9e1db9d8307fe944c0", - })] - scheme = "http" - - -class PuremashiroMangaExtractor(foolslide.FoolslideMangaExtractor): - """Extractor for manga from reader.puremashiro.moe""" - category = "puremashiro" - pattern = foolslide.manga_pattern(r"reader\.puremashiro\.moe") - test = [("http://reader.puremashiro.moe/series/hayate-no-gotoku/", { - "url": "0cf77a623bff35b43323427a8fd1e40ff0e13c09", - "keyword": "1b57d724b259a1d81b6352d889a1aa5eb86a6ef9", - })] - scheme = "http" diff --git a/gallery_dl/extractor/smugmug.py b/gallery_dl/extractor/smugmug.py new file mode 100644 index 00000000..3306bc58 --- /dev/null +++ b/gallery_dl/extractor/smugmug.py @@ -0,0 +1,298 @@ +# -*- coding: utf-8 -*- + +# Copyright 2018 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extract images from https://www.smugmug.com/""" + +from .common import Extractor, Message +from .. import text, util, exception + +BASE_PATTERN = ( + r"(?:smugmug:(?!album:)(?:https?://)?([^/]+)|" + r"(?:https?://)?([^.]+)\.smugmug\.com)") + + +class SmugmugExtractor(Extractor): + """Base class for smugmug extractors""" + category = "smugmug" + filename_fmt = ("{category}_{User[NickName]}_" + "{Image[UploadKey]}_{Image[ImageKey]}.{extension}") + + def __init__(self): + Extractor.__init__(self) + self.api = SmugmugAPI(self) + + @staticmethod + def _apply_largest(image, delete=True): + largest = image["Uris"]["LargestImage"] + if delete: + del image["Uris"] + for key in ("Url", "Width", "Height", "MD5", "Size", "Watermarked"): + if key in largest: + image[key] = largest[key] + return image["Url"] + + +class SmugmugAlbumExtractor(SmugmugExtractor): + """Extractor for smugmug albums""" + subcategory = "album" + directory_fmt = ["{category}", "{User[NickName]}", "{Album[Name]}"] + archive_fmt = "a_{Album[AlbumKey]}_{Image[ImageKey]}" + pattern = [r"smugmug:album:([^:]+)$"] + test = [ + ("smugmug:album:MN6kHH", { + "count": 0, + }), + ("smugmug:album:6Ffcgk", { + "count": 1, + "pattern": ".*/i-L4CxBdg/0/33e0b290/X3/i-L4CxBdg-X3.jpg", + }), + ("smugmug:album:drn76C", { + "count": 2, + "content": "864f6953cb04121290407a579611bc5087d117ee", + }), + ] + + def __init__(self, match): + SmugmugExtractor.__init__(self) + self.album_id = match.group(1) + + def items(self): + album = self.api.album(self.album_id, "User") + user = album["Uris"]["User"] + + del user["Uris"] + del album["Uris"] + data = {"Album": album, "User": user} + + yield Message.Version, 1 + yield Message.Directory, data + + for image in self.api.album_images(self.album_id, "LargestImage"): + url = self._apply_largest(image) + data["Image"] = image + yield Message.Url, url, text.nameext_from_url(url, data) + + +class SmugmugImageExtractor(SmugmugExtractor): + """Extractor for individual smugmug images""" + subcategory = "image" + directory_fmt = ["{category}", "{User[NickName]}"] + archive_fmt = "{Image[ImageKey]}" + pattern = [BASE_PATTERN + r"(?:/[^/?&#]+)+/i-([^/?&#]+)"] + test = [("https://mikf.smugmug.com/Test/n-xnNH3s/i-L4CxBdg", { + "url": "905bfdef52ce1a731a4eae17e9ac348511e17ae4", + "keyword": "df63d36cfaeb128fda86802942d9a4271d3feafc", + "content": "626fe50d25fe49beeda15e116938db36e163c01f", + })] + + def __init__(self, match): + SmugmugExtractor.__init__(self) + self.image_id = match.group(3) + + def items(self): + image = self.api.image(self.image_id, "LargestImage,ImageOwner") + user = image["Uris"]["ImageOwner"] + url = self._apply_largest(image) + + del user["Uris"] + data = {"Image": image, "User": user} + text.nameext_from_url(url, data) + + yield Message.Version, 1 + yield Message.Directory, data + yield Message.Url, url, data + + +class SmugmugPathExtractor(SmugmugExtractor): + """Extractor for smugmug albums from URL paths and users""" + subcategory = "path" + pattern = [BASE_PATTERN + r"((?:/[^/?&#a-mo-z][^/?&#]*)*)/?$"] + test = [ + ("https://mikf.smugmug.com/Test/", { + "pattern": "smugmug:album:xgkb4C$", + }), + ("https://mikf.smugmug.com/Test/n-xnNH3s", { + "pattern": "smugmug:album:xgkb4C$", + }), + ("https://mikf.smugmug.com/", { + "count": 4, + "pattern": "smugmug:album:(xgkb4C|MN6kHH|6Ffcgk|drn76C)$", + }), + ("smugmug:www.creativedogportraits.com/PortfolioGallery/", { + "pattern": "smugmug:album:txWXzs$", + }), + ("smugmug:www.creativedogportraits.com/", { + "pattern": "smugmug:album:txWXzs$", + }), + ("smugmug:https://www.creativedogportraits.com/", None), + ] + + def __init__(self, match): + SmugmugExtractor.__init__(self) + self.domain, self.user, self.path = match.groups() + + def items(self): + yield Message.Version, 1 + + if not self.user: + self.user = self.api.site_user(self.domain)["NickName"] + + if self.path: + data = self.api.user_urlpathlookup(self.user, self.path) + node = data["Uris"]["Node"] + + if node["Type"] == "Album": + nodes = (node,) + elif node["Type"] == "Folder": + nodes = self.album_nodes(node) + else: + nodes = () + + for node in nodes: + album_id = node["Uris"]["Album"].rpartition("/")[2] + yield Message.Queue, "smugmug:album:" + album_id, node + + else: + for album in self.api.user_albums(self.user): + uri = "smugmug:album:" + album["AlbumKey"] + yield Message.Queue, uri, album + + def album_nodes(self, root): + """Yield all descendant album nodes of 'root'""" + for node in self.api.node_children(root["NodeID"]): + if node["Type"] == "Album": + yield node + elif node["Type"] == "Folder": + yield from self.album_nodes(node) + + +class SmugmugAPI(): + """Minimal interface for the smugmug API v2""" + API_DOMAIN = "api.smugmug.com" + API_KEY = "DFqxg4jf7GrtsQ5PnbNB8899zKfnDrdK" + API_SECRET = ("fknV35p9r9BwZC4XbTzvCXpcSJRdD83S" + "9nMFQm25ndGBzNPnwRDbRnnVBvqt4xTq") + HEADERS = {"Accept": "application/json"} + + def __init__(self, extractor): + api_key = extractor.config("api-key", self.API_KEY) + api_secret = extractor.config("api-secret", self.API_SECRET) + token = extractor.config("access-token") + token_secret = extractor.config("access-token-secret") + + if api_key and api_secret and token and token_secret: + self.session = util.OAuthSession( + extractor.session, + api_key, api_secret, + token, token_secret, + ) + self.api_key = None + else: + self.session = extractor.session + self.api_key = api_key + + self.log = extractor.log + + def album(self, album_id, expands=None): + return self._expansion("album/" + album_id, expands) + + def image(self, image_id, expands=None): + return self._expansion("image/" + image_id, expands) + + def node(self, node_id, expands=None): + return self._expansion("node/" + node_id, expands) + + def user(self, username, expands=None): + return self._expansion("user/" + username, expands) + + def album_images(self, album_id, expands=None): + return self._pagination("album/" + album_id + "!images", expands) + + def node_children(self, node_id, expands=None): + return self._pagination("node/" + node_id + "!children", expands) + + def user_albums(self, username, expands=None): + return self._pagination("user/" + username + "!albums", expands) + + def site_user(self, domain): + return self._call("!siteuser", domain=domain)["Response"]["User"] + + def user_urlpathlookup(self, username, path): + endpoint = "user/" + username + "!urlpathlookup" + params = {"urlpath": path} + return self._expansion(endpoint, "Node", params) + + def _call(self, endpoint, params=None, domain=API_DOMAIN): + url = "https://{}/api/v2/{}".format(domain, endpoint) + params = params or {} + if self.api_key: + params["APIKey"] = self.api_key + params["_verbosity"] = "1" + + response = self.session.get(url, params=params, headers=self.HEADERS) + data = response.json() + + if 200 <= data["Code"] < 400: + return data + if data["Code"] == 404: + raise exception.NotFoundError() + if data["Code"] == 429: + self.log.error("Rate limit reached") + else: + self.log.error("API request failed") + self.log.debug(data) + raise exception.StopExtraction() + + def _expansion(self, endpoint, expands, params=None): + endpoint = self._extend(endpoint, expands) + result = self._apply_expansions(self._call(endpoint, params), expands) + if not result: + raise exception.NotFoundError() + return result[0] + + def _pagination(self, endpoint, expands=None): + endpoint = self._extend(endpoint, expands) + params = {"start": 1, "count": 100} + + while True: + data = self._call(endpoint, params) + yield from self._apply_expansions(data, expands) + + if "NextPage" not in data["Response"]["Pages"]: + return + params["start"] += params["count"] + + @staticmethod + def _extend(endpoint, expands): + if expands: + endpoint += "?_expand=" + expands + return endpoint + + @staticmethod + def _apply_expansions(data, expands): + + def unwrap(response): + locator = response["Locator"] + return response[locator] if locator in response else [] + + objs = unwrap(data["Response"]) + if not isinstance(objs, list): + objs = (objs,) + + if "Expansions" in data: + expansions = data["Expansions"] + expands = expands.split(",") + + for obj in objs: + uris = obj["Uris"] + + for name in expands: + uri = uris[name] + uris[name] = unwrap(expansions[uri]) + + return objs diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py index 0138aee2..770ca03f 100644 --- a/gallery_dl/extractor/tumblr.py +++ b/gallery_dl/extractor/tumblr.py @@ -10,6 +10,7 @@ from .common import Extractor, Message from .. import text, util, exception +from datetime import datetime, timedelta import re import time @@ -322,10 +323,11 @@ class TumblrAPI(): # daily rate limit if response.headers.get("x-ratelimit-perday-remaining") == "0": + reset = response.headers.get("x-ratelimit-perday-reset") self.log.error( "Daily API rate limit exceeded: aborting; " - "%s seconds until rate limit reset", - response.headers.get("x-ratelimit-perday-reset"), + "rate limit will reset at %s", + self._to_time(reset), ) raise exception.StopExtraction() @@ -334,11 +336,19 @@ class TumblrAPI(): if reset: self.log.info( "Hourly API rate limit exceeded; " - "waiting %s seconds for rate limit reset", - reset, + "waiting until %s for rate limit reset", + self._to_time(reset), ) time.sleep(int(reset) + 1) return self._call(blog, endpoint, params) self.log.error(data) raise exception.StopExtraction() + + @staticmethod + def _to_time(reset): + try: + reset_time = datetime.now() + timedelta(seconds=int(reset)) + except (ValueError, TypeError): + return "?" + return reset_time.strftime("%H:%M:%S") diff --git a/gallery_dl/util.py b/gallery_dl/util.py index af754f07..642162e4 100644 --- a/gallery_dl/util.py +++ b/gallery_dl/util.py @@ -510,11 +510,11 @@ class OAuthSession(): self.params["oauth_signature_method"] = "HMAC-SHA1" self.params["oauth_version"] = "1.0" - def get(self, url, params): + def get(self, url, params, **kwargs): params.update(self.params) params["oauth_nonce"] = self.nonce(16) params["oauth_timestamp"] = int(time.time()) - return self.session.get(url + self.sign(url, params)) + return self.session.get(url + self.sign(url, params), **kwargs) def sign(self, url, params): """Generate 'oauth_signature' value and return query string""" diff --git a/gallery_dl/version.py b/gallery_dl/version.py index 191fe110..45c77642 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,4 +6,4 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.3.5-dev" +__version__ = "1.4.0-dev" diff --git a/scripts/build_supportedsites.py b/scripts/build_supportedsites.py index 398d3623..db12075d 100755 --- a/scripts/build_supportedsites.py +++ b/scripts/build_supportedsites.py @@ -5,7 +5,7 @@ import os.path ROOTDIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.insert(0, os.path.realpath(ROOTDIR)) -import gallery_dl.extractor +import gallery_dl.extractor # noqa CATEGORY_MAP = { @@ -20,7 +20,6 @@ CATEGORY_MAP = { "e621" : "e621", "exhentai" : "ExHentai", "fallenangels" : "Fallen Angels Scans", - "gomanga" : "GoManga", "hbrowse" : "HBrowse", "hentai2read" : "Hentai2Read", "hentaifoundry" : "Hentai Foundry", @@ -30,14 +29,11 @@ CATEGORY_MAP = { "imagebam" : "ImageBam", "imagefap" : "ImageFap", "imgbox" : "imgbox", - "imgchili" : "imgChili", "imgth" : "imgth", "imgur" : "imgur", "jaiminisbox" : "Jaimini's Box", "kireicake" : "Kirei Cake", - "kisscomic" : "KissComic", "kissmanga" : "KissManga", - "loveisover" : "Love is Over Archive", "mangadex" : "MangaDex", "mangafox" : "Manga Fox", "mangahere" : "Manga Here", @@ -48,7 +44,6 @@ CATEGORY_MAP = { "nyafuu" : "Nyafuu Archive", "paheal" : "rule #34", "powermanga" : "PowerManga", - "puremashiro" : "Pure Mashiro", "readcomiconline": "Read Comic Online", "rbt" : "RebeccaBlackTech", "rule34" : "Rule 34", @@ -58,10 +53,9 @@ CATEGORY_MAP = { "senmanga" : "Sen Manga", "sensescans" : "Sense-Scans", "slideshare" : "SlideShare", - "spectrumnexus" : "Spectrum Nexus", + "smugmug" : "SmugMug", "thebarchive" : "The /b/ Archive", "worldthree" : "World Three", - "yeet" : "YEET Archive", "xvideos" : "XVideos", } @@ -72,6 +66,7 @@ SUBCATEGORY_MAP = { "issue" : "Comic-Issues", "manga" : "Manga", "me" : "pixiv.me Links", + "path" : "Images from Users and Folders", "pinit" : "pin.it Links", "popular": "Popular Images", "search" : "Search Results", diff --git a/test/test_results.py b/test/test_results.py index fd370dab..2e13ebea 100644 --- a/test/test_results.py +++ b/test/test_results.py @@ -16,14 +16,11 @@ from gallery_dl import extractor, job, config, exception # these don't work on travis-ci TRAVIS_SKIP = { "exhentai", "kissmanga", "mangafox", "dynastyscans", "nijie", - "archivedmoe", "archiveofsins", "thebarchive", + "archivedmoe", "archiveofsins", "thebarchive", "sankaku", "idolcomplex", } # temporary issues, etc. BROKEN = { - "gomanga", # server down - "pinterest", # access tokens have been set to 10 requests per hour - "puremashiro", # online reader down } @@ -146,9 +143,9 @@ def generate_tests(): fltr = lambda c, bc: c in argv or bc in argv # noqa: E731 del sys.argv[1:] else: - skip = BROKEN.copy() + skip = set(BROKEN) if "CI" in os.environ and "TRAVIS" in os.environ: - skip |= TRAVIS_SKIP + skip |= set(TRAVIS_SKIP) print("skipping:", ", ".join(skip)) fltr = lambda c, bc: c not in skip # noqa: E731