From 8704d850bf0e4874889debf259f71a46da998aed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Mon, 19 Feb 2018 18:24:56 +0100 Subject: [PATCH] add explicit proxy support (#76) - '--proxy' as command-line argument - 'extractor.*.proxy' as config option --- CHANGELOG.md | 4 +++- docs/configuration.rst | 33 ++++++++++++++++++++++++++++++++- docs/gallery-dl.conf | 1 + gallery_dl/extractor/common.py | 22 +++++++++++++++++++--- gallery_dl/option.py | 5 +++++ 5 files changed, 60 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 453cb49e..6f29c173 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,9 @@ # Changelog ## Unreleased -- Fixed extraction issues for `mangastream` and `pixiv` +- Added `--proxy` to explicitly specify a proxy server ([#76](https://github.com/mikf/gallery-dl/issues/76)) +- Fixed ugoira extraction for `pixiv` ([#78](https://github.com/mikf/gallery-dl/issues/78)) +- Fixed miscellaneous extraction issues for `mangastream` ## 1.2.0 - 2018-02-16 - Added support for: diff --git a/docs/configuration.rst b/docs/configuration.rst index 05c75798..73f7b33a 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -331,6 +331,36 @@ Description Source to read additional cookies from. =========== ===== +extractor.*.proxy +----------------- +=========== ===== +Type ``string`` or ``object`` +Default ``null`` +Description Proxy (or proxies) to be used for remote connections. + + * If this is a ``string``, it is the proxy URL for all + outgoing requests. + * If this is an ``object``, it is a scheme-to-proxy mapping to + specify different proxy URLs for each scheme. + It is also possible to set a proxy for a specific host by using + ``scheme://host`` as key. + See `Requests' proxy documentation`_ for more details. + + Example: + + .. code:: + + { + "http": "http://10.10.1.10:3128", + "https": "http://10.10.1.10:1080", + "http://10.20.1.128": "http://10.10.1.10:5323" + } + + Note that all proxy URLs should include a scheme, + otherwise ``http://`` is assumed. +=========== ===== + + extractor.*.user-agent ---------------------- =========== ===== @@ -348,7 +378,7 @@ extractor.*.keywords -------------------- =========== ===== Type ``object`` -Example ``{"type": "Pixel Art", "type_id": 123}`` +Example ``{"type": "Pixel Art", "type_id": 123}`` Description Additional key-value pairs to be added to each metadata dictionary. =========== ===== @@ -811,6 +841,7 @@ How To - login and visit Tumblr's Applications_ section .. _requests.request(): https://docs.python-requests.org/en/master/api/#requests.request .. _timeout: https://docs.python-requests.org/en/latest/user/advanced/#timeouts .. _verify: https://docs.python-requests.org/en/master/user/advanced/#ssl-cert-verification +.. _`Requests' proxy documentation`: http://docs.python-requests.org/en/master/user/advanced/#proxies .. _format string: https://docs.python.org/3/library/string.html#formatstrings .. _format strings: https://docs.python.org/3/library/string.html#formatstrings .. _strptime: https://docs.python.org/3/library/datetime.html#strftime-strptime-behavior diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 22f7276d..6dc586be 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -17,6 +17,7 @@ "extractor": { "archive": null, + "proxy": null, "skip": true, "sleep": 0, diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index 7737a933..8e42c32c 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -34,8 +34,9 @@ class Extractor(): def __init__(self): self.session = requests.Session() self.log = logging.getLogger(self.category) - self._set_cookies(self.config("cookies")) self._set_headers() + self._set_cookies() + self._set_proxies() def __iter__(self): return self.items() @@ -105,8 +106,9 @@ class Extractor(): "user-agent", ("Mozilla/5.0 (X11; Linux x86_64; rv:54.0) " "Gecko/20100101 Firefox/54.0")) - def _set_cookies(self, cookies): - """Populate the cookiejar with 'cookies'""" + def _set_cookies(self): + """Populate the session's cookiejar""" + cookies = self.config("cookies") if cookies: if isinstance(cookies, dict): setcookie = self.session.cookies.set @@ -120,6 +122,20 @@ class Extractor(): except OSError as exc: self.log.warning("cookies: %s", exc) + def _set_proxies(self): + """Update the session's proxy map""" + proxies = self.config("proxy") + if proxies: + if isinstance(proxies, str): + proxies = {"http": proxies, "https": proxies} + if isinstance(proxies, dict): + for scheme, proxy in proxies.items(): + if "://" not in proxy: + proxies[scheme] = "http://" + proxy.lstrip("/") + self.session.proxies = proxies + else: + self.log.warning("invalid proxy specifier: %s", proxies) + def _check_cookies(self, cookienames, domain=None): """Check if all 'cookienames' are in the session's cookiejar""" if not domain and self.cookiedomain: diff --git a/gallery_dl/option.py b/gallery_dl/option.py index b464a28a..03d5b263 100644 --- a/gallery_dl/option.py +++ b/gallery_dl/option.py @@ -88,6 +88,11 @@ def build_parser(): metavar="FILE", action=ConfigAction, dest="cookies", help="File to load additional cookies from", ) + general.add_argument( + "--proxy", + metavar="URL", action=ConfigAction, dest="proxy", + help="Use the specified proxy", + ) output = parser.add_argument_group("Output Options") output.add_argument(