diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py index 3c336fac..d350eadc 100644 --- a/gallery_dl/__init__.py +++ b/gallery_dl/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2014-2016 Mike Fährmann +# Copyright 2014-2017 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -8,12 +8,11 @@ from __future__ import unicode_literals, print_function -__author__ = "Mike Fährmann" -__copyright__ = "Copyright 2014-2016 Mike Fährmann" - -__license__ = "GPLv2" +__author__ = "Mike Fährmann" +__copyright__ = "Copyright 2014-2017 Mike Fährmann" +__license__ = "GPLv2" __maintainer__ = "Mike Fährmann" -__email__ = "mike_faehrmann@web.de" +__email__ = "mike_faehrmann@web.de" import sys @@ -21,12 +20,12 @@ if sys.hexversion < 0x3030000: print("Python 3.3+ required", file=sys.stderr) sys.exit(1) -import os import argparse import json from . import config, extractor, job, exception from .version import __version__ + def build_cmdline_parser(): parser = argparse.ArgumentParser( description='Download images from various sources') @@ -64,7 +63,8 @@ def build_cmdline_parser(): ) parser.add_argument( "--list-extractors", dest="list_extractors", action="store_true", - help="print a list of extractor classes with description and example URL", + help=("print a list of extractor classes " + "with description and example URL"), ) parser.add_argument( "--list-keywords", dest="list_keywords", action="store_true", @@ -97,12 +97,14 @@ def parse_option(opt): except ValueError: print("Invalid 'key=value' pair:", opt, file=sys.stderr) + def sanatize_input(file): for line in file: line = line.strip() if line: yield line + def main(): try: config.load() diff --git a/gallery_dl/adapter.py b/gallery_dl/adapter.py index a0842ee4..ad343a40 100644 --- a/gallery_dl/adapter.py +++ b/gallery_dl/adapter.py @@ -12,6 +12,7 @@ from requests.adapters import BaseAdapter from requests import Response, codes import io + class FileAdapter(BaseAdapter): def send(self, request, **kwargs): diff --git a/gallery_dl/cache.py b/gallery_dl/cache.py index 63c24681..b95b43de 100644 --- a/gallery_dl/cache.py +++ b/gallery_dl/cache.py @@ -6,7 +6,7 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Decorator to keep function results in a combined in-memory and database cache""" +"""Decorator to keep function results in a in-memory and database cache""" import sqlite3 import pickle @@ -113,11 +113,13 @@ class DatabaseCache(CacheModule): raise RuntimeError() path = os.path.expanduser(os.path.expandvars(path)) self.db = sqlite3.connect(path, timeout=30, check_same_thread=False) - self.db.execute("CREATE TABLE IF NOT EXISTS data (" - "key TEXT PRIMARY KEY," - "value TEXT," - "expires INTEGER" - ")") + self.db.execute( + "CREATE TABLE IF NOT EXISTS data (" + "key TEXT PRIMARY KEY," + "value TEXT," + "expires INTEGER" + ")" + ) def __getitem__(self, key): key, timestamp = key @@ -127,7 +129,12 @@ class DatabaseCache(CacheModule): cursor.execute("BEGIN EXCLUSIVE") except sqlite3.OperationalError: """workaround for python 3.6""" - cursor.execute("SELECT value, expires FROM data WHERE key=?", (key,)) + cursor.execute( + "SELECT value, expires " + "FROM data " + "WHERE key=?", + (key,) + ) value, expires = cursor.fetchone() if timestamp < expires: self.commit() @@ -192,6 +199,7 @@ def build_cache_decorator(*modules): module = CacheChain(modules) else: module = modules[0] + def decorator(maxage=3600, keyarg=None): def wrap(func): return CacheDecorator(func, module, maxage, keyarg) diff --git a/gallery_dl/cloudflare.py b/gallery_dl/cloudflare.py index 82f922a0..9ea2ab2c 100644 --- a/gallery_dl/cloudflare.py +++ b/gallery_dl/cloudflare.py @@ -14,6 +14,7 @@ import urllib.parse from . import text from .cache import cache + def request_func(self, *args): cookies = _cookiecache(self.root) if cookies: @@ -25,6 +26,7 @@ def request_func(self, *args): _cookiecache(self.root, self.session.cookies) return response + def solve_challenge(session, response): session.headers["Referer"] = response.url page = response.text @@ -37,17 +39,20 @@ def solve_challenge(session, response): url = urllib.parse.urljoin(response.url, "/cdn-cgi/l/chk_jschl") return session.get(url, params=params) + def solve_jschl(url, page): """Solve challenge to get 'jschl_answer' value""" data, pos = text.extract_all(page, ( ('var' , ',f, ', '='), ('key' , '"', '"'), - ('expr', ':', '}') + ('expr', ':', '}'), )) solution = evaluate_expression(data["expr"]) variable = "{}.{}".format(data["var"], data["key"]) vlength = len(variable) - expressions = text.extract(page, "'challenge-form');", "f.submit();", pos)[0] + expressions = text.extract( + page, "'challenge-form');", "f.submit();", pos + )[0] for expr in expressions.split(";")[1:]: if expr.startswith(variable): func = operator_functions[expr[vlength]] @@ -56,8 +61,9 @@ def solve_jschl(url, page): elif expr.startswith("a.value"): return solution + len(urllib.parse.urlsplit(url).netloc) + def evaluate_expression(expr): - """Evaluate a Javascript expression for the challange and return its value""" + """Evaluate a Javascript expression for the challenge""" stack = [] ranges = [] value = "" @@ -75,6 +81,7 @@ def evaluate_expression(expr): value += str(num) return int(value) + operator_functions = { "+": operator.add, "-": operator.sub, @@ -88,6 +95,7 @@ expression_values = { "+!!": 1, } + @cache(maxage=365*24*60*60, keyarg=0) def _cookiecache(key, item=None): return item diff --git a/gallery_dl/config.py b/gallery_dl/config.py index 5f993e1a..b1c15b8b 100644 --- a/gallery_dl/config.py +++ b/gallery_dl/config.py @@ -12,6 +12,7 @@ import sys import json import os.path + # -------------------------------------------------------------------- # public interface @@ -32,10 +33,12 @@ def load(*files, strict=False): print("Error while loading '", path, "':", sep="", file=sys.stderr) print(exception, file=sys.stderr) + def clear(): """Reset configuration to en empty state""" globals()["_config"] = {} + def get(keys, default=None): """Get the value of property 'key' or a default-value if it doenst exist""" conf = _config @@ -46,6 +49,7 @@ def get(keys, default=None): except (KeyError, AttributeError): return default + def interpolate(keys, default=None): """Interpolate the value of 'key'""" conf = _config @@ -57,6 +61,7 @@ def interpolate(keys, default=None): except (KeyError, AttributeError): return default + def set(keys, value): """Set the value of property 'key' for this session""" conf = _config @@ -69,6 +74,7 @@ def set(keys, value): conf = temp conf[keys[-1]] = value + def setdefault(keys, value): """Set the value of property 'key' if it doesn't exist""" conf = _config diff --git a/gallery_dl/downloader/__init__.py b/gallery_dl/downloader/__init__.py index 0750caa8..d5786d7a 100644 --- a/gallery_dl/downloader/__init__.py +++ b/gallery_dl/downloader/__init__.py @@ -8,6 +8,7 @@ import importlib + def find(scheme): """Return downloader class suitable for handling the given scheme""" try: @@ -21,6 +22,7 @@ def find(scheme): except ImportError: return None + # -------------------------------------------------------------------- # internals diff --git a/gallery_dl/downloader/common.py b/gallery_dl/downloader/common.py index 6e08b9ad..377094dc 100644 --- a/gallery_dl/downloader/common.py +++ b/gallery_dl/downloader/common.py @@ -10,6 +10,7 @@ import os + class BasicDownloader(): """Base class for downloader modules""" diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py index b472ebd9..f361ec67 100644 --- a/gallery_dl/downloader/http.py +++ b/gallery_dl/downloader/http.py @@ -6,13 +6,14 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Downloader module for http urls""" +"""Downloader module for http:// and https:// urls""" import time import requests import mimetypes from .common import BasicDownloader + class Downloader(BasicDownloader): def __init__(self, output): @@ -38,7 +39,9 @@ class Downloader(BasicDownloader): if response.status_code != requests.codes.ok: tries += 1 self.out.error(pathfmt.path, 'HTTP status "{} {}"'.format( - response.status_code, response.reason), tries, self.max_tries) + response.status_code, response.reason), + tries, self.max_tries + ) if response.status_code == 404: return self.max_tries time.sleep(1) diff --git a/gallery_dl/downloader/text.py b/gallery_dl/downloader/text.py index f326334c..ac0f12e3 100644 --- a/gallery_dl/downloader/text.py +++ b/gallery_dl/downloader/text.py @@ -10,6 +10,7 @@ from .common import BasicDownloader + class Downloader(BasicDownloader): def __init__(self, output): diff --git a/gallery_dl/exception.py b/gallery_dl/exception.py index 43e3d1e8..8a9d5bc9 100644 --- a/gallery_dl/exception.py +++ b/gallery_dl/exception.py @@ -6,14 +6,18 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. + class NoExtractorError(Exception): """No extractor can handle the given URL""" + class AuthenticationError(Exception): """Invalid or missing login information""" + class AuthorizationError(Exception): """Insufficient privileges to access a resource""" + class NotFoundError(Exception): """Requested resource (gallery/image) does not exist""" diff --git a/gallery_dl/iso639_1.py b/gallery_dl/iso639_1.py index 8d829b36..3ad8c92a 100644 --- a/gallery_dl/iso639_1.py +++ b/gallery_dl/iso639_1.py @@ -8,10 +8,12 @@ """Conversion between language names and ISO 639-1 codes""" + def code_to_language(code, default="English"): """Map an ISO 639-1 language code to its actual name""" return codes.get(code.lower(), default) + def language_to_code(lang, default="en"): """Map a language name to its ISO 639-1 code""" lang = lang.capitalize() @@ -20,6 +22,7 @@ def language_to_code(lang, default="en"): return code return default + codes = { "ar": "Arabic", "cs": "Czech", diff --git a/gallery_dl/job.py b/gallery_dl/job.py index 0730d601..6ef49381 100644 --- a/gallery_dl/job.py +++ b/gallery_dl/job.py @@ -11,6 +11,7 @@ import hashlib from . import extractor, downloader, path, output, exception from .extractor.message import Message + class Job(): """Base class for Job-types""" @@ -66,6 +67,7 @@ class Job(): kwdict["category"] = self.extractor.category kwdict["subcategory"] = self.extractor.subcategory + class DownloadJob(Job): """Download images into appropriate directory/filename locations""" @@ -188,8 +190,8 @@ class HashJob(DownloadJob): def __init__(self, url, content=False): DownloadJob.__init__(self, url) - self.content = content - self.hash_url = hashlib.sha1() + self.content = content + self.hash_url = hashlib.sha1() self.hash_keyword = hashlib.sha1() self.hash_content = hashlib.sha1() if content: diff --git a/gallery_dl/output.py b/gallery_dl/output.py index 7c4d3eb3..4f8c5ea5 100644 --- a/gallery_dl/output.py +++ b/gallery_dl/output.py @@ -11,6 +11,7 @@ import sys import shutil from . import config + def select(): """Automatically select a suitable printer class""" pdict = { @@ -31,6 +32,7 @@ def select(): else: raise Exception("invalid output mode: " + omode) + def safeprint(txt, **kwargs): """Handle unicode errors and replace invalid characters""" try: @@ -89,7 +91,11 @@ class TerminalPrinter(Printer): """Reduce the length of 'txt' to the width of the terminal""" if self.short and len(txt) > self.width: hwidth = self.width // 2 - OFFSET - return "".join((txt[:hwidth-1], CHAR_ELLIPSIES, txt[-hwidth-(self.width%2):])) + return "".join(( + txt[:hwidth-1], + CHAR_ELLIPSIES, + txt[-hwidth-(self.width % 2):] + )) return txt @@ -109,7 +115,8 @@ class ColorPrinter(TerminalPrinter): def error(self, file, error, tries, max_tries): if tries <= 1 and hasattr(file, "name"): print("\r\033[1;31m", self.shorten(file.name), sep="") - print("\033[0;31m[Error]\033[0m ", error, " (", tries, "/", max_tries, ")", sep="") + print("\033[0;31m[Error]\033[0m ", error, + " (", tries, "/", max_tries, ")", sep="") if os.name == "nt": diff --git a/gallery_dl/path.py b/gallery_dl/path.py index 2359140a..b80ad9b0 100644 --- a/gallery_dl/path.py +++ b/gallery_dl/path.py @@ -9,6 +9,7 @@ import os from . import config, text + class PathFormat(): def __init__(self, extractor): diff --git a/gallery_dl/text.py b/gallery_dl/text.py index 2f82356c..c89f9567 100644 --- a/gallery_dl/text.py +++ b/gallery_dl/text.py @@ -14,10 +14,12 @@ import os.path import html import urllib.parse + def remove_html(text): """Remove html-tags from a string""" return " ".join(re.sub("<[^>]+?>", " ", text).split()) + def filename_from_url(url): """Extract the last part of an url to use as a filename""" try: @@ -27,8 +29,9 @@ def filename_from_url(url): except ValueError: return url + def nameext_from_url(url, data=None): - """Extract the last part of an url and fill keywords of 'data' accordingly""" + """Extract the last part of an url and fill 'data' accordingly""" if data is None: data = {} data["filename"] = unquote(filename_from_url(url)) @@ -36,6 +39,7 @@ def nameext_from_url(url, data=None): data["extension"] = ext[1:].lower() return data + def clean_path_windows(path): """Remove illegal characters from a path-segment (Windows)""" try: @@ -43,6 +47,7 @@ def clean_path_windows(path): except TypeError: return path + def clean_path_posix(path): """Remove illegal characters from a path-segment (Posix)""" try: @@ -50,17 +55,20 @@ def clean_path_posix(path): except AttributeError: return path + def shorten_path(path, limit=255, encoding=sys.getfilesystemencoding()): """Shorten a path segment to at most 'limit' bytes""" return (path.encode(encoding)[:limit]).decode(encoding, "ignore") -def shorten_filename(filename, limit=255, encoding=sys.getfilesystemencoding()): - """Shorten a filename to at most 'limit' bytes while preserving extension""" - name, extension = os.path.splitext(filename) + +def shorten_filename(fname, limit=255, encoding=sys.getfilesystemencoding()): + """Shorten filename to at most 'limit' bytes while preserving extension""" + name, extension = os.path.splitext(fname) bext = extension.encode(encoding) bname = name.encode(encoding)[:limit-len(bext)] return bname.decode(encoding, "ignore") + extension + def extract(txt, begin, end, pos=0): """Extract the text between 'begin' and 'end' from 'txt' @@ -88,6 +96,7 @@ def extract(txt, begin, end, pos=0): except ValueError: return None, pos + def extract_all(txt, rules, pos=0, values=None): """Calls extract for each rule and returns the result in a dict""" if values is None: @@ -98,6 +107,7 @@ def extract_all(txt, rules, pos=0, values=None): values[key] = result return values, pos + def extract_iter(txt, begin, end, pos=0): """Yield all values obtained by repeated calls to text.extract""" while True: @@ -106,6 +116,7 @@ def extract_iter(txt, begin, end, pos=0): return yield value + if os.name == "nt": clean_path = clean_path_windows else: diff --git a/test/test_config.py b/test/test_config.py index 7644e73f..95e681c5 100644 --- a/test/test_config.py +++ b/test/test_config.py @@ -12,6 +12,7 @@ import gallery_dl.config as config import os import tempfile + class TestConfig(unittest.TestCase): def setUp(self): @@ -51,5 +52,6 @@ class TestConfig(unittest.TestCase): self.assertEqual(config.interpolate(["b", "d"], "2"), 123) self.assertEqual(config.interpolate(["d", "d"], "2"), 123) + if __name__ == '__main__': unittest.main() diff --git a/test/test_extractors.py b/test/test_extractors.py index f9084c7f..8e8d7f3a 100644 --- a/test/test_extractors.py +++ b/test/test_extractors.py @@ -38,7 +38,7 @@ class TestExtractors(unittest.TestCase): self.assertEqual(hjob.hash_content.hexdigest(), result["content"]) -# dynamically genetate tests +# dynamically genertate tests def _generate_test(extr, tcase): def test(self): url, result = tcase @@ -46,6 +46,7 @@ def _generate_test(extr, tcase): self._run_test(extr, url, result) return test + # enable selective testing for direct calls extractors = extractor.extractors() if __name__ == '__main__' and len(sys.argv) > 1: diff --git a/test/test_iso639_1.py b/test/test_iso639_1.py index 50c7c71d..2d2042ff 100644 --- a/test/test_iso639_1.py +++ b/test/test_iso639_1.py @@ -10,6 +10,7 @@ import unittest import gallery_dl.iso639_1 as iso639_1 + class TestISO639_1(unittest.TestCase): def test_code_to_language(self): @@ -17,11 +18,13 @@ class TestISO639_1(unittest.TestCase): self.assertEqual(iso639_1.code_to_language("FR"), "French") self.assertEqual(iso639_1.code_to_language("xx"), "English") self.assertEqual(iso639_1.code_to_language("xx", default=None), None) + def test_language_to_code(self): self.assertEqual(iso639_1.language_to_code("English"), "en") self.assertEqual(iso639_1.language_to_code("fRENch"), "fr") - self.assertEqual(iso639_1.language_to_code("Nothing"), "en") - self.assertEqual(iso639_1.language_to_code("Nothing", default=None), None) + self.assertEqual(iso639_1.language_to_code("xx"), "en") + self.assertEqual(iso639_1.language_to_code("xx", default=None), None) + if __name__ == '__main__': unittest.main() diff --git a/test/test_text.py b/test/test_text.py index eeddb1fe..e26dde76 100644 --- a/test/test_text.py +++ b/test/test_text.py @@ -11,6 +11,7 @@ import unittest import sys import gallery_dl.text as text + class TestText(unittest.TestCase): def test_remove_html(self): @@ -62,7 +63,7 @@ class TestText(unittest.TestCase): } for case, result in cases.items(): self.assertEqual(text.clean_path_windows(case), result[0]) - self.assertEqual(text.clean_path_posix (case), result[1]) + self.assertEqual(text.clean_path_posix(case), result[1]) def test_shorten_path(self): cases = { @@ -86,8 +87,9 @@ class TestText(unittest.TestCase): } enc = sys.getfilesystemencoding() for case, result in cases.items(): - self.assertEqual(text.shorten_filename(case), result) - self.assertTrue(len(text.shorten_filename(case).encode(enc)) <= 255) + fname = text.shorten_filename(case) + self.assertEqual(fname, result) + self.assertTrue(len(fname.encode(enc)) <= 255) def test_extract(self): cases = { @@ -121,5 +123,6 @@ class TestText(unittest.TestCase): result = ["c", "b", "a", "d"] self.assertEqual(list(text.extract_iter(txt, "[", "]")), result) + if __name__ == '__main__': unittest.main()