code adjustments according to pep8
This commit is contained in:
@@ -1,6 +1,6 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
# Copyright 2014-2016 Mike Fährmann
|
# Copyright 2014-2017 Mike Fährmann
|
||||||
#
|
#
|
||||||
# This program is free software; you can redistribute it and/or modify
|
# This program is free software; you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU General Public License version 2 as
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
@@ -8,12 +8,11 @@
|
|||||||
|
|
||||||
from __future__ import unicode_literals, print_function
|
from __future__ import unicode_literals, print_function
|
||||||
|
|
||||||
__author__ = "Mike Fährmann"
|
__author__ = "Mike Fährmann"
|
||||||
__copyright__ = "Copyright 2014-2016 Mike Fährmann"
|
__copyright__ = "Copyright 2014-2017 Mike Fährmann"
|
||||||
|
__license__ = "GPLv2"
|
||||||
__license__ = "GPLv2"
|
|
||||||
__maintainer__ = "Mike Fährmann"
|
__maintainer__ = "Mike Fährmann"
|
||||||
__email__ = "mike_faehrmann@web.de"
|
__email__ = "mike_faehrmann@web.de"
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
@@ -21,12 +20,12 @@ if sys.hexversion < 0x3030000:
|
|||||||
print("Python 3.3+ required", file=sys.stderr)
|
print("Python 3.3+ required", file=sys.stderr)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
import os
|
|
||||||
import argparse
|
import argparse
|
||||||
import json
|
import json
|
||||||
from . import config, extractor, job, exception
|
from . import config, extractor, job, exception
|
||||||
from .version import __version__
|
from .version import __version__
|
||||||
|
|
||||||
|
|
||||||
def build_cmdline_parser():
|
def build_cmdline_parser():
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
description='Download images from various sources')
|
description='Download images from various sources')
|
||||||
@@ -64,7 +63,8 @@ def build_cmdline_parser():
|
|||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--list-extractors", dest="list_extractors", action="store_true",
|
"--list-extractors", dest="list_extractors", action="store_true",
|
||||||
help="print a list of extractor classes with description and example URL",
|
help=("print a list of extractor classes "
|
||||||
|
"with description and example URL"),
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--list-keywords", dest="list_keywords", action="store_true",
|
"--list-keywords", dest="list_keywords", action="store_true",
|
||||||
@@ -97,12 +97,14 @@ def parse_option(opt):
|
|||||||
except ValueError:
|
except ValueError:
|
||||||
print("Invalid 'key=value' pair:", opt, file=sys.stderr)
|
print("Invalid 'key=value' pair:", opt, file=sys.stderr)
|
||||||
|
|
||||||
|
|
||||||
def sanatize_input(file):
|
def sanatize_input(file):
|
||||||
for line in file:
|
for line in file:
|
||||||
line = line.strip()
|
line = line.strip()
|
||||||
if line:
|
if line:
|
||||||
yield line
|
yield line
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
try:
|
try:
|
||||||
config.load()
|
config.load()
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ from requests.adapters import BaseAdapter
|
|||||||
from requests import Response, codes
|
from requests import Response, codes
|
||||||
import io
|
import io
|
||||||
|
|
||||||
|
|
||||||
class FileAdapter(BaseAdapter):
|
class FileAdapter(BaseAdapter):
|
||||||
|
|
||||||
def send(self, request, **kwargs):
|
def send(self, request, **kwargs):
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
# it under the terms of the GNU General Public License version 2 as
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
# published by the Free Software Foundation.
|
# published by the Free Software Foundation.
|
||||||
|
|
||||||
"""Decorator to keep function results in a combined in-memory and database cache"""
|
"""Decorator to keep function results in a in-memory and database cache"""
|
||||||
|
|
||||||
import sqlite3
|
import sqlite3
|
||||||
import pickle
|
import pickle
|
||||||
@@ -113,11 +113,13 @@ class DatabaseCache(CacheModule):
|
|||||||
raise RuntimeError()
|
raise RuntimeError()
|
||||||
path = os.path.expanduser(os.path.expandvars(path))
|
path = os.path.expanduser(os.path.expandvars(path))
|
||||||
self.db = sqlite3.connect(path, timeout=30, check_same_thread=False)
|
self.db = sqlite3.connect(path, timeout=30, check_same_thread=False)
|
||||||
self.db.execute("CREATE TABLE IF NOT EXISTS data ("
|
self.db.execute(
|
||||||
"key TEXT PRIMARY KEY,"
|
"CREATE TABLE IF NOT EXISTS data ("
|
||||||
"value TEXT,"
|
"key TEXT PRIMARY KEY,"
|
||||||
"expires INTEGER"
|
"value TEXT,"
|
||||||
")")
|
"expires INTEGER"
|
||||||
|
")"
|
||||||
|
)
|
||||||
|
|
||||||
def __getitem__(self, key):
|
def __getitem__(self, key):
|
||||||
key, timestamp = key
|
key, timestamp = key
|
||||||
@@ -127,7 +129,12 @@ class DatabaseCache(CacheModule):
|
|||||||
cursor.execute("BEGIN EXCLUSIVE")
|
cursor.execute("BEGIN EXCLUSIVE")
|
||||||
except sqlite3.OperationalError:
|
except sqlite3.OperationalError:
|
||||||
"""workaround for python 3.6"""
|
"""workaround for python 3.6"""
|
||||||
cursor.execute("SELECT value, expires FROM data WHERE key=?", (key,))
|
cursor.execute(
|
||||||
|
"SELECT value, expires "
|
||||||
|
"FROM data "
|
||||||
|
"WHERE key=?",
|
||||||
|
(key,)
|
||||||
|
)
|
||||||
value, expires = cursor.fetchone()
|
value, expires = cursor.fetchone()
|
||||||
if timestamp < expires:
|
if timestamp < expires:
|
||||||
self.commit()
|
self.commit()
|
||||||
@@ -192,6 +199,7 @@ def build_cache_decorator(*modules):
|
|||||||
module = CacheChain(modules)
|
module = CacheChain(modules)
|
||||||
else:
|
else:
|
||||||
module = modules[0]
|
module = modules[0]
|
||||||
|
|
||||||
def decorator(maxage=3600, keyarg=None):
|
def decorator(maxage=3600, keyarg=None):
|
||||||
def wrap(func):
|
def wrap(func):
|
||||||
return CacheDecorator(func, module, maxage, keyarg)
|
return CacheDecorator(func, module, maxage, keyarg)
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ import urllib.parse
|
|||||||
from . import text
|
from . import text
|
||||||
from .cache import cache
|
from .cache import cache
|
||||||
|
|
||||||
|
|
||||||
def request_func(self, *args):
|
def request_func(self, *args):
|
||||||
cookies = _cookiecache(self.root)
|
cookies = _cookiecache(self.root)
|
||||||
if cookies:
|
if cookies:
|
||||||
@@ -25,6 +26,7 @@ def request_func(self, *args):
|
|||||||
_cookiecache(self.root, self.session.cookies)
|
_cookiecache(self.root, self.session.cookies)
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
||||||
def solve_challenge(session, response):
|
def solve_challenge(session, response):
|
||||||
session.headers["Referer"] = response.url
|
session.headers["Referer"] = response.url
|
||||||
page = response.text
|
page = response.text
|
||||||
@@ -37,17 +39,20 @@ def solve_challenge(session, response):
|
|||||||
url = urllib.parse.urljoin(response.url, "/cdn-cgi/l/chk_jschl")
|
url = urllib.parse.urljoin(response.url, "/cdn-cgi/l/chk_jschl")
|
||||||
return session.get(url, params=params)
|
return session.get(url, params=params)
|
||||||
|
|
||||||
|
|
||||||
def solve_jschl(url, page):
|
def solve_jschl(url, page):
|
||||||
"""Solve challenge to get 'jschl_answer' value"""
|
"""Solve challenge to get 'jschl_answer' value"""
|
||||||
data, pos = text.extract_all(page, (
|
data, pos = text.extract_all(page, (
|
||||||
('var' , ',f, ', '='),
|
('var' , ',f, ', '='),
|
||||||
('key' , '"', '"'),
|
('key' , '"', '"'),
|
||||||
('expr', ':', '}')
|
('expr', ':', '}'),
|
||||||
))
|
))
|
||||||
solution = evaluate_expression(data["expr"])
|
solution = evaluate_expression(data["expr"])
|
||||||
variable = "{}.{}".format(data["var"], data["key"])
|
variable = "{}.{}".format(data["var"], data["key"])
|
||||||
vlength = len(variable)
|
vlength = len(variable)
|
||||||
expressions = text.extract(page, "'challenge-form');", "f.submit();", pos)[0]
|
expressions = text.extract(
|
||||||
|
page, "'challenge-form');", "f.submit();", pos
|
||||||
|
)[0]
|
||||||
for expr in expressions.split(";")[1:]:
|
for expr in expressions.split(";")[1:]:
|
||||||
if expr.startswith(variable):
|
if expr.startswith(variable):
|
||||||
func = operator_functions[expr[vlength]]
|
func = operator_functions[expr[vlength]]
|
||||||
@@ -56,8 +61,9 @@ def solve_jschl(url, page):
|
|||||||
elif expr.startswith("a.value"):
|
elif expr.startswith("a.value"):
|
||||||
return solution + len(urllib.parse.urlsplit(url).netloc)
|
return solution + len(urllib.parse.urlsplit(url).netloc)
|
||||||
|
|
||||||
|
|
||||||
def evaluate_expression(expr):
|
def evaluate_expression(expr):
|
||||||
"""Evaluate a Javascript expression for the challange and return its value"""
|
"""Evaluate a Javascript expression for the challenge"""
|
||||||
stack = []
|
stack = []
|
||||||
ranges = []
|
ranges = []
|
||||||
value = ""
|
value = ""
|
||||||
@@ -75,6 +81,7 @@ def evaluate_expression(expr):
|
|||||||
value += str(num)
|
value += str(num)
|
||||||
return int(value)
|
return int(value)
|
||||||
|
|
||||||
|
|
||||||
operator_functions = {
|
operator_functions = {
|
||||||
"+": operator.add,
|
"+": operator.add,
|
||||||
"-": operator.sub,
|
"-": operator.sub,
|
||||||
@@ -88,6 +95,7 @@ expression_values = {
|
|||||||
"+!!": 1,
|
"+!!": 1,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@cache(maxage=365*24*60*60, keyarg=0)
|
@cache(maxage=365*24*60*60, keyarg=0)
|
||||||
def _cookiecache(key, item=None):
|
def _cookiecache(key, item=None):
|
||||||
return item
|
return item
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ import sys
|
|||||||
import json
|
import json
|
||||||
import os.path
|
import os.path
|
||||||
|
|
||||||
|
|
||||||
# --------------------------------------------------------------------
|
# --------------------------------------------------------------------
|
||||||
# public interface
|
# public interface
|
||||||
|
|
||||||
@@ -32,10 +33,12 @@ def load(*files, strict=False):
|
|||||||
print("Error while loading '", path, "':", sep="", file=sys.stderr)
|
print("Error while loading '", path, "':", sep="", file=sys.stderr)
|
||||||
print(exception, file=sys.stderr)
|
print(exception, file=sys.stderr)
|
||||||
|
|
||||||
|
|
||||||
def clear():
|
def clear():
|
||||||
"""Reset configuration to en empty state"""
|
"""Reset configuration to en empty state"""
|
||||||
globals()["_config"] = {}
|
globals()["_config"] = {}
|
||||||
|
|
||||||
|
|
||||||
def get(keys, default=None):
|
def get(keys, default=None):
|
||||||
"""Get the value of property 'key' or a default-value if it doenst exist"""
|
"""Get the value of property 'key' or a default-value if it doenst exist"""
|
||||||
conf = _config
|
conf = _config
|
||||||
@@ -46,6 +49,7 @@ def get(keys, default=None):
|
|||||||
except (KeyError, AttributeError):
|
except (KeyError, AttributeError):
|
||||||
return default
|
return default
|
||||||
|
|
||||||
|
|
||||||
def interpolate(keys, default=None):
|
def interpolate(keys, default=None):
|
||||||
"""Interpolate the value of 'key'"""
|
"""Interpolate the value of 'key'"""
|
||||||
conf = _config
|
conf = _config
|
||||||
@@ -57,6 +61,7 @@ def interpolate(keys, default=None):
|
|||||||
except (KeyError, AttributeError):
|
except (KeyError, AttributeError):
|
||||||
return default
|
return default
|
||||||
|
|
||||||
|
|
||||||
def set(keys, value):
|
def set(keys, value):
|
||||||
"""Set the value of property 'key' for this session"""
|
"""Set the value of property 'key' for this session"""
|
||||||
conf = _config
|
conf = _config
|
||||||
@@ -69,6 +74,7 @@ def set(keys, value):
|
|||||||
conf = temp
|
conf = temp
|
||||||
conf[keys[-1]] = value
|
conf[keys[-1]] = value
|
||||||
|
|
||||||
|
|
||||||
def setdefault(keys, value):
|
def setdefault(keys, value):
|
||||||
"""Set the value of property 'key' if it doesn't exist"""
|
"""Set the value of property 'key' if it doesn't exist"""
|
||||||
conf = _config
|
conf = _config
|
||||||
|
|||||||
@@ -8,6 +8,7 @@
|
|||||||
|
|
||||||
import importlib
|
import importlib
|
||||||
|
|
||||||
|
|
||||||
def find(scheme):
|
def find(scheme):
|
||||||
"""Return downloader class suitable for handling the given scheme"""
|
"""Return downloader class suitable for handling the given scheme"""
|
||||||
try:
|
try:
|
||||||
@@ -21,6 +22,7 @@ def find(scheme):
|
|||||||
except ImportError:
|
except ImportError:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
# --------------------------------------------------------------------
|
# --------------------------------------------------------------------
|
||||||
# internals
|
# internals
|
||||||
|
|
||||||
|
|||||||
@@ -10,6 +10,7 @@
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
|
||||||
class BasicDownloader():
|
class BasicDownloader():
|
||||||
"""Base class for downloader modules"""
|
"""Base class for downloader modules"""
|
||||||
|
|
||||||
|
|||||||
@@ -6,13 +6,14 @@
|
|||||||
# it under the terms of the GNU General Public License version 2 as
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
# published by the Free Software Foundation.
|
# published by the Free Software Foundation.
|
||||||
|
|
||||||
"""Downloader module for http urls"""
|
"""Downloader module for http:// and https:// urls"""
|
||||||
|
|
||||||
import time
|
import time
|
||||||
import requests
|
import requests
|
||||||
import mimetypes
|
import mimetypes
|
||||||
from .common import BasicDownloader
|
from .common import BasicDownloader
|
||||||
|
|
||||||
|
|
||||||
class Downloader(BasicDownloader):
|
class Downloader(BasicDownloader):
|
||||||
|
|
||||||
def __init__(self, output):
|
def __init__(self, output):
|
||||||
@@ -38,7 +39,9 @@ class Downloader(BasicDownloader):
|
|||||||
if response.status_code != requests.codes.ok:
|
if response.status_code != requests.codes.ok:
|
||||||
tries += 1
|
tries += 1
|
||||||
self.out.error(pathfmt.path, 'HTTP status "{} {}"'.format(
|
self.out.error(pathfmt.path, 'HTTP status "{} {}"'.format(
|
||||||
response.status_code, response.reason), tries, self.max_tries)
|
response.status_code, response.reason),
|
||||||
|
tries, self.max_tries
|
||||||
|
)
|
||||||
if response.status_code == 404:
|
if response.status_code == 404:
|
||||||
return self.max_tries
|
return self.max_tries
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
|
|||||||
@@ -10,6 +10,7 @@
|
|||||||
|
|
||||||
from .common import BasicDownloader
|
from .common import BasicDownloader
|
||||||
|
|
||||||
|
|
||||||
class Downloader(BasicDownloader):
|
class Downloader(BasicDownloader):
|
||||||
|
|
||||||
def __init__(self, output):
|
def __init__(self, output):
|
||||||
|
|||||||
@@ -6,14 +6,18 @@
|
|||||||
# it under the terms of the GNU General Public License version 2 as
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
# published by the Free Software Foundation.
|
# published by the Free Software Foundation.
|
||||||
|
|
||||||
|
|
||||||
class NoExtractorError(Exception):
|
class NoExtractorError(Exception):
|
||||||
"""No extractor can handle the given URL"""
|
"""No extractor can handle the given URL"""
|
||||||
|
|
||||||
|
|
||||||
class AuthenticationError(Exception):
|
class AuthenticationError(Exception):
|
||||||
"""Invalid or missing login information"""
|
"""Invalid or missing login information"""
|
||||||
|
|
||||||
|
|
||||||
class AuthorizationError(Exception):
|
class AuthorizationError(Exception):
|
||||||
"""Insufficient privileges to access a resource"""
|
"""Insufficient privileges to access a resource"""
|
||||||
|
|
||||||
|
|
||||||
class NotFoundError(Exception):
|
class NotFoundError(Exception):
|
||||||
"""Requested resource (gallery/image) does not exist"""
|
"""Requested resource (gallery/image) does not exist"""
|
||||||
|
|||||||
@@ -8,10 +8,12 @@
|
|||||||
|
|
||||||
"""Conversion between language names and ISO 639-1 codes"""
|
"""Conversion between language names and ISO 639-1 codes"""
|
||||||
|
|
||||||
|
|
||||||
def code_to_language(code, default="English"):
|
def code_to_language(code, default="English"):
|
||||||
"""Map an ISO 639-1 language code to its actual name"""
|
"""Map an ISO 639-1 language code to its actual name"""
|
||||||
return codes.get(code.lower(), default)
|
return codes.get(code.lower(), default)
|
||||||
|
|
||||||
|
|
||||||
def language_to_code(lang, default="en"):
|
def language_to_code(lang, default="en"):
|
||||||
"""Map a language name to its ISO 639-1 code"""
|
"""Map a language name to its ISO 639-1 code"""
|
||||||
lang = lang.capitalize()
|
lang = lang.capitalize()
|
||||||
@@ -20,6 +22,7 @@ def language_to_code(lang, default="en"):
|
|||||||
return code
|
return code
|
||||||
return default
|
return default
|
||||||
|
|
||||||
|
|
||||||
codes = {
|
codes = {
|
||||||
"ar": "Arabic",
|
"ar": "Arabic",
|
||||||
"cs": "Czech",
|
"cs": "Czech",
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ import hashlib
|
|||||||
from . import extractor, downloader, path, output, exception
|
from . import extractor, downloader, path, output, exception
|
||||||
from .extractor.message import Message
|
from .extractor.message import Message
|
||||||
|
|
||||||
|
|
||||||
class Job():
|
class Job():
|
||||||
"""Base class for Job-types"""
|
"""Base class for Job-types"""
|
||||||
|
|
||||||
@@ -66,6 +67,7 @@ class Job():
|
|||||||
kwdict["category"] = self.extractor.category
|
kwdict["category"] = self.extractor.category
|
||||||
kwdict["subcategory"] = self.extractor.subcategory
|
kwdict["subcategory"] = self.extractor.subcategory
|
||||||
|
|
||||||
|
|
||||||
class DownloadJob(Job):
|
class DownloadJob(Job):
|
||||||
"""Download images into appropriate directory/filename locations"""
|
"""Download images into appropriate directory/filename locations"""
|
||||||
|
|
||||||
@@ -188,8 +190,8 @@ class HashJob(DownloadJob):
|
|||||||
|
|
||||||
def __init__(self, url, content=False):
|
def __init__(self, url, content=False):
|
||||||
DownloadJob.__init__(self, url)
|
DownloadJob.__init__(self, url)
|
||||||
self.content = content
|
self.content = content
|
||||||
self.hash_url = hashlib.sha1()
|
self.hash_url = hashlib.sha1()
|
||||||
self.hash_keyword = hashlib.sha1()
|
self.hash_keyword = hashlib.sha1()
|
||||||
self.hash_content = hashlib.sha1()
|
self.hash_content = hashlib.sha1()
|
||||||
if content:
|
if content:
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ import sys
|
|||||||
import shutil
|
import shutil
|
||||||
from . import config
|
from . import config
|
||||||
|
|
||||||
|
|
||||||
def select():
|
def select():
|
||||||
"""Automatically select a suitable printer class"""
|
"""Automatically select a suitable printer class"""
|
||||||
pdict = {
|
pdict = {
|
||||||
@@ -31,6 +32,7 @@ def select():
|
|||||||
else:
|
else:
|
||||||
raise Exception("invalid output mode: " + omode)
|
raise Exception("invalid output mode: " + omode)
|
||||||
|
|
||||||
|
|
||||||
def safeprint(txt, **kwargs):
|
def safeprint(txt, **kwargs):
|
||||||
"""Handle unicode errors and replace invalid characters"""
|
"""Handle unicode errors and replace invalid characters"""
|
||||||
try:
|
try:
|
||||||
@@ -89,7 +91,11 @@ class TerminalPrinter(Printer):
|
|||||||
"""Reduce the length of 'txt' to the width of the terminal"""
|
"""Reduce the length of 'txt' to the width of the terminal"""
|
||||||
if self.short and len(txt) > self.width:
|
if self.short and len(txt) > self.width:
|
||||||
hwidth = self.width // 2 - OFFSET
|
hwidth = self.width // 2 - OFFSET
|
||||||
return "".join((txt[:hwidth-1], CHAR_ELLIPSIES, txt[-hwidth-(self.width%2):]))
|
return "".join((
|
||||||
|
txt[:hwidth-1],
|
||||||
|
CHAR_ELLIPSIES,
|
||||||
|
txt[-hwidth-(self.width % 2):]
|
||||||
|
))
|
||||||
return txt
|
return txt
|
||||||
|
|
||||||
|
|
||||||
@@ -109,7 +115,8 @@ class ColorPrinter(TerminalPrinter):
|
|||||||
def error(self, file, error, tries, max_tries):
|
def error(self, file, error, tries, max_tries):
|
||||||
if tries <= 1 and hasattr(file, "name"):
|
if tries <= 1 and hasattr(file, "name"):
|
||||||
print("\r\033[1;31m", self.shorten(file.name), sep="")
|
print("\r\033[1;31m", self.shorten(file.name), sep="")
|
||||||
print("\033[0;31m[Error]\033[0m ", error, " (", tries, "/", max_tries, ")", sep="")
|
print("\033[0;31m[Error]\033[0m ", error,
|
||||||
|
" (", tries, "/", max_tries, ")", sep="")
|
||||||
|
|
||||||
|
|
||||||
if os.name == "nt":
|
if os.name == "nt":
|
||||||
|
|||||||
@@ -9,6 +9,7 @@
|
|||||||
import os
|
import os
|
||||||
from . import config, text
|
from . import config, text
|
||||||
|
|
||||||
|
|
||||||
class PathFormat():
|
class PathFormat():
|
||||||
|
|
||||||
def __init__(self, extractor):
|
def __init__(self, extractor):
|
||||||
|
|||||||
@@ -14,10 +14,12 @@ import os.path
|
|||||||
import html
|
import html
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
|
||||||
|
|
||||||
def remove_html(text):
|
def remove_html(text):
|
||||||
"""Remove html-tags from a string"""
|
"""Remove html-tags from a string"""
|
||||||
return " ".join(re.sub("<[^>]+?>", " ", text).split())
|
return " ".join(re.sub("<[^>]+?>", " ", text).split())
|
||||||
|
|
||||||
|
|
||||||
def filename_from_url(url):
|
def filename_from_url(url):
|
||||||
"""Extract the last part of an url to use as a filename"""
|
"""Extract the last part of an url to use as a filename"""
|
||||||
try:
|
try:
|
||||||
@@ -27,8 +29,9 @@ def filename_from_url(url):
|
|||||||
except ValueError:
|
except ValueError:
|
||||||
return url
|
return url
|
||||||
|
|
||||||
|
|
||||||
def nameext_from_url(url, data=None):
|
def nameext_from_url(url, data=None):
|
||||||
"""Extract the last part of an url and fill keywords of 'data' accordingly"""
|
"""Extract the last part of an url and fill 'data' accordingly"""
|
||||||
if data is None:
|
if data is None:
|
||||||
data = {}
|
data = {}
|
||||||
data["filename"] = unquote(filename_from_url(url))
|
data["filename"] = unquote(filename_from_url(url))
|
||||||
@@ -36,6 +39,7 @@ def nameext_from_url(url, data=None):
|
|||||||
data["extension"] = ext[1:].lower()
|
data["extension"] = ext[1:].lower()
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
||||||
def clean_path_windows(path):
|
def clean_path_windows(path):
|
||||||
"""Remove illegal characters from a path-segment (Windows)"""
|
"""Remove illegal characters from a path-segment (Windows)"""
|
||||||
try:
|
try:
|
||||||
@@ -43,6 +47,7 @@ def clean_path_windows(path):
|
|||||||
except TypeError:
|
except TypeError:
|
||||||
return path
|
return path
|
||||||
|
|
||||||
|
|
||||||
def clean_path_posix(path):
|
def clean_path_posix(path):
|
||||||
"""Remove illegal characters from a path-segment (Posix)"""
|
"""Remove illegal characters from a path-segment (Posix)"""
|
||||||
try:
|
try:
|
||||||
@@ -50,17 +55,20 @@ def clean_path_posix(path):
|
|||||||
except AttributeError:
|
except AttributeError:
|
||||||
return path
|
return path
|
||||||
|
|
||||||
|
|
||||||
def shorten_path(path, limit=255, encoding=sys.getfilesystemencoding()):
|
def shorten_path(path, limit=255, encoding=sys.getfilesystemencoding()):
|
||||||
"""Shorten a path segment to at most 'limit' bytes"""
|
"""Shorten a path segment to at most 'limit' bytes"""
|
||||||
return (path.encode(encoding)[:limit]).decode(encoding, "ignore")
|
return (path.encode(encoding)[:limit]).decode(encoding, "ignore")
|
||||||
|
|
||||||
def shorten_filename(filename, limit=255, encoding=sys.getfilesystemencoding()):
|
|
||||||
"""Shorten a filename to at most 'limit' bytes while preserving extension"""
|
def shorten_filename(fname, limit=255, encoding=sys.getfilesystemencoding()):
|
||||||
name, extension = os.path.splitext(filename)
|
"""Shorten filename to at most 'limit' bytes while preserving extension"""
|
||||||
|
name, extension = os.path.splitext(fname)
|
||||||
bext = extension.encode(encoding)
|
bext = extension.encode(encoding)
|
||||||
bname = name.encode(encoding)[:limit-len(bext)]
|
bname = name.encode(encoding)[:limit-len(bext)]
|
||||||
return bname.decode(encoding, "ignore") + extension
|
return bname.decode(encoding, "ignore") + extension
|
||||||
|
|
||||||
|
|
||||||
def extract(txt, begin, end, pos=0):
|
def extract(txt, begin, end, pos=0):
|
||||||
"""Extract the text between 'begin' and 'end' from 'txt'
|
"""Extract the text between 'begin' and 'end' from 'txt'
|
||||||
|
|
||||||
@@ -88,6 +96,7 @@ def extract(txt, begin, end, pos=0):
|
|||||||
except ValueError:
|
except ValueError:
|
||||||
return None, pos
|
return None, pos
|
||||||
|
|
||||||
|
|
||||||
def extract_all(txt, rules, pos=0, values=None):
|
def extract_all(txt, rules, pos=0, values=None):
|
||||||
"""Calls extract for each rule and returns the result in a dict"""
|
"""Calls extract for each rule and returns the result in a dict"""
|
||||||
if values is None:
|
if values is None:
|
||||||
@@ -98,6 +107,7 @@ def extract_all(txt, rules, pos=0, values=None):
|
|||||||
values[key] = result
|
values[key] = result
|
||||||
return values, pos
|
return values, pos
|
||||||
|
|
||||||
|
|
||||||
def extract_iter(txt, begin, end, pos=0):
|
def extract_iter(txt, begin, end, pos=0):
|
||||||
"""Yield all values obtained by repeated calls to text.extract"""
|
"""Yield all values obtained by repeated calls to text.extract"""
|
||||||
while True:
|
while True:
|
||||||
@@ -106,6 +116,7 @@ def extract_iter(txt, begin, end, pos=0):
|
|||||||
return
|
return
|
||||||
yield value
|
yield value
|
||||||
|
|
||||||
|
|
||||||
if os.name == "nt":
|
if os.name == "nt":
|
||||||
clean_path = clean_path_windows
|
clean_path = clean_path_windows
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ import gallery_dl.config as config
|
|||||||
import os
|
import os
|
||||||
import tempfile
|
import tempfile
|
||||||
|
|
||||||
|
|
||||||
class TestConfig(unittest.TestCase):
|
class TestConfig(unittest.TestCase):
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
@@ -51,5 +52,6 @@ class TestConfig(unittest.TestCase):
|
|||||||
self.assertEqual(config.interpolate(["b", "d"], "2"), 123)
|
self.assertEqual(config.interpolate(["b", "d"], "2"), 123)
|
||||||
self.assertEqual(config.interpolate(["d", "d"], "2"), 123)
|
self.assertEqual(config.interpolate(["d", "d"], "2"), 123)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|||||||
@@ -38,7 +38,7 @@ class TestExtractors(unittest.TestCase):
|
|||||||
self.assertEqual(hjob.hash_content.hexdigest(), result["content"])
|
self.assertEqual(hjob.hash_content.hexdigest(), result["content"])
|
||||||
|
|
||||||
|
|
||||||
# dynamically genetate tests
|
# dynamically genertate tests
|
||||||
def _generate_test(extr, tcase):
|
def _generate_test(extr, tcase):
|
||||||
def test(self):
|
def test(self):
|
||||||
url, result = tcase
|
url, result = tcase
|
||||||
@@ -46,6 +46,7 @@ def _generate_test(extr, tcase):
|
|||||||
self._run_test(extr, url, result)
|
self._run_test(extr, url, result)
|
||||||
return test
|
return test
|
||||||
|
|
||||||
|
|
||||||
# enable selective testing for direct calls
|
# enable selective testing for direct calls
|
||||||
extractors = extractor.extractors()
|
extractors = extractor.extractors()
|
||||||
if __name__ == '__main__' and len(sys.argv) > 1:
|
if __name__ == '__main__' and len(sys.argv) > 1:
|
||||||
|
|||||||
@@ -10,6 +10,7 @@
|
|||||||
import unittest
|
import unittest
|
||||||
import gallery_dl.iso639_1 as iso639_1
|
import gallery_dl.iso639_1 as iso639_1
|
||||||
|
|
||||||
|
|
||||||
class TestISO639_1(unittest.TestCase):
|
class TestISO639_1(unittest.TestCase):
|
||||||
|
|
||||||
def test_code_to_language(self):
|
def test_code_to_language(self):
|
||||||
@@ -17,11 +18,13 @@ class TestISO639_1(unittest.TestCase):
|
|||||||
self.assertEqual(iso639_1.code_to_language("FR"), "French")
|
self.assertEqual(iso639_1.code_to_language("FR"), "French")
|
||||||
self.assertEqual(iso639_1.code_to_language("xx"), "English")
|
self.assertEqual(iso639_1.code_to_language("xx"), "English")
|
||||||
self.assertEqual(iso639_1.code_to_language("xx", default=None), None)
|
self.assertEqual(iso639_1.code_to_language("xx", default=None), None)
|
||||||
|
|
||||||
def test_language_to_code(self):
|
def test_language_to_code(self):
|
||||||
self.assertEqual(iso639_1.language_to_code("English"), "en")
|
self.assertEqual(iso639_1.language_to_code("English"), "en")
|
||||||
self.assertEqual(iso639_1.language_to_code("fRENch"), "fr")
|
self.assertEqual(iso639_1.language_to_code("fRENch"), "fr")
|
||||||
self.assertEqual(iso639_1.language_to_code("Nothing"), "en")
|
self.assertEqual(iso639_1.language_to_code("xx"), "en")
|
||||||
self.assertEqual(iso639_1.language_to_code("Nothing", default=None), None)
|
self.assertEqual(iso639_1.language_to_code("xx", default=None), None)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ import unittest
|
|||||||
import sys
|
import sys
|
||||||
import gallery_dl.text as text
|
import gallery_dl.text as text
|
||||||
|
|
||||||
|
|
||||||
class TestText(unittest.TestCase):
|
class TestText(unittest.TestCase):
|
||||||
|
|
||||||
def test_remove_html(self):
|
def test_remove_html(self):
|
||||||
@@ -62,7 +63,7 @@ class TestText(unittest.TestCase):
|
|||||||
}
|
}
|
||||||
for case, result in cases.items():
|
for case, result in cases.items():
|
||||||
self.assertEqual(text.clean_path_windows(case), result[0])
|
self.assertEqual(text.clean_path_windows(case), result[0])
|
||||||
self.assertEqual(text.clean_path_posix (case), result[1])
|
self.assertEqual(text.clean_path_posix(case), result[1])
|
||||||
|
|
||||||
def test_shorten_path(self):
|
def test_shorten_path(self):
|
||||||
cases = {
|
cases = {
|
||||||
@@ -86,8 +87,9 @@ class TestText(unittest.TestCase):
|
|||||||
}
|
}
|
||||||
enc = sys.getfilesystemencoding()
|
enc = sys.getfilesystemencoding()
|
||||||
for case, result in cases.items():
|
for case, result in cases.items():
|
||||||
self.assertEqual(text.shorten_filename(case), result)
|
fname = text.shorten_filename(case)
|
||||||
self.assertTrue(len(text.shorten_filename(case).encode(enc)) <= 255)
|
self.assertEqual(fname, result)
|
||||||
|
self.assertTrue(len(fname.encode(enc)) <= 255)
|
||||||
|
|
||||||
def test_extract(self):
|
def test_extract(self):
|
||||||
cases = {
|
cases = {
|
||||||
@@ -121,5 +123,6 @@ class TestText(unittest.TestCase):
|
|||||||
result = ["c", "b", "a", "d"]
|
result = ["c", "b", "a", "d"]
|
||||||
self.assertEqual(list(text.extract_iter(txt, "[", "]")), result)
|
self.assertEqual(list(text.extract_iter(txt, "[", "]")), result)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|||||||
Reference in New Issue
Block a user