diff --git a/gallery_dl/archive.py b/gallery_dl/archive.py new file mode 100644 index 00000000..302dc5aa --- /dev/null +++ b/gallery_dl/archive.py @@ -0,0 +1,56 @@ +# -*- coding: utf-8 -*- + +# Copyright 2024 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Download Archives""" + +import os +import sqlite3 +from . import formatter + + +class DownloadArchive(): + + def __init__(self, path, format_string, pragma=None, + cache_key="_archive_key"): + try: + con = sqlite3.connect(path, timeout=60, check_same_thread=False) + except sqlite3.OperationalError: + os.makedirs(os.path.dirname(path)) + con = sqlite3.connect(path, timeout=60, check_same_thread=False) + con.isolation_level = None + + self.keygen = formatter.parse(format_string).format_map + self.connection = con + self.close = con.close + self.cursor = cursor = con.cursor() + self._cache_key = cache_key + + if pragma: + for stmt in pragma: + cursor.execute("PRAGMA " + stmt) + + try: + cursor.execute("CREATE TABLE IF NOT EXISTS archive " + "(entry TEXT PRIMARY KEY) WITHOUT ROWID") + except sqlite3.OperationalError: + # fallback for missing WITHOUT ROWID support (#553) + cursor.execute("CREATE TABLE IF NOT EXISTS archive " + "(entry TEXT PRIMARY KEY)") + + def add(self, kwdict): + """Add item described by 'kwdict' to archive""" + key = kwdict.get(self._cache_key) or self.keygen(kwdict) + self.cursor.execute( + "INSERT OR IGNORE INTO archive (entry) VALUES (?)", (key,)) + + def check(self, kwdict): + """Return True if the item described by 'kwdict' exists in archive""" + key = kwdict[self._cache_key] = self.keygen(kwdict) + self.cursor.execute( + "SELECT 1 FROM archive WHERE entry=? LIMIT 1", (key,)) + return self.cursor.fetchone() diff --git a/gallery_dl/job.py b/gallery_dl/job.py index eb10a0ce..6c2d2ba8 100644 --- a/gallery_dl/job.py +++ b/gallery_dl/job.py @@ -11,10 +11,23 @@ import errno import logging import functools import collections -from . import extractor, downloader, postprocessor -from . import config, text, util, path, formatter, output, exception, version + +from . import ( + extractor, + downloader, + postprocessor, + archive, + config, + exception, + formatter, + output, + path, + text, + util, + version, +) from .extractor.message import Message -from .output import stdout_write +stdout_write = output.stdout_write class Job(): @@ -507,23 +520,24 @@ class DownloadJob(Job): # monkey-patch method to do nothing and always return True self.download = pathfmt.fix_extension - archive = cfg("archive") - if archive: - archive = util.expand_path(archive) + archive_path = cfg("archive") + if archive_path: + archive_path = util.expand_path(archive_path) archive_format = (cfg("archive-prefix", extr.category) + cfg("archive-format", extr.archive_fmt)) archive_pragma = (cfg("archive-pragma")) try: - if "{" in archive: - archive = formatter.parse(archive).format_map(kwdict) - self.archive = util.DownloadArchive( - archive, archive_format, archive_pragma) + if "{" in archive_path: + archive_path = formatter.parse( + archive_path).format_map(kwdict) + self.archive = archive.DownloadArchive( + archive_path, archive_format, archive_pragma) except Exception as exc: extr.log.warning( "Failed to open download archive at '%s' (%s: %s)", - archive, exc.__class__.__name__, exc) + archive_path, exc.__class__.__name__, exc) else: - extr.log.debug("Using download archive '%s'", archive) + extr.log.debug("Using download archive '%s'", archive_path) skip = cfg("skip", True) if skip: diff --git a/gallery_dl/postprocessor/common.py b/gallery_dl/postprocessor/common.py index 1d2fba87..d4e16034 100644 --- a/gallery_dl/postprocessor/common.py +++ b/gallery_dl/postprocessor/common.py @@ -8,7 +8,7 @@ """Common classes and constants used by postprocessor modules.""" -from .. import util, formatter +from .. import util, formatter, archive class PostProcessor(): @@ -22,30 +22,31 @@ class PostProcessor(): return self.__class__.__name__ def _init_archive(self, job, options, prefix=None): - archive = options.get("archive") - if archive: + archive_path = options.get("archive") + if archive_path: extr = job.extractor - archive = util.expand_path(archive) + archive_path = util.expand_path(archive_path) if not prefix: prefix = "_" + self.name.upper() + "_" archive_format = ( options.get("archive-prefix", extr.category) + options.get("archive-format", prefix + extr.archive_fmt)) try: - if "{" in archive: - archive = formatter.parse(archive).format_map( + if "{" in archive_path: + archive_path = formatter.parse(archive_path).format_map( job.pathfmt.kwdict) - self.archive = util.DownloadArchive( - archive, archive_format, + self.archive = archive.DownloadArchive( + archive_path, archive_format, options.get("archive-pragma"), "_archive_" + self.name) except Exception as exc: self.log.warning( "Failed to open %s archive at '%s' (%s: %s)", - self.name, archive, exc.__class__.__name__, exc) + self.name, archive_path, exc.__class__.__name__, exc) else: - self.log.debug("Using %s archive '%s'", self.name, archive) + self.log.debug( + "Using %s archive '%s'", self.name, archive_path) return True - else: - self.archive = None + + self.archive = None return False diff --git a/gallery_dl/util.py b/gallery_dl/util.py index 0e6f04a9..861ec7eb 100644 --- a/gallery_dl/util.py +++ b/gallery_dl/util.py @@ -16,7 +16,6 @@ import time import random import getpass import hashlib -import sqlite3 import binascii import datetime import functools @@ -852,46 +851,3 @@ class FilterPredicate(): raise except Exception as exc: raise exception.FilterError(exc) - - -class DownloadArchive(): - - def __init__(self, path, format_string, pragma=None, - cache_key="_archive_key"): - try: - con = sqlite3.connect(path, timeout=60, check_same_thread=False) - except sqlite3.OperationalError: - os.makedirs(os.path.dirname(path)) - con = sqlite3.connect(path, timeout=60, check_same_thread=False) - con.isolation_level = None - - from . import formatter - self.keygen = formatter.parse(format_string).format_map - self.close = con.close - self.cursor = cursor = con.cursor() - self._cache_key = cache_key - - if pragma: - for stmt in pragma: - cursor.execute("PRAGMA " + stmt) - - try: - cursor.execute("CREATE TABLE IF NOT EXISTS archive " - "(entry TEXT PRIMARY KEY) WITHOUT ROWID") - except sqlite3.OperationalError: - # fallback for missing WITHOUT ROWID support (#553) - cursor.execute("CREATE TABLE IF NOT EXISTS archive " - "(entry TEXT PRIMARY KEY)") - - def check(self, kwdict): - """Return True if the item described by 'kwdict' exists in archive""" - key = kwdict[self._cache_key] = self.keygen(kwdict) - self.cursor.execute( - "SELECT 1 FROM archive WHERE entry=? LIMIT 1", (key,)) - return self.cursor.fetchone() - - def add(self, kwdict): - """Add item described by 'kwdict' to archive""" - key = kwdict.get(self._cache_key) or self.keygen(kwdict) - self.cursor.execute( - "INSERT OR IGNORE INTO archive (entry) VALUES (?)", (key,))