move TestJob into test module; test _extractor values
This commit is contained in:
@@ -56,6 +56,7 @@ class HentaicafeMangaExtractor(foolslide.FoolslideMangaExtractor):
|
|||||||
)
|
)
|
||||||
root = "https://hentai.cafe"
|
root = "https://hentai.cafe"
|
||||||
reverse = False
|
reverse = False
|
||||||
|
chapterclass = HentaicafeChapterExtractor
|
||||||
|
|
||||||
def chapters(self, page):
|
def chapters(self, page):
|
||||||
if "/manga/series/" in self.manga_url:
|
if "/manga/series/" in self.manga_url:
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
# Copyright 2015-2018 Mike Fährmann
|
# Copyright 2015-2019 Mike Fährmann
|
||||||
#
|
#
|
||||||
# This program is free software; you can redistribute it and/or modify
|
# This program is free software; you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU General Public License version 2 as
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
@@ -9,7 +9,6 @@
|
|||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
import json
|
import json
|
||||||
import hashlib
|
|
||||||
import logging
|
import logging
|
||||||
from . import extractor, downloader, postprocessor
|
from . import extractor, downloader, postprocessor
|
||||||
from . import config, text, util, output, exception
|
from . import config, text, util, output, exception
|
||||||
@@ -449,99 +448,6 @@ class UrlJob(Job):
|
|||||||
self._write_unsupported(url)
|
self._write_unsupported(url)
|
||||||
|
|
||||||
|
|
||||||
class TestJob(DownloadJob):
|
|
||||||
"""Generate test-results for extractor runs"""
|
|
||||||
|
|
||||||
class HashIO():
|
|
||||||
"""Minimal file-like interface"""
|
|
||||||
|
|
||||||
def __init__(self, hashobj):
|
|
||||||
self.hashobj = hashobj
|
|
||||||
self.path = ""
|
|
||||||
self.size = 0
|
|
||||||
self.has_extension = True
|
|
||||||
|
|
||||||
def __enter__(self):
|
|
||||||
return self
|
|
||||||
|
|
||||||
def __exit__(self, *args):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def open(self, mode):
|
|
||||||
self.size = 0
|
|
||||||
return self
|
|
||||||
|
|
||||||
def write(self, content):
|
|
||||||
"""Update SHA1 hash"""
|
|
||||||
self.size += len(content)
|
|
||||||
self.hashobj.update(content)
|
|
||||||
|
|
||||||
def tell(self):
|
|
||||||
return self.size
|
|
||||||
|
|
||||||
def part_size(self):
|
|
||||||
return 0
|
|
||||||
|
|
||||||
def __init__(self, url, parent=None, content=False):
|
|
||||||
DownloadJob.__init__(self, url, parent)
|
|
||||||
self.content = content
|
|
||||||
self.list_url = []
|
|
||||||
self.list_keyword = []
|
|
||||||
self.list_archive = []
|
|
||||||
self.hash_url = hashlib.sha1()
|
|
||||||
self.hash_keyword = hashlib.sha1()
|
|
||||||
self.hash_archive = hashlib.sha1()
|
|
||||||
self.hash_content = hashlib.sha1()
|
|
||||||
if content:
|
|
||||||
self.fileobj = self.HashIO(self.hash_content)
|
|
||||||
self.get_downloader("http")._check_extension = lambda a, b: None
|
|
||||||
|
|
||||||
def run(self):
|
|
||||||
for msg in self.extractor:
|
|
||||||
self.dispatch(msg)
|
|
||||||
|
|
||||||
def handle_url(self, url, keywords):
|
|
||||||
self.update_url(url)
|
|
||||||
self.update_keyword(keywords)
|
|
||||||
self.update_archive(keywords)
|
|
||||||
self.update_content(url)
|
|
||||||
|
|
||||||
def handle_urllist(self, urls, keywords):
|
|
||||||
self.handle_url(urls[0], keywords)
|
|
||||||
|
|
||||||
def handle_directory(self, keywords):
|
|
||||||
self.update_keyword(keywords, False)
|
|
||||||
|
|
||||||
def handle_queue(self, url, keywords):
|
|
||||||
self.update_url(url)
|
|
||||||
self.update_keyword(keywords)
|
|
||||||
|
|
||||||
def update_url(self, url):
|
|
||||||
"""Update the URL hash"""
|
|
||||||
self.list_url.append(url)
|
|
||||||
self.hash_url.update(url.encode())
|
|
||||||
|
|
||||||
def update_keyword(self, kwdict, to_list=True):
|
|
||||||
"""Update the keyword hash"""
|
|
||||||
kwdict = self._filter(kwdict)
|
|
||||||
if to_list:
|
|
||||||
self.list_keyword.append(kwdict)
|
|
||||||
self.hash_keyword.update(
|
|
||||||
json.dumps(kwdict, sort_keys=True, default=str).encode())
|
|
||||||
|
|
||||||
def update_archive(self, kwdict):
|
|
||||||
"""Update the archive-id hash"""
|
|
||||||
archive_id = self.extractor.archive_fmt.format_map(kwdict)
|
|
||||||
self.list_archive.append(archive_id)
|
|
||||||
self.hash_archive.update(archive_id.encode())
|
|
||||||
|
|
||||||
def update_content(self, url):
|
|
||||||
"""Update the content hash"""
|
|
||||||
if self.content:
|
|
||||||
scheme = url.partition(":")[0]
|
|
||||||
self.get_downloader(scheme).download(url, self.fileobj)
|
|
||||||
|
|
||||||
|
|
||||||
class DataJob(Job):
|
class DataJob(Job):
|
||||||
"""Collect extractor results and dump them"""
|
"""Collect extractor results and dump them"""
|
||||||
|
|
||||||
|
|||||||
@@ -10,6 +10,8 @@
|
|||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import re
|
import re
|
||||||
|
import json
|
||||||
|
import hashlib
|
||||||
import unittest
|
import unittest
|
||||||
from gallery_dl import extractor, job, config, exception
|
from gallery_dl import extractor, job, config, exception
|
||||||
|
|
||||||
@@ -26,34 +28,6 @@ BROKEN = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def setup_test_config():
|
|
||||||
name = "gallerydl"
|
|
||||||
email = "gallerydl@openaliasbox.org"
|
|
||||||
|
|
||||||
config.clear()
|
|
||||||
config.set(("cache", "file"), ":memory:")
|
|
||||||
config.set(("downloader", "part"), False)
|
|
||||||
config.set(("extractor", "timeout"), 60)
|
|
||||||
config.set(("extractor", "username"), name)
|
|
||||||
config.set(("extractor", "password"), name)
|
|
||||||
config.set(("extractor", "nijie", "username"), email)
|
|
||||||
config.set(("extractor", "seiga", "username"), email)
|
|
||||||
config.set(("extractor", "danbooru", "username"), None)
|
|
||||||
|
|
||||||
config.set(("extractor", "deviantart", "client-id"), "7777")
|
|
||||||
config.set(("extractor", "deviantart", "client-secret"),
|
|
||||||
"ff14994c744d9208e5caeec7aab4a026")
|
|
||||||
|
|
||||||
config.set(("extractor", "tumblr", "api-key"),
|
|
||||||
"0cXoHfIqVzMQcc3HESZSNsVlulGxEXGDTTZCDrRrjaa0jmuTc6")
|
|
||||||
config.set(("extractor", "tumblr", "api-secret"),
|
|
||||||
"6wxAK2HwrXdedn7VIoZWxGqVhZ8JdYKDLjiQjL46MLqGuEtyVj")
|
|
||||||
config.set(("extractor", "tumblr", "access-token"),
|
|
||||||
"N613fPV6tOZQnyn0ERTuoEZn0mEqG8m2K8M3ClSJdEHZJuqFdG")
|
|
||||||
config.set(("extractor", "tumblr", "access-token-secret"),
|
|
||||||
"sgOA7ZTT4FBXdOGGVV331sSp0jHYp4yMDRslbhaQf7CaS71i4O")
|
|
||||||
|
|
||||||
|
|
||||||
class TestExtractorResults(unittest.TestCase):
|
class TestExtractorResults(unittest.TestCase):
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
@@ -74,7 +48,7 @@ class TestExtractorResults(unittest.TestCase):
|
|||||||
else:
|
else:
|
||||||
content = False
|
content = False
|
||||||
|
|
||||||
tjob = job.TestJob(url, content=content)
|
tjob = ResultJob(url, content=content)
|
||||||
self.assertEqual(extr, tjob.extractor.__class__)
|
self.assertEqual(extr, tjob.extractor.__class__)
|
||||||
|
|
||||||
if not result:
|
if not result:
|
||||||
@@ -94,6 +68,14 @@ class TestExtractorResults(unittest.TestCase):
|
|||||||
# test archive-id uniqueness
|
# test archive-id uniqueness
|
||||||
self.assertEqual(len(set(tjob.list_archive)), len(tjob.list_archive))
|
self.assertEqual(len(set(tjob.list_archive)), len(tjob.list_archive))
|
||||||
|
|
||||||
|
# test '_extractor' entries
|
||||||
|
if tjob.queue:
|
||||||
|
for url, kwdict in zip(tjob.list_url, tjob.list_keyword):
|
||||||
|
if "_extractor" in kwdict:
|
||||||
|
extr = kwdict["_extractor"].from_url(url)
|
||||||
|
self.assertIsInstance(extr, kwdict["_extractor"])
|
||||||
|
self.assertEqual(extr.url, url)
|
||||||
|
|
||||||
# test extraction results
|
# test extraction results
|
||||||
if "url" in result:
|
if "url" in result:
|
||||||
self.assertEqual(result["url"], tjob.hash_url.hexdigest())
|
self.assertEqual(result["url"], tjob.hash_url.hexdigest())
|
||||||
@@ -142,6 +124,123 @@ class TestExtractorResults(unittest.TestCase):
|
|||||||
self.assertEqual(value, test, msg=key)
|
self.assertEqual(value, test, msg=key)
|
||||||
|
|
||||||
|
|
||||||
|
class ResultJob(job.DownloadJob):
|
||||||
|
"""Generate test-results for extractor runs"""
|
||||||
|
|
||||||
|
def __init__(self, url, parent=None, content=False):
|
||||||
|
job.DownloadJob.__init__(self, url, parent)
|
||||||
|
self.queue = False
|
||||||
|
self.content = content
|
||||||
|
self.list_url = []
|
||||||
|
self.list_keyword = []
|
||||||
|
self.list_archive = []
|
||||||
|
self.hash_url = hashlib.sha1()
|
||||||
|
self.hash_keyword = hashlib.sha1()
|
||||||
|
self.hash_archive = hashlib.sha1()
|
||||||
|
self.hash_content = hashlib.sha1()
|
||||||
|
if content:
|
||||||
|
self.fileobj = FakePathfmt(self.hash_content)
|
||||||
|
self.get_downloader("http")._check_extension = lambda a, b: None
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
for msg in self.extractor:
|
||||||
|
self.dispatch(msg)
|
||||||
|
|
||||||
|
def handle_url(self, url, keywords):
|
||||||
|
self.update_url(url)
|
||||||
|
self.update_keyword(keywords)
|
||||||
|
self.update_archive(keywords)
|
||||||
|
self.update_content(url)
|
||||||
|
|
||||||
|
def handle_directory(self, keywords):
|
||||||
|
self.update_keyword(keywords, False)
|
||||||
|
|
||||||
|
def handle_queue(self, url, keywords):
|
||||||
|
self.queue = True
|
||||||
|
self.update_url(url)
|
||||||
|
self.update_keyword(keywords)
|
||||||
|
|
||||||
|
def update_url(self, url):
|
||||||
|
self.list_url.append(url)
|
||||||
|
self.hash_url.update(url.encode())
|
||||||
|
|
||||||
|
def update_keyword(self, kwdict, to_list=True):
|
||||||
|
if to_list:
|
||||||
|
self.list_keyword.append(kwdict)
|
||||||
|
kwdict = self._filter(kwdict)
|
||||||
|
self.hash_keyword.update(
|
||||||
|
json.dumps(kwdict, sort_keys=True, default=str).encode())
|
||||||
|
|
||||||
|
def update_archive(self, kwdict):
|
||||||
|
archive_id = self.extractor.archive_fmt.format_map(kwdict)
|
||||||
|
self.list_archive.append(archive_id)
|
||||||
|
self.hash_archive.update(archive_id.encode())
|
||||||
|
|
||||||
|
def update_content(self, url):
|
||||||
|
if self.content:
|
||||||
|
scheme = url.partition(":")[0]
|
||||||
|
self.get_downloader(scheme).download(url, self.fileobj)
|
||||||
|
|
||||||
|
|
||||||
|
class FakePathfmt():
|
||||||
|
"""Minimal file-like interface"""
|
||||||
|
|
||||||
|
def __init__(self, hashobj):
|
||||||
|
self.hashobj = hashobj
|
||||||
|
self.path = ""
|
||||||
|
self.size = 0
|
||||||
|
self.has_extension = True
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, *args):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def open(self, mode):
|
||||||
|
self.size = 0
|
||||||
|
return self
|
||||||
|
|
||||||
|
def write(self, content):
|
||||||
|
"""Update SHA1 hash"""
|
||||||
|
self.size += len(content)
|
||||||
|
self.hashobj.update(content)
|
||||||
|
|
||||||
|
def tell(self):
|
||||||
|
return self.size
|
||||||
|
|
||||||
|
def part_size(self):
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def setup_test_config():
|
||||||
|
name = "gallerydl"
|
||||||
|
email = "gallerydl@openaliasbox.org"
|
||||||
|
|
||||||
|
config.clear()
|
||||||
|
config.set(("cache", "file"), ":memory:")
|
||||||
|
config.set(("downloader", "part"), False)
|
||||||
|
config.set(("extractor", "timeout"), 60)
|
||||||
|
config.set(("extractor", "username"), name)
|
||||||
|
config.set(("extractor", "password"), name)
|
||||||
|
config.set(("extractor", "nijie", "username"), email)
|
||||||
|
config.set(("extractor", "seiga", "username"), email)
|
||||||
|
config.set(("extractor", "danbooru", "username"), None)
|
||||||
|
|
||||||
|
config.set(("extractor", "deviantart", "client-id"), "7777")
|
||||||
|
config.set(("extractor", "deviantart", "client-secret"),
|
||||||
|
"ff14994c744d9208e5caeec7aab4a026")
|
||||||
|
|
||||||
|
config.set(("extractor", "tumblr", "api-key"),
|
||||||
|
"0cXoHfIqVzMQcc3HESZSNsVlulGxEXGDTTZCDrRrjaa0jmuTc6")
|
||||||
|
config.set(("extractor", "tumblr", "api-secret"),
|
||||||
|
"6wxAK2HwrXdedn7VIoZWxGqVhZ8JdYKDLjiQjL46MLqGuEtyVj")
|
||||||
|
config.set(("extractor", "tumblr", "access-token"),
|
||||||
|
"N613fPV6tOZQnyn0ERTuoEZn0mEqG8m2K8M3ClSJdEHZJuqFdG")
|
||||||
|
config.set(("extractor", "tumblr", "access-token-secret"),
|
||||||
|
"sgOA7ZTT4FBXdOGGVV331sSp0jHYp4yMDRslbhaQf7CaS71i4O")
|
||||||
|
|
||||||
|
|
||||||
def generate_tests():
|
def generate_tests():
|
||||||
"""Dynamically generate extractor unittests"""
|
"""Dynamically generate extractor unittests"""
|
||||||
def _generate_test(extr, tcase):
|
def _generate_test(extr, tcase):
|
||||||
@@ -165,16 +264,14 @@ def generate_tests():
|
|||||||
skip = set(BROKEN)
|
skip = set(BROKEN)
|
||||||
if "CI" in os.environ and "TRAVIS" in os.environ:
|
if "CI" in os.environ and "TRAVIS" in os.environ:
|
||||||
skip |= set(TRAVIS_SKIP)
|
skip |= set(TRAVIS_SKIP)
|
||||||
print("skipping:", ", ".join(skip))
|
if skip:
|
||||||
|
print("skipping:", ", ".join(skip))
|
||||||
fltr = lambda c, bc: c not in skip # noqa: E731
|
fltr = lambda c, bc: c not in skip # noqa: E731
|
||||||
|
|
||||||
# filter available extractor classes
|
# filter available extractor classes
|
||||||
extractors = [
|
extractors = [
|
||||||
extr for extr in extractor.extractors()
|
extr for extr in extractor.extractors()
|
||||||
if fltr(
|
if fltr(extr.category, getattr(extr, "basecategory", None))
|
||||||
extr.category,
|
|
||||||
extr.basecategory if hasattr(extr, "basecategory") else None
|
|
||||||
)
|
|
||||||
]
|
]
|
||||||
|
|
||||||
# add 'test_...' methods
|
# add 'test_...' methods
|
||||||
|
|||||||
Reference in New Issue
Block a user