initial commit
This commit is contained in:
44
gallery_dl/__init__.py
Normal file
44
gallery_dl/__init__.py
Normal file
@@ -0,0 +1,44 @@
|
||||
__author__ = "Mike Fährmann"
|
||||
__copyright__ = "Copyright 2014, Mike Fährmann"
|
||||
|
||||
__license__ = "GPLv3"
|
||||
__version__ = "0.4"
|
||||
__maintainer__ = "Mike Fährmann"
|
||||
__email__ = "mike_faehrmann@web.de"
|
||||
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
import configparser
|
||||
|
||||
from . import extractor
|
||||
from . import downloader
|
||||
|
||||
def parse_cmdline_options():
|
||||
p = argparse.ArgumentParser(
|
||||
description='Download images from various sources')
|
||||
p.add_argument("-c", "--config",
|
||||
default="~/.config/gallery/config", metavar="CFG", help="alternate configuration file")
|
||||
p.add_argument("-d", "--dest",
|
||||
metavar="DEST", help="destination directory")
|
||||
p.add_argument("urls", nargs="+",
|
||||
metavar="URL", help="url to download images from")
|
||||
return p.parse_args()
|
||||
|
||||
def parse_config_file(path):
|
||||
config = configparser.ConfigParser(
|
||||
interpolation=None,
|
||||
)
|
||||
config.optionxform = lambda opt:opt
|
||||
config.read(os.path.expanduser(path))
|
||||
return config
|
||||
|
||||
def main():
|
||||
opts = parse_cmdline_options()
|
||||
conf = parse_config_file(opts.config)
|
||||
extf = extractor.ExtractorFinder(conf)
|
||||
dlmg = downloader.DownloadManager(opts, conf)
|
||||
|
||||
for url in opts.urls:
|
||||
ex = extf.match(url)
|
||||
dlmg.add(ex)
|
||||
54
gallery_dl/downloader/__init__.py
Normal file
54
gallery_dl/downloader/__init__.py
Normal file
@@ -0,0 +1,54 @@
|
||||
import os
|
||||
import sys
|
||||
import importlib
|
||||
|
||||
class DownloadManager():
|
||||
|
||||
def __init__(self, opts, conf):
|
||||
self.opts = opts
|
||||
self.conf = conf
|
||||
self.downloaders = {}
|
||||
|
||||
def add(self, extr):
|
||||
if self.opts.dest:
|
||||
dest = self.opts.dest
|
||||
elif extr.category in self.conf:
|
||||
dest = self.conf[extr.category].get("destination", "/tmp/")
|
||||
else:
|
||||
dest = self.conf["general"].get("destination", "/tmp/")
|
||||
dest = os.path.join(dest, extr.category, extr.directory)
|
||||
os.makedirs(dest, exist_ok=True)
|
||||
|
||||
for url, filename in extr:
|
||||
path = os.path.join(dest, filename)
|
||||
if os.path.exists(path):
|
||||
self.print_skip(path)
|
||||
continue
|
||||
dl = self.get_downloader(extr, url)
|
||||
self.print_start(path)
|
||||
tries = dl.download(url, path)
|
||||
self.print_success(path, tries)
|
||||
|
||||
def get_downloader(self, extr, url):
|
||||
end = url.find("://")
|
||||
proto = url[:end] if end != -1 else "http"
|
||||
if proto not in self.downloaders:
|
||||
# import downloader
|
||||
module = importlib.import_module("."+proto, __package__)
|
||||
self.downloaders[proto] = module.Downloader
|
||||
return self.downloaders[proto](extr)
|
||||
|
||||
@staticmethod
|
||||
def print_start(path):
|
||||
print(path, end="")
|
||||
sys.stdout.flush()
|
||||
|
||||
@staticmethod
|
||||
def print_skip(path):
|
||||
print("\033[2m", path, "\033[0m", sep="")
|
||||
|
||||
@staticmethod
|
||||
def print_success(path, tries):
|
||||
if tries == 0:
|
||||
print("\r", end="")
|
||||
print("\r\033[1;32m", path, "\033[0m", sep="")
|
||||
21
gallery_dl/downloader/common.py
Normal file
21
gallery_dl/downloader/common.py
Normal file
@@ -0,0 +1,21 @@
|
||||
import os
|
||||
|
||||
class BasicDownloader():
|
||||
|
||||
max_tries = 5
|
||||
|
||||
def download(self, url, path):
|
||||
with open(path, "wb") as file:
|
||||
try:
|
||||
return self.download_impl(url, file)
|
||||
file.close()
|
||||
except:
|
||||
# make sure to remove file if download failed
|
||||
os.unlink(path)
|
||||
raise
|
||||
|
||||
@staticmethod
|
||||
def print_error(file, error, tries, max_tries=5):
|
||||
if tries == 1 and hasattr(file, "name"):
|
||||
print("\r\033[1;31m", file.name, sep="")
|
||||
print("\033[0;31m[Error]\033[0m ", error, " (", tries, "/", max_tries, ")", sep="")
|
||||
42
gallery_dl/downloader/http.py
Normal file
42
gallery_dl/downloader/http.py
Normal file
@@ -0,0 +1,42 @@
|
||||
from .common import BasicDownloader
|
||||
import time
|
||||
import requests
|
||||
|
||||
class Downloader(BasicDownloader):
|
||||
|
||||
def __init__(self, extr):
|
||||
BasicDownloader.__init__(self)
|
||||
self.session = extr.session
|
||||
|
||||
def download_impl(self, url, file):
|
||||
tries = 0
|
||||
while True:
|
||||
# try to connect to remote source
|
||||
try:
|
||||
response = self.session.get(url, stream=True, verify=True)
|
||||
except requests.exceptions.ConnectionError as e:
|
||||
tries += 1
|
||||
self.print_error(file, e, tries, self.max_tries)
|
||||
time.sleep(1)
|
||||
if tries == self.max_tries:
|
||||
raise
|
||||
continue
|
||||
|
||||
# reject error-status-codes
|
||||
if response.status_code != requests.codes.ok:
|
||||
tries += 1
|
||||
self.print_error(file, 'HTTP status "{} {}"'.format(
|
||||
response.status_code, response.reason), tries, self.max_tries)
|
||||
if response.status_code == 404:
|
||||
return self.max_tries
|
||||
time.sleep(1)
|
||||
if tries == 5:
|
||||
response.raise_for_status()
|
||||
continue
|
||||
|
||||
# everything ok -- proceed to download
|
||||
break
|
||||
|
||||
for data in response.iter_content(16384):
|
||||
file.write(data)
|
||||
return tries
|
||||
1
gallery_dl/downloader/https.py
Normal file
1
gallery_dl/downloader/https.py
Normal file
@@ -0,0 +1 @@
|
||||
from .http import Downloader
|
||||
10
gallery_dl/downloader/text.py
Normal file
10
gallery_dl/downloader/text.py
Normal file
@@ -0,0 +1,10 @@
|
||||
from .common import BasicDownloader
|
||||
|
||||
class Downloader(BasicDownloader):
|
||||
|
||||
def __init__(self, extr):
|
||||
BasicDownloader.__init__(self)
|
||||
|
||||
def download_impl(self, url, file):
|
||||
file.write(bytes(url[7:], "utf-8"))
|
||||
return 0
|
||||
21
gallery_dl/extractor/8chan.py
Normal file
21
gallery_dl/extractor/8chan.py
Normal file
@@ -0,0 +1,21 @@
|
||||
from .common import BasicExtractor
|
||||
from urllib.parse import unquote
|
||||
import re
|
||||
|
||||
class Extractor(BasicExtractor):
|
||||
|
||||
thread_url_fmt = "https://www.8chan.co/{0}/res/{1}.html"
|
||||
regex = r'>File: <a href="([^"]+)">([^<]+)\.[^<]+<.*?<span class="postfilename"( title="([^"]+)")?>([^<]+)<'
|
||||
|
||||
def __init__(self, match, config):
|
||||
BasicExtractor.__init__(self, config)
|
||||
self.board, _, self.thread_id = match.group(1).split("/")
|
||||
self.category = "8chan"
|
||||
self.directory = self.board + "-" + self.thread_id
|
||||
|
||||
def images(self):
|
||||
url = self.thread_url_fmt.format(self.board, self.thread_id)
|
||||
text = self.request(url).text
|
||||
for match in re.finditer(self.regex, text):
|
||||
url, prefix, fullname, name = match.group(1, 2, 4, 5)
|
||||
yield ("https://www.8chan.co" + url, prefix + "-" + unquote(fullname or name))
|
||||
47
gallery_dl/extractor/__init__.py
Normal file
47
gallery_dl/extractor/__init__.py
Normal file
@@ -0,0 +1,47 @@
|
||||
import os
|
||||
import sys
|
||||
import re
|
||||
import sqlite3
|
||||
import importlib
|
||||
|
||||
class ExtractorFinder():
|
||||
|
||||
def __init__(self, config):
|
||||
self.config = config
|
||||
self.match_list = list()
|
||||
if "database" in config["general"]:
|
||||
path = os.path.expanduser(config["general"]["database"])
|
||||
conn = sqlite3.connect(path)
|
||||
self.load_from_database(conn)
|
||||
self.load_from_config(config)
|
||||
|
||||
def match(self, url):
|
||||
for category, regex in self.match_list:
|
||||
match = regex.match(url)
|
||||
if match:
|
||||
module = importlib.import_module("."+category, __package__)
|
||||
return module.Extractor(match, self.config)
|
||||
return None
|
||||
|
||||
def load_from_database(self, db):
|
||||
query = (
|
||||
"SELECT regex.re, category.name "
|
||||
"FROM regex JOIN category "
|
||||
"ON regex.category_id = category.id"
|
||||
)
|
||||
for row in db.execute(query):
|
||||
self.add_match(row[1], row[0])
|
||||
|
||||
def load_from_config(self, conf):
|
||||
for category in conf:
|
||||
for key, value in conf[category].items():
|
||||
if(key.startswith("regex")):
|
||||
self.add_match(category, value)
|
||||
|
||||
def add_match(self, category, regex):
|
||||
try:
|
||||
# print(category, regex)
|
||||
self.match_list.append( (category, re.compile(regex)) )
|
||||
except:
|
||||
print("[Warning] [{0}] failed to compile regular expression '{1}'"
|
||||
.format(category, regex))
|
||||
22
gallery_dl/extractor/batoto.py
Normal file
22
gallery_dl/extractor/batoto.py
Normal file
@@ -0,0 +1,22 @@
|
||||
from .common import AsyncExtractor
|
||||
from ..util import filename_from_url
|
||||
from urllib.parse import unquote
|
||||
|
||||
class Extractor(AsyncExtractor):
|
||||
|
||||
def __init__(self, match, config):
|
||||
AsyncExtractor.__init__(self, config)
|
||||
self.url = "https://bato.to/read/_/" + match.group(1) + "/_/1"
|
||||
self.category = "batoto"
|
||||
self.directory = match.group(1)
|
||||
|
||||
def images(self):
|
||||
next_url = self.url
|
||||
while next_url:
|
||||
text = self.request(next_url).text
|
||||
pos = text.find('<div id="full_image"')
|
||||
|
||||
next_url, pos = self.extract(text, '<a href="', '"', pos)
|
||||
url, pos = self.extract(text, 'src="', '"', pos)
|
||||
name = unquote( filename_from_url(url) )
|
||||
yield url, name
|
||||
67
gallery_dl/extractor/common.py
Normal file
67
gallery_dl/extractor/common.py
Normal file
@@ -0,0 +1,67 @@
|
||||
import queue
|
||||
import threading
|
||||
import requests
|
||||
from ..util import safe_request
|
||||
|
||||
class BasicExtractor():
|
||||
|
||||
def __init__(self, config):
|
||||
self.session = requests.Session()
|
||||
self.category = ""
|
||||
self.directory = ""
|
||||
|
||||
def __iter__(self):
|
||||
return self.images()
|
||||
|
||||
def request(self, url, *args, **kwargs):
|
||||
return safe_request(self.session, url, *args, **kwargs)
|
||||
|
||||
def enable_useragent(self):
|
||||
self.session.headers["User-Agent"] = "Mozilla/5.0 (X11; Linux x86_64; rv:24.0) Gecko/20100101 Firefox/24.0"
|
||||
|
||||
@staticmethod
|
||||
def extract(txt, begin, end, pos=0):
|
||||
try:
|
||||
first = txt.index(begin, pos) + len(begin)
|
||||
last = txt.index(end, first)
|
||||
return txt[first:last], last+len(end)
|
||||
except:
|
||||
return None, pos
|
||||
|
||||
@staticmethod
|
||||
def extract_all(txt, begin, end, pos=0):
|
||||
try:
|
||||
first = txt.index(begin, pos)
|
||||
last = txt.index(end, first + len(begin)) + len(end)
|
||||
return txt[first:last], last
|
||||
except:
|
||||
return None, pos
|
||||
|
||||
class AsyncExtractor(BasicExtractor):
|
||||
|
||||
def __init__(self, config):
|
||||
super().__init__(config)
|
||||
self.__queue = queue.Queue(maxsize=5)
|
||||
self.__thread = threading.Thread(target=self.async_images, daemon=True)
|
||||
|
||||
def __iter__(self):
|
||||
get = self.__queue.get
|
||||
done = self.__queue.task_done
|
||||
|
||||
self.__thread.start()
|
||||
while True:
|
||||
task = get()
|
||||
if task is None:
|
||||
return
|
||||
yield task
|
||||
done()
|
||||
|
||||
def async_images(self):
|
||||
put = self.__queue.put
|
||||
try:
|
||||
for task in self.images():
|
||||
put(task)
|
||||
except:
|
||||
import traceback
|
||||
print(traceback.format_exc())
|
||||
put(None)
|
||||
68
gallery_dl/extractor/exhentai.py
Normal file
68
gallery_dl/extractor/exhentai.py
Normal file
@@ -0,0 +1,68 @@
|
||||
from .common import BasicExtractor
|
||||
from ..util import unescape
|
||||
import time
|
||||
import random
|
||||
import json
|
||||
|
||||
class Extractor(BasicExtractor):
|
||||
|
||||
api_url = "http://exhentai.org/api.php"
|
||||
name_fmt = "{}_{:>04}_{}_{}"
|
||||
|
||||
def __init__(self, match, config):
|
||||
BasicExtractor.__init__(self, config)
|
||||
self.url = match.group(0)
|
||||
self.gid, self.token = match.group(1).split("/")
|
||||
self.category = "exhentai"
|
||||
self.directory = self.gid
|
||||
self.session.cookies.update(config["exhentai-cookies"])
|
||||
|
||||
def images(self):
|
||||
e = self.extract
|
||||
|
||||
# get gallery page
|
||||
text = self.request(self.url).text
|
||||
|
||||
# get first image page
|
||||
url, pos = self.extract_all(text, "http://exhentai.org/s/", "-1")
|
||||
text = self.request(url).text
|
||||
|
||||
# extract information
|
||||
_ , pos = e(text, '<div id="i3"><a onclick="return load_image(', '')
|
||||
imgkey, pos = e(text, "'", "'", pos)
|
||||
url , pos = e(text, '<img id="img" src="', '"', pos)
|
||||
name , pos = e(text, '<div id="i4"><div>', ' :: ', pos)
|
||||
orgurl, pos = e(text, 'http://exhentai.org/fullimg.php', '"', pos)
|
||||
if orgurl: url = "http://exhentai.org/fullimg.php" + unescape(orgurl)
|
||||
yield url, name_fmt.format(self.gid, 1, imgkey, name)
|
||||
|
||||
gid , pos = e(text, 'var gid=' , ';', pos)
|
||||
startkey, pos = e(text, 'var startkey="', '";', pos)
|
||||
showkey , pos = e(text, 'var showkey="' , '";', pos)
|
||||
|
||||
# use json-api for further pages
|
||||
request = {
|
||||
"method" : "showpage",
|
||||
"gid" : int(gid),
|
||||
"page" : 2,
|
||||
"imgkey" : imgkey,
|
||||
"showkey": showkey,
|
||||
}
|
||||
|
||||
while True:
|
||||
time.sleep( random.uniform(2, 5) )
|
||||
info = json.loads(
|
||||
self.session.post(self.api_url, data=json.dumps(request)).text
|
||||
)
|
||||
|
||||
imgkey, pos = e(info["i3"], "'", "'")
|
||||
url , pos = e(info["i3"], '<img id="img" src="', '"', pos)
|
||||
name , pos = e(info["i" ], '<div>', ' :: ')
|
||||
orgurl, pos = e(info["i7"], '<a href="', '"')
|
||||
if orgurl: url = unescape(orgurl)
|
||||
yield url, name_fmt.format(gid, request["page"], imgkey, name)
|
||||
|
||||
if request["imgkey"] == imgkey:
|
||||
return
|
||||
request["imgkey"] = imgkey
|
||||
request["page"] += 1
|
||||
50
gallery_dl/extractor/gelbooru.py
Normal file
50
gallery_dl/extractor/gelbooru.py
Normal file
@@ -0,0 +1,50 @@
|
||||
from .common import AsyncExtractor
|
||||
from ..util import filename_from_url
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
class BooruExtractor(AsyncExtractor):
|
||||
|
||||
def __init__(self, match, config):
|
||||
AsyncExtractor.__init__(self, config)
|
||||
self.tags = match.group(1)
|
||||
self.category = "booru"
|
||||
self.params = {"tags": self.tags}
|
||||
self.page = "page"
|
||||
self.directory = self.tags.replace("/", "_")
|
||||
|
||||
def images(self):
|
||||
self.update_page(reset=True)
|
||||
while True:
|
||||
root = ET.fromstring(
|
||||
self.request(self.api_url, verify=True, params=self.params).text
|
||||
)
|
||||
# root = tree.getroot()
|
||||
if len(root) == 0:
|
||||
return
|
||||
for item in root:
|
||||
url = item.attrib["file_url"]
|
||||
name = "{}_{}".format(self.category, filename_from_url(url))
|
||||
yield url, name
|
||||
self.update_page()
|
||||
|
||||
def update_page(self, reset=False):
|
||||
# Override this method in derived classes if necessary.
|
||||
# It is usually enough to adjust the 'page' attribute
|
||||
if reset is False:
|
||||
self.params[self.page] += 1
|
||||
else:
|
||||
self.params[self.page] = 1
|
||||
|
||||
class Extractor(BooruExtractor):
|
||||
|
||||
def __init__(self, match, config):
|
||||
BooruExtractor.__init__(self, match, config)
|
||||
self.category = "gelbooru"
|
||||
self.api_url = "http://gelbooru.com/"
|
||||
self.params = {"page":"dapi", "s":"post", "q":"index", "tags":self.tags}
|
||||
|
||||
def update_page(self, reset=False):
|
||||
if reset is False:
|
||||
self.params["pid"] += 1
|
||||
else:
|
||||
self.params["pid"] = 0
|
||||
150
gallery_dl/extractor/pixiv.py
Normal file
150
gallery_dl/extractor/pixiv.py
Normal file
@@ -0,0 +1,150 @@
|
||||
from .common import AsyncExtractor
|
||||
from ..util import safe_request
|
||||
import re
|
||||
import csv
|
||||
import requests
|
||||
|
||||
class Extractor(AsyncExtractor):
|
||||
|
||||
member_url = "http://www.pixiv.net/member_illust.php"
|
||||
illust_url = "http://www.pixiv.net/member_illust.php?mode=medium"
|
||||
|
||||
def __init__(self, match, config):
|
||||
AsyncExtractor.__init__(self, config)
|
||||
self.member_id = match.group(1)
|
||||
self.category = "pixiv"
|
||||
self.directory = self.member_id
|
||||
self.session.cookies.update(config["pixiv-cookies"])
|
||||
self.session.headers.update({"Referer": "http://www.pixiv.net/"})
|
||||
self.api = PixivAPI(config["pixiv-cookies"]["PHPSESSID"])
|
||||
|
||||
def images(self):
|
||||
sname_fmt = "pixiv_{1}_{0}.{2}"
|
||||
mname_fmt = "pixiv_{1}_{0}_p{num:02}.{2}"
|
||||
|
||||
singl_v1_fmt = "http://i1.pixiv.net/img{4:>02}/img/{24}/{0}.{2}"
|
||||
manga_v1_fmt = "http://i1.pixiv.net/img{4:>02}/img/{24}/{0}{big}_p{num}.{2}"
|
||||
|
||||
singl_v2_fmt = "http://i1.pixiv.net/img-original/img/{date}/{0}_p0.{2}"
|
||||
manga_v2_fmt = "http://i1.pixiv.net/img-original/img/{date}/{0}_p{num}.{2}"
|
||||
|
||||
date = ""
|
||||
big = ""
|
||||
|
||||
for img in self.image_ids():
|
||||
data = self.api.request(img)
|
||||
# debug
|
||||
# for i, value in enumerate(data):
|
||||
# print("{:02}: {}".format(i, value))
|
||||
# return
|
||||
# debug end
|
||||
|
||||
if "うごイラ" in data[13]:
|
||||
# ugoira / animations
|
||||
try:
|
||||
url, framelist = self.parse_ugoira(img)
|
||||
data[2] = "zip"
|
||||
yield (url, sname_fmt.format(*data))
|
||||
data[2] = "txt"
|
||||
yield (framelist, sname_fmt.format(*data))
|
||||
continue
|
||||
except:
|
||||
print("[Warning] failed to get ugoira url; trying fallback")
|
||||
|
||||
# images
|
||||
if img > 46270949:
|
||||
date = data[6][45:64]
|
||||
url_s_fmt = singl_v2_fmt
|
||||
url_m_fmt = manga_v2_fmt
|
||||
else:
|
||||
big = "_big" if img > 11319935 else ""
|
||||
url_s_fmt = singl_v1_fmt
|
||||
url_m_fmt = manga_v1_fmt
|
||||
|
||||
if not data[19]:
|
||||
yield (url_s_fmt.format(*data, date=date), sname_fmt.format(*data))
|
||||
else:
|
||||
for i in range(0, int(data[19])):
|
||||
yield (url_m_fmt.format(*data, num=i, date=date, big=big),
|
||||
mname_fmt.format(*data, num=i))
|
||||
|
||||
def image_ids(self):
|
||||
"""generator -- yield all image ids"""
|
||||
needle = '<li class="image-item"><a href="/member_illust.php?mode=medium&illust_id='
|
||||
params = {"id": self.member_id, "p": 1}
|
||||
while True:
|
||||
text = self.request(self.member_url, params=params).text
|
||||
end = 0
|
||||
found = 0
|
||||
while True:
|
||||
pos = text.find(needle, end)
|
||||
if pos == -1:
|
||||
break
|
||||
pos += len(needle)
|
||||
end = text.find('"', pos)
|
||||
found += 1
|
||||
yield int(text[pos:end])
|
||||
if found != 20:
|
||||
return
|
||||
params["p"] += 1
|
||||
|
||||
def parse_ugoira(self, illust_id):
|
||||
# get illust page
|
||||
text = self.request(
|
||||
self.illust_url,
|
||||
params={"illust_id": illust_id},
|
||||
).text
|
||||
|
||||
# parse page
|
||||
url , pos = self.extract(text, 'ugokuIllustFullscreenData = {"src":"', '"')
|
||||
frames, pos = self.extract(text, '"frames":[', ']', pos)
|
||||
|
||||
# fix url
|
||||
url = url.replace("\\/", "/")
|
||||
|
||||
# build framelist
|
||||
framelist = "text://" + re.sub(
|
||||
r'\{"file":"([^"]+)","delay":(\d+)\},?',
|
||||
r'\1 \2\n',
|
||||
frames
|
||||
)
|
||||
|
||||
return url, framelist
|
||||
|
||||
|
||||
class PixivAPI():
|
||||
api_url = "http://spapi.pixiv.net/iphone/illust.php"
|
||||
|
||||
def __init__(self, session_id):
|
||||
self.session = requests.Session()
|
||||
self.session.params["PHPSESSID"] = session_id
|
||||
|
||||
def request(self, illust_id):
|
||||
while True:
|
||||
text = safe_request(
|
||||
self.session,
|
||||
self.api_url,
|
||||
params={"illust_id": illust_id}
|
||||
).text
|
||||
if len(text) > 31:
|
||||
return next(csv.reader([text]))
|
||||
|
||||
# class FileDict(dict):
|
||||
#
|
||||
# def __init__(self, *args):
|
||||
# super().__init__()
|
||||
# self.re = re.compile(r"pixiv_\d+_(?P<id>\d+)(?P<extra>_p\d+)?\.[a-z]{3}")
|
||||
# for arg in args:
|
||||
# self.load_from(arg)
|
||||
#
|
||||
# def load_from(self, directory):
|
||||
# match = self.re.match
|
||||
# for file in os.listdir(directory):
|
||||
# m = match(file)
|
||||
# if m is None:
|
||||
# continue
|
||||
# val = True if m.group("extra") else False
|
||||
# dict.__setitem__(self, m.group("id"), val)
|
||||
#
|
||||
# def __getitem__(self, key):
|
||||
# return dict.get(self, key)
|
||||
35
gallery_dl/extractor/sankaku.py
Normal file
35
gallery_dl/extractor/sankaku.py
Normal file
@@ -0,0 +1,35 @@
|
||||
from .common import AsyncExtractor
|
||||
from ..util import filename_from_url
|
||||
|
||||
class Extractor(AsyncExtractor):
|
||||
|
||||
url = "https://chan.sankakucomplex.com/"
|
||||
|
||||
def __init__(self, match, config):
|
||||
AsyncExtractor.__init__(self, config)
|
||||
self.tags = match.group(1)
|
||||
self.category = "sankaku"
|
||||
self.directory = self.tags.replace("/", "_")
|
||||
self.enable_useragent()
|
||||
|
||||
def images(self):
|
||||
needle = ' src="//c.sankakucomplex.com/data/preview/'
|
||||
params = {"tags": self.tags, "page":1}
|
||||
while True:
|
||||
text = self.request(self.url, params=params).text
|
||||
print(text)
|
||||
return
|
||||
pos = 0
|
||||
found = 0
|
||||
while True:
|
||||
try:
|
||||
url, pos = self.extract(text, needle, '"', pos)
|
||||
found += 1
|
||||
print("https://cs.sankakucomplex.com/data/" + url)
|
||||
yield ("https://cs.sankakucomplex.com/data/" + url,
|
||||
"%s_%s" % (self.category, filename_from_url(url)))
|
||||
except:
|
||||
break
|
||||
if found == 0:
|
||||
break
|
||||
params["page"] += 1
|
||||
33
gallery_dl/util.py
Normal file
33
gallery_dl/util.py
Normal file
@@ -0,0 +1,33 @@
|
||||
import time
|
||||
import requests
|
||||
import html.parser
|
||||
|
||||
def safe_request(session, url, *args, **kwargs):
|
||||
tries = 0
|
||||
while True:
|
||||
# try to connect to remote source
|
||||
try:
|
||||
r = session.get(url, *args, **kwargs)
|
||||
except requests.exceptions.ConnectionError:
|
||||
tries += 1
|
||||
time.sleep(1)
|
||||
if tries == 5:
|
||||
raise
|
||||
continue
|
||||
|
||||
# reject error-status-codes
|
||||
if r.status_code != requests.codes.ok:
|
||||
tries += 1
|
||||
time.sleep(1)
|
||||
if tries == 5:
|
||||
r.raise_for_status()
|
||||
continue
|
||||
|
||||
# everything ok -- proceed to download
|
||||
return r
|
||||
|
||||
def filename_from_url(url):
|
||||
pos = url.rfind("/")
|
||||
return url[pos+1:]
|
||||
|
||||
unescape = html.parser.HTMLParser().unescape
|
||||
Reference in New Issue
Block a user