update some extractors to use https

This commit is contained in:
Mike Fährmann
2017-04-20 13:20:41 +02:00
parent 342371086b
commit 13dc5d72bc
12 changed files with 43 additions and 41 deletions

View File

@@ -6,7 +6,7 @@
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation. # published by the Free Software Foundation.
"""Extract images from http://www.deviantart.com/""" """Extract images from https://www.deviantart.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, exception from .. import text, exception
@@ -107,9 +107,9 @@ class DeviantartImageExtractor(DeviantartExtractor):
return (self.api.deviation(deviation_id),) return (self.api.deviation(deviation_id),)
class DeviantartFavouritesExtractor(DeviantartExtractor): class DeviantartFavoriteExtractor(DeviantartExtractor):
"""Extractor for an artist's favourites from deviantart.com""" """Extractor for an artist's favourites from deviantart.com"""
subcategory = "favourites" subcategory = "favorite"
directory_fmt = ["{category}", "{subcategory}", directory_fmt = ["{category}", "{subcategory}",
"{collection[owner]} - {collection[title]}"] "{collection[owner]} - {collection[title]}"]
pattern = [r"(?:https?://)?([^\.]+)\.deviantart\.com/favourites" pattern = [r"(?:https?://)?([^\.]+)\.deviantart\.com/favourites"
@@ -117,12 +117,12 @@ class DeviantartFavouritesExtractor(DeviantartExtractor):
test = [ test = [
("http://h3813067.deviantart.com/favourites/", { ("http://h3813067.deviantart.com/favourites/", {
"url": "71345ce3bef5b19bd2a56d7b96e6b5ddba747c2e", "url": "71345ce3bef5b19bd2a56d7b96e6b5ddba747c2e",
"keyword": "35a275b0f737aa9bd1f32ba13604d6e9a7054a14", "keyword": "51e88d400c3fb69ae0b5a618ef21a282697185fe",
"content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e", "content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e",
}), }),
("http://rosuuri.deviantart.com/favourites/58951174/Useful", { ("http://rosuuri.deviantart.com/favourites/58951174/Useful", {
"url": "9e8d971c80db099b95d1c785399e2bc6eb96cd07", "url": "9e8d971c80db099b95d1c785399e2bc6eb96cd07",
"keyword": "cf65309a880799a4a82a7b2f0389e5bc88f5730f", "keyword": "ea42a4e238f7e54caa250fa2f5abef103cbf30ab",
}), }),
] ]

View File

@@ -1,12 +1,12 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2015 Mike Fährmann # Copyright 2015-2017 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation. # published by the Free Software Foundation.
"""Extract manga-chapters from http://dynasty-scans.com/""" """Extract manga-chapters from https://dynasty-scans.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text from .. import text
@@ -24,16 +24,16 @@ class DynastyscansChapterExtractor(Extractor):
test = [ test = [
(("http://dynasty-scans.com/chapters/" (("http://dynasty-scans.com/chapters/"
"hitoribocchi_no_oo_seikatsu_ch33"), { "hitoribocchi_no_oo_seikatsu_ch33"), {
"url": "63950fa1dfdef58ab842c1b9b854c5c1d650cfa0", "url": "ba945116382eff26b0e52fe1804e6d58fee7734d",
"keyword": "81bfda5b98b34ac2a7324bd9e2abad3df9cc7673", "keyword": "81bfda5b98b34ac2a7324bd9e2abad3df9cc7673",
}), }),
(("http://dynasty-scans.com/chapters/" (("http://dynasty-scans.com/chapters/"
"new_game_the_spinoff_special_13"), { "new_game_the_spinoff_special_13"), {
"url": "6b28c733481ac498da341e85a9eb155864491731", "url": "2cd5e04bd16f842dc884c145a44cf0c64ec27a21",
"keyword": "93b75d0c0aaeb849c99f2225a4b97f466bc3ace9", "keyword": "93b75d0c0aaeb849c99f2225a4b97f466bc3ace9",
}), }),
] ]
url_base = "http://dynasty-scans.com/" url_base = "https://dynasty-scans.com/"
def __init__(self, match): def __init__(self, match):
Extractor.__init__(self) Extractor.__init__(self)

View File

@@ -6,7 +6,7 @@
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation. # published by the Free Software Foundation.
"""Extract images from http://gelbooru.com/""" """Extract images from https://gelbooru.com/"""
from . import booru from . import booru
from .. import config from .. import config
@@ -15,7 +15,7 @@ from .. import config
class GelbooruExtractor(booru.XMLBooruExtractor): class GelbooruExtractor(booru.XMLBooruExtractor):
"""Base class for gelbooru extractors""" """Base class for gelbooru extractors"""
category = "gelbooru" category = "gelbooru"
api_url = "http://gelbooru.com/" api_url = "https://gelbooru.com/"
pagestart = 0 pagestart = 0
pagekey = "pid" pagekey = "pid"

View File

@@ -6,7 +6,7 @@
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation. # published by the Free Software Foundation.
"""Extract images from http://www.hentai-foundry.com/""" """Extract images from https://www.hentai-foundry.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, exception from .. import text, exception
@@ -25,15 +25,15 @@ class HentaifoundryUserExtractor(Extractor):
r"user/([^/]+)/profile"), r"user/([^/]+)/profile"),
] ]
test = [ test = [
("http://www.hentai-foundry.com/pictures/user/Tenpura", { ("https://www.hentai-foundry.com/pictures/user/Tenpura", {
"url": "35124cf236ffec596092446322b8f0ad603571c5", "url": "ebbc981a85073745e3ca64a0f2ab31fab967fc28",
"keyword": "6e9a549feb9bafebd9d9342ef3c8ccad33a7031c", "keyword": "6e9a549feb9bafebd9d9342ef3c8ccad33a7031c",
}), }),
("http://www.hentai-foundry.com/user/asdq/profile", { ("http://www.hentai-foundry.com/user/asdq/profile", {
"exception": exception.NotFoundError, "exception": exception.NotFoundError,
}), }),
] ]
url_base = "http://www.hentai-foundry.com/pictures/user/" url_base = "https://www.hentai-foundry.com/pictures/user/"
def __init__(self, match): def __init__(self, match):
Extractor.__init__(self) Extractor.__init__(self)
@@ -85,7 +85,7 @@ class HentaifoundryUserExtractor(Extractor):
page, '//pictures.hentai-foundry.com', '"', pos) page, '//pictures.hentai-foundry.com', '"', pos)
data = {"index": index, "title": text.unescape(title)} data = {"index": index, "title": text.unescape(title)}
text.nameext_from_url(url, data) text.nameext_from_url(url, data)
return "http://pictures.hentai-foundry.com" + url, data return "https://pictures.hentai-foundry.com" + url, data
def set_filters(self, token): def set_filters(self, token):
"""Set site-internal filters to show all images""" """Set site-internal filters to show all images"""
@@ -107,11 +107,14 @@ class HentaifoundryUserExtractor(Extractor):
"rating_female": 1, "rating_female": 1,
"rating_futa": 1, "rating_futa": 1,
"rating_other": 1, "rating_other": 1,
"rating_scat": 1,
"rating_incest": 1,
"rating_rape": 1,
"filter_media": "A", "filter_media": "A",
"filter_order": "date_new", "filter_order": "date_new",
"filter_type": 0, "filter_type": 0,
} }
self.request("http://www.hentai-foundry.com/site/filters", self.request("https://www.hentai-foundry.com/site/filters",
method="post", data=formdata) method="post", data=formdata)
@@ -127,7 +130,7 @@ class HentaifoundryImageExtractor(Extractor):
test = [ test = [
(("http://www.hentai-foundry.com/" (("http://www.hentai-foundry.com/"
"pictures/user/Tenpura/407501/shimakaze"), { "pictures/user/Tenpura/407501/shimakaze"), {
"url": "b68d1b0121b97e01a878beeb2e43b07cb881b5a9", "url": "fbf2fd74906738094e2575d2728e8dc3de18a8a3",
"keyword": "304479cfe00fbb723886be78b2bd6b9306a31d8a", "keyword": "304479cfe00fbb723886be78b2bd6b9306a31d8a",
"content": "91bf01497c39254b6dfb234a18e8f01629c77fd1", "content": "91bf01497c39254b6dfb234a18e8f01629c77fd1",
}), }),
@@ -149,7 +152,7 @@ class HentaifoundryImageExtractor(Extractor):
def get_image_metadata(self): def get_image_metadata(self):
"""Collect metadata for an image""" """Collect metadata for an image"""
url = "http://www.hentai-foundry.com/pictures/user/{}/{}".format( url = "https://www.hentai-foundry.com/pictures/user/{}/{}".format(
self.artist, self.index) self.artist, self.index)
response = self.session.get(url + "?enterAgree=1") response = self.session.get(url + "?enterAgree=1")
if response.status_code == 404: if response.status_code == 404:
@@ -165,4 +168,4 @@ class HentaifoundryImageExtractor(Extractor):
"title": text.unescape(title), "title": text.unescape(title),
} }
text.nameext_from_url(url, data) text.nameext_from_url(url, data)
return "http://pictures.hentai-foundry.com" + url, data return "https://pictures.hentai-foundry.com" + url, data

View File

@@ -1,12 +1,12 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2014-2016 Mike Fährmann # Copyright 2014-2017 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation. # published by the Free Software Foundation.
"""Extract images from http://imgchili.net/""" """Extract images from https://imgchili.net/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text from .. import text
@@ -16,7 +16,7 @@ class ImgchiliExtractor(Extractor):
"""Base class for imgchili extractors""" """Base class for imgchili extractors"""
category = "imgchili" category = "imgchili"
directory_fmt = ["{category}"] directory_fmt = ["{category}"]
url_base = "http://imgchili.net/" url_base = "https://imgchili.net/"
def __init__(self, match): def __init__(self, match):
Extractor.__init__(self) Extractor.__init__(self)

View File

@@ -1,12 +1,12 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2016 Mike Fährmann # Copyright 2016-2017 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation. # published by the Free Software Foundation.
"""Extract soundtracks from http://khinsider.com/""" """Extract soundtracks from https://downloads.khinsider.com/"""
from .common import AsynchronousExtractor, Message from .common import AsynchronousExtractor, Message
from .. import text, exception from .. import text, exception
@@ -31,7 +31,7 @@ class KhinsiderSoundtrackExtractor(AsynchronousExtractor):
self.album = match.group(1) self.album = match.group(1)
def items(self): def items(self):
url = ("http://downloads.khinsider.com/game-soundtracks/album/" + url = ("https://downloads.khinsider.com/game-soundtracks/album/" +
self.album) self.album)
page = self.request(url, encoding="utf-8").text page = self.request(url, encoding="utf-8").text
data = self.get_job_metadata(page) data = self.get_job_metadata(page)

View File

@@ -1,12 +1,12 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2015 Mike Fährmann # Copyright 2015-2017 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation. # published by the Free Software Foundation.
"""Extract images from http://nhentai.net/""" """Extract images from https://nhentai.net/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text from .. import text
@@ -46,7 +46,7 @@ class NhentaiGalleryExtractor(Extractor):
def get_gallery_info(self): def get_gallery_info(self):
"""Extract and return gallery-info""" """Extract and return gallery-info"""
page = self.request("http://nhentai.net/g/" + self.gid + "/1/").text page = self.request("https://nhentai.net/g/" + self.gid + "/1/").text
media_url, pos = text.extract( media_url, pos = text.extract(
page, ".reader({\n\t\t\tmedia_url: '", "'") page, ".reader({\n\t\t\tmedia_url: '", "'")
json_data, pos = text.extract( json_data, pos = text.extract(

View File

@@ -6,7 +6,7 @@
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation. # published by the Free Software Foundation.
"""Extract images and ugoira from http://www.pixiv.net/""" """Extract images and ugoira from https://www.pixiv.net/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import config, text, exception from .. import config, text, exception
@@ -31,8 +31,8 @@ class PixivUserExtractor(Extractor):
"exception": exception.NotFoundError, "exception": exception.NotFoundError,
}), }),
] ]
member_url = "http://www.pixiv.net/member_illust.php" member_url = "https://www.pixiv.net/member_illust.php"
illust_url = "http://www.pixiv.net/member_illust.php?mode=medium" illust_url = "https://www.pixiv.net/member_illust.php?mode=medium"
def __init__(self, match): def __init__(self, match):
Extractor.__init__(self) Extractor.__init__(self)

View File

@@ -6,7 +6,7 @@
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation. # published by the Free Software Foundation.
"""Extractors for http://powermanga.org/""" """Extractors for https://powermanga.org/"""
from . import foolslide from . import foolslide

View File

@@ -6,7 +6,7 @@
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation. # published by the Free Software Foundation.
"""Extract images from http://safebooru.org/""" """Extract images from https://safebooru.org/"""
from . import booru from . import booru
@@ -14,7 +14,7 @@ from . import booru
class SafebooruExtractor(booru.XMLBooruExtractor): class SafebooruExtractor(booru.XMLBooruExtractor):
"""Base class for safebooru extractors""" """Base class for safebooru extractors"""
category = "safebooru" category = "safebooru"
api_url = "http://safebooru.org/index.php" api_url = "https://safebooru.org/index.php"
pagestart = 0 pagestart = 0
pagekey = "pid" pagekey = "pid"
@@ -28,7 +28,7 @@ class SafebooruTagExtractor(SafebooruExtractor, booru.BooruTagExtractor):
pattern = [(r"(?:https?://)?(?:www\.)?safebooru\.org/(?:index\.php)?" pattern = [(r"(?:https?://)?(?:www\.)?safebooru\.org/(?:index\.php)?"
r"\?page=post&s=list&tags=([^&]+)")] r"\?page=post&s=list&tags=([^&]+)")]
test = [("http://safebooru.org/index.php?page=post&s=list&tags=bonocho", { test = [("http://safebooru.org/index.php?page=post&s=list&tags=bonocho", {
"url": "c91e04ffbdf317fae95b2e160c8345503d9fb730", "url": "17c61b386530cf4c30842c9f580d15ef1cd09586",
"content": "e5ad4c5bf241b1def154958535bef6c2f6b733eb", "content": "e5ad4c5bf241b1def154958535bef6c2f6b733eb",
})] })]
@@ -39,6 +39,6 @@ class SafebooruPostExtractor(SafebooruExtractor, booru.BooruPostExtractor):
pattern = [(r"(?:https?://)?(?:www\.)?safebooru\.org/(?:index\.php)?" pattern = [(r"(?:https?://)?(?:www\.)?safebooru\.org/(?:index\.php)?"
r"\?page=post&s=view&id=(\d+)")] r"\?page=post&s=view&id=(\d+)")]
test = [("http://safebooru.org/index.php?page=post&s=view&id=1169132", { test = [("http://safebooru.org/index.php?page=post&s=view&id=1169132", {
"url": "bcb6047665729c7c9db243a27f41cbef9af1ecef", "url": "cf05e37a3c62b2d55788e2080b8eabedb00f999b",
"content": "93b293b27dabd198afafabbaf87c49863ac82f27", "content": "93b293b27dabd198afafabbaf87c49863ac82f27",
})] })]

View File

@@ -61,7 +61,6 @@ class Job():
exc.__class__.__name__, err) exc.__class__.__name__, err)
log.debug("Traceback", exc_info=True) log.debug("Traceback", exc_info=True)
def dispatch(self, msg): def dispatch(self, msg):
"""Call the appropriate message handler""" """Call the appropriate message handler"""
if msg[0] == Message.Url: if msg[0] == Message.Url:
@@ -297,7 +296,7 @@ class DataJob(Job):
copy = [ copy = [
part.copy() if hasattr(part, "copy") else part part.copy() if hasattr(part, "copy") else part
for part in msg for part in msg
] ]
self.data.append(copy) self.data.append(copy)
except Exception as exc: except Exception as exc:
self.data.append((exc.__class__.__name__, str(exc))) self.data.append((exc.__class__.__name__, str(exc)))

View File

@@ -51,7 +51,7 @@ skip = [
# dont work on travis-ci # dont work on travis-ci
"exhentai", "kissmanga", "mangafox", "dynastyscans", "exhentai", "kissmanga", "mangafox", "dynastyscans",
# temporary issues # temporary issues
"mangashare", "readcomics", "mangashare", "readcomics", "pawoo",
] ]
# enable selective testing for direct calls # enable selective testing for direct calls
if __name__ == '__main__' and len(sys.argv) > 1: if __name__ == '__main__' and len(sys.argv) > 1: