[hentaifoundry] improve/fix extraction
- Sometimes an ad interfered when trying to get a download URL - Resolving "www.hentai-foundry.com" yields an invalid(?) IPv6 address (2607:5300:60:ca9e:feed:dead:beef:1) and urllib3 only tries to connect to the IPv4 variant after a rather long wait time
This commit is contained in:
@@ -10,6 +10,8 @@
|
|||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text, util, exception
|
from .. import text, util, exception
|
||||||
|
import requests.packages.urllib3.util.connection as u3_conn
|
||||||
|
import socket
|
||||||
|
|
||||||
|
|
||||||
class HentaifoundryExtractor(Extractor):
|
class HentaifoundryExtractor(Extractor):
|
||||||
@@ -28,6 +30,10 @@ class HentaifoundryExtractor(Extractor):
|
|||||||
self.start_post = 0
|
self.start_post = 0
|
||||||
self.start_page = text.parse_int(page, 1)
|
self.start_page = text.parse_int(page, 1)
|
||||||
|
|
||||||
|
def request(*args, **kwargs):
|
||||||
|
with disable_ipv6():
|
||||||
|
return Extractor.request(*args, **kwargs)
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
data = self.get_job_metadata()
|
data = self.get_job_metadata()
|
||||||
yield Message.Version, 1
|
yield Message.Version, 1
|
||||||
@@ -67,6 +73,7 @@ class HentaifoundryExtractor(Extractor):
|
|||||||
page = self.request(text.urljoin(self.root, page_url)).text
|
page = self.request(text.urljoin(self.root, page_url)).text
|
||||||
index = page_url.rsplit("/", 2)[1]
|
index = page_url.rsplit("/", 2)[1]
|
||||||
title , pos = text.extract(page, '<title>', '</title>')
|
title , pos = text.extract(page, '<title>', '</title>')
|
||||||
|
_ , pos = text.extract(page, 'id="picBox"', '', pos)
|
||||||
width , pos = text.extract(page, 'width="', '"', pos)
|
width , pos = text.extract(page, 'width="', '"', pos)
|
||||||
height, pos = text.extract(page, 'height="', '"', pos)
|
height, pos = text.extract(page, 'height="', '"', pos)
|
||||||
url , pos = text.extract(page, 'src="', '"', pos)
|
url , pos = text.extract(page, 'src="', '"', pos)
|
||||||
@@ -261,3 +268,14 @@ class HentaifoundryImageExtractor(HentaifoundryExtractor):
|
|||||||
|
|
||||||
def skip(self, _):
|
def skip(self, _):
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
class disable_ipv6():
|
||||||
|
"""Context Manager: Reject IPv6 addresses during DNS lookup"""
|
||||||
|
_allowed_gai_family = u3_conn.allowed_gai_family
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
u3_conn.allowed_gai_family = lambda: socket.AF_INET
|
||||||
|
|
||||||
|
def __exit__(self, etype, value, traceback):
|
||||||
|
u3_conn.allowed_gai_family = disable_ipv6._allowed_gai_family
|
||||||
|
|||||||
Reference in New Issue
Block a user