[hentaifoundry] improve/fix extraction

- Sometimes an ad interfered when trying to get a download URL
- Resolving "www.hentai-foundry.com" yields an invalid(?) IPv6 address
  (2607:5300:60:ca9e:feed:dead:beef:1) and urllib3 only tries to connect
  to the IPv4 variant after a rather long wait time
This commit is contained in:
Mike Fährmann
2019-02-25 16:07:48 +01:00
parent 280531c8ff
commit a138d5873d

View File

@@ -10,6 +10,8 @@
from .common import Extractor, Message
from .. import text, util, exception
import requests.packages.urllib3.util.connection as u3_conn
import socket
class HentaifoundryExtractor(Extractor):
@@ -28,6 +30,10 @@ class HentaifoundryExtractor(Extractor):
self.start_post = 0
self.start_page = text.parse_int(page, 1)
def request(*args, **kwargs):
with disable_ipv6():
return Extractor.request(*args, **kwargs)
def items(self):
data = self.get_job_metadata()
yield Message.Version, 1
@@ -67,6 +73,7 @@ class HentaifoundryExtractor(Extractor):
page = self.request(text.urljoin(self.root, page_url)).text
index = page_url.rsplit("/", 2)[1]
title , pos = text.extract(page, '<title>', '</title>')
_ , pos = text.extract(page, 'id="picBox"', '', pos)
width , pos = text.extract(page, 'width="', '"', pos)
height, pos = text.extract(page, 'height="', '"', pos)
url , pos = text.extract(page, 'src="', '"', pos)
@@ -261,3 +268,14 @@ class HentaifoundryImageExtractor(HentaifoundryExtractor):
def skip(self, _):
return 0
class disable_ipv6():
"""Context Manager: Reject IPv6 addresses during DNS lookup"""
_allowed_gai_family = u3_conn.allowed_gai_family
def __enter__(self):
u3_conn.allowed_gai_family = lambda: socket.AF_INET
def __exit__(self, etype, value, traceback):
u3_conn.allowed_gai_family = disable_ipv6._allowed_gai_family