[hentaifoundry] improve/fix extraction
- Sometimes an ad interfered when trying to get a download URL - Resolving "www.hentai-foundry.com" yields an invalid(?) IPv6 address (2607:5300:60:ca9e:feed:dead:beef:1) and urllib3 only tries to connect to the IPv4 variant after a rather long wait time
This commit is contained in:
@@ -10,6 +10,8 @@
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text, util, exception
|
||||
import requests.packages.urllib3.util.connection as u3_conn
|
||||
import socket
|
||||
|
||||
|
||||
class HentaifoundryExtractor(Extractor):
|
||||
@@ -28,6 +30,10 @@ class HentaifoundryExtractor(Extractor):
|
||||
self.start_post = 0
|
||||
self.start_page = text.parse_int(page, 1)
|
||||
|
||||
def request(*args, **kwargs):
|
||||
with disable_ipv6():
|
||||
return Extractor.request(*args, **kwargs)
|
||||
|
||||
def items(self):
|
||||
data = self.get_job_metadata()
|
||||
yield Message.Version, 1
|
||||
@@ -67,6 +73,7 @@ class HentaifoundryExtractor(Extractor):
|
||||
page = self.request(text.urljoin(self.root, page_url)).text
|
||||
index = page_url.rsplit("/", 2)[1]
|
||||
title , pos = text.extract(page, '<title>', '</title>')
|
||||
_ , pos = text.extract(page, 'id="picBox"', '', pos)
|
||||
width , pos = text.extract(page, 'width="', '"', pos)
|
||||
height, pos = text.extract(page, 'height="', '"', pos)
|
||||
url , pos = text.extract(page, 'src="', '"', pos)
|
||||
@@ -261,3 +268,14 @@ class HentaifoundryImageExtractor(HentaifoundryExtractor):
|
||||
|
||||
def skip(self, _):
|
||||
return 0
|
||||
|
||||
|
||||
class disable_ipv6():
|
||||
"""Context Manager: Reject IPv6 addresses during DNS lookup"""
|
||||
_allowed_gai_family = u3_conn.allowed_gai_family
|
||||
|
||||
def __enter__(self):
|
||||
u3_conn.allowed_gai_family = lambda: socket.AF_INET
|
||||
|
||||
def __exit__(self, etype, value, traceback):
|
||||
u3_conn.allowed_gai_family = disable_ipv6._allowed_gai_family
|
||||
|
||||
Reference in New Issue
Block a user