[hentaifoundry] improve/fix extraction

- Sometimes an ad interfered when trying to get a download URL - Resolving "www.hentai-foundry.com" yields an invalid(?) IPv6 address (2607:5300:60:ca9e:feed:dead:beef:1) and urllib3 only tries to connect to the IPv4 variant after a rather long wait time
2019-02-25 16:07:48 +01:00
parent 280531c8ff
commit a138d5873d
1 changed files with 18 additions and 0 deletions
--- a/gallery_dl/extractor/hentaifoundry.py
+++ b/gallery_dl/extractor/hentaifoundry.py
@@ -10,6 +10,8 @@

 from .common import Extractor, Message
 from .. import text, util, exception
+import requests.packages.urllib3.util.connection as u3_conn
+import socket


 class HentaifoundryExtractor(Extractor):
@@ -28,6 +30,10 @@ class HentaifoundryExtractor(Extractor):
        self.start_post = 0
        self.start_page = text.parse_int(page, 1)

+    def request(*args, **kwargs):
+        with disable_ipv6():
+            return Extractor.request(*args, **kwargs)
+
    def items(self):
        data = self.get_job_metadata()
        yield Message.Version, 1
@@ -67,6 +73,7 @@ class HentaifoundryExtractor(Extractor):
        page = self.request(text.urljoin(self.root, page_url)).text
        index = page_url.rsplit("/", 2)[1]
        title , pos = text.extract(page, '<title>', '</title>')
+        _     , pos = text.extract(page, 'id="picBox"', '', pos)
        width , pos = text.extract(page, 'width="', '"', pos)
        height, pos = text.extract(page, 'height="', '"', pos)
        url   , pos = text.extract(page, 'src="', '"', pos)
@@ -261,3 +268,14 @@ class HentaifoundryImageExtractor(HentaifoundryExtractor):

    def skip(self, _):
        return 0
+
+
+class disable_ipv6():
+    """Context Manager: Reject IPv6 addresses during DNS lookup"""
+    _allowed_gai_family = u3_conn.allowed_gai_family
+
+    def __enter__(self):
+        u3_conn.allowed_gai_family = lambda: socket.AF_INET
+
+    def __exit__(self, etype, value, traceback):
+        u3_conn.allowed_gai_family = disable_ipv6._allowed_gai_family