support direct image links

2017-05-24 12:51:18 +02:00
parent d2dceb35b7
commit 691c4dd709
4 changed files with 41 additions and 4 deletions
--- a/gallery_dl/extractor/init.py
+++ b/gallery_dl/extractor/init.py
@@ -69,6 +69,7 @@ modules = [
    "yandere",
    "yonkouprod",
    "imagehosts",
+    "directlink",
    "recursive",
    "test",
 ]
--- a/gallery_dl/extractor/directlink.py
+++ b/gallery_dl/extractor/directlink.py
@@ -0,0 +1,35 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2017 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Direct link handling"""
+
+from .common import Extractor, Message
+from .. import text
+
+
+class DirectlinkExtractor(Extractor):
+    """Extractor for direct links to images"""
+    category = "directlink"
+    directory_fmt = []
+    filename_fmt = "{filename}"
+    pattern = [r"https?://[^?&#]+\.(?:jpe?g|png|gifv?|webm|mp4)"]
+    test = [("https://i.imgur.com/21yMxCS.png", {
+        "url": "6f2dcfb86815bdd72808c313e5f715610bc7b9b2",
+        "keyword": "6a9636d8dd6f71f14d6d20d24153fc83a9895ed9",
+        "content": "0c8768055e4e20e7c7259608b67799171b691140",
+    })]
+
+    def __init__(self, match):
+        Extractor.__init__(self)
+        self.url = match.string
+
+    def items(self):
+        data = text.nameext_from_url(self.url)
+        yield Message.Version, 1
+        yield Message.Directory, data
+        yield Message.Url, self.url, data
--- a/gallery_dl/extractor/recursive.py
+++ b/gallery_dl/extractor/recursive.py
@@ -10,7 +10,7 @@

 import re
 from .common import Extractor, Message
-from .. import adapter
+from .. import extractor, adapter


 class RecursiveExtractor(Extractor):
@@ -29,5 +29,6 @@ class RecursiveExtractor(Extractor):
    def items(self):
        page = self.request(self.url).text
        yield Message.Version, 1
-        for match in re.finditer(r"https?://[^\s\"']+", page):
-            yield Message.Queue, match.group(0)
+        with extractor.blacklist("directlink"):
+            for match in re.finditer(r"https?://[^\s\"']+", page):
+                yield Message.Queue, match.group(0)
--- a/test/test_extractors.py
+++ b/test/test_extractors.py
@@ -51,7 +51,7 @@ skip = [
    # dont work on travis-ci
    "exhentai", "kissmanga", "mangafox", "dynastyscans", "nijie",
    # temporary issues
-    "e621",
+
 ]
 # enable selective testing for direct calls
 if __name__ == '__main__' and len(sys.argv) > 1: