change Chapter and MangaExtractor classes

- unify and simplify constructors - rename get_metadata and get_images to just metadata() and images() - rename self.url to chapter_url and manga_url
2019-02-11 18:38:47 +01:00
parent 4b1880fa5e
commit 580baef72c
24 changed files with 435 additions and 467 deletions
--- a/gallery_dl/extractor/hentai2read.py
+++ b/gallery_dl/extractor/hentai2read.py
@@ -10,21 +10,65 @@

 from .common import ChapterExtractor, MangaExtractor
 from .. import text
-import re
 import json
+import re


-class Hentai2readMangaExtractor(MangaExtractor):
-    """Extractor for hmanga from hentai2read.com"""
+class Hentai2readBase():
+    """Base class for hentai2read extractors"""
    category = "hentai2read"
-    scheme = "https"
-    pattern = r"(?:https?://)?(?:www\.)?(hentai2read\.com/[^/]+/?)$"
+    root = "https://hentai2read.com"
+
+
+class Hentai2readChapterExtractor(Hentai2readBase, ChapterExtractor):
+    """Extractor for a single manga chapter from hentai2read.com"""
+    archive_fmt = "{chapter_id}_{page}"
+    pattern = r"(?:https?://)?(?:www\.)?hentai2read\.com(/[^/?&#]+/(\d+))"
+    test = ("https://hentai2read.com/amazon_elixir/1/", {
+        "url": "964b942cf492b3a129d2fe2608abfc475bc99e71",
+        "keyword": "9845105898d28c6a540cffdea60a1a20fab52431",
+    })
+
+    def __init__(self, match):
+        self.chapter = match.group(2)
+        ChapterExtractor.__init__(self, match)
+
+    def metadata(self, page):
+        title, pos = text.extract(page, "<title>", "</title>")
+        manga_id, pos = text.extract(page, 'data-mid="', '"', pos)
+        chapter_id, pos = text.extract(page, 'data-cid="', '"', pos)
+        match = re.match(r"Reading (.+) \(([^)]+)\) Hentai(?: by (.+))? - "
+                         r"(\d+): (.+) . Page 1 ", title)
+        return {
+            "manga": match.group(1),
+            "manga_id": text.parse_int(manga_id),
+            "chapter": text.parse_int(self.chapter),
+            "chapter_id": text.parse_int(chapter_id),
+            "type": match.group(2),
+            "author": match.group(3),
+            "title": match.group(5),
+            "lang": "en",
+            "language": "English",
+        }
+
+    @staticmethod
+    def images(page):
+        images = text.extract(page, "'images' : ", ",\n")[0]
+        return [
+            ("https://hentaicdn.com/hentai" + part, None)
+            for part in json.loads(images)
+        ]
+
+
+class Hentai2readMangaExtractor(Hentai2readBase, MangaExtractor):
+    """Extractor for hmanga from hentai2read.com"""
+    pattern = r"(?:https?://)?(?:www\.)?hentai2read\.com(/[^/?&#]+)/?$"
    test = (
-        ("http://hentai2read.com/amazon_elixir/", {
+        ("https://hentai2read.com/amazon_elixir/", {
            "url": "273073752d418ec887d7f7211e42b832e8c403ba",
            "keyword": "13c1ce7e15cbb941f01c843b0e89adc993d939ac",
        }),
-        ("http://hentai2read.com/oshikage_riot/", {
+        ("https://hentai2read.com/oshikage_riot/", {
            "url": "6595f920a3088a15c2819c502862d45f8eb6bea6",
            "keyword": "675c7b7a4fa52cf569c283553bd16b4200a5cd36",
        }),
@@ -54,46 +98,3 @@ class Hentai2readMangaExtractor(MangaExtractor):
                "chapter": text.parse_int(chapter),
                "title": title, "lang": "en", "language": "English",
            }))
-
-
-class Hentai2readChapterExtractor(ChapterExtractor):
-    """Extractor for a single manga chapter from hentai2read.com"""
-    category = "hentai2read"
-    archive_fmt = "{chapter_id}_{page}"
-    pattern = r"(?:https?://)?(?:www\.)?hentai2read\.com/([^/]+)/(\d+)"
-    test = ("http://hentai2read.com/amazon_elixir/1/", {
-        "url": "964b942cf492b3a129d2fe2608abfc475bc99e71",
-        "keyword": "9845105898d28c6a540cffdea60a1a20fab52431",
-    })
-
-    def __init__(self, match):
-        url_title, self.chapter = match.groups()
-        url = "https://hentai2read.com/{}/{}/".format(url_title, self.chapter)
-        ChapterExtractor.__init__(self, match, url)
-
-    def get_metadata(self, page):
-        title, pos = text.extract(page, "<title>", "</title>")
-        manga_id, pos = text.extract(page, 'data-mid="', '"', pos)
-        chapter_id, pos = text.extract(page, 'data-cid="', '"', pos)
-        match = re.match(r"Reading (.+) \(([^)]+)\) Hentai(?: by (.+))? - "
-                         r"(\d+): (.+) . Page 1 ", title)
-        return {
-            "manga": match.group(1),
-            "manga_id": text.parse_int(manga_id),
-            "chapter": text.parse_int(self.chapter),
-            "chapter_id": text.parse_int(chapter_id),
-            "type": match.group(2),
-            "author": match.group(3),
-            "title": match.group(5),
-            "lang": "en",
-            "language": "English",
-        }
-
-    @staticmethod
-    def get_images(page):
-        """Extract and return a list of all image-urls"""
-        images = text.extract(page, "'images' : ", ",\n")[0]
-        return [
-            ("https://hentaicdn.com/hentai" + part, None)
-            for part in json.loads(images)
-        ]