diff --git a/gallery_dl/extractor/simplyhentai.py b/gallery_dl/extractor/simplyhentai.py index 7301cbcb..e1b14ef6 100644 --- a/gallery_dl/extractor/simplyhentai.py +++ b/gallery_dl/extractor/simplyhentai.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2018-2019 Mike Fährmann +# Copyright 2018-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -16,9 +16,9 @@ class SimplyhentaiGalleryExtractor(GalleryExtractor): """Extractor for image galleries from simply-hentai.com""" category = "simplyhentai" archive_fmt = "{image_id}" - pattern = (r"(?:https?://)?(?!videos\.)([\w-]+\.simply-hentai\.com" + pattern = (r"(?:https?://)?(?!videos\.)([\w-]+\.)?simply-hentai\.com" r"(?!/(?:album|gifs?|images?|series)(?:/|$))" - r"(?:/(?!(?:page|all-pages)(?:/|\.|$))[^/?#]+)+)") + r"((?:/(?!(?:page|all-pages)(?:/|\.|$))[^/?#]+)+)") test = ( (("https://original-work.simply-hentai.com" "/amazon-no-hiyaku-amazon-elixir"), { @@ -35,7 +35,10 @@ class SimplyhentaiGalleryExtractor(GalleryExtractor): ) def __init__(self, match): - url = "https://" + match.group(1) + subdomain, path = match.groups() + if subdomain and subdomain not in ("www.", "old."): + path = "/" + subdomain.rstrip(".") + path + url = "https://old.simply-hentai.com" + path GalleryExtractor.__init__(self, match, url) self.session.headers["Referer"] = url @@ -43,7 +46,6 @@ class SimplyhentaiGalleryExtractor(GalleryExtractor): extr = text.extract_from(page) split = text.split_html - self.gallery_url = extr('