diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py index 645b4e9c..f66634b2 100644 --- a/gallery_dl/extractor/nijie.py +++ b/gallery_dl/extractor/nijie.py @@ -12,27 +12,17 @@ from .common import AsynchronousExtractor, Message from .. import config, text, exception from ..cache import cache -class NijieUserExtractor(AsynchronousExtractor): - """Extractor for works of a nijie-user""" +class NijieExtractor(AsynchronousExtractor): + """Base class for nijie extractors""" category = "nijie" - subcategory = "user" directory_fmt = ["{category}", "{artist-id}"] filename_fmt = "{category}_{artist-id}_{image-id}_p{index:>02}.{extension}" - pattern = [r"(?:https?://)?(?:www\.)?nijie\.info/members(?:_illust)?\.php\?id=(\d+)"] - test = [("https://nijie.info/members_illust.php?id=44", { - "url": "585d821df4716b1098660a0be426d01db4b65f2a", - "keyword": "30c981b9d7351ec275b9840d8bc2b4ef3da8c4b4", - })] popup_url = "https://nijie.info/view_popup.php?id=" - def __init__(self, match): + def __init__(self): AsynchronousExtractor.__init__(self) - self.artist_id = match.group(1) - self.artist_url = ( - "https://nijie.info/members_illust.php?id=" - + self.artist_id - ) - self.session.headers["Referer"] = self.artist_url + self.session.headers["Referer"] = "https://nijie.info/" + self.artist_id = "" def items(self): self.session.cookies = self.login( @@ -57,14 +47,16 @@ class NijieUserExtractor(AsynchronousExtractor): def get_image_ids(self): """Collect all image-ids for a specific artist""" - response = self.session.get(self.artist_url) - if response.status_code == 404: - raise exception.NotFoundError("artist") - return list(text.extract_iter(response.text, ' illust_id="', '"')) + return [] def get_image_data(self, image_id): """Get URL and metadata for images specified by 'image_id'""" page = self.request(self.popup_url + image_id).text + return self.extract_image_data(page, image_id) + + @staticmethod + def extract_image_data(page, image_id): + """Get URL and metadata for images from 'page'""" images = list(text.extract_iter(page, '