diff --git a/gallery_dl/extractor/pururin.py b/gallery_dl/extractor/pururin.py index 32567f63..e56dbcd6 100644 --- a/gallery_dl/extractor/pururin.py +++ b/gallery_dl/extractor/pururin.py @@ -10,7 +10,6 @@ from .common import GalleryExtractor from .. import text, util -import binascii class PururinGalleryExtractor(GalleryExtractor): @@ -19,12 +18,11 @@ class PururinGalleryExtractor(GalleryExtractor): pattern = r"(?:https?://)?(?:www\.)?pururin\.[ti]o/(?:gallery|read)/(\d+)" test = ( ("https://pururin.to/gallery/38661/iowant-2", { - "pattern": r"https://cdn.pururin.[ti]o/\w+" - r"/images/data/\d+/\d+\.jpg", + "pattern": r"https://i\.pururin\.to/38661/\d+\.jpg", "keyword": { "title" : "re:I ?owant 2!!", "title_en" : "re:I ?owant 2!!", - "title_jp" : "", + "title_ja" : "", "gallery_id": 38661, "count" : 19, "artist" : ["Shoda Norihiro"], @@ -33,11 +31,11 @@ class PururinGalleryExtractor(GalleryExtractor): "characters": ["Iowa", "Teitoku"], "tags" : list, "type" : "Doujinshi", - "collection": "I owant you!", - "convention": "C92", + "collection": ["I owant you!"], + "convention": ["C92"], "rating" : float, "uploader" : "demo", - "scanlator" : "mrwayne", + "scanlator" : ["mrwayne", "The Lost Light"], "lang" : "en", "language" : "English", } @@ -54,51 +52,49 @@ class PururinGalleryExtractor(GalleryExtractor): url = "{}/gallery/{}/x".format(self.root, self.gallery_id) GalleryExtractor.__init__(self, match, url) - self._ext = "" - self._cnt = 0 - def metadata(self, page): extr = text.extract_from(page) - def _lst(key, e=extr): - return [ - text.unescape(item) - for item in text.extract_iter(e(key, ""), 'title="', '"') - ] + def _lst(e=extr): + v = text.unescape(e('value="', '"')) + return [item["name"] for item in util.json_loads(v)] if v else () def _str(key, e=extr): - return text.unescape(text.extract( - e(key, ""), 'title="', '"')[0] or "") + return text.unescape(text.extr( + e(key, ""), 'title="', '"')).partition(" / ")[0] - url = "{}/read/{}/01/x".format(self.root, self.gallery_id) - page = self.request(url).text - info = util.json_loads(binascii.a2b_base64(text.extr( - page, '', '<')) + title_en, _, title_ja = title.partition(" / ") data = { "gallery_id": text.parse_int(self.gallery_id), - "title" : info["title"] or info.get("j_title") or "", - "title_en" : info["title"], - "title_jp" : info.get("j_title") or "", - "artist" : _lst("Artist"), - "group" : _lst("Circle"), - "parody" : _lst("Parody"), - "tags" : _lst("Contents"), - "type" : _str("Category"), - "characters": _lst("Character"), - "collection": _str("Collection"), + "title" : title_en or title_ja, + "title_en" : title_en, + "title_ja" : title_ja, "language" : _str("Language"), - "scanlator" : _str("Scanlator"), - "convention": _str("Convention"), + "type" : _str("Category"), "uploader" : text.remove_html(extr("Uploader", "")), - "rating" : text.parse_float(extr(" :rating='" , "'")), + "rating" : text.parse_float(extr( + 'itemprop="ratingValue" content="', '"')), + "artist" : extr('name="artist_tags"', '') or _lst(), + "group" : _lst(), + "parody" : _lst(), + "tags" : _lst(), + "characters": _lst(), + "scanlator" : _lst(), + "convention": _lst(), + "collection": _lst(), } data["lang"] = util.language_to_code(data["language"]) return data def images(self, _): - ufmt = "https://cdn.pururin.to/assets/images/data/{}/{{}}.{}".format( - self.gallery_id, self._ext) - return [(ufmt.format(num), None) for num in range(1, self._cnt + 1)] + url = "{}/read/{}/01/x".format(self.root, self.gallery_id) + page = self.request(url).text + + svr, pos = text.extract(page, 'data-svr="', '"') + img, pos = text.extract(page, 'data-img="', '"', pos) + data = util.json_loads(text.unescape(img)) + + base = "{}/{}/".format(svr, data["directory"]) + return [(base + i["filename"], None) for i in data["images"]]