From efaba5f3985a336aea9dfd4b6279d0078826528d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Tue, 3 Jan 2017 15:03:36 +0100 Subject: [PATCH] [luscious] support new album URLs + small fixes --- gallery_dl/extractor/luscious.py | 42 +++++++++++++++++++------------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/gallery_dl/extractor/luscious.py b/gallery_dl/extractor/luscious.py index 5eb245f1..3faf42dd 100644 --- a/gallery_dl/extractor/luscious.py +++ b/gallery_dl/extractor/luscious.py @@ -10,7 +10,6 @@ from .common import Extractor, Message from .. import text, iso639_1 -from urllib.parse import urljoin import re class LusciousAlbumExtractor(Extractor): @@ -19,17 +18,24 @@ class LusciousAlbumExtractor(Extractor): subcategory = "album" directory_fmt = ["{category}", "{gallery-id} {title}"] filename_fmt = "{category}_{gallery-id}_{num:>03}.{extension}" - pattern = [(r"(?:https?://)?(?:www\.)?luscious\.net/c/([^/]+)/" - r"(?:pictures/album|albums)/([^/]+_(\d+))")] - test = [("https://luscious.net/c/hentai_manga/albums/okinami-no-koigokoro_277031/view/", { - "url": "7e4984a271a1072ac6483e4228a045895aff86f3", - "keyword": "f3087f8e5c84e3f52fcd7d672cf0e6beae821837", - "content": "b3a747a6464509440bd0ff6d1267e6959f8d6ff3", - })] + pattern = [(r"(?:https?://)?(?:www\.)?luscious\.net/" + r"(?:c/[^/]+/)?(?:pictures/album|albums)/([^/]+_(\d+))")] + test = [ + ("https://luscious.net/c/hentai_manga/albums/okinami-no-koigokoro_277031/view/", { + "url": "7e4984a271a1072ac6483e4228a045895aff86f3", + "keyword": "3b3d36b355fa6a1a6c24be374ae16e6e9b0c729e", + "content": "b3a747a6464509440bd0ff6d1267e6959f8d6ff3", + }), + ("https://luscious.net/albums/okinami-no-koigokoro_277031/", { + "url": "7e4984a271a1072ac6483e4228a045895aff86f3", + "keyword": "3b3d36b355fa6a1a6c24be374ae16e6e9b0c729e", + }), + ] def __init__(self, match): Extractor.__init__(self) - self.section, self.gpart, self.gid = match.groups() + self.gpart, self.gid = match.groups() + self.section = "x" def items(self): data = self.get_job_metadata() @@ -41,24 +47,26 @@ class LusciousAlbumExtractor(Extractor): def get_job_metadata(self): """Collect metadata for extractor-job""" - url = "https://luscious.net/c/{}/albums/{}/view/".format( - self.section, self.gpart) + url = "https://luscious.net/albums/" + self.gpart + "/" page = self.request(url).text - data, pos = text.extract_all(page, ( + data = text.extract_all(page, ( ("title" , '"og:title" content="', '"'), + ("tags" , '', ''), ("count" , '

', ' '), (None , '

Section:', ''), ("section" , '>', '<'), - (None , '

Language:', ''), - ("language", '', ' '), - ("artist" , 'rtist:\n', '\n'), - ), values={"gallery-id": self.gid}) + ("language", '

Language:', ' '), + ), values={"gallery-id": self.gid})[0] data["lang"] = iso639_1.language_to_code(data["language"]) - data["artist"] = (data["artist"] or "").strip() + data["artist"] = text.extract(data["tags"], "rtist: ", ",")[0] or "" + self.section = data["com"] + del data["com"] return data def get_images(self): + """Collect image-urls and -metadata""" pnum = 1 inum = 1 apiurl = ("https://luscious.net/c/{}/pictures/album/{}/page/{{}}/.json"