[nhentai] use API to get gallery info

This commit is contained in:
Mike Fährmann
2018-03-21 12:37:46 +01:00
parent b2ba2b821d
commit f5c6a2d7f5
3 changed files with 10 additions and 21 deletions

View File

@@ -80,11 +80,11 @@ class MangaparkChapterExtractor(MangaparkExtractor, ChapterExtractor):
(("https://mangapark.me/manga/" (("https://mangapark.me/manga/"
"ad-astra-per-aspera-hata-kenjirou/s5/c1.2"), { "ad-astra-per-aspera-hata-kenjirou/s5/c1.2"), {
"count": 40, "count": 40,
"keyword": "fb5082bb60e19cae0a194b89f69f333888a9325d", "keyword": "f7f7fb1ca8b26a59a47d8ec60c5eaaf69a43a3f6",
}), }),
("https://mangapark.me/manga/gekkan-shoujo-nozaki-kun/s2/c70/e2/1", { ("https://mangapark.me/manga/gekkan-shoujo-nozaki-kun/s2/c70/e2/1", {
"count": 15, "count": 15,
"keyword": "dc9233cdd83d8659300f0a20ec3c493873f71741", "keyword": "8d5d1608d4182495ea43ad665e25b755b6468be2",
}), }),
] ]

View File

@@ -9,8 +9,6 @@
"""Extract images from https://nhentai.net/""" """Extract images from https://nhentai.net/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text
import json
class NhentaiGalleryExtractor(Extractor): class NhentaiGalleryExtractor(Extractor):
@@ -21,9 +19,9 @@ class NhentaiGalleryExtractor(Extractor):
filename_fmt = "{category}_{gallery_id}_{num:>03}.{extension}" filename_fmt = "{category}_{gallery_id}_{num:>03}.{extension}"
archive_fmt = "{gallery_id}_{num}" archive_fmt = "{gallery_id}_{num}"
pattern = [r"(?:https?://)?(?:www\.)?nhentai\.net/g/(\d+)"] pattern = [r"(?:https?://)?(?:www\.)?nhentai\.net/g/(\d+)"]
test = [("http://nhentai.net/g/147850/", { test = [("https://nhentai.net/g/147850/", {
"url": "5179dbf0f96af44005a0ff705a0ad64ac26547d0", "url": "5179dbf0f96af44005a0ff705a0ad64ac26547d0",
"keyword": "82751294e75fc203b019ffd94d8c1f94a5b86494", "keyword": "2f94976e657f3043a89997e22f4de8e1b22d9175",
})] })]
def __init__(self, match): def __init__(self, match):
@@ -33,8 +31,8 @@ class NhentaiGalleryExtractor(Extractor):
def items(self): def items(self):
ginfo = self.get_gallery_info() ginfo = self.get_gallery_info()
data = self.get_job_metadata(ginfo) data = self.get_job_metadata(ginfo)
urlfmt = "{}galleries/{}/{{}}.{{}}".format( urlfmt = "https://i.nhentai.net/galleries/{}/{{}}.{{}}".format(
ginfo["media_url"], data["media_id"]) data["media_id"])
extdict = {"j": "jpg", "p": "png", "g": "gif"} extdict = {"j": "jpg", "p": "png", "g": "gif"}
yield Message.Version, 1 yield Message.Version, 1
yield Message.Directory, data yield Message.Directory, data
@@ -47,24 +45,15 @@ class NhentaiGalleryExtractor(Extractor):
def get_gallery_info(self): def get_gallery_info(self):
"""Extract and return gallery-info""" """Extract and return gallery-info"""
page = self.request("https://nhentai.net/g/" + self.gid + "/1/").text url = "https://nhentai.net/api/gallery/" + self.gid
media_url, pos = text.extract( return self.request(url).json()
page, "media_url: '", "'")
json_data, pos = text.extract(
page, "gallery: ", ",\n", pos)
if json_data.startswith("b'"):
json_data = json_data[2:-1].replace(r"\\u", r"\u")
json_dict = json.loads(json_data)
json_dict["media_url"] = media_url
return json_dict
def get_job_metadata(self, ginfo): def get_job_metadata(self, ginfo):
"""Collect metadata for extractor-job""" """Collect metadata for extractor-job"""
title_en = ginfo["title"].get("english", "") title_en = ginfo["title"].get("english", "")
title_ja = ginfo["title"].get("japanese", "") title_ja = ginfo["title"].get("japanese", "")
return { return {
"gallery_id": self.gid, "gallery_id": ginfo["id"],
"upload_date": ginfo["upload_date"], "upload_date": ginfo["upload_date"],
"media_id": ginfo["media_id"], "media_id": ginfo["media_id"],
"scanlator": ginfo["scanlator"], "scanlator": ginfo["scanlator"],

View File

@@ -21,7 +21,7 @@ TRAVIS_SKIP = {
# temporary issues, etc. # temporary issues, etc.
BROKEN = { BROKEN = {
"mangapark", "mangahere", # invalid SSL cert
"puremashiro", # online reader down "puremashiro", # online reader down
} }