[hentai2read] fix and update keywords
Added the "author" keyword and changed the name of a few others to be consistent with other manga/chapter extractors.
This commit is contained in:
@@ -42,7 +42,7 @@ class Hentai2readChapterExtractor(hentaicdn.HentaicdnChapterExtractor):
|
|||||||
pattern = [r"(?:https?://)?(?:www\.)?hentai2read\.com/([^/]+)/(\d+)"]
|
pattern = [r"(?:https?://)?(?:www\.)?hentai2read\.com/([^/]+)/(\d+)"]
|
||||||
test = [("http://hentai2read.com/amazon_elixir/1/", {
|
test = [("http://hentai2read.com/amazon_elixir/1/", {
|
||||||
"url": "964b942cf492b3a129d2fe2608abfc475bc99e71",
|
"url": "964b942cf492b3a129d2fe2608abfc475bc99e71",
|
||||||
"keyword": "c05d0d0bbe188926b15a43df1f8f65b8ac11c3fd",
|
"keyword": "fc79e4c70d61ae476aea2b63a75324e3d96f4497",
|
||||||
})]
|
})]
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
@@ -54,13 +54,16 @@ class Hentai2readChapterExtractor(hentaicdn.HentaicdnChapterExtractor):
|
|||||||
|
|
||||||
def get_job_metadata(self, page, images):
|
def get_job_metadata(self, page, images):
|
||||||
title = text.extract(page, "<title>", "</title>")[0]
|
title = text.extract(page, "<title>", "</title>")[0]
|
||||||
match = re.match(r"Reading (?:(.+) dj - )?(.+) Hentai - \d+: ", title)
|
match = re.match(r"Reading (.+) \(([^)]+)\) Hentai(?: by (.+))? - "
|
||||||
|
r"(\d+): (.+) . Page 1 ", title)
|
||||||
return {
|
return {
|
||||||
"gallery-id": images[0].split("/")[-3],
|
"manga-id": images[0].split("/")[-3],
|
||||||
"chapter": self.chapter,
|
"chapter": self.chapter,
|
||||||
"count": len(images),
|
"count": len(images),
|
||||||
"series": match.group(1) or "",
|
"manga": match.group(1),
|
||||||
"title": match.group(2),
|
"type": match.group(2),
|
||||||
|
"author": match.group(3),
|
||||||
|
"title": match.group(5),
|
||||||
"lang": "en",
|
"lang": "en",
|
||||||
"language": "English",
|
"language": "English",
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -16,8 +16,8 @@ import json
|
|||||||
class HentaicdnChapterExtractor(Extractor):
|
class HentaicdnChapterExtractor(Extractor):
|
||||||
"""Base class for extractors for a single manga chapter"""
|
"""Base class for extractors for a single manga chapter"""
|
||||||
subcategory = "chapter"
|
subcategory = "chapter"
|
||||||
directory_fmt = ["{category}", "{gallery-id} {title}"]
|
directory_fmt = ["{category}", "{manga-id} {title}"]
|
||||||
filename_fmt = ("{category}_{gallery-id}_{chapter:>02}_"
|
filename_fmt = ("{category}_{manga-id}_{chapter:>02}_"
|
||||||
"{num:>03}.{extension}")
|
"{num:>03}.{extension}")
|
||||||
url = ""
|
url = ""
|
||||||
|
|
||||||
@@ -27,8 +27,7 @@ class HentaicdnChapterExtractor(Extractor):
|
|||||||
data = self.get_job_metadata(page, images)
|
data = self.get_job_metadata(page, images)
|
||||||
yield Message.Version, 1
|
yield Message.Version, 1
|
||||||
yield Message.Directory, data
|
yield Message.Directory, data
|
||||||
for num, part in enumerate(images, 1):
|
for data["num"], part in enumerate(images, 1):
|
||||||
data["num"] = num
|
|
||||||
url = "https://hentaicdn.com/hentai" + part
|
url = "https://hentaicdn.com/hentai" + part
|
||||||
yield Message.Url, url, text.nameext_from_url(url, data)
|
yield Message.Url, url, text.nameext_from_url(url, data)
|
||||||
|
|
||||||
|
|||||||
@@ -40,7 +40,7 @@ class HentaihereChapterExtractor(hentaicdn.HentaicdnChapterExtractor):
|
|||||||
pattern = [r"(?:https?://)?(?:www\.)?hentaihere\.com/m/S(\d+)/(\d+)"]
|
pattern = [r"(?:https?://)?(?:www\.)?hentaihere\.com/m/S(\d+)/(\d+)"]
|
||||||
test = [("https://hentaihere.com/m/S13812/1/1/", {
|
test = [("https://hentaihere.com/m/S13812/1/1/", {
|
||||||
"url": "964b942cf492b3a129d2fe2608abfc475bc99e71",
|
"url": "964b942cf492b3a129d2fe2608abfc475bc99e71",
|
||||||
"keyword": "e8625ccca8466a5dee089394fc29efea6d6e2950",
|
"keyword": "fd6e515ccf073e3b57d39c5cb472692858bddb88",
|
||||||
})]
|
})]
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
@@ -55,11 +55,11 @@ class HentaihereChapterExtractor(hentaicdn.HentaicdnChapterExtractor):
|
|||||||
pattern = r"Page 1 \| (.+) \(([^)]+)\) - Chapter \d+: (.+) by (.+) at "
|
pattern = r"Page 1 \| (.+) \(([^)]+)\) - Chapter \d+: (.+) by (.+) at "
|
||||||
match = re.match(pattern, title)
|
match = re.match(pattern, title)
|
||||||
return {
|
return {
|
||||||
"gallery-id": self.gid,
|
"manga-id": self.gid,
|
||||||
"title": match.group(1),
|
"manga": match.group(1),
|
||||||
"type": match.group(2),
|
"type": match.group(2),
|
||||||
"chapter": self.chapter,
|
"chapter": self.chapter,
|
||||||
"chapter-name": match.group(3),
|
"title": match.group(3),
|
||||||
"author": match.group(4),
|
"author": match.group(4),
|
||||||
"count": len(images),
|
"count": len(images),
|
||||||
"lang": "en",
|
"lang": "en",
|
||||||
|
|||||||
@@ -267,7 +267,7 @@ class PixivRankingExtractor(PixivExtractor):
|
|||||||
test = [
|
test = [
|
||||||
(("https://www.pixiv.net/ranking.php"
|
(("https://www.pixiv.net/ranking.php"
|
||||||
"?mode=daily&content=illust&date=20170818"), {
|
"?mode=daily&content=illust&date=20170818"), {
|
||||||
"url": "83a3809e52a58f39f5cf5878fa8fcd9d8df6c760",
|
"url": "7fdffbecfbd420b1d202fa417d79317240be30bc",
|
||||||
}),
|
}),
|
||||||
("https://www.pixiv.net/ranking.php", None),
|
("https://www.pixiv.net/ranking.php", None),
|
||||||
]
|
]
|
||||||
|
|||||||
Reference in New Issue
Block a user