[simplyhentai] fix/improve extraction
This commit is contained in:
@@ -121,7 +121,7 @@ class NijieUserExtractor(NijieExtractor):
|
||||
r"/members(?:_illust)?\.php\?id=(\d+)")
|
||||
test = (
|
||||
("https://nijie.info/members_illust.php?id=44", {
|
||||
"url": "4725cf3cd9cff4715db3c7e04dd7c323b9f2dec5",
|
||||
"url": "66c4ff94c6e77c0765dd88f2d8c663055fda573e",
|
||||
"keyword": "d629c69e3172db1d7e026145e8eb640ac31ac16a",
|
||||
}),
|
||||
("https://nijie.info/members_illust.php?id=43", {
|
||||
@@ -169,7 +169,7 @@ class NijieImageExtractor(NijieExtractor):
|
||||
r"/view(?:_popup)?\.php\?id=(\d+)")
|
||||
test = (
|
||||
("https://nijie.info/view.php?id=70720", {
|
||||
"url": "c1881afd89b79af3d166545de15bab4d1e32c340",
|
||||
"url": "5497f897311397dafa188521258624346a0af2a3",
|
||||
"keyword": "408393d010307c76d52cbd0a4368d6d357805aea",
|
||||
"content": "d85e3ea896ed5e4da0bca2390ad310a4df716ca6",
|
||||
}),
|
||||
|
||||
@@ -23,7 +23,7 @@ class SimplyhentaiGalleryExtractor(GalleryExtractor):
|
||||
(("https://original-work.simply-hentai.com"
|
||||
"/amazon-no-hiyaku-amazon-elixir"), {
|
||||
"url": "258289249990502c3138719cb89e995a60861e49",
|
||||
"keyword": "18ab9defca53dbb2aeb7965193e93e0ea125b76b",
|
||||
"keyword": "eba83ccdbab3022a2280c77aa747f9458196138b",
|
||||
}),
|
||||
("https://www.simply-hentai.com/notfound", {
|
||||
"exception": exception.GalleryDLException,
|
||||
@@ -40,30 +40,26 @@ class SimplyhentaiGalleryExtractor(GalleryExtractor):
|
||||
self.session.headers["Referer"] = url
|
||||
|
||||
def metadata(self, page):
|
||||
extr = text.extract
|
||||
title , pos = extr(page, '<meta property="og:title" content="', '"')
|
||||
extr = text.extract_from(page)
|
||||
split = text.split_html
|
||||
|
||||
title = extr('<meta property="og:title" content="', '"')
|
||||
if not title:
|
||||
raise exception.NotFoundError("gallery")
|
||||
gid , pos = extr(page, '/Album/', '/', pos)
|
||||
series, pos = extr(page, 'box-title">Series</div>', '</div>', pos)
|
||||
lang , pos = extr(page, 'box-title">Language</div>', '</div>', pos)
|
||||
chars , pos = extr(page, 'box-title">Characters</div>', '</div>', pos)
|
||||
tags , pos = extr(page, 'box-title">Tags</div>', '</div>', pos)
|
||||
artist, pos = extr(page, 'box-title">Artists</div>', '</div>', pos)
|
||||
date , pos = extr(page, 'Uploaded', '</div>', pos)
|
||||
lang = text.remove_html(lang) if lang else None
|
||||
|
||||
return {
|
||||
"gallery_id": text.parse_int(gid),
|
||||
data = {
|
||||
"title" : text.unescape(title),
|
||||
"artist" : text.split_html(artist),
|
||||
"parody" : text.split_html(series),
|
||||
"characters": text.split_html(chars),
|
||||
"tags" : text.split_html(tags),
|
||||
"lang" : util.language_to_code(lang),
|
||||
"language" : lang,
|
||||
"date" : text.remove_html(date),
|
||||
"gallery_id": text.parse_int(extr('/Album/', '/')),
|
||||
"parody" : split(extr('box-title">Series</div>', '</div>')),
|
||||
"language" : text.remove_html(extr(
|
||||
'box-title">Language</div>', '</div>')) or None,
|
||||
"characters": split(extr('box-title">Characters</div>', '</div>')),
|
||||
"tags" : split(extr('box-title">Tags</div>', '</div>')),
|
||||
"artist" : split(extr('box-title">Artists</div>', '</div>')),
|
||||
"date" : text.parse_datetime(text.remove_html(
|
||||
extr('Uploaded', '</div>')), "%d.%m.%Y"),
|
||||
}
|
||||
data["lang"] = util.language_to_code(data["language"])
|
||||
return data
|
||||
|
||||
def images(self, _):
|
||||
url = self.chapter_url + "/all-pages"
|
||||
@@ -102,12 +98,11 @@ class SimplyhentaiImageExtractor(Extractor):
|
||||
self.type = match.group(2)
|
||||
|
||||
def items(self):
|
||||
page = self.request(self.page_url).text
|
||||
url_search = 'data-src="' if self.type == "image" else '<source src="'
|
||||
|
||||
title, pos = text.extract(page, '"og:title" content="', '"')
|
||||
descr, pos = text.extract(page, '"og:description" content="', '"', pos)
|
||||
url , pos = text.extract(page, url_search, '"', pos)
|
||||
extr = text.extract_from(self.request(self.page_url).text)
|
||||
title = extr('"og:title" content="' , '"')
|
||||
descr = extr('"og:description" content="', '"')
|
||||
url = extr('"image":"' , '&')
|
||||
url = extr(""content":"", "&") or url
|
||||
|
||||
tags = text.extract(descr, " tagged with ", " online for free ")[0]
|
||||
if tags:
|
||||
@@ -140,13 +135,13 @@ class SimplyhentaiVideoExtractor(Extractor):
|
||||
("https://videos.simply-hentai.com/creamy-pie-episode-02", {
|
||||
"pattern": r"https://www\.googleapis\.com/drive/v3/files"
|
||||
r"/0B1ecQ8ZVLm3JcHZzQzBnVy1ZUmc\?alt=media&key=[\w-]+",
|
||||
"keyword": "29d63987fed33f0a9f4b3786d1d71b03d793250a",
|
||||
"keyword": "706790708b14773efc1e075ddd3b738a375348a5",
|
||||
"count": 1,
|
||||
}),
|
||||
(("https://videos.simply-hentai.com"
|
||||
"/1715-tifa-in-hentai-gang-bang-3d-movie"), {
|
||||
"url": "ad9a36ae06c601b6490e3c401834b4949d947eb0",
|
||||
"keyword": "c561341aa3c6999f615abf1971d28fb2a83da2a7",
|
||||
"keyword": "f9dad94fbde9c95859e631ff4f07297a9567b874",
|
||||
}),
|
||||
)
|
||||
|
||||
@@ -178,8 +173,9 @@ class SimplyhentaiVideoExtractor(Extractor):
|
||||
"title": text.unescape(title),
|
||||
"episode": text.parse_int(episode),
|
||||
"tags": text.split_html(tags)[::2],
|
||||
"date": text.remove_html(date),
|
||||
"type": "video",
|
||||
"date": text.parse_datetime(text.remove_html(
|
||||
date), "%B %d, %Y %H:%M"),
|
||||
})
|
||||
|
||||
yield Message.Version, 1
|
||||
|
||||
@@ -27,7 +27,6 @@ TRAVIS_SKIP = {
|
||||
# temporary issues, etc.
|
||||
BROKEN = {
|
||||
"mangapark",
|
||||
"pixnet",
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user