[hentainexus] improve/simplify code

This commit is contained in:
Mike Fährmann
2020-07-30 00:35:49 +02:00
parent 627d2141d3
commit 72c5578a27

View File

@@ -36,21 +36,17 @@ class HentainexusGalleryExtractor(GalleryExtractor):
rmve = text.remove_html
extr = text.extract_from(page)
data = {
"gallery_id" : text.parse_int(self.gallery_id),
"tags" : extr('"og:description" content="', '"').split(", "),
"thumbnail" : extr('"og:image" content="', '"'),
"title" : extr('<h1 class="title">', '</h1>'),
"artist" : rmve(extr('viewcolumn">Artist</td>' , '</td>')),
"book" : rmve(extr('viewcolumn">Book</td>' , '</td>')),
"circle" : rmve(extr('viewcolumn">Circle</td>' , '</td>')),
"event" : rmve(extr('viewcolumn">Event</td>' , '</td>')),
"language" : rmve(extr('viewcolumn">Language</td>' , '</td>')),
"magazine" : rmve(extr('viewcolumn">Magazine</td>' , '</td>')),
"parody" : rmve(extr('viewcolumn">Parody</td>' , '</td>')),
"publisher" : rmve(extr('viewcolumn">Publisher</td>' , '</td>')),
"description": rmve(extr('viewcolumn">Description</td>', '</td>')),
"gallery_id": text.parse_int(self.gallery_id),
"tags" : extr('"og:description" content="', '"').split(", "),
"thumbnail" : extr('"og:image" content="', '"'),
"title" : extr('<h1 class="title">', '</h1>'),
}
for key in ("Artist", "Book", "Circle", "Event", "Language",
"Magazine", "Parody", "Publisher", "Description"):
data[key.lower()] = rmve(extr(
'viewcolumn">' + key + '</td>', '</td>'))
data["lang"] = util.language_to_code(data["language"])
if 'doujin' in data['tags']:
data['type'] = 'Doujinshi'
elif 'illustration' in data['tags']:
@@ -60,10 +56,10 @@ class HentainexusGalleryExtractor(GalleryExtractor):
data["title_conventional"] = self._join_title(data)
return data
def images(self, page):
def images(self, _):
url = "{}/read/{}".format(self.root, self.gallery_id)
extr = text.extract_from(self.request(url).text)
urls = extr("initReader(", "]") + "]"
page = self.request(url).text
urls = text.extract(page, "initReader(", "]")[0] + "]"
return [(url, None) for url in json.loads(urls)]
@staticmethod
@@ -120,14 +116,13 @@ class HentainexusSearchExtractor(Extractor):
self.params = text.parse_query(match.group(1))
def items(self):
yield Message.Version, 1
params = self.params
path = "/"
data = {"_extractor": HentainexusGalleryExtractor}
while path:
page = self.request(self.root + path, params=params).text
extr = text.extract_from(page)
data = {"_extractor": HentainexusGalleryExtractor}
while True:
gallery_id = extr('<a href="/view/', '"')