[exhentai] extract tag metadata
This commit is contained in:
@@ -1,6 +1,6 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
# Copyright 2014-2018 Mike Fährmann
|
# Copyright 2014-2019 Mike Fährmann
|
||||||
#
|
#
|
||||||
# This program is free software; you can redistribute it and/or modify
|
# This program is free software; you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU General Public License version 2 as
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
@@ -15,6 +15,9 @@ import time
|
|||||||
import random
|
import random
|
||||||
|
|
||||||
|
|
||||||
|
BASE_PATTERN = r"(?:https?://)?(e[x-]|g\.e-)hentai\.org"
|
||||||
|
|
||||||
|
|
||||||
class ExhentaiExtractor(Extractor):
|
class ExhentaiExtractor(Extractor):
|
||||||
"""Base class for exhentai extractors"""
|
"""Base class for exhentai extractors"""
|
||||||
category = "exhentai"
|
category = "exhentai"
|
||||||
@@ -99,10 +102,10 @@ class ExhentaiExtractor(Extractor):
|
|||||||
class ExhentaiGalleryExtractor(ExhentaiExtractor):
|
class ExhentaiGalleryExtractor(ExhentaiExtractor):
|
||||||
"""Extractor for image galleries from exhentai.org"""
|
"""Extractor for image galleries from exhentai.org"""
|
||||||
subcategory = "gallery"
|
subcategory = "gallery"
|
||||||
pattern = [r"(?:https?://)?(g\.e-|e-|ex)hentai\.org/g/(\d+)/([\da-f]{10})"]
|
pattern = [BASE_PATTERN + r"/g/(\d+)/([\da-f]{10})"]
|
||||||
test = [
|
test = [
|
||||||
("https://exhentai.org/g/960460/4f0e369d82/", {
|
("https://exhentai.org/g/960460/4f0e369d82/", {
|
||||||
"keyword": "15b755fd3e2c710d7fd7ff112a5cdbf4333201b2",
|
"keyword": "900b8dccd23c41a76e915a8df70ae77c4e0f52c7",
|
||||||
"content": "493d759de534355c9f55f8e365565b62411de146",
|
"content": "493d759de534355c9f55f8e365565b62411de146",
|
||||||
}),
|
}),
|
||||||
("https://exhentai.org/g/960461/4f0e369d82/", {
|
("https://exhentai.org/g/960461/4f0e369d82/", {
|
||||||
@@ -152,7 +155,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
|
|||||||
"gallery_id" : self.gid,
|
"gallery_id" : self.gid,
|
||||||
"gallery_token": self.token,
|
"gallery_token": self.token,
|
||||||
}
|
}
|
||||||
text.extract_all(page, (
|
data, pos = text.extract_all(page, (
|
||||||
("title" , '<h1 id="gn">', '</h1>'),
|
("title" , '<h1 id="gn">', '</h1>'),
|
||||||
("title_jp" , '<h1 id="gj">', '</h1>'),
|
("title_jp" , '<h1 id="gj">', '</h1>'),
|
||||||
("date" , '>Posted:</td><td class="gdt2">', '</td>'),
|
("date" , '>Posted:</td><td class="gdt2">', '</td>'),
|
||||||
@@ -160,12 +163,17 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
|
|||||||
("gallery_size", '>File Size:</td><td class="gdt2">', '<'),
|
("gallery_size", '>File Size:</td><td class="gdt2">', '<'),
|
||||||
("count" , '>Length:</td><td class="gdt2">', ' '),
|
("count" , '>Length:</td><td class="gdt2">', ' '),
|
||||||
), values=data)
|
), values=data)
|
||||||
|
|
||||||
data["lang"] = util.language_to_code(data["language"])
|
data["lang"] = util.language_to_code(data["language"])
|
||||||
data["title"] = text.unescape(data["title"])
|
data["title"] = text.unescape(data["title"])
|
||||||
data["title_jp"] = text.unescape(data["title_jp"])
|
data["title_jp"] = text.unescape(data["title_jp"])
|
||||||
data["count"] = text.parse_int(data["count"])
|
data["count"] = text.parse_int(data["count"])
|
||||||
data["gallery_size"] = text.parse_bytes(
|
data["gallery_size"] = text.parse_bytes(
|
||||||
data["gallery_size"].rstrip("Bb"))
|
data["gallery_size"].rstrip("Bb"))
|
||||||
|
data["tags"] = [
|
||||||
|
text.unquote(tag)
|
||||||
|
for tag in text.extract_iter(page, 'hentai.org/tag/', '"', pos)
|
||||||
|
]
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def get_images(self, page):
|
def get_images(self, page):
|
||||||
@@ -258,12 +266,16 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
|
|||||||
class ExhentaiSearchExtractor(ExhentaiExtractor):
|
class ExhentaiSearchExtractor(ExhentaiExtractor):
|
||||||
"""Extractor for exhentai search results"""
|
"""Extractor for exhentai search results"""
|
||||||
subcategory = "search"
|
subcategory = "search"
|
||||||
pattern = [r"(?:https?://)?(?:g\.e-|e-|ex)hentai\.org/?\?(.*)$"]
|
pattern = [BASE_PATTERN + r"/?\?(.*)$"]
|
||||||
test = [
|
test = [
|
||||||
("https://exhentai.org/?f_search=touhou", None),
|
("https://exhentai.org/?f_search=touhou", None),
|
||||||
("https://exhentai.org/?f_doujinshi=0&f_manga=0&f_artistcg=0"
|
(("https://exhentai.org/?f_doujinshi=0&f_manga=0&f_artistcg=0"
|
||||||
"&f_gamecg=0&f_western=0&f_non-h=1&f_imageset=0&f_cosplay=0"
|
"&f_gamecg=0&f_western=0&f_non-h=1&f_imageset=0&f_cosplay=0"
|
||||||
"&f_asianporn=0&f_misc=0&f_search=touhou&f_apply=Apply+Filter", None),
|
"&f_asianporn=0&f_misc=0&f_search=touhou&f_apply=Apply+Filter"), {
|
||||||
|
"pattern": ExhentaiGalleryExtractor.pattern[0],
|
||||||
|
"range": "1-30",
|
||||||
|
"count": 30,
|
||||||
|
}),
|
||||||
]
|
]
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
@@ -318,8 +330,7 @@ class ExhentaiSearchExtractor(ExhentaiExtractor):
|
|||||||
class ExhentaiFavoriteExtractor(ExhentaiSearchExtractor):
|
class ExhentaiFavoriteExtractor(ExhentaiSearchExtractor):
|
||||||
"""Extractor for favorited exhentai galleries"""
|
"""Extractor for favorited exhentai galleries"""
|
||||||
subcategory = "favorite"
|
subcategory = "favorite"
|
||||||
pattern = [r"(?:https?://)?(?:g\.e-|e-|ex)hentai\.org"
|
pattern = [BASE_PATTERN + r"/favorites\.php(?:\?(.*))?"]
|
||||||
r"/favorites\.php(?:\?(.*))?"]
|
|
||||||
test = [
|
test = [
|
||||||
("https://exhentai.org/favorites.php", None),
|
("https://exhentai.org/favorites.php", None),
|
||||||
("https://exhentai.org/favorites.php?favcat=1&f_search=touhou"
|
("https://exhentai.org/favorites.php?favcat=1&f_search=touhou"
|
||||||
|
|||||||
Reference in New Issue
Block a user