Merge branch 'category'
This commit is contained in:
@@ -17,7 +17,7 @@ class FourchanThreadExtractor(ChanExtractor):
|
||||
pattern = [r"(?:https?://)?boards\.4chan\.org/([^/]+)/thread/(\d+)"]
|
||||
test = [("https://boards.4chan.org/tg/thread/15396072/", {
|
||||
"url": "39082ad166161966d7ba8e37f2173a824eb540f0",
|
||||
"keyword": "9b610fd3674653728516c34ec65925a024cc0074",
|
||||
"keyword": "38679a7c8054f535cba67cae13eef1ea7dbc8085",
|
||||
"content": "3081ed85a5afaeb3f430f42540e7bb5eec1908cc",
|
||||
})]
|
||||
api_url = "https://a.4cdn.org/{board}/thread/{thread}.json"
|
||||
|
||||
@@ -17,7 +17,7 @@ class InfinitychanThreadExtractor(ChanExtractor):
|
||||
pattern = [r"(?:https?://)?(?:www\.)?8ch\.net/([^/]+)/res/(\d+)"]
|
||||
test = [("https://8ch.net/tg/res/175887.html", {
|
||||
"url": "cb03fdc650ad8e796fdab553fbd5489f468d3f45",
|
||||
"keyword": "d9388d231db6a0ea3e710a6cf46dc53dbdbb2115",
|
||||
"keyword": "c2a7f57422558dddaf3467b9a30018e847eb4fad",
|
||||
"content": "9f51cdfee6942a18011996ca049baeb0a22f931b",
|
||||
})]
|
||||
api_url = "https://8ch.net/{board}/res/{thread}.json"
|
||||
|
||||
@@ -22,7 +22,7 @@ class BatotoChapterExtractor(AsynchronousExtractor):
|
||||
pattern = [r"(?:https?://)?(?:www\.)?bato\.to/reader#([0-9a-f]+)"]
|
||||
test = [("http://bato.to/reader#459878c8fda07502", {
|
||||
"url": "432d7958506ad913b0a9e42664a89e46a63e9296",
|
||||
"keyword": "7a3e03c40c8b3c7137c4ebe723b1b9c95a303d81",
|
||||
"keyword": "75a3a86d32aecfc21c44865b4043490757f73d77",
|
||||
})]
|
||||
url = "https://bato.to/"
|
||||
reader_url = "https://bato.to/areader"
|
||||
@@ -78,7 +78,6 @@ class BatotoChapterExtractor(AsynchronousExtractor):
|
||||
manga, pos = extr(page, "document.title = '", " - ", pos)
|
||||
match = re.match(r"(Vol.(\d+) )?Ch\.([^:]+)(: (.+))?", cinfo)
|
||||
return {
|
||||
"category": self.category,
|
||||
"token": self.token,
|
||||
"manga": text.unescape(manga),
|
||||
"volume": match.group(2) or "",
|
||||
|
||||
@@ -55,13 +55,10 @@ class BooruExtractor(Extractor):
|
||||
def get_job_metadata(self):
|
||||
"""Collect metadata for extractor-job"""
|
||||
# Override this method in derived classes
|
||||
return {
|
||||
"category": self.category,
|
||||
}
|
||||
return {}
|
||||
|
||||
def get_file_metadata(self, data):
|
||||
"""Collect metadata for a downloadable file"""
|
||||
data["category"] = self.category
|
||||
return text.nameext_from_url(self.get_file_url(data), data)
|
||||
|
||||
def get_file_url(self, data):
|
||||
@@ -114,10 +111,7 @@ class BooruTagExtractor(BooruExtractor):
|
||||
self.params["tags"] = self.tags
|
||||
|
||||
def get_job_metadata(self):
|
||||
return {
|
||||
"category": self.category,
|
||||
"tags": self.tags,
|
||||
}
|
||||
return {"tags": self.tags}
|
||||
|
||||
|
||||
class BooruPoolExtractor(BooruExtractor):
|
||||
@@ -131,10 +125,7 @@ class BooruPoolExtractor(BooruExtractor):
|
||||
self.params["tags"] = "pool:" + self.pool
|
||||
|
||||
def get_job_metadata(self):
|
||||
return {
|
||||
"category": self.category,
|
||||
"pool": self.pool,
|
||||
}
|
||||
return {"pool": self.pool}
|
||||
|
||||
|
||||
class BooruPostExtractor(BooruExtractor):
|
||||
|
||||
@@ -21,7 +21,6 @@ class ChanExtractor(Extractor):
|
||||
def __init__(self, board, thread):
|
||||
Extractor.__init__(self)
|
||||
self.metadata = {
|
||||
"category": self.category,
|
||||
"board": board,
|
||||
"thread": thread,
|
||||
}
|
||||
|
||||
@@ -21,7 +21,7 @@ class ChronosImageExtractor(Extractor):
|
||||
url_base = "http://chronos.to/"
|
||||
test = [("http://chronos.to/bdrmq7rw7v4y", {
|
||||
"url": "7fcb3fe315c94283644d25ef47a644c2dc8da944",
|
||||
"keyword": "9c364ef9bd962fe70eca49ef74c1c424486514f9",
|
||||
"keyword": "04dbc71a1154728d01c931308184050d61c5da55",
|
||||
"content": "0c8768055e4e20e7c7259608b67799171b691140",
|
||||
})]
|
||||
|
||||
@@ -30,10 +30,6 @@ class ChronosImageExtractor(Extractor):
|
||||
self.token = match.group(1)
|
||||
|
||||
def items(self):
|
||||
data = {
|
||||
"category": self.category,
|
||||
"token": self.token,
|
||||
}
|
||||
params = {
|
||||
"op": "view",
|
||||
"id": self.token,
|
||||
@@ -44,7 +40,7 @@ class ChronosImageExtractor(Extractor):
|
||||
data=params).text
|
||||
url , pos = text.extract(page, '<br><img src="', '"')
|
||||
filename, pos = text.extract(page, ' alt="', '"', pos)
|
||||
text.nameext_from_url(filename, data)
|
||||
data = text.nameext_from_url(filename, {"token": self.token})
|
||||
yield Message.Version, 1
|
||||
yield Message.Directory, data
|
||||
yield Message.Url, url, data
|
||||
|
||||
@@ -17,6 +17,6 @@ class CoreimgImageExtractor(chronos.ChronosImageExtractor):
|
||||
url_base = "https://coreimg.net/"
|
||||
test = [("http://coreimg.net/ykcl5al8uzvg", {
|
||||
"url": "2b32596a2ea66b7cc784e20f3749f75f20998d78",
|
||||
"keyword": "c81daac4ecc3e44796117cdea5eb6f3b852c2027",
|
||||
"keyword": "8d71e5b820bc7177baee33ca529c91ae4521299f",
|
||||
"content": "0c8768055e4e20e7c7259608b67799171b691140",
|
||||
})]
|
||||
|
||||
@@ -21,7 +21,7 @@ class DeviantartUserExtractor(AsynchronousExtractor):
|
||||
pattern = [r"(?:https?://)?([^\.]+)\.deviantart\.com(?:/gallery)?/?$"]
|
||||
test = [("http://shimoda7.deviantart.com/gallery/", {
|
||||
"url": "63bfa8efba199e27181943c9060f6770f91a8441",
|
||||
"keyword": "c0343b41c28c87254a3f0b3241222e94f780701e",
|
||||
"keyword": "741bbea4891a23335bb5d119c4a42aeb54702c50",
|
||||
})]
|
||||
|
||||
def __init__(self, match):
|
||||
@@ -57,10 +57,7 @@ class DeviantartUserExtractor(AsynchronousExtractor):
|
||||
|
||||
def get_job_metadata(self):
|
||||
"""Collect metadata for extractor-job"""
|
||||
return {
|
||||
"category": self.category,
|
||||
"artist": self.artist,
|
||||
}
|
||||
return {"artist": self.artist}
|
||||
|
||||
def get_image_metadata(self, image):
|
||||
"""Collect metadata for an image"""
|
||||
@@ -108,7 +105,7 @@ class DeviantartImageExtractor(Extractor):
|
||||
pattern = [r"(?:https?://)?[^\.]+\.deviantart\.com/art/.+-(\d+)"]
|
||||
test = [("http://shimoda7.deviantart.com/art/For-the-sake-of-a-memory-10073852", {
|
||||
"url": "71345ce3bef5b19bd2a56d7b96e6b5ddba747c2e",
|
||||
"keyword": "f2dfde276a39990097935ace092811c56bc0bfec",
|
||||
"keyword": "ccac27b8f740fc943afca9460608e02c6cbcdf96",
|
||||
})]
|
||||
|
||||
def __init__(self, match):
|
||||
@@ -127,7 +124,7 @@ class DeviantartImageExtractor(Extractor):
|
||||
('description', '"og:description" content="', '"'),
|
||||
(None , '<span class="tt-w">', ''),
|
||||
('date' , 'title="', '"'),
|
||||
), values={'category': self.category, "index": self.index})[0]
|
||||
), values={"index": self.index})[0]
|
||||
data["description"] = text.unescape(text.unescape(data["description"]))
|
||||
data["artist"] = text.extract(data["url"], "//", ".")[0]
|
||||
data["date"] = text.extract(data["date"], ", ", " in ", len(data["title"]))[0]
|
||||
|
||||
@@ -22,7 +22,7 @@ class DoujinmodeChapterExtractor(Extractor):
|
||||
r"(?:hentai/|yaoi/|western/)?mangas/([0-9a-f]{36})")]
|
||||
test = [("http://doujinmode.net/mangas/967836c988a716e9efca06998b7838d09eb5", {
|
||||
"url": "be5d48a9fd48f09cfcc5d4e51f24bf1100e75502",
|
||||
"keyword": "710cc9599faf563b0cad836bbc7d85b288fcda3a",
|
||||
"keyword": "fbccd0416f19080dc2e041917aeff721399adf13",
|
||||
"content": "a041114e2a8af54d42a4a46a69cae4ebf2641cb1",
|
||||
})]
|
||||
url_base = "http://doujinmode.net/mangas/"
|
||||
@@ -45,7 +45,6 @@ class DoujinmodeChapterExtractor(Extractor):
|
||||
count, pos = text.extract(page, ' class="manga-count">', '</span>')
|
||||
title, pos = text.extract(page, '<h2>', ' Images List</h2>', pos)
|
||||
return {
|
||||
"category": self.category,
|
||||
"gallery-id": self.gid,
|
||||
"title": text.unescape(title),
|
||||
"count": count,
|
||||
|
||||
@@ -23,11 +23,11 @@ class DynastyscansChapterExtractor(Extractor):
|
||||
test = [
|
||||
("http://dynasty-scans.com/chapters/hitoribocchi_no_oo_seikatsu_ch33", {
|
||||
"url": "63950fa1dfdef58ab842c1b9b854c5c1d650cfa0",
|
||||
"keyword": "7a950a94e76cceb63559de0826cb2d5a1dcaa48a",
|
||||
"keyword": "81bfda5b98b34ac2a7324bd9e2abad3df9cc7673",
|
||||
}),
|
||||
("http://dynasty-scans.com/chapters/new_game_the_spinoff_special_13", {
|
||||
"url": "6b28c733481ac498da341e85a9eb155864491731",
|
||||
"keyword": "56ed59442b69d45ee4042d6586b30a72f55c3e12",
|
||||
"keyword": "93b75d0c0aaeb849c99f2225a4b97f466bc3ace9",
|
||||
}),
|
||||
]
|
||||
url_base = "http://dynasty-scans.com/"
|
||||
@@ -61,7 +61,6 @@ class DynastyscansChapterExtractor(Extractor):
|
||||
info
|
||||
)
|
||||
return {
|
||||
"category": self.category,
|
||||
"manga": text.unescape(match.group(1)),
|
||||
"chapter": match.group(2) or "",
|
||||
"title": text.unescape(match.group(3) or ""),
|
||||
|
||||
@@ -22,7 +22,7 @@ class ExhentaiGalleryExtractor(Extractor):
|
||||
filename_fmt = "{gallery-id}_{num:>04}_{image-token}_{name}.{extension}"
|
||||
pattern = [r"(?:https?://)?(?:g\.e-|ex)hentai\.org/g/(\d+)/([\da-f]{10})"]
|
||||
test = [("https://exhentai.org/g/960460/4f0e369d82/", {
|
||||
"keyword": "c1282ffbe5d452c62dec9dbde4ecb7037525cd64",
|
||||
"keyword": "623f8c86c9fe38e964682dd4309b96922655b900",
|
||||
"content": "493d759de534355c9f55f8e365565b62411de146",
|
||||
})]
|
||||
api_url = "https://exhentai.org/api.php"
|
||||
@@ -71,7 +71,6 @@ class ExhentaiGalleryExtractor(Extractor):
|
||||
def get_job_metadata(self, page):
|
||||
"""Collect metadata for extractor-job"""
|
||||
data = {
|
||||
"category" : self.category,
|
||||
"gallery-id" : self.gid,
|
||||
"gallery-token": self.token,
|
||||
}
|
||||
|
||||
@@ -46,7 +46,7 @@ class HbrowseChapterExtractor(Extractor):
|
||||
pattern = [r"(?:https?://)?(?:www\.)?hbrowse\.com/(\d+)/(c\d+)"]
|
||||
test = [("http://www.hbrowse.com/10363/c00000", {
|
||||
"url": "634f4800858913f097bc3b62a8fedaf74b5254bd",
|
||||
"keyword": "e6263b71f791000ad4bca58bc4d90f79e42e6be6",
|
||||
"keyword": "c7dc22a10699dee5cf466406fecee6ffa2e6277e",
|
||||
"content": "44578ebbe176c2c27434966aef22945787e2781e",
|
||||
})]
|
||||
url_base = "http://www.hbrowse.com"
|
||||
@@ -68,7 +68,6 @@ class HbrowseChapterExtractor(Extractor):
|
||||
def get_job_metadata(self, page):
|
||||
"""Collect metadata for extractor-job"""
|
||||
data = {
|
||||
"category": self.category,
|
||||
'gallery-id': self.gid,
|
||||
"chapter": int(self.chapter[1:]),
|
||||
}
|
||||
|
||||
@@ -54,7 +54,7 @@ class Hentai2readChapterExtractor(Extractor):
|
||||
pattern = [r"(?:https?://)?(?:www\.)?hentai2read\.com/([^/]+)/(\d+)"]
|
||||
test = [("http://hentai2read.com/amazon_elixir/1/", {
|
||||
"url": "fb5fc4d7cc194116960eaa648c7e045a6e6f0c11",
|
||||
"keyword": "03435037539d57ca084c457b5ac4d48928487521",
|
||||
"keyword": "c05d0d0bbe188926b15a43df1f8f65b8ac11c3fd",
|
||||
})]
|
||||
|
||||
def __init__(self, match):
|
||||
@@ -78,7 +78,6 @@ class Hentai2readChapterExtractor(Extractor):
|
||||
title = text.extract(page, "<title>", "</title>")[0]
|
||||
match = re.match(r"Reading (?:(.+) dj - )?(.+) Hentai - \d+: ", title)
|
||||
return {
|
||||
"category": self.category,
|
||||
"gallery-id": images[0].split("/")[-3],
|
||||
"chapter": self.chapter,
|
||||
"count": len(images),
|
||||
|
||||
@@ -20,7 +20,7 @@ class HentaiboxChapterExtractor(Extractor):
|
||||
pattern = [r"(?:https?://)?(?:www\.)?hentaibox\.net/[^/]+/(\d+)_\d+_([^/&]+)"]
|
||||
test = [("http://www.hentaibox.net/hentai-manga/16_18_Original_Amazon-No-Hiyaku-Amazon-Elixir-Decensored", {
|
||||
"url": "d1a50a9b289d284f178971e01cf312791888e057",
|
||||
"keyword": "294eda384689d4f1178ec952560d0dedd3e38647",
|
||||
"keyword": "b4b100f800b716e573e072f01b5d604d9b436b70",
|
||||
})]
|
||||
|
||||
def __init__(self, match):
|
||||
@@ -44,7 +44,7 @@ class HentaiboxChapterExtractor(Extractor):
|
||||
("title" , 'content="Read or Download ', ' hentai manga from'),
|
||||
("series" , ' the series ', ' with ' + self.count),
|
||||
("language", ' translated pages to ', '.'),
|
||||
), values={"category": self.category, "count": self.count})[0]
|
||||
), values={"count": self.count})[0]
|
||||
data["lang"] = iso639_1.language_to_code(data["language"])
|
||||
return data
|
||||
|
||||
|
||||
@@ -23,7 +23,7 @@ class HentaifoundryUserExtractor(Extractor):
|
||||
]
|
||||
test = [("http://www.hentai-foundry.com/pictures/user/Orzy", {
|
||||
"url": "236ac02c8f081fee44ad2c2571bf74615633b91e",
|
||||
"keyword": "f5f1aa78ecbe390fb117a0b599f771cd47df86c6",
|
||||
"keyword": "9f334f635b71c915b026cf20a65eee065237d452",
|
||||
})]
|
||||
url_base = "http://www.hentai-foundry.com/pictures/user/"
|
||||
|
||||
@@ -60,7 +60,6 @@ class HentaifoundryUserExtractor(Extractor):
|
||||
token, pos = text.extract(page, 'hidden" value="', '"')
|
||||
count, pos = text.extract(page, 'class="active" >Pictures (', ')', pos)
|
||||
return {
|
||||
"category": self.category,
|
||||
"artist": self.artist,
|
||||
"count": count,
|
||||
}, token
|
||||
@@ -115,7 +114,7 @@ class HentaifoundryImageExtractor(Extractor):
|
||||
r"([^/]+)/(\d+)/[^/]+")]
|
||||
test = [("http://www.hentai-foundry.com/pictures/user/Orzy/76940/Youmu-Konpaku", {
|
||||
"url": "50c267b2b2983b98b18fd0d2acbec8ce5ba64c77",
|
||||
"keyword": "8c9b7054b78fb4f52982c3f21f3ba2a9fcdd5428",
|
||||
"keyword": "6cee38ac0817783feb6db9944da997bec13d0e19",
|
||||
})]
|
||||
|
||||
def __init__(self, match):
|
||||
@@ -136,7 +135,6 @@ class HentaifoundryImageExtractor(Extractor):
|
||||
title, pos = text.extract(page, 'Pictures</a> » <span>', '<')
|
||||
url , pos = text.extract(page, '//pictures.hentai-foundry.com', '"', pos)
|
||||
data = {
|
||||
"category": self.category,
|
||||
"artist": self.artist,
|
||||
"index": self.index,
|
||||
"title": text.unescape(title),
|
||||
|
||||
@@ -21,7 +21,7 @@ class HitomiGalleryExtractor(Extractor):
|
||||
pattern = [r"(?:https?://)?hitomi\.la/(?:galleries|reader)/(\d+)\.html"]
|
||||
test = [("https://hitomi.la/galleries/867789.html", {
|
||||
"url": "23fd59894c3db65aec826aa5efb85f96d2384883",
|
||||
"keyword": "80395a06b6ba24842c15121d142830bb467ae68b",
|
||||
"keyword": "03a64d67584afd7b8ad96ecb47acae08ea14d90f",
|
||||
})]
|
||||
|
||||
def __init__(self, match):
|
||||
@@ -61,7 +61,6 @@ class HitomiGalleryExtractor(Extractor):
|
||||
series, pos = text.extract(page, '.html">', '</a>', pos)
|
||||
lang = lang.capitalize()
|
||||
return {
|
||||
"category": self.category,
|
||||
"gallery-id": self.gid,
|
||||
"title": " ".join(title.split()),
|
||||
"artist": string.capwords(artist),
|
||||
|
||||
@@ -20,7 +20,7 @@ class ImagebamGalleryExtractor(AsynchronousExtractor):
|
||||
pattern = [r"(?:https?://)?(?:www\.)?imagebam\.com/gallery/([^/]+).*"]
|
||||
test = [("http://www.imagebam.com/gallery/adz2y0f9574bjpmonaismyrhtjgvey4o", {
|
||||
"url": "d7a4483b6d5ebba81950a349aad58ae034c60eda",
|
||||
"keyword": "9f54ab808d77f2517444411dfbf8686189c20b43",
|
||||
"keyword": "e4a9395dbd06d4af3172a6a61c90601bc47ee18c",
|
||||
"content": "596e6bfa157f2c7169805d50075c2986549973a8",
|
||||
})]
|
||||
url_base = "http://www.imagebam.com"
|
||||
@@ -44,17 +44,12 @@ class ImagebamGalleryExtractor(AsynchronousExtractor):
|
||||
"""Collect metadata for extractor-job"""
|
||||
url = self.url_base + "/gallery/" + self.gkey
|
||||
page = self.request(url, encoding="utf-8").text
|
||||
data = {
|
||||
"category": self.category,
|
||||
"gallery-key": self.gkey,
|
||||
}
|
||||
data, _ = text.extract_all(page, (
|
||||
return text.extract_all(page, (
|
||||
(None , "<img src='/img/icons/photos.png'", ""),
|
||||
("title" , "'> ", " <"),
|
||||
("count" , "'>", " images"),
|
||||
("first-url", "<a href='http://www.imagebam.com", "'"),
|
||||
), values=data)
|
||||
return data
|
||||
), values={"gallery-key": self.gkey})[0]
|
||||
|
||||
def get_images(self, url):
|
||||
"""Yield all image-urls and -ids for a gallery"""
|
||||
@@ -71,7 +66,6 @@ class ImagebamGalleryExtractor(AsynchronousExtractor):
|
||||
yield image_url, image_id
|
||||
|
||||
|
||||
|
||||
class ImagebamImageExtractor(Extractor):
|
||||
"""Extractor for single images from imagebam.com"""
|
||||
category = "imagebam"
|
||||
@@ -81,7 +75,7 @@ class ImagebamImageExtractor(Extractor):
|
||||
pattern = [r"(?:https?://)?(?:www\.)?imagebam\.com/image/([0-9a-f]{15})"]
|
||||
test = [("http://www.imagebam.com/image/94d56c502511890", {
|
||||
"url": "94add9417c685d113a91bcdda4916e9538b5f8a9",
|
||||
"keyword": "046f049533126bb0ee7f81419f59371c6903df9e",
|
||||
"keyword": "fd99b2f45b761d0b639af46740aacd976f5dfcc7",
|
||||
"content": "0c8768055e4e20e7c7259608b67799171b691140",
|
||||
})]
|
||||
|
||||
@@ -90,10 +84,9 @@ class ImagebamImageExtractor(Extractor):
|
||||
self.token = match.group(1)
|
||||
|
||||
def items(self):
|
||||
data = {"category": self.category, "token": self.token}
|
||||
page = self.request("http://www.imagebam.com/image/" + self.token).text
|
||||
url = text.extract(page, 'property="og:image" content="', '"')[0]
|
||||
text.nameext_from_url(url, data)
|
||||
url = text.extract(page, 'property="og:image" content="', '"')[0]
|
||||
data = text.nameext_from_url(url, {"token": self.token})
|
||||
yield Message.Version, 1
|
||||
yield Message.Directory, data
|
||||
yield Message.Url, url, data
|
||||
|
||||
@@ -22,7 +22,7 @@ class ImagefapGalleryExtractor(Extractor):
|
||||
r"(?:gallery\.php\?gid=|gallery/|pictures/)(\d+)")]
|
||||
test = [("http://www.imagefap.com/gallery/6318447", {
|
||||
"url": "f63e6876df83a40e1a98dad70e46952dd9edb7a7",
|
||||
"keyword": "eb26d0e62defc1a547b6b854fe0de693055d9f20",
|
||||
"keyword": "715f99ad154c4cf608afc7cd77dd1e896030646a",
|
||||
"content": "38e50699db9518ae68648c45ecdd6be614efc324",
|
||||
})]
|
||||
|
||||
@@ -48,7 +48,7 @@ class ImagefapGalleryExtractor(Extractor):
|
||||
("title" , '<title>Porn pics of ', ' (Page 1)</title>'),
|
||||
("uploader", '>Uploaded by ', '</font>'),
|
||||
("count" , ' 1 of ', ' pics"'),
|
||||
), values={"category": self.category, "gallery-id": self.gid})
|
||||
), values={"gallery-id": self.gid})
|
||||
self.image_id = text.extract(page, 'id="img_ed_', '"', pos)[0]
|
||||
data["title"] = text.unescape(data["title"])
|
||||
return data
|
||||
@@ -82,7 +82,7 @@ class ImagefapImageExtractor(Extractor):
|
||||
pattern = [r"(?:https?://)?(?:www\.)?imagefap\.com/photo/(\d+)"]
|
||||
test = [("http://www.imagefap.com/photo/1616331218/", {
|
||||
"url": "8a05c0ccdcf84e63c962803bc41d247628c549ea",
|
||||
"keyword": "401ded07ae0b3a8f718e553e506898b34cd92020",
|
||||
"keyword": "c9880c6731b3fdc6d98d25dbff56f4342c11683e",
|
||||
"content": "964b8c62c9d5c2a039a2fccf1b1e10aaf7a18a96",
|
||||
})]
|
||||
|
||||
@@ -101,7 +101,6 @@ class ImagefapImageExtractor(Extractor):
|
||||
"""Collect metadata for extractor-job"""
|
||||
parts = info["contentUrl"].rsplit("/", 3)
|
||||
return text.nameext_from_url(parts[3], {
|
||||
"category": self.category,
|
||||
"title": text.unescape(info["name"]),
|
||||
"section": info["section"],
|
||||
"uploader": info["author"],
|
||||
|
||||
@@ -20,7 +20,7 @@ class ImagetwistImageExtractor(Extractor):
|
||||
pattern = [r"(?:https?://)?(?:www\.)?imagetwist\.com/([a-z0-9]{12})"]
|
||||
test = [("http://imagetwist.com/4e46hv31tu0q/test.jpg", {
|
||||
"url": "6b3fc0bd1105b698d2d5844658ca674d66b1e2e7",
|
||||
"keyword": "d599a540ed233bb7b66e4abec30affbad2e44af1",
|
||||
"keyword": "825d9d1901829da054b6ef9c034229af85e495e2",
|
||||
"content": "96b1fd099b06faad5879fce23a7e4eb8290d8810",
|
||||
})]
|
||||
|
||||
@@ -34,7 +34,6 @@ class ImagetwistImageExtractor(Extractor):
|
||||
filename, pos = text.extract(page, ' alt="', '"', pos)
|
||||
userid , pos = text.extract(url , '/', '/', 29)
|
||||
data = {
|
||||
"category": self.category,
|
||||
"token": self.token,
|
||||
"user": userid,
|
||||
}
|
||||
|
||||
@@ -21,7 +21,7 @@ class ImgboxGalleryExtractor(AsynchronousExtractor):
|
||||
pattern = [r"(?:https?://)?(?:www\.)?imgbox\.com/g/([A-Za-z0-9]{10})"]
|
||||
test = [("http://imgbox.com/g/JaX5V5HX7g", {
|
||||
"url": "c7c3466dde31d4308833816961104c7d1100368d",
|
||||
"keyword": "23deb783d3afee090f61472b495e797c8f262b93",
|
||||
"keyword": "cebd7f6868cf84ff492341c936cb6dbe5cde4682",
|
||||
"content": "d20307dc8511ac24d688859c55abf2e2cc2dd3cc",
|
||||
})]
|
||||
url_base = "http://imgbox.com"
|
||||
@@ -47,7 +47,6 @@ class ImgboxGalleryExtractor(AsynchronousExtractor):
|
||||
title = text.extract(page, "<h1>", "</h1>")[0]
|
||||
parts = title.rsplit(" - ", maxsplit=1)
|
||||
return {
|
||||
"category": self.category,
|
||||
"gallery-key": self.key,
|
||||
"title": text.unescape(parts[0]),
|
||||
"count": parts[1][:-7],
|
||||
@@ -79,7 +78,7 @@ class ImgboxImageExtractor(Extractor):
|
||||
pattern = [r"(?:https?://)?(?:www\.)?imgbox\.com/([A-Za-z0-9]{8})"]
|
||||
test = [("http://imgbox.com/qHhw7lpG", {
|
||||
"url": "d96990ea12223895287d139695077b70dfa0abe4",
|
||||
"keyword": "c5e87be93fec3122151edf85b6424d1871279590",
|
||||
"keyword": "ff0524dba869a4b3292d7d4f72f5da4024b4f002",
|
||||
"content": "0c8768055e4e20e7c7259608b67799171b691140",
|
||||
})]
|
||||
|
||||
@@ -91,8 +90,7 @@ class ImgboxImageExtractor(Extractor):
|
||||
page = self.request("http://imgbox.com/" + self.key).text
|
||||
url , pos = text.extract(page, 'src="http://i.', '"')
|
||||
filename, pos = text.extract(page, ' title="', '"', pos)
|
||||
data = {"category": self.category, "image-key": self.key}
|
||||
text.nameext_from_url(filename, data)
|
||||
data = text.nameext_from_url(filename, {"image-key": self.key})
|
||||
yield Message.Version, 1
|
||||
yield Message.Directory, data
|
||||
yield Message.Url, "http://i." + url, data
|
||||
|
||||
@@ -21,7 +21,7 @@ class ImgcandyImageExtractor(Extractor):
|
||||
r"(?:_(.+))?\.html")]
|
||||
test = [("http://imgcandy.net/img-57d02527efee8_test-テスト.png.html", {
|
||||
"url": "bc3c9207b10dbfe8e65ccef5b9e3194a7427b4fa",
|
||||
"keyword": "381e036374742a091cac7dd7a3eca90ee725afa3",
|
||||
"keyword": "1ed1587ef38a6b26ce28b35857a78417239d197a",
|
||||
"content": "0c8768055e4e20e7c7259608b67799171b691140",
|
||||
})]
|
||||
|
||||
@@ -30,12 +30,11 @@ class ImgcandyImageExtractor(Extractor):
|
||||
self.token, self.filename = match.groups()
|
||||
|
||||
def items(self):
|
||||
data = {"category": self.category, "token": self.token}
|
||||
params = {"imgContinue": "Continue+to+image+...+"}
|
||||
page = self.request("http://imgcandy.net/img-" + self.token + ".html",
|
||||
method="post", data=params).text
|
||||
url = text.extract(page, "<img class='centred' src='", "'")[0]
|
||||
text.nameext_from_url(self.filename or url, data)
|
||||
data = text.nameext_from_url(self.filename or url, {"token": self.token})
|
||||
yield Message.Version, 1
|
||||
yield Message.Directory, data
|
||||
yield Message.Url, url, data
|
||||
|
||||
@@ -61,8 +61,6 @@ class ImgchiliImageExtractor(ImgchiliExtractor):
|
||||
parts = name2.split("in the gallery ")
|
||||
name = parts[0] if not parts[0].endswith("...") else name1
|
||||
return text.nameext_from_url(name, {
|
||||
"category": self.category,
|
||||
"subcategory": self.subcategory,
|
||||
"image-id": self.match.group(1),
|
||||
"title": text.unescape(parts[-1]) if len(parts) > 1 else ""
|
||||
})
|
||||
@@ -86,8 +84,6 @@ class ImgchiliAlbumExtractor(ImgchiliExtractor):
|
||||
def get_job_metadata(self, page):
|
||||
title = text.extract(page, "<h1>", "</h1>")[0]
|
||||
return {
|
||||
"category": self.category,
|
||||
"subcategory": self.subcategory,
|
||||
"title": text.unescape(title),
|
||||
"key": self.match.group(1),
|
||||
}
|
||||
|
||||
@@ -20,7 +20,7 @@ class ImgthGalleryExtractor(Extractor):
|
||||
pattern = [r"(?:https?://)?imgth\.com/gallery/(\d+)"]
|
||||
test = [("http://imgth.com/gallery/37/wallpaper-anime", {
|
||||
"url": "4ae1d281ca2b48952cf5cca57e9914402ad72748",
|
||||
"keyword": "1b15726d53bc2c08d845fa60ce538396380688df",
|
||||
"keyword": "3f268fcc18d49ac3799a8f25cc08053e90891955",
|
||||
})]
|
||||
|
||||
def __init__(self, match):
|
||||
@@ -61,4 +61,4 @@ class ImgthGalleryExtractor(Extractor):
|
||||
("date" , 'created on ', ' by <'),
|
||||
(None , 'href="/users/', ''),
|
||||
("user" , '>', '<'),
|
||||
), values={"category": self.category, "gallery-id": self.gid})[0]
|
||||
), values={"gallery-id": self.gid})[0]
|
||||
|
||||
@@ -20,7 +20,7 @@ class ImgtrexImageExtractor(Extractor):
|
||||
pattern = [r"(?:https?://)?(?:www\.)?imgtrex\.com/([^/]+)"]
|
||||
test = [("http://imgtrex.com/im0ypxq0rke4/test-テスト-&<a>.png", {
|
||||
"url": "c000618bddda42bd599a590b7972c7396d19d8fe",
|
||||
"keyword": "4d766eae04aa5457bca4992290aa28b76239d287",
|
||||
"keyword": "58905795a9cd3f17d5ff024fc4d63645795ba23c",
|
||||
"content": "0c8768055e4e20e7c7259608b67799171b691140",
|
||||
})]
|
||||
|
||||
@@ -29,11 +29,10 @@ class ImgtrexImageExtractor(Extractor):
|
||||
self.token = match.group(1)
|
||||
|
||||
def items(self):
|
||||
data = {"category": self.category, "token": self.token}
|
||||
page = self.request("http://imgtrex.com/" + self.token).text
|
||||
filename, pos = text.extract(page, '<title>ImgTrex: ', '</title>')
|
||||
url , pos = text.extract(page, '<br>\n<img src="', '"', pos)
|
||||
text.nameext_from_url(filename, data)
|
||||
data = text.nameext_from_url(filename, {"token": self.token})
|
||||
yield Message.Version, 1
|
||||
yield Message.Directory, data
|
||||
yield Message.Url, url, data
|
||||
|
||||
@@ -22,7 +22,7 @@ class ImgurAlbumExtractor(Extractor):
|
||||
pattern = [r"(?:https?://)?(?:www\.)?imgur\.com/(?:a|gallery)/([^/?&#]+)"]
|
||||
test = [("https://imgur.com/a/TcBmP", {
|
||||
"url": "ce3552f550a5b5316bd9c7ae02e21e39f30c0563",
|
||||
"keyword": "5c96eee4df5938ed37f1f95f5c4ef64444bddeb4",
|
||||
"keyword": "c76bbf86f8f114cdaadab396c0ea4acf47aa44eb",
|
||||
})]
|
||||
|
||||
def __init__(self, match):
|
||||
@@ -43,16 +43,12 @@ class ImgurAlbumExtractor(Extractor):
|
||||
def get_job_metadata(self):
|
||||
"""Collect metadata for extractor-job"""
|
||||
page = self.request("https://imgur.com/a/" + self.album).text
|
||||
data = {
|
||||
"category": self.category,
|
||||
"album-key": self.album,
|
||||
}
|
||||
text.extract_all(page, (
|
||||
data = text.extract_all(page, (
|
||||
('title', '<meta property="og:title" content="', '"'),
|
||||
('count', '"num_images":"', '"'),
|
||||
('date' , '"datetime":"', ' '),
|
||||
('time' , '', '"'),
|
||||
), values=data)
|
||||
), values={"album-key": self.album})[0]
|
||||
data["title"] = text.unescape(data["title"])
|
||||
return data
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@ class ImgytImageExtractor(Extractor):
|
||||
pattern = [r"(?:https?://)?(?:www\.)?img\.yt/img-([a-z0-9]+)\.html"]
|
||||
test = [("http://img.yt/img-57a2050547b97.html", {
|
||||
"url": "6801fac1ff8335bd27a1665ad27ad64cace2cd84",
|
||||
"keyword": "a20aa2215a4a6d5f4605d6370a8d605b525fc4bc",
|
||||
"keyword": "7548cc9915f90f5d7ffbafa079085457ae34562c",
|
||||
"content": "54592f2635674c25677c6872db3709d343cdf92f",
|
||||
})]
|
||||
|
||||
@@ -30,12 +30,12 @@ class ImgytImageExtractor(Extractor):
|
||||
self.token = match.group(1)
|
||||
|
||||
def items(self):
|
||||
data = {"category": self.category, "token": self.token}
|
||||
params = {"imgContinue": "Continue+to+image+...+"}
|
||||
page = self.request("https://img.yt/img-" + self.token + ".html",
|
||||
method="post", data=params).text
|
||||
url , pos = text.extract(page, "<img class='centred' src='", "'")
|
||||
filename, pos = text.extract(page, " alt='", "'", pos)
|
||||
data = {"token": self.token}
|
||||
text.nameext_from_url(filename + splitext(url)[1], data)
|
||||
if url.startswith("http:"):
|
||||
url = "https:" + url[5:]
|
||||
|
||||
@@ -20,7 +20,7 @@ class KhinsiderSoundtrackExtractor(AsynchronousExtractor):
|
||||
pattern = [r"(?:https?://)?downloads\.khinsider\.com/game-soundtracks/album/(.+)"]
|
||||
test = [("http://downloads.khinsider.com/game-soundtracks/album/horizon-riders-wii-", {
|
||||
"url": "35ff4c8310884664408dc5560fda3b06157f7606",
|
||||
"keyword": "dde50e1f5dbed5ee3f13df4e1bffc58bb9563f22",
|
||||
"keyword": "d91cf3edee6713b536eaf3995743f0be7dc72f68",
|
||||
})]
|
||||
|
||||
def __init__(self, match):
|
||||
@@ -45,7 +45,7 @@ class KhinsiderSoundtrackExtractor(AsynchronousExtractor):
|
||||
("size" , "Total Filesize: <b>", "</b>"),
|
||||
("date" , "Date added: <b>", "</b>"),
|
||||
("type" , "Album type: <b>", "</b>"),
|
||||
), values={"category": self.category})[0]
|
||||
))[0]
|
||||
|
||||
def get_album_tracks(self, page):
|
||||
"""Collect url and metadata for all tracks of a soundtrack"""
|
||||
|
||||
@@ -53,11 +53,11 @@ class KissmangaChapterExtractor(KissmangaExtractor):
|
||||
test = [
|
||||
("http://kissmanga.com/Manga/Dropout/Ch-000---Oneshot-?id=145847", {
|
||||
"url": "4136bcd1c6cecbca8cc2bc965d54f33ef0a97cc0",
|
||||
"keyword": "892c3e4df03a575a282a5695add986a49623d746",
|
||||
"keyword": "ab332093a4f2e473a468235bfd624cbe3b19fd7f",
|
||||
}),
|
||||
("http://kissmanga.com/Manga/Urban-Tales/a?id=256717", {
|
||||
"url": "de074848f6c1245204bb9214c12bcc3ecfd65019",
|
||||
"keyword": "0a98952984941cc2a11892b1cd7b237ffb20adaa",
|
||||
"keyword": "013aad80e578c6ccd2e1fe47cdc27c12a64f6db2",
|
||||
})
|
||||
]
|
||||
|
||||
@@ -81,7 +81,6 @@ class KissmangaChapterExtractor(KissmangaExtractor):
|
||||
r"(?:Vol.0*(\d+) )?(?:Ch.)?0*(\d+)(?:\.0*(\d+))?(?:: (.+))?", cinfo)
|
||||
chminor = match.group(3)
|
||||
return {
|
||||
"category": self.category,
|
||||
"manga": manga,
|
||||
"volume": match.group(1) or "",
|
||||
"chapter": match.group(2),
|
||||
|
||||
@@ -21,8 +21,8 @@ class LusciousAlbumExtractor(Extractor):
|
||||
pattern = [(r"(?:https?://)?(?:www\.)?luscious\.net/c/([^/]+)/"
|
||||
r"(?:pictures/album|albums)/([^/\d]+(\d+))")]
|
||||
test = [("https://luscious.net/c/incest_manga/albums/amazon-no-hiyaku-amazon-elixir-english-decensored_261127/view/", {
|
||||
"url": "319a70261de12620d123add9b519d15b8515b503",
|
||||
"keyword": "60cc15db2619b8aee47c1527b6326be5a54f5c2f",
|
||||
"url": "12e1fde5ef3c0d41973a85fb27a602eb922c60aa",
|
||||
"keyword": "e10c7c070ad730e305024fb37cc70af6b05378dd",
|
||||
})]
|
||||
|
||||
def __init__(self, match):
|
||||
@@ -50,7 +50,7 @@ class LusciousAlbumExtractor(Extractor):
|
||||
(None , '<p>Language:', ''),
|
||||
("language", '\n ', ' '),
|
||||
("artist" , 'rtist: ', '\n'),
|
||||
), values={"category": self.category, "gallery-id": self.gid})[0]
|
||||
), values={"gallery-id": self.gid})[0]
|
||||
data["lang"] = iso639_1.language_to_code(data["language"])
|
||||
return data
|
||||
|
||||
|
||||
@@ -49,7 +49,7 @@ class MangahereChapterExtractor(AsynchronousExtractor):
|
||||
r"([^/]+(?:/v0*(\d+))?/c0*(\d+)(\.\d+)?)")]
|
||||
test = [("http://www.mangahere.co/manga/dongguo_xiaojie/c003.2/", {
|
||||
"url": "68efaeed3bc6abb0a0b6f75a5c649c17979e31f1",
|
||||
"keyword": "f342e3df9fa39eb10cf7ba5ef3300df6ad77f332",
|
||||
"keyword": "d3fe470e934a9f02ed00d4391b1743970eae82fa",
|
||||
})]
|
||||
url_fmt = "http://www.mangahere.co/manga/{}/{}.html"
|
||||
|
||||
@@ -76,7 +76,6 @@ class MangahereChapterExtractor(AsynchronousExtractor):
|
||||
count, pos = text.extract(page, '>', '<', pos-30)
|
||||
manga = re.match(r"(.+) \d+(\.\d+)? - Read .+ Chapter \d+(\.\d+)? Online", manga).group(1)
|
||||
return {
|
||||
"category": self.category,
|
||||
"manga": text.unescape(manga),
|
||||
# "title": TODO,
|
||||
"volume": self.volume or "",
|
||||
|
||||
@@ -55,7 +55,7 @@ class MangamintChapterExtractor(Extractor):
|
||||
pattern = [r"(?:https?://)?(?:www\.)?mangamint\.com/([^\?]+-(\d+))"]
|
||||
test = [("http://www.mangamint.com/mushishi-1", {
|
||||
"url": "337f46c4dab50f544e9196ced723ac8f70400dd0",
|
||||
"keyword": "ca4ba6fa84367fd7c345879a17ebaad39b589da5",
|
||||
"keyword": "de9ea839d231cb9f1590a2a93ca9ab2f8743b39d",
|
||||
})]
|
||||
|
||||
def __init__(self, match):
|
||||
@@ -80,7 +80,6 @@ class MangamintChapterExtractor(Extractor):
|
||||
chid , pos = text.extract(page, r'"identifier":"node\/', '"', pos)
|
||||
match = re.match(r"(.+) (\d+)(\.\d+)?$", manga)
|
||||
return {
|
||||
"category": self.category,
|
||||
"manga": match.group(1),
|
||||
"chapter": match.group(2),
|
||||
"chapter-minor": match.group(3) or "",
|
||||
|
||||
@@ -33,5 +33,5 @@ class MangapandaChapterExtractor(MangapandaBase, MangareaderChapterExtractor):
|
||||
]
|
||||
test = [("http://www.mangapanda.com/red-storm/2", {
|
||||
"url": "4bf4ddf6c50105ec8a37675495ab80c46608275d",
|
||||
"keyword": "dcb8d655e3f461738c821819bbb8d017bd916713",
|
||||
"keyword": "89c712f7ed255ec9c1d8e84dcb5a160b6cb4498c",
|
||||
})]
|
||||
|
||||
@@ -51,11 +51,11 @@ class MangaparkChapterExtractor(Extractor):
|
||||
test = [
|
||||
("http://mangapark.me/manga/ad-astra-per-aspera-hata-kenjirou/s1/c1.2/1", {
|
||||
"url": "25d998a70df1fa559afc189ebd17df300b54dc28",
|
||||
"keyword": "40d60961d7aaf24454d2ab23fbc83f4c55cd4174",
|
||||
"keyword": "aa0dfbd21a5174b1497bce98182324e5120dd4ff",
|
||||
}),
|
||||
("http://mangapark.me/manga/gekkan-shoujo-nozaki-kun/s2/c70/e2/1", {
|
||||
"url": "8534c8286a18c4db47606f84a4df9f1a42bab291",
|
||||
"keyword": "f96962442cdd5bc957603831c695159d974b7b93",
|
||||
"keyword": "df83f2ccde8dd58d6b906a65ae1ecf3bec801567",
|
||||
})
|
||||
]
|
||||
|
||||
@@ -80,7 +80,6 @@ class MangaparkChapterExtractor(Extractor):
|
||||
def get_job_metadata(self, page):
|
||||
"""Collect metadata for extractor-job"""
|
||||
data = {
|
||||
"category": self.category,
|
||||
"version": self.version,
|
||||
"volume": self.volume or "",
|
||||
"chapter": self.chapter,
|
||||
|
||||
@@ -50,7 +50,7 @@ class MangareaderChapterExtractor(MangareaderBase, AsynchronousExtractor):
|
||||
]
|
||||
test = [("http://www.mangareader.net/karate-shoukoushi-kohinata-minoru/11", {
|
||||
"url": "84ffaab4c027ef9022695c53163c3aeabd07ca58",
|
||||
"keyword": "0df7db81a44ef642922aab798c303d60e2b6802d",
|
||||
"keyword": "09b4ad57a082eb371dec027ccfc8ed1157c6eac6",
|
||||
})]
|
||||
|
||||
def __init__(self, match):
|
||||
@@ -74,7 +74,6 @@ class MangareaderChapterExtractor(MangareaderBase, AsynchronousExtractor):
|
||||
"""Collect metadata for extractor-job"""
|
||||
page = self.request(self.url_base + self.url_title).text
|
||||
data = {
|
||||
"category": self.category,
|
||||
"chapter": self.chapter,
|
||||
"lang": "en",
|
||||
"language": "English",
|
||||
|
||||
@@ -46,7 +46,7 @@ class MangashareChapterExtractor(AsynchronousExtractor):
|
||||
pattern = [r"(?:https?://)?read\.mangashare\.com/([^/]+/chapter-\d+)"]
|
||||
test = [("http://read.mangashare.com/Gantz/chapter-331/page001.html", {
|
||||
"url": "2980fb9548e809dea63d104bc514dcc33bdd9ef7",
|
||||
"keyword": "4872a5645ab79cb9ecf363a5bf4cb9062fd61eef",
|
||||
"keyword": "8afc1c2a3e64efa3d2b9ed2359885343f89bdfa9",
|
||||
})]
|
||||
url_fmt = "http://read.mangashare.com/{}/page{:>03}.html"
|
||||
|
||||
@@ -67,7 +67,6 @@ class MangashareChapterExtractor(AsynchronousExtractor):
|
||||
def get_job_metadata(self, page):
|
||||
"""Collect metadata for extractor-job"""
|
||||
data = {
|
||||
"category": self.category,
|
||||
"lang": "en",
|
||||
"language": "English",
|
||||
}
|
||||
|
||||
@@ -46,7 +46,6 @@ class MangastreamChapterExtractor(AsynchronousExtractor):
|
||||
title, pos = text.extract(page, ' - ', '<', pos)
|
||||
count, pos = text.extract(page, 'Last Page (', ')', pos)
|
||||
data = {
|
||||
"category": self.category,
|
||||
"manga": manga,
|
||||
"chapter": text.unquote(self.chapter),
|
||||
"chapter-id": self.ch_id,
|
||||
|
||||
@@ -21,7 +21,7 @@ class NhentaiGalleryExtractor(Extractor):
|
||||
pattern = [r"(?:https?://)?(?:www\.)?nhentai\.net/g/(\d+)"]
|
||||
test = [("http://nhentai.net/g/147850/", {
|
||||
"url": "199ddd07dded0f69282e09a372710698ea21ab8e",
|
||||
"keyword": "e00678567c8335289ffcbb2e6980b28d332ee6a7",
|
||||
"keyword": "c7e37dfe80ca5eee69210c690a1340ea78a932a4",
|
||||
})]
|
||||
|
||||
def __init__(self, match):
|
||||
@@ -57,7 +57,6 @@ class NhentaiGalleryExtractor(Extractor):
|
||||
title_en = ginfo["title"].get("english", "")
|
||||
title_ja = ginfo["title"].get("japanese", "")
|
||||
return {
|
||||
"category": self.category,
|
||||
"gallery-id": self.gid,
|
||||
"upload-date": ginfo["upload_date"],
|
||||
"media-id": ginfo["media_id"],
|
||||
|
||||
@@ -40,10 +40,7 @@ class NijieExtractor(AsynchronousExtractor):
|
||||
|
||||
def get_job_metadata(self):
|
||||
"""Collect metadata for extractor-job"""
|
||||
return {
|
||||
"category": self.category,
|
||||
"artist-id": self.artist_id,
|
||||
}
|
||||
return {"artist-id": self.artist_id}
|
||||
|
||||
def get_image_ids(self):
|
||||
"""Collect all image-ids for a specific artist"""
|
||||
@@ -83,7 +80,7 @@ class NijieUserExtractor(NijieExtractor):
|
||||
r"members(?:_illust)?\.php\?id=(\d+)")]
|
||||
test = [("https://nijie.info/members_illust.php?id=44", {
|
||||
"url": "585d821df4716b1098660a0be426d01db4b65f2a",
|
||||
"keyword": "30c981b9d7351ec275b9840d8bc2b4ef3da8c4b4",
|
||||
"keyword": "7a2dbf8fc0dfdb2af208ecdb8ec7f3186bdc31ab",
|
||||
})]
|
||||
|
||||
def __init__(self, match):
|
||||
@@ -105,7 +102,7 @@ class NijieImageExtractor(NijieExtractor):
|
||||
pattern = [r"(?:https?://)?(?:www\.)?nijie\.info/view\.php\?id=(\d+)"]
|
||||
test = [("https://nijie.info/view.php?id=70720", {
|
||||
"url": "a10d4995645b5f260821e32c60a35f73546c2699",
|
||||
"keyword": "1c0b1a2e447d8e1cd4f93c21f71d7fe7de0eeed3",
|
||||
"keyword": "e454c2bad9b636b90d569881bf4fe8438506e0d2",
|
||||
"content": "d85e3ea896ed5e4da0bca2390ad310a4df716ca6",
|
||||
})]
|
||||
|
||||
|
||||
@@ -26,8 +26,6 @@ class PinterestExtractor(Extractor):
|
||||
img = pin["image"]["original"]
|
||||
url = img["url"]
|
||||
data = {
|
||||
"category": self.category,
|
||||
"subcategory": self.subcategory,
|
||||
"pin-id": pin["id"],
|
||||
"note": pin["note"],
|
||||
"width": img["width"],
|
||||
@@ -90,8 +88,6 @@ class PinterestBoardExtractor(PinterestExtractor):
|
||||
def data_from_board(self, board):
|
||||
"""Get metadata from a board-object"""
|
||||
data = {
|
||||
"category": self.category,
|
||||
"subcategory": self.subcategory,
|
||||
"user": self.user,
|
||||
"board-id": board["id"],
|
||||
"board": board["name"],
|
||||
|
||||
@@ -92,7 +92,6 @@ class PixivUserExtractor(Extractor):
|
||||
"""Prepare a work-dictionary with additional keywords"""
|
||||
user = work["user"]
|
||||
url = work["image_urls"]["large"]
|
||||
work["category"] = self.category
|
||||
work["artist-id"] = user["id"]
|
||||
work["artist-name"] = user["name"]
|
||||
work["artist-nick"] = user["account"]
|
||||
@@ -130,7 +129,6 @@ class PixivUserExtractor(Extractor):
|
||||
if not user:
|
||||
user = self.api.user(self.artist_id)["response"][0]
|
||||
return {
|
||||
"category": self.category,
|
||||
"artist-id": user["id"],
|
||||
"artist-name": user["name"],
|
||||
"artist-nick": user["account"],
|
||||
|
||||
@@ -27,7 +27,7 @@ class PowermangaChapterExtractor(Extractor):
|
||||
]
|
||||
test = [("https://read.powermanga.org/read/one_piece/en/0/803/page/1", {
|
||||
"url": "e6179c1565068f99180620281f86bdd25be166b4",
|
||||
"keyword": "1c8593087f4a2e3343966a2900fc67be8e6401f1",
|
||||
"keyword": "ab66c38e31f1b716ed360ee8c78fd973d7d8693a",
|
||||
})]
|
||||
|
||||
def __init__(self, match):
|
||||
@@ -63,7 +63,6 @@ class PowermangaChapterExtractor(Extractor):
|
||||
json_data, pos = text.extract(page, 'var pages = ', ';', pos)
|
||||
match = re.match(r"(\w+ (\d+)([^:+]*)(?:: (.*))?|[^:]+)", chapter)
|
||||
return {
|
||||
"category": self.category,
|
||||
"manga": text.unescape(manga),
|
||||
"chapter": match.group(2) or match.group(1),
|
||||
"chapter-minor": match.group(3) or "",
|
||||
|
||||
@@ -20,7 +20,7 @@ class SankakuTagExtractor(AsynchronousExtractor):
|
||||
pattern = [r"(?:https?://)?chan\.sankakucomplex\.com/\?tags=([^&]+)"]
|
||||
test = [("https://chan.sankakucomplex.com/?tags=bonocho", {
|
||||
"url": "2561ca0d8482ead48f22a7abcd23919cd78344a1",
|
||||
"keyword": "6282e9a2d5223d635d9be7515f59d87d4b9be732",
|
||||
"keyword": "5e3a39fdc6698e63ed0054478ebd4ca632ce643e",
|
||||
})]
|
||||
url = "https://chan.sankakucomplex.com/"
|
||||
|
||||
@@ -42,10 +42,7 @@ class SankakuTagExtractor(AsynchronousExtractor):
|
||||
|
||||
def get_job_metadata(self):
|
||||
"""Collect metadata for extractor-job"""
|
||||
return {
|
||||
"category": self.category,
|
||||
"tags": self.tags,
|
||||
}
|
||||
return {"tags": self.tags}
|
||||
|
||||
def get_images(self):
|
||||
params = {
|
||||
|
||||
@@ -23,7 +23,7 @@ class SeigaImageExtractor(Extractor):
|
||||
(r"(?:https?://)?lohas\.nicoseiga\.jp/"
|
||||
r"(?:priv|o)/[^/]+/\d+/(\d+)")]
|
||||
test = [("http://seiga.nicovideo.jp/seiga/im5977527", {
|
||||
"keyword": "e2ea59186c47beb71484ba35d550cf6511ac185a",
|
||||
"keyword": "fd2628b573d15d1bbdefb219a99b993365b214ed",
|
||||
"content": "d9202292012178374d57fb0126f6124387265297",
|
||||
})]
|
||||
|
||||
@@ -44,10 +44,7 @@ class SeigaImageExtractor(Extractor):
|
||||
|
||||
def get_job_metadata(self):
|
||||
"""Collect metadata for extractor-job"""
|
||||
return {
|
||||
"category": self.category,
|
||||
"image-id": self.image_id,
|
||||
}
|
||||
return {"image-id": self.image_id}
|
||||
|
||||
def get_image_url(self, image_id):
|
||||
"""Get url for an image with id 'image_id'"""
|
||||
|
||||
@@ -20,7 +20,7 @@ class SenmangaChapterExtractor(Extractor):
|
||||
pattern = [r"(?:https?://)?raw\.senmanga\.com/([^/]+/[^/]+)"]
|
||||
test = [("http://raw.senmanga.com/Bokura-wa-Minna-Kawaisou/37A/1", {
|
||||
"url": "32d88382fcad66859d089cd9a61249f375492ec5",
|
||||
"keyword": "9554ccc7bc32c358b2491c255e614ae908d7d593",
|
||||
"keyword": "465905e0b69998656f9d59462a9560319941c58d",
|
||||
"content": "a791dda85ac0d37e3b36d754560cbb65b8dab5b9",
|
||||
})]
|
||||
url_base = "http://raw.senmanga.com"
|
||||
@@ -52,7 +52,6 @@ class SenmangaChapterExtractor(Extractor):
|
||||
manga, pos = text.extract(title, '| Raw | ', ' | Chapter ')
|
||||
chapter, pos = text.extract(title, '', ' | Page ', pos)
|
||||
return {
|
||||
"category": self.category,
|
||||
"manga": text.unescape(manga.replace("-", " ")),
|
||||
"chapter": chapter,
|
||||
"count": count,
|
||||
|
||||
@@ -50,7 +50,7 @@ class SpectrumnexusChapterExtractor(AsynchronousExtractor):
|
||||
]
|
||||
test = [("http://view.thespectrum.net/series/toriko.html?ch=Chapter+343&page=1", {
|
||||
"url": "c0fc7dc594841217cc622a67edd79f06e9900333",
|
||||
"keyword": "bde9c95a2d0feca0574c7248ed06f1684f86b2ac",
|
||||
"keyword": "8499166b62db0c87e7109cc5f9aa837b4815dd9c",
|
||||
})]
|
||||
|
||||
def __init__(self, match):
|
||||
@@ -82,7 +82,6 @@ class SpectrumnexusChapterExtractor(AsynchronousExtractor):
|
||||
def get_job_metadata(self, page):
|
||||
"""Collect metadata for extractor-job"""
|
||||
data = {
|
||||
"category": self.category,
|
||||
"chapter": self.chapter or "",
|
||||
"volume": self.volume or "",
|
||||
"identifier": self.identifier.replace("+", " "),
|
||||
|
||||
@@ -21,7 +21,7 @@ class TumblrUserExtractor(Extractor):
|
||||
pattern = [r"(?:https?://)?([^.]+)\.tumblr\.com(?:/page/\d+)?/?$"]
|
||||
test = [("http://demo.tumblr.com/", {
|
||||
"url": "d3d2bb185230e537314a0036814050634c730f74",
|
||||
"keyword": "2ab87097ecafce595dd53d8469b2337ec541bcde",
|
||||
"keyword": "8704a9bbb65b6e52dc1ccdf2c2449bd4abe3d389",
|
||||
"content": "31495fdb9f84edbb7f67972746a1521456f649e2",
|
||||
})]
|
||||
|
||||
@@ -47,7 +47,6 @@ class TumblrUserExtractor(Extractor):
|
||||
def get_job_metadata(self, image_data):
|
||||
"""Collect metadata for extractor-job"""
|
||||
data = next(image_data)
|
||||
data["category"] = self.category
|
||||
data["user"] = self.user
|
||||
del data["cname"]
|
||||
del data["description"]
|
||||
@@ -97,7 +96,7 @@ class TumblrPostExtractor(TumblrUserExtractor):
|
||||
pattern = [r"(?:https?://)?([^.]+)\.tumblr\.com/post/(\d+)"]
|
||||
test = [("http://demo.tumblr.com/post/459265350", {
|
||||
"url": "d3d2bb185230e537314a0036814050634c730f74",
|
||||
"keyword": "a6a0d99eddfba835e710a584d59b19df1ea5c1ab",
|
||||
"keyword": "821236db342fb0d1bf8a177ca3108349168e6cd0",
|
||||
})]
|
||||
|
||||
def __init__(self, match):
|
||||
@@ -111,7 +110,7 @@ class TumblrTagExtractor(TumblrUserExtractor):
|
||||
pattern = [r"(?:https?://)?([^.]+)\.tumblr\.com/tagged/(.+)"]
|
||||
test = [("http://demo.tumblr.com/tagged/Times Square", {
|
||||
"url": "d3d2bb185230e537314a0036814050634c730f74",
|
||||
"keyword": "2ab87097ecafce595dd53d8469b2337ec541bcde",
|
||||
"keyword": "e182759d3a26c9f72ccc8ddc22a382aad598d6dc",
|
||||
})]
|
||||
|
||||
def __init__(self, match):
|
||||
|
||||
@@ -20,7 +20,7 @@ class TurboimagehostImageExtractor(Extractor):
|
||||
pattern = [r"(?:https?://)?(?:www\.)?turboimagehost\.com/p/((\d+)/[^/]+\.html)"]
|
||||
test = [("http://www.turboimagehost.com/p/29690902/test--.png.html", {
|
||||
"url": "c624dc7784de515342117a2678fee6ecf1032d79",
|
||||
"keyword": "32b27364c3137786ffec8e90b8de453e489abf93",
|
||||
"keyword": "8f8d105bae58fa33f1b06ca04949d38a1515641f",
|
||||
"content": "0c8768055e4e20e7c7259608b67799171b691140",
|
||||
})]
|
||||
|
||||
@@ -30,15 +30,11 @@ class TurboimagehostImageExtractor(Extractor):
|
||||
|
||||
def items(self):
|
||||
page = self.request("http://www.turboimagehost.com/p/" + self.part).text
|
||||
data = {
|
||||
"category": self.category,
|
||||
"token": self.token,
|
||||
}
|
||||
text.extract_all(page, (
|
||||
data = text.extract_all(page, (
|
||||
('width' , 'var imWidth = ', ';'),
|
||||
('height', 'var imHeight = ', ';'),
|
||||
('url' , '<a href="http://www.turboimagehost.com"><img src="', '"'),
|
||||
), values=data)
|
||||
), values={"token": self.token})[0]
|
||||
text.nameext_from_url(data["url"], data)
|
||||
yield Message.Version, 1
|
||||
yield Message.Directory, data
|
||||
|
||||
@@ -23,7 +23,50 @@ class Job():
|
||||
|
||||
def run(self):
|
||||
"""Execute or run the job"""
|
||||
pass
|
||||
for msg in self.extractor:
|
||||
if msg[0] == Message.Url:
|
||||
self.update_kwdict(msg[2])
|
||||
self.handle_url(msg[1], msg[2])
|
||||
|
||||
elif msg[0] == Message.Directory:
|
||||
self.update_kwdict(msg[1])
|
||||
self.handle_directory(msg[1])
|
||||
|
||||
elif msg[0] == Message.Queue:
|
||||
self.handle_queue(msg[1])
|
||||
|
||||
elif msg[0] == Message.Headers:
|
||||
self.handle_headers(msg[1])
|
||||
|
||||
elif msg[0] == Message.Cookies:
|
||||
self.handle_cookies(msg[1])
|
||||
|
||||
elif msg[0] == Message.Version:
|
||||
if msg[1] != 1:
|
||||
raise "unsupported message-version ({}, {})".format(
|
||||
self.extractor.category, msg[1]
|
||||
)
|
||||
# TODO: support for multiple message versions
|
||||
|
||||
def handle_url(self, url, kexwords):
|
||||
"""Handle Message.Url"""
|
||||
|
||||
def handle_directory(self, keywords):
|
||||
"""Handle Message.Directory"""
|
||||
|
||||
def handle_queue(self, url):
|
||||
"""Handle Message.Queue"""
|
||||
|
||||
def handle_headers(self, headers):
|
||||
"""Handle Message.Headers"""
|
||||
|
||||
def handle_cookies(self, cookies):
|
||||
"""Handle Message.Cookies"""
|
||||
|
||||
def update_kwdict(self, kwdict):
|
||||
"""Add 'category' and 'subcategory' keywords"""
|
||||
kwdict["category"] = self.extractor.category
|
||||
kwdict["subcategory"] = self.extractor.subcategory
|
||||
|
||||
class DownloadJob(Job):
|
||||
"""Download images into appropriate directory/filename locations"""
|
||||
@@ -45,44 +88,17 @@ class DownloadJob(Job):
|
||||
)
|
||||
|
||||
def run(self):
|
||||
for msg in self.extractor:
|
||||
if msg[0] == Message.Url:
|
||||
self.download(msg)
|
||||
Job.run(self)
|
||||
if self.queue:
|
||||
for url in self.queue:
|
||||
try:
|
||||
DownloadJob(url).run()
|
||||
except exception.NoExtractorError:
|
||||
pass
|
||||
|
||||
elif msg[0] == Message.Headers:
|
||||
self.get_downloader("http:").set_headers(msg[1])
|
||||
|
||||
elif msg[0] == Message.Cookies:
|
||||
self.get_downloader("http:").set_cookies(msg[1])
|
||||
|
||||
elif msg[0] == Message.Directory:
|
||||
self.set_directory(msg)
|
||||
|
||||
elif msg[0] == Message.Queue:
|
||||
self.enqueue(msg[1])
|
||||
|
||||
elif msg[0] == Message.Version:
|
||||
if msg[1] != 1:
|
||||
raise "unsupported message-version ({}, {})".format(
|
||||
self.extractor.category, msg[1]
|
||||
)
|
||||
# TODO: support for multiple message versions
|
||||
self.run_queue()
|
||||
|
||||
def run_queue(self):
|
||||
"""Run all jobs stored in queue"""
|
||||
if not self.queue:
|
||||
return
|
||||
for url in self.queue:
|
||||
try:
|
||||
DownloadJob(url).run()
|
||||
except exception.NoExtractorError:
|
||||
pass
|
||||
|
||||
def download(self, msg):
|
||||
"""Download the resource specified in 'msg'"""
|
||||
_, url, metadata = msg
|
||||
filename = text.clean_path(self.filename_fmt.format(**metadata))
|
||||
def handle_url(self, url, keywords):
|
||||
"""Download the resource specified in 'url'"""
|
||||
filename = text.clean_path(self.filename_fmt.format(**keywords))
|
||||
path = os.path.join(self.directory, filename)
|
||||
realpath = self.adjust_path(path)
|
||||
if os.path.exists(realpath):
|
||||
@@ -94,10 +110,10 @@ class DownloadJob(Job):
|
||||
tries = dlinstance.download(url, file)
|
||||
self.printer.success(path, tries)
|
||||
|
||||
def set_directory(self, msg):
|
||||
def handle_directory(self, keywords):
|
||||
"""Set and create the target directory for downloads"""
|
||||
segments = [
|
||||
text.clean_path(segment.format(**msg[1]).strip())
|
||||
text.clean_path(segment.format(**keywords).strip())
|
||||
for segment in self.directory_fmt
|
||||
]
|
||||
self.directory = os.path.join(
|
||||
@@ -106,6 +122,19 @@ class DownloadJob(Job):
|
||||
)
|
||||
os.makedirs(self.adjust_path(self.directory), exist_ok=True)
|
||||
|
||||
def handle_queue(self, url):
|
||||
"""Add url to work-queue"""
|
||||
try:
|
||||
self.queue.append(url)
|
||||
except AttributeError:
|
||||
self.queue = [url]
|
||||
|
||||
def handle_headers(self, headers):
|
||||
self.get_downloader("http:").set_headers(headers)
|
||||
|
||||
def handle_cookies(self, cookies):
|
||||
self.get_downloader("http:").set_cookies(cookies)
|
||||
|
||||
def get_downloader(self, url):
|
||||
"""Return, and possibly construct, a downloader suitable for 'url'"""
|
||||
pos = url.find(":")
|
||||
@@ -119,13 +148,6 @@ class DownloadJob(Job):
|
||||
self.downloaders[scheme] = instance
|
||||
return instance
|
||||
|
||||
def enqueue(self, url):
|
||||
"""Add url to work-queue"""
|
||||
try:
|
||||
self.queue.append(url)
|
||||
except AttributeError:
|
||||
self.queue = [url]
|
||||
|
||||
@staticmethod
|
||||
def get_base_directory():
|
||||
"""Return the base-destination-directory for downloads"""
|
||||
@@ -147,10 +169,12 @@ class KeywordJob(Job):
|
||||
for msg in self.extractor:
|
||||
if msg[0] == Message.Url:
|
||||
print("Keywords for filenames:")
|
||||
self.update_kwdict(msg[2])
|
||||
self.print_keywords(msg[2])
|
||||
return
|
||||
elif msg[0] == Message.Directory:
|
||||
print("Keywords for directory names:")
|
||||
self.update_kwdict(msg[1])
|
||||
self.print_keywords(msg[1])
|
||||
|
||||
@staticmethod
|
||||
@@ -165,27 +189,27 @@ class KeywordJob(Job):
|
||||
class UrlJob(Job):
|
||||
"""Print download urls"""
|
||||
|
||||
def run(self):
|
||||
for msg in self.extractor:
|
||||
if msg[0] == Message.Url:
|
||||
print(msg[1])
|
||||
elif msg[0] == Message.Queue:
|
||||
try:
|
||||
UrlJob(msg[1]).run()
|
||||
except exception.NoExtractorError:
|
||||
pass
|
||||
def handle_url(self, url, _):
|
||||
print(url)
|
||||
|
||||
def handle_queue(self, url):
|
||||
try:
|
||||
UrlJob(url).run()
|
||||
except exception.NoExtractorError:
|
||||
pass
|
||||
|
||||
|
||||
class HashJob(DownloadJob):
|
||||
"""Generate SHA1 hashes for extractor results"""
|
||||
|
||||
class HashIO():
|
||||
"""Minimal file-like interface"""
|
||||
|
||||
def __init__(self, hashobj):
|
||||
self.hashobj = hashobj
|
||||
|
||||
def write(self, content):
|
||||
"""Update SHA1 hash"""
|
||||
self.hashobj.update(content)
|
||||
|
||||
def __init__(self, url, content=False):
|
||||
@@ -197,25 +221,28 @@ class HashJob(DownloadJob):
|
||||
if content:
|
||||
self.fileobj = self.HashIO(self.hash_content)
|
||||
|
||||
def download(self, msg):
|
||||
self.update_url(msg[1])
|
||||
self.update_keyword(msg[2])
|
||||
self.update_content(msg[1])
|
||||
def handle_url(self, url, keywords):
|
||||
self.update_url(url)
|
||||
self.update_keyword(keywords)
|
||||
self.update_content(url)
|
||||
|
||||
def set_directory(self, msg):
|
||||
self.update_keyword(msg[1])
|
||||
def handle_directory(self, keywords):
|
||||
self.update_keyword(keywords)
|
||||
|
||||
def enqueue(self, url):
|
||||
def handle_queue(self, url):
|
||||
self.update_url(url)
|
||||
|
||||
def update_url(self, url):
|
||||
"""Update the URL hash"""
|
||||
self.hash_url.update(url.encode())
|
||||
|
||||
def update_keyword(self, kwdict):
|
||||
"""Update the keyword hash"""
|
||||
self.hash_keyword.update(
|
||||
json.dumps(kwdict, sort_keys=True).encode()
|
||||
)
|
||||
|
||||
def update_content(self, url):
|
||||
"""Update the content hash"""
|
||||
if self.content:
|
||||
self.get_downloader(url).download(url, self.fileobj)
|
||||
|
||||
Reference in New Issue
Block a user