diff --git a/gallery_dl/extractor/4chan.py b/gallery_dl/extractor/4chan.py index 0debac33..15d19801 100644 --- a/gallery_dl/extractor/4chan.py +++ b/gallery_dl/extractor/4chan.py @@ -17,7 +17,7 @@ class FourchanThreadExtractor(ChanExtractor): pattern = [r"(?:https?://)?boards\.4chan\.org/([^/]+)/thread/(\d+)"] test = [("https://boards.4chan.org/tg/thread/15396072/", { "url": "39082ad166161966d7ba8e37f2173a824eb540f0", - "keyword": "9b610fd3674653728516c34ec65925a024cc0074", + "keyword": "38679a7c8054f535cba67cae13eef1ea7dbc8085", "content": "3081ed85a5afaeb3f430f42540e7bb5eec1908cc", })] api_url = "https://a.4cdn.org/{board}/thread/{thread}.json" diff --git a/gallery_dl/extractor/8chan.py b/gallery_dl/extractor/8chan.py index ebfa31f2..b4468183 100644 --- a/gallery_dl/extractor/8chan.py +++ b/gallery_dl/extractor/8chan.py @@ -17,7 +17,7 @@ class InfinitychanThreadExtractor(ChanExtractor): pattern = [r"(?:https?://)?(?:www\.)?8ch\.net/([^/]+)/res/(\d+)"] test = [("https://8ch.net/tg/res/175887.html", { "url": "cb03fdc650ad8e796fdab553fbd5489f468d3f45", - "keyword": "d9388d231db6a0ea3e710a6cf46dc53dbdbb2115", + "keyword": "c2a7f57422558dddaf3467b9a30018e847eb4fad", "content": "9f51cdfee6942a18011996ca049baeb0a22f931b", })] api_url = "https://8ch.net/{board}/res/{thread}.json" diff --git a/gallery_dl/extractor/batoto.py b/gallery_dl/extractor/batoto.py index 56b92e39..1e5340e5 100644 --- a/gallery_dl/extractor/batoto.py +++ b/gallery_dl/extractor/batoto.py @@ -22,7 +22,7 @@ class BatotoChapterExtractor(AsynchronousExtractor): pattern = [r"(?:https?://)?(?:www\.)?bato\.to/reader#([0-9a-f]+)"] test = [("http://bato.to/reader#459878c8fda07502", { "url": "432d7958506ad913b0a9e42664a89e46a63e9296", - "keyword": "7a3e03c40c8b3c7137c4ebe723b1b9c95a303d81", + "keyword": "75a3a86d32aecfc21c44865b4043490757f73d77", })] url = "https://bato.to/" reader_url = "https://bato.to/areader" @@ -78,7 +78,6 @@ class BatotoChapterExtractor(AsynchronousExtractor): manga, pos = extr(page, "document.title = '", " - ", pos) match = re.match(r"(Vol.(\d+) )?Ch\.([^:]+)(: (.+))?", cinfo) return { - "category": self.category, "token": self.token, "manga": text.unescape(manga), "volume": match.group(2) or "", diff --git a/gallery_dl/extractor/booru.py b/gallery_dl/extractor/booru.py index c87fb0bd..a8da3faa 100644 --- a/gallery_dl/extractor/booru.py +++ b/gallery_dl/extractor/booru.py @@ -55,13 +55,10 @@ class BooruExtractor(Extractor): def get_job_metadata(self): """Collect metadata for extractor-job""" # Override this method in derived classes - return { - "category": self.category, - } + return {} def get_file_metadata(self, data): """Collect metadata for a downloadable file""" - data["category"] = self.category return text.nameext_from_url(self.get_file_url(data), data) def get_file_url(self, data): @@ -114,10 +111,7 @@ class BooruTagExtractor(BooruExtractor): self.params["tags"] = self.tags def get_job_metadata(self): - return { - "category": self.category, - "tags": self.tags, - } + return {"tags": self.tags} class BooruPoolExtractor(BooruExtractor): @@ -131,10 +125,7 @@ class BooruPoolExtractor(BooruExtractor): self.params["tags"] = "pool:" + self.pool def get_job_metadata(self): - return { - "category": self.category, - "pool": self.pool, - } + return {"pool": self.pool} class BooruPostExtractor(BooruExtractor): diff --git a/gallery_dl/extractor/chan.py b/gallery_dl/extractor/chan.py index 2f74769f..8e5aed3c 100644 --- a/gallery_dl/extractor/chan.py +++ b/gallery_dl/extractor/chan.py @@ -21,7 +21,6 @@ class ChanExtractor(Extractor): def __init__(self, board, thread): Extractor.__init__(self) self.metadata = { - "category": self.category, "board": board, "thread": thread, } diff --git a/gallery_dl/extractor/chronos.py b/gallery_dl/extractor/chronos.py index a25b04e5..1b4d846a 100644 --- a/gallery_dl/extractor/chronos.py +++ b/gallery_dl/extractor/chronos.py @@ -21,7 +21,7 @@ class ChronosImageExtractor(Extractor): url_base = "http://chronos.to/" test = [("http://chronos.to/bdrmq7rw7v4y", { "url": "7fcb3fe315c94283644d25ef47a644c2dc8da944", - "keyword": "9c364ef9bd962fe70eca49ef74c1c424486514f9", + "keyword": "04dbc71a1154728d01c931308184050d61c5da55", "content": "0c8768055e4e20e7c7259608b67799171b691140", })] @@ -30,10 +30,6 @@ class ChronosImageExtractor(Extractor): self.token = match.group(1) def items(self): - data = { - "category": self.category, - "token": self.token, - } params = { "op": "view", "id": self.token, @@ -44,7 +40,7 @@ class ChronosImageExtractor(Extractor): data=params).text url , pos = text.extract(page, '
', '', ''), ('date' , 'title="', '"'), - ), values={'category': self.category, "index": self.index})[0] + ), values={"index": self.index})[0] data["description"] = text.unescape(text.unescape(data["description"])) data["artist"] = text.extract(data["url"], "//", ".")[0] data["date"] = text.extract(data["date"], ", ", " in ", len(data["title"]))[0] diff --git a/gallery_dl/extractor/doujinmode.py b/gallery_dl/extractor/doujinmode.py index c368c3e7..c894db6e 100644 --- a/gallery_dl/extractor/doujinmode.py +++ b/gallery_dl/extractor/doujinmode.py @@ -22,7 +22,7 @@ class DoujinmodeChapterExtractor(Extractor): r"(?:hentai/|yaoi/|western/)?mangas/([0-9a-f]{36})")] test = [("http://doujinmode.net/mangas/967836c988a716e9efca06998b7838d09eb5", { "url": "be5d48a9fd48f09cfcc5d4e51f24bf1100e75502", - "keyword": "710cc9599faf563b0cad836bbc7d85b288fcda3a", + "keyword": "fbccd0416f19080dc2e041917aeff721399adf13", "content": "a041114e2a8af54d42a4a46a69cae4ebf2641cb1", })] url_base = "http://doujinmode.net/mangas/" @@ -45,7 +45,6 @@ class DoujinmodeChapterExtractor(Extractor): count, pos = text.extract(page, ' class="manga-count">', '') title, pos = text.extract(page, '

', ' Images List

', pos) return { - "category": self.category, "gallery-id": self.gid, "title": text.unescape(title), "count": count, diff --git a/gallery_dl/extractor/dynastyscans.py b/gallery_dl/extractor/dynastyscans.py index a5d04ccd..54bff92c 100644 --- a/gallery_dl/extractor/dynastyscans.py +++ b/gallery_dl/extractor/dynastyscans.py @@ -23,11 +23,11 @@ class DynastyscansChapterExtractor(Extractor): test = [ ("http://dynasty-scans.com/chapters/hitoribocchi_no_oo_seikatsu_ch33", { "url": "63950fa1dfdef58ab842c1b9b854c5c1d650cfa0", - "keyword": "7a950a94e76cceb63559de0826cb2d5a1dcaa48a", + "keyword": "81bfda5b98b34ac2a7324bd9e2abad3df9cc7673", }), ("http://dynasty-scans.com/chapters/new_game_the_spinoff_special_13", { "url": "6b28c733481ac498da341e85a9eb155864491731", - "keyword": "56ed59442b69d45ee4042d6586b30a72f55c3e12", + "keyword": "93b75d0c0aaeb849c99f2225a4b97f466bc3ace9", }), ] url_base = "http://dynasty-scans.com/" @@ -61,7 +61,6 @@ class DynastyscansChapterExtractor(Extractor): info ) return { - "category": self.category, "manga": text.unescape(match.group(1)), "chapter": match.group(2) or "", "title": text.unescape(match.group(3) or ""), diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py index e72a249a..59e9b3b2 100644 --- a/gallery_dl/extractor/exhentai.py +++ b/gallery_dl/extractor/exhentai.py @@ -22,7 +22,7 @@ class ExhentaiGalleryExtractor(Extractor): filename_fmt = "{gallery-id}_{num:>04}_{image-token}_{name}.{extension}" pattern = [r"(?:https?://)?(?:g\.e-|ex)hentai\.org/g/(\d+)/([\da-f]{10})"] test = [("https://exhentai.org/g/960460/4f0e369d82/", { - "keyword": "c1282ffbe5d452c62dec9dbde4ecb7037525cd64", + "keyword": "623f8c86c9fe38e964682dd4309b96922655b900", "content": "493d759de534355c9f55f8e365565b62411de146", })] api_url = "https://exhentai.org/api.php" @@ -71,7 +71,6 @@ class ExhentaiGalleryExtractor(Extractor): def get_job_metadata(self, page): """Collect metadata for extractor-job""" data = { - "category" : self.category, "gallery-id" : self.gid, "gallery-token": self.token, } diff --git a/gallery_dl/extractor/hbrowse.py b/gallery_dl/extractor/hbrowse.py index 0e252304..54b553de 100644 --- a/gallery_dl/extractor/hbrowse.py +++ b/gallery_dl/extractor/hbrowse.py @@ -46,7 +46,7 @@ class HbrowseChapterExtractor(Extractor): pattern = [r"(?:https?://)?(?:www\.)?hbrowse\.com/(\d+)/(c\d+)"] test = [("http://www.hbrowse.com/10363/c00000", { "url": "634f4800858913f097bc3b62a8fedaf74b5254bd", - "keyword": "e6263b71f791000ad4bca58bc4d90f79e42e6be6", + "keyword": "c7dc22a10699dee5cf466406fecee6ffa2e6277e", "content": "44578ebbe176c2c27434966aef22945787e2781e", })] url_base = "http://www.hbrowse.com" @@ -68,7 +68,6 @@ class HbrowseChapterExtractor(Extractor): def get_job_metadata(self, page): """Collect metadata for extractor-job""" data = { - "category": self.category, 'gallery-id': self.gid, "chapter": int(self.chapter[1:]), } diff --git a/gallery_dl/extractor/hentai2read.py b/gallery_dl/extractor/hentai2read.py index 8ca9011e..b0d93dca 100644 --- a/gallery_dl/extractor/hentai2read.py +++ b/gallery_dl/extractor/hentai2read.py @@ -54,7 +54,7 @@ class Hentai2readChapterExtractor(Extractor): pattern = [r"(?:https?://)?(?:www\.)?hentai2read\.com/([^/]+)/(\d+)"] test = [("http://hentai2read.com/amazon_elixir/1/", { "url": "fb5fc4d7cc194116960eaa648c7e045a6e6f0c11", - "keyword": "03435037539d57ca084c457b5ac4d48928487521", + "keyword": "c05d0d0bbe188926b15a43df1f8f65b8ac11c3fd", })] def __init__(self, match): @@ -78,7 +78,6 @@ class Hentai2readChapterExtractor(Extractor): title = text.extract(page, "", "")[0] match = re.match(r"Reading (?:(.+) dj - )?(.+) Hentai - \d+: ", title) return { - "category": self.category, "gallery-id": images[0].split("/")[-3], "chapter": self.chapter, "count": len(images), diff --git a/gallery_dl/extractor/hentaibox.py b/gallery_dl/extractor/hentaibox.py index 3577e8c1..886aa955 100644 --- a/gallery_dl/extractor/hentaibox.py +++ b/gallery_dl/extractor/hentaibox.py @@ -20,7 +20,7 @@ class HentaiboxChapterExtractor(Extractor): pattern = [r"(?:https?://)?(?:www\.)?hentaibox\.net/[^/]+/(\d+)_\d+_([^/&]+)"] test = [("http://www.hentaibox.net/hentai-manga/16_18_Original_Amazon-No-Hiyaku-Amazon-Elixir-Decensored", { "url": "d1a50a9b289d284f178971e01cf312791888e057", - "keyword": "294eda384689d4f1178ec952560d0dedd3e38647", + "keyword": "b4b100f800b716e573e072f01b5d604d9b436b70", })] def __init__(self, match): @@ -44,7 +44,7 @@ class HentaiboxChapterExtractor(Extractor): ("title" , 'content="Read or Download ', ' hentai manga from'), ("series" , ' the series ', ' with ' + self.count), ("language", ' translated pages to ', '.'), - ), values={"category": self.category, "count": self.count})[0] + ), values={"count": self.count})[0] data["lang"] = iso639_1.language_to_code(data["language"]) return data diff --git a/gallery_dl/extractor/hentaifoundry.py b/gallery_dl/extractor/hentaifoundry.py index c0b56535..56cc3447 100644 --- a/gallery_dl/extractor/hentaifoundry.py +++ b/gallery_dl/extractor/hentaifoundry.py @@ -23,7 +23,7 @@ class HentaifoundryUserExtractor(Extractor): ] test = [("http://www.hentai-foundry.com/pictures/user/Orzy", { "url": "236ac02c8f081fee44ad2c2571bf74615633b91e", - "keyword": "f5f1aa78ecbe390fb117a0b599f771cd47df86c6", + "keyword": "9f334f635b71c915b026cf20a65eee065237d452", })] url_base = "http://www.hentai-foundry.com/pictures/user/" @@ -60,7 +60,6 @@ class HentaifoundryUserExtractor(Extractor): token, pos = text.extract(page, 'hidden" value="', '"') count, pos = text.extract(page, 'class="active" >Pictures (', ')', pos) return { - "category": self.category, "artist": self.artist, "count": count, }, token @@ -115,7 +114,7 @@ class HentaifoundryImageExtractor(Extractor): r"([^/]+)/(\d+)/[^/]+")] test = [("http://www.hentai-foundry.com/pictures/user/Orzy/76940/Youmu-Konpaku", { "url": "50c267b2b2983b98b18fd0d2acbec8ce5ba64c77", - "keyword": "8c9b7054b78fb4f52982c3f21f3ba2a9fcdd5428", + "keyword": "6cee38ac0817783feb6db9944da997bec13d0e19", })] def __init__(self, match): @@ -136,7 +135,6 @@ class HentaifoundryImageExtractor(Extractor): title, pos = text.extract(page, 'Pictures » ', '<') url , pos = text.extract(page, '//pictures.hentai-foundry.com', '"', pos) data = { - "category": self.category, "artist": self.artist, "index": self.index, "title": text.unescape(title), diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py index 1048aa15..b93b6e02 100644 --- a/gallery_dl/extractor/hitomi.py +++ b/gallery_dl/extractor/hitomi.py @@ -21,7 +21,7 @@ class HitomiGalleryExtractor(Extractor): pattern = [r"(?:https?://)?hitomi\.la/(?:galleries|reader)/(\d+)\.html"] test = [("https://hitomi.la/galleries/867789.html", { "url": "23fd59894c3db65aec826aa5efb85f96d2384883", - "keyword": "80395a06b6ba24842c15121d142830bb467ae68b", + "keyword": "03a64d67584afd7b8ad96ecb47acae08ea14d90f", })] def __init__(self, match): @@ -61,7 +61,6 @@ class HitomiGalleryExtractor(Extractor): series, pos = text.extract(page, '.html">', '', pos) lang = lang.capitalize() return { - "category": self.category, "gallery-id": self.gid, "title": " ".join(title.split()), "artist": string.capwords(artist), diff --git a/gallery_dl/extractor/imagebam.py b/gallery_dl/extractor/imagebam.py index 6d0248ed..d0fd07c8 100644 --- a/gallery_dl/extractor/imagebam.py +++ b/gallery_dl/extractor/imagebam.py @@ -20,7 +20,7 @@ class ImagebamGalleryExtractor(AsynchronousExtractor): pattern = [r"(?:https?://)?(?:www\.)?imagebam\.com/gallery/([^/]+).*"] test = [("http://www.imagebam.com/gallery/adz2y0f9574bjpmonaismyrhtjgvey4o", { "url": "d7a4483b6d5ebba81950a349aad58ae034c60eda", - "keyword": "9f54ab808d77f2517444411dfbf8686189c20b43", + "keyword": "e4a9395dbd06d4af3172a6a61c90601bc47ee18c", "content": "596e6bfa157f2c7169805d50075c2986549973a8", })] url_base = "http://www.imagebam.com" @@ -44,17 +44,12 @@ class ImagebamGalleryExtractor(AsynchronousExtractor): """Collect metadata for extractor-job""" url = self.url_base + "/gallery/" + self.gkey page = self.request(url, encoding="utf-8").text - data = { - "category": self.category, - "gallery-key": self.gkey, - } - data, _ = text.extract_all(page, ( + return text.extract_all(page, ( (None , " ", " <"), ("count" , "'>", " images"), ("first-url", "Porn pics of ', ' (Page 1)'), ("uploader", '>Uploaded by ', ''), ("count" , ' 1 of ', ' pics"'), - ), values={"category": self.category, "gallery-id": self.gid}) + ), values={"gallery-id": self.gid}) self.image_id = text.extract(page, 'id="img_ed_', '"', pos)[0] data["title"] = text.unescape(data["title"]) return data @@ -82,7 +82,7 @@ class ImagefapImageExtractor(Extractor): pattern = [r"(?:https?://)?(?:www\.)?imagefap\.com/photo/(\d+)"] test = [("http://www.imagefap.com/photo/1616331218/", { "url": "8a05c0ccdcf84e63c962803bc41d247628c549ea", - "keyword": "401ded07ae0b3a8f718e553e506898b34cd92020", + "keyword": "c9880c6731b3fdc6d98d25dbff56f4342c11683e", "content": "964b8c62c9d5c2a039a2fccf1b1e10aaf7a18a96", })] @@ -101,7 +101,6 @@ class ImagefapImageExtractor(Extractor): """Collect metadata for extractor-job""" parts = info["contentUrl"].rsplit("/", 3) return text.nameext_from_url(parts[3], { - "category": self.category, "title": text.unescape(info["name"]), "section": info["section"], "uploader": info["author"], diff --git a/gallery_dl/extractor/imagetwist.py b/gallery_dl/extractor/imagetwist.py index c7f13d07..ffc6e2fb 100644 --- a/gallery_dl/extractor/imagetwist.py +++ b/gallery_dl/extractor/imagetwist.py @@ -20,7 +20,7 @@ class ImagetwistImageExtractor(Extractor): pattern = [r"(?:https?://)?(?:www\.)?imagetwist\.com/([a-z0-9]{12})"] test = [("http://imagetwist.com/4e46hv31tu0q/test.jpg", { "url": "6b3fc0bd1105b698d2d5844658ca674d66b1e2e7", - "keyword": "d599a540ed233bb7b66e4abec30affbad2e44af1", + "keyword": "825d9d1901829da054b6ef9c034229af85e495e2", "content": "96b1fd099b06faad5879fce23a7e4eb8290d8810", })] @@ -34,7 +34,6 @@ class ImagetwistImageExtractor(Extractor): filename, pos = text.extract(page, ' alt="', '"', pos) userid , pos = text.extract(url , '/', '/', 29) data = { - "category": self.category, "token": self.token, "user": userid, } diff --git a/gallery_dl/extractor/imgbox.py b/gallery_dl/extractor/imgbox.py index 9701151c..22676af5 100644 --- a/gallery_dl/extractor/imgbox.py +++ b/gallery_dl/extractor/imgbox.py @@ -21,7 +21,7 @@ class ImgboxGalleryExtractor(AsynchronousExtractor): pattern = [r"(?:https?://)?(?:www\.)?imgbox\.com/g/([A-Za-z0-9]{10})"] test = [("http://imgbox.com/g/JaX5V5HX7g", { "url": "c7c3466dde31d4308833816961104c7d1100368d", - "keyword": "23deb783d3afee090f61472b495e797c8f262b93", + "keyword": "cebd7f6868cf84ff492341c936cb6dbe5cde4682", "content": "d20307dc8511ac24d688859c55abf2e2cc2dd3cc", })] url_base = "http://imgbox.com" @@ -47,7 +47,6 @@ class ImgboxGalleryExtractor(AsynchronousExtractor): title = text.extract(page, "

", "

")[0] parts = title.rsplit(" - ", maxsplit=1) return { - "category": self.category, "gallery-key": self.key, "title": text.unescape(parts[0]), "count": parts[1][:-7], @@ -79,7 +78,7 @@ class ImgboxImageExtractor(Extractor): pattern = [r"(?:https?://)?(?:www\.)?imgbox\.com/([A-Za-z0-9]{8})"] test = [("http://imgbox.com/qHhw7lpG", { "url": "d96990ea12223895287d139695077b70dfa0abe4", - "keyword": "c5e87be93fec3122151edf85b6424d1871279590", + "keyword": "ff0524dba869a4b3292d7d4f72f5da4024b4f002", "content": "0c8768055e4e20e7c7259608b67799171b691140", })] @@ -91,8 +90,7 @@ class ImgboxImageExtractor(Extractor): page = self.request("http://imgbox.com/" + self.key).text url , pos = text.extract(page, 'src="http://i.', '"') filename, pos = text.extract(page, ' title="', '"', pos) - data = {"category": self.category, "image-key": self.key} - text.nameext_from_url(filename, data) + data = text.nameext_from_url(filename, {"image-key": self.key}) yield Message.Version, 1 yield Message.Directory, data yield Message.Url, "http://i." + url, data diff --git a/gallery_dl/extractor/imgcandy.py b/gallery_dl/extractor/imgcandy.py index bc41ccf2..6c193c5a 100644 --- a/gallery_dl/extractor/imgcandy.py +++ b/gallery_dl/extractor/imgcandy.py @@ -21,7 +21,7 @@ class ImgcandyImageExtractor(Extractor): r"(?:_(.+))?\.html")] test = [("http://imgcandy.net/img-57d02527efee8_test-テスト.png.html", { "url": "bc3c9207b10dbfe8e65ccef5b9e3194a7427b4fa", - "keyword": "381e036374742a091cac7dd7a3eca90ee725afa3", + "keyword": "1ed1587ef38a6b26ce28b35857a78417239d197a", "content": "0c8768055e4e20e7c7259608b67799171b691140", })] @@ -30,12 +30,11 @@ class ImgcandyImageExtractor(Extractor): self.token, self.filename = match.groups() def items(self): - data = {"category": self.category, "token": self.token} params = {"imgContinue": "Continue+to+image+...+"} page = self.request("http://imgcandy.net/img-" + self.token + ".html", method="post", data=params).text url = text.extract(page, " 1 else "" }) @@ -86,8 +84,6 @@ class ImgchiliAlbumExtractor(ImgchiliExtractor): def get_job_metadata(self, page): title = text.extract(page, "

", "

")[0] return { - "category": self.category, - "subcategory": self.subcategory, "title": text.unescape(title), "key": self.match.group(1), } diff --git a/gallery_dl/extractor/imgth.py b/gallery_dl/extractor/imgth.py index ea0965c9..7b184b15 100644 --- a/gallery_dl/extractor/imgth.py +++ b/gallery_dl/extractor/imgth.py @@ -20,7 +20,7 @@ class ImgthGalleryExtractor(Extractor): pattern = [r"(?:https?://)?imgth\.com/gallery/(\d+)"] test = [("http://imgth.com/gallery/37/wallpaper-anime", { "url": "4ae1d281ca2b48952cf5cca57e9914402ad72748", - "keyword": "1b15726d53bc2c08d845fa60ce538396380688df", + "keyword": "3f268fcc18d49ac3799a8f25cc08053e90891955", })] def __init__(self, match): @@ -61,4 +61,4 @@ class ImgthGalleryExtractor(Extractor): ("date" , 'created on ', ' by <'), (None , 'href="/users/', ''), ("user" , '>', '<'), - ), values={"category": self.category, "gallery-id": self.gid})[0] + ), values={"gallery-id": self.gid})[0] diff --git a/gallery_dl/extractor/imgtrex.py b/gallery_dl/extractor/imgtrex.py index f94fa30d..da75f0be 100644 --- a/gallery_dl/extractor/imgtrex.py +++ b/gallery_dl/extractor/imgtrex.py @@ -20,7 +20,7 @@ class ImgtrexImageExtractor(Extractor): pattern = [r"(?:https?://)?(?:www\.)?imgtrex\.com/([^/]+)"] test = [("http://imgtrex.com/im0ypxq0rke4/test-テスト-&
.png", { "url": "c000618bddda42bd599a590b7972c7396d19d8fe", - "keyword": "4d766eae04aa5457bca4992290aa28b76239d287", + "keyword": "58905795a9cd3f17d5ff024fc4d63645795ba23c", "content": "0c8768055e4e20e7c7259608b67799171b691140", })] @@ -29,11 +29,10 @@ class ImgtrexImageExtractor(Extractor): self.token = match.group(1) def items(self): - data = {"category": self.category, "token": self.token} page = self.request("http://imgtrex.com/" + self.token).text filename, pos = text.extract(page, 'ImgTrex: ', '') url , pos = text.extract(page, '
\n", "", ""), ("date" , "Date added: ", ""), ("type" , "Album type: ", ""), - ), values={"category": self.category})[0] + ))[0] def get_album_tracks(self, page): """Collect url and metadata for all tracks of a soundtrack""" diff --git a/gallery_dl/extractor/kissmanga.py b/gallery_dl/extractor/kissmanga.py index 8b6e6225..31e17daa 100644 --- a/gallery_dl/extractor/kissmanga.py +++ b/gallery_dl/extractor/kissmanga.py @@ -53,11 +53,11 @@ class KissmangaChapterExtractor(KissmangaExtractor): test = [ ("http://kissmanga.com/Manga/Dropout/Ch-000---Oneshot-?id=145847", { "url": "4136bcd1c6cecbca8cc2bc965d54f33ef0a97cc0", - "keyword": "892c3e4df03a575a282a5695add986a49623d746", + "keyword": "ab332093a4f2e473a468235bfd624cbe3b19fd7f", }), ("http://kissmanga.com/Manga/Urban-Tales/a?id=256717", { "url": "de074848f6c1245204bb9214c12bcc3ecfd65019", - "keyword": "0a98952984941cc2a11892b1cd7b237ffb20adaa", + "keyword": "013aad80e578c6ccd2e1fe47cdc27c12a64f6db2", }) ] @@ -81,7 +81,6 @@ class KissmangaChapterExtractor(KissmangaExtractor): r"(?:Vol.0*(\d+) )?(?:Ch.)?0*(\d+)(?:\.0*(\d+))?(?:: (.+))?", cinfo) chminor = match.group(3) return { - "category": self.category, "manga": manga, "volume": match.group(1) or "", "chapter": match.group(2), diff --git a/gallery_dl/extractor/luscious.py b/gallery_dl/extractor/luscious.py index 3a3dfa54..31a22c26 100644 --- a/gallery_dl/extractor/luscious.py +++ b/gallery_dl/extractor/luscious.py @@ -21,8 +21,8 @@ class LusciousAlbumExtractor(Extractor): pattern = [(r"(?:https?://)?(?:www\.)?luscious\.net/c/([^/]+)/" r"(?:pictures/album|albums)/([^/\d]+(\d+))")] test = [("https://luscious.net/c/incest_manga/albums/amazon-no-hiyaku-amazon-elixir-english-decensored_261127/view/", { - "url": "319a70261de12620d123add9b519d15b8515b503", - "keyword": "60cc15db2619b8aee47c1527b6326be5a54f5c2f", + "url": "12e1fde5ef3c0d41973a85fb27a602eb922c60aa", + "keyword": "e10c7c070ad730e305024fb37cc70af6b05378dd", })] def __init__(self, match): @@ -50,7 +50,7 @@ class LusciousAlbumExtractor(Extractor): (None , '

Language:', ''), ("language", '\n ', ' '), ("artist" , 'rtist: ', '\n'), - ), values={"category": self.category, "gallery-id": self.gid})[0] + ), values={"gallery-id": self.gid})[0] data["lang"] = iso639_1.language_to_code(data["language"]) return data diff --git a/gallery_dl/extractor/mangahere.py b/gallery_dl/extractor/mangahere.py index 6b0385c7..948d3e2e 100644 --- a/gallery_dl/extractor/mangahere.py +++ b/gallery_dl/extractor/mangahere.py @@ -49,7 +49,7 @@ class MangahereChapterExtractor(AsynchronousExtractor): r"([^/]+(?:/v0*(\d+))?/c0*(\d+)(\.\d+)?)")] test = [("http://www.mangahere.co/manga/dongguo_xiaojie/c003.2/", { "url": "68efaeed3bc6abb0a0b6f75a5c649c17979e31f1", - "keyword": "f342e3df9fa39eb10cf7ba5ef3300df6ad77f332", + "keyword": "d3fe470e934a9f02ed00d4391b1743970eae82fa", })] url_fmt = "http://www.mangahere.co/manga/{}/{}.html" @@ -76,7 +76,6 @@ class MangahereChapterExtractor(AsynchronousExtractor): count, pos = text.extract(page, '>', '<', pos-30) manga = re.match(r"(.+) \d+(\.\d+)? - Read .+ Chapter \d+(\.\d+)? Online", manga).group(1) return { - "category": self.category, "manga": text.unescape(manga), # "title": TODO, "volume": self.volume or "", diff --git a/gallery_dl/extractor/mangamint.py b/gallery_dl/extractor/mangamint.py index d56292fd..48950034 100644 --- a/gallery_dl/extractor/mangamint.py +++ b/gallery_dl/extractor/mangamint.py @@ -55,7 +55,7 @@ class MangamintChapterExtractor(Extractor): pattern = [r"(?:https?://)?(?:www\.)?mangamint\.com/([^\?]+-(\d+))"] test = [("http://www.mangamint.com/mushishi-1", { "url": "337f46c4dab50f544e9196ced723ac8f70400dd0", - "keyword": "ca4ba6fa84367fd7c345879a17ebaad39b589da5", + "keyword": "de9ea839d231cb9f1590a2a93ca9ab2f8743b39d", })] def __init__(self, match): @@ -80,7 +80,6 @@ class MangamintChapterExtractor(Extractor): chid , pos = text.extract(page, r'"identifier":"node\/', '"', pos) match = re.match(r"(.+) (\d+)(\.\d+)?$", manga) return { - "category": self.category, "manga": match.group(1), "chapter": match.group(2), "chapter-minor": match.group(3) or "", diff --git a/gallery_dl/extractor/mangapanda.py b/gallery_dl/extractor/mangapanda.py index 9e7f9e95..20b5db47 100644 --- a/gallery_dl/extractor/mangapanda.py +++ b/gallery_dl/extractor/mangapanda.py @@ -33,5 +33,5 @@ class MangapandaChapterExtractor(MangapandaBase, MangareaderChapterExtractor): ] test = [("http://www.mangapanda.com/red-storm/2", { "url": "4bf4ddf6c50105ec8a37675495ab80c46608275d", - "keyword": "dcb8d655e3f461738c821819bbb8d017bd916713", + "keyword": "89c712f7ed255ec9c1d8e84dcb5a160b6cb4498c", })] diff --git a/gallery_dl/extractor/mangapark.py b/gallery_dl/extractor/mangapark.py index 1760e3e8..2c3f9bb4 100644 --- a/gallery_dl/extractor/mangapark.py +++ b/gallery_dl/extractor/mangapark.py @@ -51,11 +51,11 @@ class MangaparkChapterExtractor(Extractor): test = [ ("http://mangapark.me/manga/ad-astra-per-aspera-hata-kenjirou/s1/c1.2/1", { "url": "25d998a70df1fa559afc189ebd17df300b54dc28", - "keyword": "40d60961d7aaf24454d2ab23fbc83f4c55cd4174", + "keyword": "aa0dfbd21a5174b1497bce98182324e5120dd4ff", }), ("http://mangapark.me/manga/gekkan-shoujo-nozaki-kun/s2/c70/e2/1", { "url": "8534c8286a18c4db47606f84a4df9f1a42bab291", - "keyword": "f96962442cdd5bc957603831c695159d974b7b93", + "keyword": "df83f2ccde8dd58d6b906a65ae1ecf3bec801567", }) ] @@ -80,7 +80,6 @@ class MangaparkChapterExtractor(Extractor): def get_job_metadata(self, page): """Collect metadata for extractor-job""" data = { - "category": self.category, "version": self.version, "volume": self.volume or "", "chapter": self.chapter, diff --git a/gallery_dl/extractor/mangareader.py b/gallery_dl/extractor/mangareader.py index b7292492..d04dcd1c 100644 --- a/gallery_dl/extractor/mangareader.py +++ b/gallery_dl/extractor/mangareader.py @@ -50,7 +50,7 @@ class MangareaderChapterExtractor(MangareaderBase, AsynchronousExtractor): ] test = [("http://www.mangareader.net/karate-shoukoushi-kohinata-minoru/11", { "url": "84ffaab4c027ef9022695c53163c3aeabd07ca58", - "keyword": "0df7db81a44ef642922aab798c303d60e2b6802d", + "keyword": "09b4ad57a082eb371dec027ccfc8ed1157c6eac6", })] def __init__(self, match): @@ -74,7 +74,6 @@ class MangareaderChapterExtractor(MangareaderBase, AsynchronousExtractor): """Collect metadata for extractor-job""" page = self.request(self.url_base + self.url_title).text data = { - "category": self.category, "chapter": self.chapter, "lang": "en", "language": "English", diff --git a/gallery_dl/extractor/mangashare.py b/gallery_dl/extractor/mangashare.py index 6abeb43a..39215d84 100644 --- a/gallery_dl/extractor/mangashare.py +++ b/gallery_dl/extractor/mangashare.py @@ -46,7 +46,7 @@ class MangashareChapterExtractor(AsynchronousExtractor): pattern = [r"(?:https?://)?read\.mangashare\.com/([^/]+/chapter-\d+)"] test = [("http://read.mangashare.com/Gantz/chapter-331/page001.html", { "url": "2980fb9548e809dea63d104bc514dcc33bdd9ef7", - "keyword": "4872a5645ab79cb9ecf363a5bf4cb9062fd61eef", + "keyword": "8afc1c2a3e64efa3d2b9ed2359885343f89bdfa9", })] url_fmt = "http://read.mangashare.com/{}/page{:>03}.html" @@ -67,7 +67,6 @@ class MangashareChapterExtractor(AsynchronousExtractor): def get_job_metadata(self, page): """Collect metadata for extractor-job""" data = { - "category": self.category, "lang": "en", "language": "English", } diff --git a/gallery_dl/extractor/mangastream.py b/gallery_dl/extractor/mangastream.py index 91df9e46..57a1caa7 100644 --- a/gallery_dl/extractor/mangastream.py +++ b/gallery_dl/extractor/mangastream.py @@ -46,7 +46,6 @@ class MangastreamChapterExtractor(AsynchronousExtractor): title, pos = text.extract(page, ' - ', '<', pos) count, pos = text.extract(page, 'Last Page (', ')', pos) data = { - "category": self.category, "manga": manga, "chapter": text.unquote(self.chapter), "chapter-id": self.ch_id, diff --git a/gallery_dl/extractor/nhentai.py b/gallery_dl/extractor/nhentai.py index a18d5f5a..7bd8a15f 100644 --- a/gallery_dl/extractor/nhentai.py +++ b/gallery_dl/extractor/nhentai.py @@ -21,7 +21,7 @@ class NhentaiGalleryExtractor(Extractor): pattern = [r"(?:https?://)?(?:www\.)?nhentai\.net/g/(\d+)"] test = [("http://nhentai.net/g/147850/", { "url": "199ddd07dded0f69282e09a372710698ea21ab8e", - "keyword": "e00678567c8335289ffcbb2e6980b28d332ee6a7", + "keyword": "c7e37dfe80ca5eee69210c690a1340ea78a932a4", })] def __init__(self, match): @@ -57,7 +57,6 @@ class NhentaiGalleryExtractor(Extractor): title_en = ginfo["title"].get("english", "") title_ja = ginfo["title"].get("japanese", "") return { - "category": self.category, "gallery-id": self.gid, "upload-date": ginfo["upload_date"], "media-id": ginfo["media_id"], diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py index f66634b2..7f2be502 100644 --- a/gallery_dl/extractor/nijie.py +++ b/gallery_dl/extractor/nijie.py @@ -40,10 +40,7 @@ class NijieExtractor(AsynchronousExtractor): def get_job_metadata(self): """Collect metadata for extractor-job""" - return { - "category": self.category, - "artist-id": self.artist_id, - } + return {"artist-id": self.artist_id} def get_image_ids(self): """Collect all image-ids for a specific artist""" @@ -83,7 +80,7 @@ class NijieUserExtractor(NijieExtractor): r"members(?:_illust)?\.php\?id=(\d+)")] test = [("https://nijie.info/members_illust.php?id=44", { "url": "585d821df4716b1098660a0be426d01db4b65f2a", - "keyword": "30c981b9d7351ec275b9840d8bc2b4ef3da8c4b4", + "keyword": "7a2dbf8fc0dfdb2af208ecdb8ec7f3186bdc31ab", })] def __init__(self, match): @@ -105,7 +102,7 @@ class NijieImageExtractor(NijieExtractor): pattern = [r"(?:https?://)?(?:www\.)?nijie\.info/view\.php\?id=(\d+)"] test = [("https://nijie.info/view.php?id=70720", { "url": "a10d4995645b5f260821e32c60a35f73546c2699", - "keyword": "1c0b1a2e447d8e1cd4f93c21f71d7fe7de0eeed3", + "keyword": "e454c2bad9b636b90d569881bf4fe8438506e0d2", "content": "d85e3ea896ed5e4da0bca2390ad310a4df716ca6", })] diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py index cf79631f..43d325e2 100644 --- a/gallery_dl/extractor/pinterest.py +++ b/gallery_dl/extractor/pinterest.py @@ -26,8 +26,6 @@ class PinterestExtractor(Extractor): img = pin["image"]["original"] url = img["url"] data = { - "category": self.category, - "subcategory": self.subcategory, "pin-id": pin["id"], "note": pin["note"], "width": img["width"], @@ -90,8 +88,6 @@ class PinterestBoardExtractor(PinterestExtractor): def data_from_board(self, board): """Get metadata from a board-object""" data = { - "category": self.category, - "subcategory": self.subcategory, "user": self.user, "board-id": board["id"], "board": board["name"], diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py index d81c075e..e09a1844 100644 --- a/gallery_dl/extractor/pixiv.py +++ b/gallery_dl/extractor/pixiv.py @@ -92,7 +92,6 @@ class PixivUserExtractor(Extractor): """Prepare a work-dictionary with additional keywords""" user = work["user"] url = work["image_urls"]["large"] - work["category"] = self.category work["artist-id"] = user["id"] work["artist-name"] = user["name"] work["artist-nick"] = user["account"] @@ -130,7 +129,6 @@ class PixivUserExtractor(Extractor): if not user: user = self.api.user(self.artist_id)["response"][0] return { - "category": self.category, "artist-id": user["id"], "artist-name": user["name"], "artist-nick": user["account"], diff --git a/gallery_dl/extractor/powermanga.py b/gallery_dl/extractor/powermanga.py index 4ad84bdc..92148a01 100644 --- a/gallery_dl/extractor/powermanga.py +++ b/gallery_dl/extractor/powermanga.py @@ -27,7 +27,7 @@ class PowermangaChapterExtractor(Extractor): ] test = [("https://read.powermanga.org/read/one_piece/en/0/803/page/1", { "url": "e6179c1565068f99180620281f86bdd25be166b4", - "keyword": "1c8593087f4a2e3343966a2900fc67be8e6401f1", + "keyword": "ab66c38e31f1b716ed360ee8c78fd973d7d8693a", })] def __init__(self, match): @@ -63,7 +63,6 @@ class PowermangaChapterExtractor(Extractor): json_data, pos = text.extract(page, 'var pages = ', ';', pos) match = re.match(r"(\w+ (\d+)([^:+]*)(?:: (.*))?|[^:]+)", chapter) return { - "category": self.category, "manga": text.unescape(manga), "chapter": match.group(2) or match.group(1), "chapter-minor": match.group(3) or "", diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py index 28c3f5a0..3cfe5523 100644 --- a/gallery_dl/extractor/sankaku.py +++ b/gallery_dl/extractor/sankaku.py @@ -20,7 +20,7 @@ class SankakuTagExtractor(AsynchronousExtractor): pattern = [r"(?:https?://)?chan\.sankakucomplex\.com/\?tags=([^&]+)"] test = [("https://chan.sankakucomplex.com/?tags=bonocho", { "url": "2561ca0d8482ead48f22a7abcd23919cd78344a1", - "keyword": "6282e9a2d5223d635d9be7515f59d87d4b9be732", + "keyword": "5e3a39fdc6698e63ed0054478ebd4ca632ce643e", })] url = "https://chan.sankakucomplex.com/" @@ -42,10 +42,7 @@ class SankakuTagExtractor(AsynchronousExtractor): def get_job_metadata(self): """Collect metadata for extractor-job""" - return { - "category": self.category, - "tags": self.tags, - } + return {"tags": self.tags} def get_images(self): params = { diff --git a/gallery_dl/extractor/seiga.py b/gallery_dl/extractor/seiga.py index 4b7cd393..96f3d4a4 100644 --- a/gallery_dl/extractor/seiga.py +++ b/gallery_dl/extractor/seiga.py @@ -23,7 +23,7 @@ class SeigaImageExtractor(Extractor): (r"(?:https?://)?lohas\.nicoseiga\.jp/" r"(?:priv|o)/[^/]+/\d+/(\d+)")] test = [("http://seiga.nicovideo.jp/seiga/im5977527", { - "keyword": "e2ea59186c47beb71484ba35d550cf6511ac185a", + "keyword": "fd2628b573d15d1bbdefb219a99b993365b214ed", "content": "d9202292012178374d57fb0126f6124387265297", })] @@ -44,10 +44,7 @@ class SeigaImageExtractor(Extractor): def get_job_metadata(self): """Collect metadata for extractor-job""" - return { - "category": self.category, - "image-id": self.image_id, - } + return {"image-id": self.image_id} def get_image_url(self, image_id): """Get url for an image with id 'image_id'""" diff --git a/gallery_dl/extractor/senmanga.py b/gallery_dl/extractor/senmanga.py index 2eb7763c..6db581ee 100644 --- a/gallery_dl/extractor/senmanga.py +++ b/gallery_dl/extractor/senmanga.py @@ -20,7 +20,7 @@ class SenmangaChapterExtractor(Extractor): pattern = [r"(?:https?://)?raw\.senmanga\.com/([^/]+/[^/]+)"] test = [("http://raw.senmanga.com/Bokura-wa-Minna-Kawaisou/37A/1", { "url": "32d88382fcad66859d089cd9a61249f375492ec5", - "keyword": "9554ccc7bc32c358b2491c255e614ae908d7d593", + "keyword": "465905e0b69998656f9d59462a9560319941c58d", "content": "a791dda85ac0d37e3b36d754560cbb65b8dab5b9", })] url_base = "http://raw.senmanga.com" @@ -52,7 +52,6 @@ class SenmangaChapterExtractor(Extractor): manga, pos = text.extract(title, '| Raw | ', ' | Chapter ') chapter, pos = text.extract(title, '', ' | Page ', pos) return { - "category": self.category, "manga": text.unescape(manga.replace("-", " ")), "chapter": chapter, "count": count, diff --git a/gallery_dl/extractor/spectrumnexus.py b/gallery_dl/extractor/spectrumnexus.py index 6c9a6a64..60dec7a1 100644 --- a/gallery_dl/extractor/spectrumnexus.py +++ b/gallery_dl/extractor/spectrumnexus.py @@ -50,7 +50,7 @@ class SpectrumnexusChapterExtractor(AsynchronousExtractor): ] test = [("http://view.thespectrum.net/series/toriko.html?ch=Chapter+343&page=1", { "url": "c0fc7dc594841217cc622a67edd79f06e9900333", - "keyword": "bde9c95a2d0feca0574c7248ed06f1684f86b2ac", + "keyword": "8499166b62db0c87e7109cc5f9aa837b4815dd9c", })] def __init__(self, match): @@ -82,7 +82,6 @@ class SpectrumnexusChapterExtractor(AsynchronousExtractor): def get_job_metadata(self, page): """Collect metadata for extractor-job""" data = { - "category": self.category, "chapter": self.chapter or "", "volume": self.volume or "", "identifier": self.identifier.replace("+", " "), diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py index c784acc2..a4c14529 100644 --- a/gallery_dl/extractor/tumblr.py +++ b/gallery_dl/extractor/tumblr.py @@ -21,7 +21,7 @@ class TumblrUserExtractor(Extractor): pattern = [r"(?:https?://)?([^.]+)\.tumblr\.com(?:/page/\d+)?/?$"] test = [("http://demo.tumblr.com/", { "url": "d3d2bb185230e537314a0036814050634c730f74", - "keyword": "2ab87097ecafce595dd53d8469b2337ec541bcde", + "keyword": "8704a9bbb65b6e52dc1ccdf2c2449bd4abe3d389", "content": "31495fdb9f84edbb7f67972746a1521456f649e2", })] @@ -47,7 +47,6 @@ class TumblrUserExtractor(Extractor): def get_job_metadata(self, image_data): """Collect metadata for extractor-job""" data = next(image_data) - data["category"] = self.category data["user"] = self.user del data["cname"] del data["description"] @@ -97,7 +96,7 @@ class TumblrPostExtractor(TumblrUserExtractor): pattern = [r"(?:https?://)?([^.]+)\.tumblr\.com/post/(\d+)"] test = [("http://demo.tumblr.com/post/459265350", { "url": "d3d2bb185230e537314a0036814050634c730f74", - "keyword": "a6a0d99eddfba835e710a584d59b19df1ea5c1ab", + "keyword": "821236db342fb0d1bf8a177ca3108349168e6cd0", })] def __init__(self, match): @@ -111,7 +110,7 @@ class TumblrTagExtractor(TumblrUserExtractor): pattern = [r"(?:https?://)?([^.]+)\.tumblr\.com/tagged/(.+)"] test = [("http://demo.tumblr.com/tagged/Times Square", { "url": "d3d2bb185230e537314a0036814050634c730f74", - "keyword": "2ab87097ecafce595dd53d8469b2337ec541bcde", + "keyword": "e182759d3a26c9f72ccc8ddc22a382aad598d6dc", })] def __init__(self, match): diff --git a/gallery_dl/extractor/turboimagehost.py b/gallery_dl/extractor/turboimagehost.py index 20e12f4b..5484cb0b 100644 --- a/gallery_dl/extractor/turboimagehost.py +++ b/gallery_dl/extractor/turboimagehost.py @@ -20,7 +20,7 @@ class TurboimagehostImageExtractor(Extractor): pattern = [r"(?:https?://)?(?:www\.)?turboimagehost\.com/p/((\d+)/[^/]+\.html)"] test = [("http://www.turboimagehost.com/p/29690902/test--.png.html", { "url": "c624dc7784de515342117a2678fee6ecf1032d79", - "keyword": "32b27364c3137786ffec8e90b8de453e489abf93", + "keyword": "8f8d105bae58fa33f1b06ca04949d38a1515641f", "content": "0c8768055e4e20e7c7259608b67799171b691140", })] @@ -30,15 +30,11 @@ class TurboimagehostImageExtractor(Extractor): def items(self): page = self.request("http://www.turboimagehost.com/p/" + self.part).text - data = { - "category": self.category, - "token": self.token, - } - text.extract_all(page, ( + data = text.extract_all(page, ( ('width' , 'var imWidth = ', ';'), ('height', 'var imHeight = ', ';'), ('url' , '