diff --git a/gallery_dl/extractor/chan.py b/gallery_dl/extractor/chan.py index 2e4f308b..8dcfad45 100644 --- a/gallery_dl/extractor/chan.py +++ b/gallery_dl/extractor/chan.py @@ -70,7 +70,7 @@ class FoolfuukaThreadExtractor(SharedConfigExtractor): directory_fmt = ["{category}", "{board[shortname]}", "{thread_num}{title:? - //}"] filename_fmt = "{media[media]}" - archive_fmt = "{{board[shortname]}}_{num}_{timestamp}" + archive_fmt = "{board[shortname]}_{num}_{timestamp}" root = "" referer = True diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index 2246f008..7737a933 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -172,6 +172,8 @@ class ChapterExtractor(Extractor): "{volume:?v/ />02}c{chapter:>03}{chapter_minor:?//}{title:?: //}"] filename_fmt = ( "{manga}_c{chapter:>03}{chapter_minor:?//}_{page:>03}.{extension}") + archive_fmt = ( + "{manga}_{chapter}{chapter_minor}_{page}") def __init__(self, url): Extractor.__init__(self) diff --git a/gallery_dl/extractor/directlink.py b/gallery_dl/extractor/directlink.py index 1bcf0fb9..c13acfe1 100644 --- a/gallery_dl/extractor/directlink.py +++ b/gallery_dl/extractor/directlink.py @@ -16,6 +16,7 @@ class DirectlinkExtractor(Extractor): """Extractor for direct links to images and other media files""" category = "directlink" filename_fmt = "{domain}/{path}" + archive_fmt = "{domain}/{path}" pattern = [r"https?://(?P[^/]+)/(?P[^?&#]+\." r"(?:jpe?g|jpe|png|gif|web[mp]|mp4|mkv|og[gmv]|opus))" r"(?:\?(?P[^/?#]*))?(?:#(?P.*))?$"] diff --git a/gallery_dl/extractor/hentai2read.py b/gallery_dl/extractor/hentai2read.py index 29af07c9..6d2cd75f 100644 --- a/gallery_dl/extractor/hentai2read.py +++ b/gallery_dl/extractor/hentai2read.py @@ -58,6 +58,7 @@ class Hentai2readMangaExtractor(MangaExtractor): class Hentai2readChapterExtractor(ChapterExtractor): """Extractor for a single manga chapter from hentai2read.com""" category = "hentai2read" + archive_fmt = "{chapter_id}_{page}" pattern = [r"(?:https?://)?(?:www\.)?hentai2read\.com/([^/]+)/(\d+)"] test = [("http://hentai2read.com/amazon_elixir/1/", { "url": "964b942cf492b3a129d2fe2608abfc475bc99e71", diff --git a/gallery_dl/extractor/hentaifoundry.py b/gallery_dl/extractor/hentaifoundry.py index ba74ca80..2fe4daa7 100644 --- a/gallery_dl/extractor/hentaifoundry.py +++ b/gallery_dl/extractor/hentaifoundry.py @@ -120,6 +120,7 @@ class HentaifoundryImageExtractor(Extractor): subcategory = "image" directory_fmt = ["{category}", "{artist}"] filename_fmt = "{category}_{index}_{title}.{extension}" + archive_fmt = "{index}" pattern = [(r"(?:https?://)?(?:www\.|pictures\.)?hentai-foundry\.com/" r"(?:pictures/user/([^/]+)/(\d+)" r"|[^/]/([^/]+)/(\d+))")] diff --git a/gallery_dl/extractor/hentaihere.py b/gallery_dl/extractor/hentaihere.py index 9937cf8b..d1c5c932 100644 --- a/gallery_dl/extractor/hentaihere.py +++ b/gallery_dl/extractor/hentaihere.py @@ -59,6 +59,7 @@ class HentaihereMangaExtractor(MangaExtractor): class HentaihereChapterExtractor(ChapterExtractor): """Extractor for a single manga chapter from hentaihere.com""" category = "hentaihere" + archive_fmt = "{chapter_id}_{page}" pattern = [r"(?:https?://)?(?:www\.)?hentaihere\.com/m/S(\d+)/(\d+)"] test = [("https://hentaihere.com/m/S13812/1/1/", { "url": "964b942cf492b3a129d2fe2608abfc475bc99e71", diff --git a/gallery_dl/extractor/imgur.py b/gallery_dl/extractor/imgur.py index bd52eee4..dd4d6b83 100644 --- a/gallery_dl/extractor/imgur.py +++ b/gallery_dl/extractor/imgur.py @@ -16,7 +16,6 @@ import json class ImgurExtractor(Extractor): """Base class for imgur extractors""" category = "imgur" - archive_fmt = "{hash}" def __init__(self, match): Extractor.__init__(self) @@ -53,6 +52,7 @@ class ImgurImageExtractor(ImgurExtractor): """Extractor for individual images from imgur.com""" subcategory = "image" filename_fmt = "{category}_{hash}{title:?_//}.{extension}" + archive_fmt = "{hash}" pattern = [(r"(?:https?://)?(?:m\.|www\.)?imgur\.com/" r"(?:gallery/)?((?!gallery)[^/?&#]{7})/?"), (r"(?:https?://)?i\.imgur\.com/([^/?&#.]{5,7})\.")] @@ -112,6 +112,7 @@ class ImgurAlbumExtractor(ImgurExtractor): subcategory = "album" directory_fmt = ["{category}", "{album[hash]}{album[title]:? - //}"] filename_fmt = "{category}_{album[hash]}_{num:>03}_{hash}.{extension}" + archive_fmt = "{album[hash]}_{hash}" pattern = [r"(?:https?://)?(?:m\.|www\.)?imgur\.com/" r"(?:a|gallery)/([^/?&#]{5})/?$"] test = [ diff --git a/gallery_dl/extractor/khinsider.py b/gallery_dl/extractor/khinsider.py index 61f1a3f0..ddbe56c3 100644 --- a/gallery_dl/extractor/khinsider.py +++ b/gallery_dl/extractor/khinsider.py @@ -18,7 +18,7 @@ class KhinsiderSoundtrackExtractor(AsynchronousExtractor): category = "khinsider" subcategory = "soundtrack" directory_fmt = ["{category}", "{album}"] - archive_fmt = "{album}_{name}" + archive_fmt = "{album}_{name}.{extension}" pattern = [r"(?:https?://)?downloads\.khinsider\.com/" r"game-soundtracks/album/([^/?&#]+)"] test = [(("https://downloads.khinsider.com/game-soundtracks/" diff --git a/gallery_dl/extractor/kissmanga.py b/gallery_dl/extractor/kissmanga.py index 69a6aaeb..8c3639e2 100644 --- a/gallery_dl/extractor/kissmanga.py +++ b/gallery_dl/extractor/kissmanga.py @@ -24,6 +24,7 @@ IV = [ class KissmangaBase(): """Base class for kissmanga extractors""" category = "kissmanga" + archive_fmt = "{chapter_id}_{page}" root = "http://kissmanga.com" def request(self, url): @@ -69,7 +70,7 @@ class KissmangaMangaExtractor(KissmangaBase, MangaExtractor): test = [ ("http://kissmanga.com/Manga/Dropout", { "url": "992befdd64e178fe5af67de53f8b510860d968ca", - "keyword": "1d23ea07296e004b33bee17fe2f5cd5177c58680", + "keyword": "32b09711c28b481845acc32e3bb6054cfc90224d", }), ("http://kissmanga.com/manga/feng-shen-ji", None), ] @@ -87,8 +88,9 @@ class KissmangaMangaExtractor(KissmangaBase, MangaExtractor): for item in text.extract_iter(page, ''): url, _, chapter = item.partition(needle) data = { - "manga": manga, "id": url.rpartition("=")[2], - "chapter_string": chapter, "lang": "en", "language": "English", + "manga": manga, "chapter_string": chapter, + "chapter_id": util.safe_int(url.rpartition("=")[2]), + "lang": "en", "language": "English", } self.parse_chapter_string(data) results.append((self.root + url, data)) @@ -98,25 +100,26 @@ class KissmangaMangaExtractor(KissmangaBase, MangaExtractor): class KissmangaChapterExtractor(KissmangaBase, ChapterExtractor): """Extractor for manga-chapters from kissmanga.com""" pattern = [r"(?i)(?:https?://)?(?:www\.)?kissmanga\.com" - r"/Manga/[^/?&#]+/[^/?&#]+\?id=\d+"] + r"/Manga/[^/?&#]+/[^/?&#]+\?id=(\d+)"] test = [ ("http://kissmanga.com/Manga/Dropout/Ch-000---Oneshot-?id=145847", { "url": "4136bcd1c6cecbca8cc2bc965d54f33ef0a97cc0", - "keyword": "68384c1167858fb4aa475c4596f0a685c45fff36", + "keyword": "4a3a9341d453541de0dbfa24cd6b2e3ed39c0182", }), ("http://kissmanga.com/Manga/Urban-Tales/a?id=256717", { "url": "de074848f6c1245204bb9214c12bcc3ecfd65019", - "keyword": "089158338b4cde43b2ff244814effeb13297de33", + "keyword": "ffc11b630da44fe67709ed0473756cf51b90a05c", }), ("http://kissmanga.com/Manga/Monster/Monster-79?id=7608", { "count": 23, - "keyword": "558da596e86ca544eb72cf303f3694bbf0b1f2f5", + "keyword": "92669a75e48a8501f3fbfc22b8fd2d3188239212", }), ("http://kissmanga.com/mAnGa/mOnStEr/Monster-79?id=7608", None), ] def __init__(self, match): ChapterExtractor.__init__(self, match.group(0)) + self.chapter_id = match.group(1) self.session.headers["Referer"] = self.root def get_metadata(self, page): @@ -125,6 +128,7 @@ class KissmangaChapterExtractor(KissmangaBase, ChapterExtractor): data = { "manga": manga.strip(), "chapter_string": cinfo.strip(), + "chapter_id": util.safe_int(self.chapter_id), "lang": "en", "language": "English", } diff --git a/gallery_dl/extractor/readcomiconline.py b/gallery_dl/extractor/readcomiconline.py index fb626db6..e6f77d8e 100644 --- a/gallery_dl/extractor/readcomiconline.py +++ b/gallery_dl/extractor/readcomiconline.py @@ -9,7 +9,7 @@ """Extract comic-issues and entire comics from http://readcomiconline.to/""" from .common import ChapterExtractor, MangaExtractor -from .. import text, cloudflare +from .. import text, util, cloudflare import re @@ -18,7 +18,7 @@ class ReadcomiconlineBase(): category = "readcomiconline" directory_fmt = ["{category}", "{comic}", "{issue:>03}"] filename_fmt = "{comic}_{issue:>03}_{page:>03}.{extension}" - archive_fmt = "{comic}_{issue}_{page}" + archive_fmt = "{issue_id}_{page}" root = "http://readcomiconline.to" useragent = "Wget/1.19.2 (linux-gnu)" @@ -33,11 +33,11 @@ class ReadcomiconlineComicExtractor(ReadcomiconlineBase, MangaExtractor): test = [ ("http://readcomiconline.to/Comic/W-i-t-c-h", { "url": "c5a530538a30b176916e30cbe223a93d83cb2691", - "keyword": "51097f2b65da683160dbea4de128dbec1cbf9357", + "keyword": "3986248e4458fa44a201ec073c3684917f48ee0c", }), ("http://readcomiconline.to/Comic/Bazooka-Jules", { "url": "e517dca61dff489f18ca781084f59a9eeb60a6b6", - "keyword": "7d4877d1215650a768097a8626a2f0c6083119a4", + "keyword": "f5ba5246cd787bb750924d9690cb1549199bd516", }), ] @@ -55,7 +55,8 @@ class ReadcomiconlineComicExtractor(ReadcomiconlineBase, MangaExtractor): if issue.startswith('Issue #'): issue = issue[7:] results.append((self.root + url, { - "comic": comic, "issue": issue, "id": url.rpartition("=")[2], + "comic": comic, "issue": issue, + "issue_id": util.safe_int(url.rpartition("=")[2]), "lang": "en", "language": "English", })) return results @@ -65,14 +66,15 @@ class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor): """Extractor for comic-issues from readcomiconline.to""" subcategory = "issue" pattern = [r"(?i)(?:https?://)?(?:www\.)?readcomiconline\.to" - r"/Comic/[^/?&#]+/[^/?&#]+\?id=\d+"] + r"/Comic/[^/?&#]+/[^/?&#]+\?id=(\d+)"] test = [("http://readcomiconline.to/Comic/W-i-t-c-h/Issue-130?id=22289", { "url": "a45c77f8fbde66091fe2346d6341f9cf3c6b1bc5", - "keyword": "dee8a8a44659825afe1d69e1d809a48b03e98c68", + "keyword": "c6de1c9c8a307dc4be56783c4ac6f1338ffac6fc", })] def __init__(self, match): ChapterExtractor.__init__(self, match.group(0)) + self.issue_id = match.group(1) self.session.headers["User-Agent"] = self.useragent def get_metadata(self, page): @@ -82,6 +84,7 @@ class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor): return { "comic": comic, "issue": match.group(1) or match.group(2), + "issue_id": util.safe_int(self.issue_id), "lang": "en", "language": "English", } diff --git a/gallery_dl/util.py b/gallery_dl/util.py index 9c54cbf9..a3170854 100644 --- a/gallery_dl/util.py +++ b/gallery_dl/util.py @@ -545,10 +545,7 @@ class DownloadArchive(): self.cursor = con.cursor() self.cursor.execute("CREATE TABLE IF NOT EXISTS archive " "(entry PRIMARY KEY) WITHOUT ROWID") - self.keygen = ( - extractor.category + - (extractor.archive_fmt or extractor.filename_fmt) - ).format_map + self.keygen = (extractor.category + extractor.archive_fmt).format_map self._key = None def check(self, kwdict):