From c2f0720184151865b83430ad2cd3d436e382e86c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Mon, 16 Nov 2015 17:32:26 +0100 Subject: [PATCH] code cleanup to use nameext_from_url --- gallery_dl/extractor/batoto.py | 7 ++----- gallery_dl/extractor/booru.py | 8 +------- gallery_dl/extractor/deviantart.py | 6 ++---- gallery_dl/extractor/exhentai.py | 4 +--- gallery_dl/extractor/hbrowse.py | 4 +--- gallery_dl/extractor/hentaifoundry.py | 8 +++----- gallery_dl/extractor/hitomi.py | 5 +---- gallery_dl/extractor/imagebam.py | 5 +---- gallery_dl/extractor/imgth.py | 6 +----- gallery_dl/extractor/kissmanga.py | 6 +----- gallery_dl/extractor/mangareader.py | 9 ++------- gallery_dl/extractor/mangashare.py | 6 ++---- gallery_dl/extractor/mangastream.py | 5 +---- gallery_dl/extractor/nijie.py | 6 ++---- gallery_dl/extractor/sankaku.py | 11 ++++------- gallery_dl/extractor/spectrumnexus.py | 4 +--- 16 files changed, 26 insertions(+), 74 deletions(-) diff --git a/gallery_dl/extractor/batoto.py b/gallery_dl/extractor/batoto.py index 49a98338..0e1fce53 100644 --- a/gallery_dl/extractor/batoto.py +++ b/gallery_dl/extractor/batoto.py @@ -44,14 +44,11 @@ class BatotoExtractor(AsynchronousExtractor): page = self.request(self.url, params=params).text data = self.get_job_metadata(page) yield Message.Version, 1 - yield Message.Directory, data + yield Message.Directory, data.copy() for i in range(int(data["count"])): next_url, image_url = self.get_page_urls(page) - filename = text.unquote(text.filename_from_url(image_url)) - name, ext = os.path.splitext(filename) + text.nameext_from_url(image_url, data) data["page"] = i+1 - data["name"] = name - data["extension"] = ext[1:] yield Message.Url, image_url, data.copy() if next_url: params["p"] += 1 diff --git a/gallery_dl/extractor/booru.py b/gallery_dl/extractor/booru.py index d59416b4..200bbf77 100644 --- a/gallery_dl/extractor/booru.py +++ b/gallery_dl/extractor/booru.py @@ -59,13 +59,7 @@ class BooruExtractor(Extractor): def get_file_metadata(self, data): """Collect metadata for a downloadable file""" data["category"] = self.info["category"] - data["filename"] = text.unquote( - text.filename_from_url(self.get_file_url(data)) - ) - name, ext = os.path.splitext(data["filename"]) - data["name"] = name - data["extension"] = ext[1:] - return data + return text.nameext_from_url(self.get_file_url(data), data) def get_file_url(self, data): """Extract download-url from 'data'""" diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index d090d3e4..d1002f02 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -85,17 +85,15 @@ class DeviantArtExtractor(AsynchronousExtractor): url , pos = text.extract(image, ' data-super-img="', '"', pos) width , pos = text.extract(image, ' data-super-width="', '"', pos) height, pos = text.extract(image, ' data-super-height="', '"', pos) - name, ext = os.path.splitext(text.filename_from_url(url)) - return url, { + data = { "index": index, "title": match.group(1), "artist": match.group(2), "date": match.group(3), "width": width, "height": height, - "name": name, - "extension": ext[1:], } + return url, text.nameext_from_url(url, data) @staticmethod def extract_data(txt, attr, pattern): diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py index 11a3f85d..2f89a98b 100644 --- a/gallery_dl/extractor/exhentai.py +++ b/gallery_dl/extractor/exhentai.py @@ -58,10 +58,8 @@ class ExhentaiExtractor(Extractor): urlkey = "origurl" for num, image in enumerate(self.get_images(url), 1): image.update(data) - name, ext = os.path.splitext(text.filename_from_url(image["url"])) image["num"] = num - image["name"] = name - image["extension"] = ext[1:] + text.nameext_from_url(image["url"], image) if "/fullimg.php" in image[urlkey]: time.sleep(random.uniform(1, 2)) yield Message.Url, image[urlkey], image diff --git a/gallery_dl/extractor/hbrowse.py b/gallery_dl/extractor/hbrowse.py index 53018028..59aa576e 100644 --- a/gallery_dl/extractor/hbrowse.py +++ b/gallery_dl/extractor/hbrowse.py @@ -36,10 +36,8 @@ class HbrowseExtractor(Extractor): yield Message.Version, 1 yield Message.Directory, data for num, url in enumerate(self.get_image_urls(page), 1): - name, ext = os.path.splitext(text.filename_from_url(url)) - data["name"] = name - data["extension"] = ext[1:] data["num"] = num + text.nameext_from_url(url, data) yield Message.Url, url, data def get_job_metadata(self, page): diff --git a/gallery_dl/extractor/hentaifoundry.py b/gallery_dl/extractor/hentaifoundry.py index 5a95b4ac..d3fcb362 100644 --- a/gallery_dl/extractor/hentaifoundry.py +++ b/gallery_dl/extractor/hentaifoundry.py @@ -70,14 +70,12 @@ class HentaiFoundryExtractor(Extractor): page = self.request(url).text index = text.extract(url, '/', '/', len(self.url_base) + len(self.artist))[0] title, pos = text.extract(page, 'Pictures » ', '<') - url , pos = text.extract(page, '//pictures.hentai-foundry.com', '"', pos)# - name, ext = os.path.splitext(text.filename_from_url(url)) - return "http://pictures.hentai-foundry.com" + url, { + url , pos = text.extract(page, '//pictures.hentai-foundry.com', '"', pos) + data = { "index": index, "title": text.unescape(title), - "name": name, - "extension": ext[1:], } + return "http://pictures.hentai-foundry.com" + url, text.nameext_from_url(url, data) def set_filters(self, token): """Set site-internal filters to show all images""" diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py index d5eab4ce..0b5f2d94 100644 --- a/gallery_dl/extractor/hitomi.py +++ b/gallery_dl/extractor/hitomi.py @@ -37,11 +37,8 @@ class HitomiExtractor(Extractor): yield Message.Version, 1 yield Message.Directory, data for num, url in enumerate(images, 1): - name, ext = os.path.splitext(text.filename_from_url(url)) data["num"] = num - data["name"] = name - data["extension"] = ext[1:] - yield Message.Url, url, data + yield Message.Url, url, text.nameext_from_url(url, data) def get_job_metadata(self, page): """Collect metadata for extractor-job""" diff --git a/gallery_dl/extractor/imagebam.py b/gallery_dl/extractor/imagebam.py index ec65430d..c39f74fc 100644 --- a/gallery_dl/extractor/imagebam.py +++ b/gallery_dl/extractor/imagebam.py @@ -37,11 +37,8 @@ class ImagebamExtractor(AsynchronousExtractor): yield Message.Directory, data for image_url, image_id in self.get_images(data["first-url"]): data["id"] = image_id - data["filename"] = text.unquote(text.filename_from_url(image_url)) - name, ext = os.path.splitext(data["filename"]) data["num"] += 1 - data["name"] = name - data["extension"] = ext[1:] + text.nameext_from_url(image_url, data) yield Message.Url, image_url, data.copy() def get_job_metadata(self): diff --git a/gallery_dl/extractor/imgth.py b/gallery_dl/extractor/imgth.py index 818fdd19..8622e579 100644 --- a/gallery_dl/extractor/imgth.py +++ b/gallery_dl/extractor/imgth.py @@ -35,12 +35,8 @@ class ImgthExtractor(Extractor): yield Message.Version, 1 yield Message.Directory, data for num, url in enumerate(self.get_images(page), 1): - name, ext = os.path.splitext(text.filename_from_url(url)) data["num"] = num - data["name"] = name - data["extension"] = ext[1:] - yield Message.Url, url, data - + yield Message.Url, url, text.nameext_from_url(url, data) def get_images(self, page): pnum = 0 while True: diff --git a/gallery_dl/extractor/kissmanga.py b/gallery_dl/extractor/kissmanga.py index 5be49a9b..5e1edcbc 100644 --- a/gallery_dl/extractor/kissmanga.py +++ b/gallery_dl/extractor/kissmanga.py @@ -38,12 +38,8 @@ class KissmangaExtractor(Extractor): yield Message.Version, 1 yield Message.Directory, data for num, url in enumerate(imgs, 1): - filename = text.unquote(text.filename_from_url(url)) - name, ext = os.path.splitext(filename) data["page"] = num - data["name"] = name - data["extension"] = ext[1:] - yield Message.Url, url, data + yield Message.Url, url, text.nameext_from_url(url, data) @staticmethod def get_job_metadata(page): diff --git a/gallery_dl/extractor/mangareader.py b/gallery_dl/extractor/mangareader.py index 87dc0a02..372969b3 100644 --- a/gallery_dl/extractor/mangareader.py +++ b/gallery_dl/extractor/mangareader.py @@ -91,12 +91,7 @@ class MangaReaderExtractor(AsynchronousExtractor): width , pos = extr(page, 'Original: ', 'x', pos) height , pos = text.extract(page, '', ' ', pos) - filename = text.filename_from_url(image_url) - name, ext = os.path.splitext(filename) - return { + data = text.nameext_from_url(image_url, { "id": image_id, "file-url": "https:" + image_url, "width": width, "height": height, - "md5": name, - "name": name, - "extension": ext[1:], - } + }) + data["md5"] = data["name"] + return data diff --git a/gallery_dl/extractor/spectrumnexus.py b/gallery_dl/extractor/spectrumnexus.py index d6a0f462..4f85fa25 100644 --- a/gallery_dl/extractor/spectrumnexus.py +++ b/gallery_dl/extractor/spectrumnexus.py @@ -41,10 +41,8 @@ class SpectrumNexusExtractor(AsynchronousExtractor): count = int(data["count"]) for i in range(1, count+1): url = self.get_image_url(page) - name, ext = os.path.splitext(text.filename_from_url(url)) + text.nameext_from_url(url, data) data["page"] = i - data["name"] = name - data["extension"] = ext[1:] yield Message.Url, url, data.copy() if i < count: params["page"] += 1