From 4d56b76aa85a3fdc31f82f112994feea0b2e82ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sat, 21 Nov 2015 04:26:30 +0100 Subject: [PATCH] update all other extractors --- gallery_dl/extractor/batoto.py | 17 +++++------------ gallery_dl/extractor/deviantart.py | 22 ++++++++-------------- gallery_dl/extractor/exhentai.py | 16 +++++----------- gallery_dl/extractor/hbrowse.py | 17 +++++------------ gallery_dl/extractor/hentaifoundry.py | 20 ++++++++------------ gallery_dl/extractor/hitomi.py | 18 ++++++------------ gallery_dl/extractor/imagebam.py | 17 +++++------------ gallery_dl/extractor/imgbox.py | 19 +++++++------------ gallery_dl/extractor/imgchili.py | 15 +++++---------- gallery_dl/extractor/imgth.py | 18 ++++++------------ gallery_dl/extractor/imgur.py | 17 ++++++----------- gallery_dl/extractor/kissmanga.py | 21 +++++++-------------- gallery_dl/extractor/mangapanda.py | 19 +++++++------------ gallery_dl/extractor/mangareader.py | 20 +++++++------------- gallery_dl/extractor/mangashare.py | 20 ++++++-------------- gallery_dl/extractor/mangastream.py | 20 +++++++------------- gallery_dl/extractor/nhentai.py | 17 ++++++----------- gallery_dl/extractor/nijie.py | 18 ++++++------------ gallery_dl/extractor/pixiv.py | 17 +++++------------ gallery_dl/extractor/sankaku.py | 17 +++++------------ gallery_dl/extractor/spectrumnexus.py | 22 +++++++++------------- 21 files changed, 131 insertions(+), 256 deletions(-) diff --git a/gallery_dl/extractor/batoto.py b/gallery_dl/extractor/batoto.py index 0e1fce53..ec00eef8 100644 --- a/gallery_dl/extractor/batoto.py +++ b/gallery_dl/extractor/batoto.py @@ -10,21 +10,14 @@ from .common import AsynchronousExtractor, Message from .. import text, iso639_1 -import os.path import re -info = { - "category": "batoto", - "extractor": "BatotoExtractor", - "directory": ["{category}", "{manga}", "c{chapter:>03} - {title}"], - "filename": "{manga}_c{chapter:>03}_{page:>03}.{extension}", - "pattern": [ - r"(?:https?://)?(?:www\.)?bato\.to/reader#([0-9a-f]+)", - ], -} - class BatotoExtractor(AsynchronousExtractor): + category = "batoto" + directory_fmt = ["{category}", "{manga}", "c{chapter:>03} - {title}"] + filename_fmt = "{manga}_c{chapter:>03}_{page:>03}.{extension}" + pattern = [r"(?:https?://)?(?:www\.)?bato\.to/reader#([0-9a-f]+)"] url = "https://bato.to/areader" def __init__(self, match): @@ -68,7 +61,7 @@ class BatotoExtractor(AsynchronousExtractor): manga, pos = extr(page, "document.title = '", " - ", pos) match = re.match(r"(Vol.(\d+) )?Ch.(\d+)([^:]*)(: (.+))?", cinfo) return { - "category": info["category"], + "category": self.category, "token": self.token, "manga": manga, "volume": match.group(2) or "", diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index d1002f02..4553f8c3 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -10,21 +10,15 @@ from .common import AsynchronousExtractor, Message from .. import text -import os.path import re -info = { - "category": "deviantart", - "extractor": "DeviantArtExtractor", - "directory": ["{category}", "{artist}"], - "filename": "{category}_{index}_{title}.{extension}", - "pattern": [ - r"(?:https?://)?([^\.]+)\.deviantart\.com/gallery/.*", - ], -} - class DeviantArtExtractor(AsynchronousExtractor): + category = "deviantart" + directory_fmt = ["{category}", "{artist}"] + filename_fmt = "{category}_{index}_{title}.{extension}" + pattern = [r"(?:https?://)?([^\.]+)\.deviantart\.com/gallery/.*"] + def __init__(self, match): AsynchronousExtractor.__init__(self) self.session.cookies["agegate_state"] = "1" @@ -57,14 +51,14 @@ class DeviantArtExtractor(AsynchronousExtractor): def get_job_metadata(self): """Collect metadata for extractor-job""" return { - "category": info["category"], + "category": self.category, "artist": self.artist, } def get_image_metadata(self, image): """Collect metadata for an image""" match = self.extract_data(image, 'title', - '(.+) by (.+), ([A-Z][a-z]{2} \d+, \d{4}) in') + r'(.+) by (.+), ([A-Z][a-z]{2} \d+, \d{4}) in') if image.startswith(" ismature"): # adult image url, _ = text.extract(image, 'href="', '"') @@ -76,7 +70,7 @@ class DeviantArtExtractor(AsynchronousExtractor): height, pos = text.extract(page, ' height="', '"', pos) else: # normal image - index = self.extract_data(image, 'href', '[^"]+-(\d+)').group(1) + index = self.extract_data(image, 'href', r'[^"]+-(\d+)').group(1) url, pos = text.extract(image, ' data-super-full-img="', '"', match.end()) if url: width , pos = text.extract(image, ' data-super-full-width="', '"', pos) diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py index 875ab1b4..b2d697d9 100644 --- a/gallery_dl/extractor/exhentai.py +++ b/gallery_dl/extractor/exhentai.py @@ -13,18 +13,12 @@ from .. import config, text, iso639_1 import time import random -info = { - "category": "exhentai", - "extractor": "ExhentaiExtractor", - "directory": ["{category}", "{gallery-id}"], - "filename": "{gallery-id}_{num:>04}_{imgkey}_{name}.{extension}", - "pattern": [ - r"(?:https?://)?(g\.e-|ex)hentai\.org/g/(\d+)/([\da-f]{10})", - ], -} - class ExhentaiExtractor(Extractor): + category = "exhentai" + directory_fmt = ["{category}", "{gallery-id}"] + filename_fmt = "{gallery-id}_{num:>04}_{imgkey}_{name}.{extension}" + pattern = [r"(?:https?://)?(g\.e-|ex)hentai\.org/g/(\d+)/([\da-f]{10})"] api_url = "http://exhentai.org/api.php" def __init__(self, match): @@ -70,7 +64,7 @@ class ExhentaiExtractor(Extractor): def get_job_metadata(self, page): """Collect metadata for extractor-job""" data = { - "category" : info["category"], + "category" : self.category, "gallery-id" : self.gid, "gallery-token": self.token, } diff --git a/gallery_dl/extractor/hbrowse.py b/gallery_dl/extractor/hbrowse.py index 59aa576e..67da9011 100644 --- a/gallery_dl/extractor/hbrowse.py +++ b/gallery_dl/extractor/hbrowse.py @@ -10,20 +10,13 @@ from .common import Extractor, Message from .. import text -import os.path - -info = { - "category": "hbrowse", - "extractor": "HbrowseExtractor", - "directory": ["{category}", "{gallery-id} {title}"], - "filename": "{category}_{gallery-id}_{num:>03}.{extension}", - "pattern": [ - r"(?:https?://)?(?:www\.)?hbrowse\.com/(\d+)/(c\d+)", - ], -} class HbrowseExtractor(Extractor): + category = "hbrowse" + directory_fmt = ["{category}", "{gallery-id} {title}"] + filename_fmt = "{category}_{gallery-id}_{num:>03}.{extension}" + pattern = [r"(?:https?://)?(?:www\.)?hbrowse\.com/(\d+)/(c\d+)"] url_base = "http://www.hbrowse.com/thumbnails/" def __init__(self, match): @@ -43,7 +36,7 @@ class HbrowseExtractor(Extractor): def get_job_metadata(self, page): """Collect metadata for extractor-job""" data = { - "category": info["category"], + "category": self.category, 'gallery-id': self.gid, 'chapter': int(self.chapter[1:]), } diff --git a/gallery_dl/extractor/hentaifoundry.py b/gallery_dl/extractor/hentaifoundry.py index d3fcb362..5c916173 100644 --- a/gallery_dl/extractor/hentaifoundry.py +++ b/gallery_dl/extractor/hentaifoundry.py @@ -12,19 +12,15 @@ from .common import Extractor, Message from .. import text import os.path -info = { - "category": "hentaifoundry", - "extractor": "HentaiFoundryExtractor", - "directory": ["{category}", "{artist}"], - "filename": "{category}_{index}_{title}.{extension}", - "pattern": [ - r"(?:https?://)?(?:www\.)?hentai-foundry\.com/pictures/user/([^/]+)", - r"(?:https?://)?(?:www\.)?hentai-foundry\.com/user/([^/]+)/profile", - ], -} - class HentaiFoundryExtractor(Extractor): + category = "hentaifoundry" + directory_fmt = ["{category}", "{artist}"] + filename_fmt = "{category}_{index}_{title}.{extension}" + pattern = [ + r"(?:https?://)?(?:www\.)?hentai-foundry\.com/pictures/user/([^/]+)", + r"(?:https?://)?(?:www\.)?hentai-foundry\.com/user/([^/]+)/profile", + ] url_base = "http://www.hentai-foundry.com/pictures/user/" def __init__(self, match): @@ -60,7 +56,7 @@ class HentaiFoundryExtractor(Extractor): token, pos = text.extract(page, 'hidden" value="', '"') count, pos = text.extract(page, 'class="active" >Pictures (', ')', pos) return { - "category": info["category"], + "category": self.category, "artist": self.artist, "count": count, }, token diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py index c7dc49c8..dd341538 100644 --- a/gallery_dl/extractor/hitomi.py +++ b/gallery_dl/extractor/hitomi.py @@ -10,21 +10,15 @@ from .common import Extractor, Message from .. import text, iso639_1 -import os.path import string -info = { - "category": "hitomi", - "extractor": "HitomiExtractor", - "directory": ["{category}", "{gallery-id} {title}"], - "filename": "{category}_{gallery-id}_{num:>03}_{name}.{extension}", - "pattern": [ - r"(?:https?://)?hitomi\.la/(?:galleries|reader)/(\d+)\.html", - ], -} - class HitomiExtractor(Extractor): + category = "hitomi" + directory_fmt = ["{category}", "{gallery-id} {title}"] + filename_fmt = "{category}_{gallery-id}_{num:>03}_{name}.{extension}" + pattern = [r"(?:https?://)?hitomi\.la/(?:galleries|reader)/(\d+)\.html"] + def __init__(self, match): Extractor.__init__(self) self.gid = match.group(1) @@ -62,7 +56,7 @@ class HitomiExtractor(Extractor): series, pos = text.extract(page, '.html">', '', pos) lang = lang.capitalize() return { - "category": info["category"], + "category": self.category, "gallery-id": self.gid, "title": title, "artist": string.capwords(artist), diff --git a/gallery_dl/extractor/imagebam.py b/gallery_dl/extractor/imagebam.py index c39f74fc..3bd65dbd 100644 --- a/gallery_dl/extractor/imagebam.py +++ b/gallery_dl/extractor/imagebam.py @@ -10,20 +10,13 @@ from .common import AsynchronousExtractor, Message from .. import text -import os.path - -info = { - "category": "imagebam", - "extractor": "ImagebamExtractor", - "directory": ["{category}", "{title} - {gallery-key}"], - "filename": "{num:>03}-{filename}", - "pattern": [ - r"(?:https?://)?(?:www\.)?imagebam\.com/gallery/([^/]+).*", - ], -} class ImagebamExtractor(AsynchronousExtractor): + category = "imagebam" + directory_fmt = ["{category}", "{title} - {gallery-key}"] + filename_fmt = "{num:>03}-{filename}" + pattern = [r"(?:https?://)?(?:www\.)?imagebam\.com/gallery/([^/]+).*"] url_base = "http://www.imagebam.com" def __init__(self, match): @@ -47,7 +40,7 @@ class ImagebamExtractor(AsynchronousExtractor): response.encoding = "utf-8" page = response.text data = { - "category": info["category"], + "category": self.category, "gallery-key": self.gkey, } data, _ = text.extract_all(page, ( diff --git a/gallery_dl/extractor/imgbox.py b/gallery_dl/extractor/imgbox.py index b3a01dd7..edd4db58 100644 --- a/gallery_dl/extractor/imgbox.py +++ b/gallery_dl/extractor/imgbox.py @@ -12,18 +12,12 @@ from .common import AsynchronousExtractor, Message from .. import text import re -info = { - "category": "imgbox", - "extractor": "ImgboxExtractor", - "directory": ["{category}", "{title} - {gallery-key}"], - "filename": "{num:>03}-{name}", - "pattern": [ - r"(?:https?://)?(?:www\.)?imgbox\.com/g/(.+)", - ], -} - class ImgboxExtractor(AsynchronousExtractor): + category = "imgbox" + directory_fmt = ["{category}", "{title} - {gallery-key}"] + filename_fmt = "{num:>03}-{name}" + pattern = [r"(?:https?://)?(?:www\.)?imgbox\.com/g/(.+)"] url_base = "http://imgbox.com" def __init__(self, match): @@ -44,7 +38,7 @@ class ImgboxExtractor(AsynchronousExtractor): """Collect metadata for extractor-job""" match = re.search(r"

(.+) \(([^ ]+) ([^ ]+) \w+\) - (\d+)", page) return { - "category": info["category"], + "category": self.category, "gallery-key": self.key, "title": match.group(1), "date": match.group(2), @@ -62,7 +56,8 @@ class ImgboxExtractor(AsynchronousExtractor): ), values=data) return data - def get_file_url(self, page): + @staticmethod + def get_file_url(page): """Extract download-url""" base = "http://i.imgbox.com/" path, _ = text.extract(page, base, '"') diff --git a/gallery_dl/extractor/imgchili.py b/gallery_dl/extractor/imgchili.py index d913a0d3..8cd67e06 100644 --- a/gallery_dl/extractor/imgchili.py +++ b/gallery_dl/extractor/imgchili.py @@ -12,18 +12,13 @@ from .common import Extractor, Message from .. import text import re -info = { - "category": "imgchili", - "extractor": "ImgchiliExtractor", - "directory": ["{category}", "{title} - {key}"], - "filename": "{num:>03}-{name}", - "pattern": [ - r"(?:https?://)?(?:www\.)?imgchili\.net/album/([^/]+)", - ], -} - class ImgchiliExtractor(Extractor): + category = "imgchili" + directory_fmt = ["{category}", "{title} - {key}"] + filename_fmt = "{num:>03}-{name}" + pattern = [r"(?:https?://)?(?:www\.)?imgchili\.net/album/([^/]+)"] + def __init__(self, match): Extractor.__init__(self) self.match = match diff --git a/gallery_dl/extractor/imgth.py b/gallery_dl/extractor/imgth.py index 8622e579..7000b18c 100644 --- a/gallery_dl/extractor/imgth.py +++ b/gallery_dl/extractor/imgth.py @@ -10,20 +10,14 @@ from .common import Extractor, Message from .. import text -import os.path - -info = { - "category": "imgth", - "extractor": "ImgthExtractor", - "directory": ["{category}", "{gallery-id} {title}"], - "filename": "{category}_{gallery-id}_{num:>03}.{extension}", - "pattern": [ - r"(?:https?://)?imgth\.com/gallery/(\d+)", - ], -} class ImgthExtractor(Extractor): + category = "imgth" + directory_fmt = ["{category}", "{gallery-id} {title}"] + filename_fmt = "{category}_{gallery-id}_{num:>03}.{extension}" + pattern = [r"(?:https?://)?imgth\.com/gallery/(\d+)"] + def __init__(self, match): Extractor.__init__(self) self.gid = match.group(1) @@ -55,7 +49,7 @@ class ImgthExtractor(Extractor): def get_job_metadata(self, page): """Collect metadata for extractor-job""" data = { - "category": info["category"], + "category": self.category, "gallery-id": self.gid, } data, _ = text.extract_all(page, ( diff --git a/gallery_dl/extractor/imgur.py b/gallery_dl/extractor/imgur.py index 5515c509..355e201b 100644 --- a/gallery_dl/extractor/imgur.py +++ b/gallery_dl/extractor/imgur.py @@ -12,18 +12,13 @@ from .common import Extractor, Message from .. import text import os.path -info = { - "category": "imgur", - "extractor": "ImgurExtractor", - "directory": ["{category}", "{album-key} - {title}"], - "filename": "{category}_{album-key}_{num:>03}_{name}.{extension}", - "pattern": [ - r"(?:https?://)?(?:www\.)?imgur\.com/(?:a|gallery)/([^/?&#]+)", - ], -} - class ImgurExtractor(Extractor): + category = "imgur" + directory_fmt = ["{category}", "{album-key} - {title}"] + filename_fmt = "{category}_{album-key}_{num:>03}_{name}.{extension}" + pattern = [r"(?:https?://)?(?:www\.)?imgur\.com/(?:a|gallery)/([^/?&#]+)"] + def __init__(self, match): Extractor.__init__(self) self.album = match.group(1) @@ -43,7 +38,7 @@ class ImgurExtractor(Extractor): """Collect metadata for extractor-job""" page = self.request("https://imgur.com/a/" + self.album).text data = { - "category": info["category"], + "category": self.category, "album-key": self.album, } return text.extract_all(page, ( diff --git a/gallery_dl/extractor/kissmanga.py b/gallery_dl/extractor/kissmanga.py index 5e1edcbc..4b1cad15 100644 --- a/gallery_dl/extractor/kissmanga.py +++ b/gallery_dl/extractor/kissmanga.py @@ -10,21 +10,15 @@ from .common import Extractor, Message from .. import text, cloudflare -import os.path import re -info = { - "category": "kissmanga", - "extractor": "KissmangaExtractor", - "directory": ["{category}", "{manga}", "c{chapter:>03}{chapter-minor} - {title}"], - "filename": "{manga}_c{chapter:>03}{chapter-minor}_{page:>03}.{extension}", - "pattern": [ - r"(?:https?://)?(?:www\.)?kissmanga\.com/Manga/.+/.+\?id=\d+", - ], -} - class KissmangaExtractor(Extractor): + category = "kissmanga" + directory_fmt = ["{category}", "{manga}", "c{chapter:>03}{chapter-minor} - {title}"] + filename_fmt = "{manga}_c{chapter:>03}{chapter-minor}_{page:>03}.{extension}" + pattern = [r"(?:https?://)?(?:www\.)?kissmanga\.com/Manga/.+/.+\?id=\d+"] + def __init__(self, match): Extractor.__init__(self) self.url = match.group(0) @@ -41,8 +35,7 @@ class KissmangaExtractor(Extractor): data["page"] = num yield Message.Url, url, text.nameext_from_url(url, data) - @staticmethod - def get_job_metadata(page): + def get_job_metadata(self, page): """Collect metadata for extractor-job""" manga, pos = text.extract(page, "Read manga\n", "\n") cinfo, pos = text.extract(page, "", "\n", pos) @@ -50,7 +43,7 @@ class KissmangaExtractor(Extractor): r"(?:Vol.0*(\d+) )?(?:Ch.)?0*(\d+)(?:\.0*(\d+))?(?:: (.+))?", cinfo) chminor = match.group(3) return { - "category": info["category"], + "category": self.category, "manga": manga, "volume": match.group(1) or "", "chapter": match.group(2), diff --git a/gallery_dl/extractor/mangapanda.py b/gallery_dl/extractor/mangapanda.py index b5db2679..5a09dd59 100644 --- a/gallery_dl/extractor/mangapanda.py +++ b/gallery_dl/extractor/mangapanda.py @@ -10,18 +10,13 @@ from .mangareader import MangaReaderExtractor -info = { - "category": "mangapanda", - "extractor": "MangaPandaExtractor", - "directory": ["{category}", "{manga}", "c{chapter:>03} - {title}"], - "filename": "{manga}_c{chapter:>03}_{page:>03}.{extension}", - "pattern": [ - r"(?:https?://)?(?:www\.)?mangapanda\.com((/[^/]+)/(\d+))", - r"(?:https?://)?(?:www\.)?mangapanda\.com(/\d+-\d+-\d+(/[^/]+)/chapter-(\d+).html)", - ], -} - class MangaPandaExtractor(MangaReaderExtractor): - category = info["category"] + category = "mangapanda" + directory_fmt = ["{category}", "{manga}", "c{chapter:>03} - {title}"] + filename_fmt = "{manga}_c{chapter:>03}_{page:>03}.{extension}" + pattern = [ + r"(?:https?://)?(?:www\.)?mangapanda\.com((/[^/]+)/(\d+))", + r"(?:https?://)?(?:www\.)?mangapanda\.com(/\d+-\d+-\d+(/[^/]+)/chapter-(\d+).html)", + ] url_base = "http://www.mangapanda.com" diff --git a/gallery_dl/extractor/mangareader.py b/gallery_dl/extractor/mangareader.py index 372969b3..111f9a10 100644 --- a/gallery_dl/extractor/mangareader.py +++ b/gallery_dl/extractor/mangareader.py @@ -10,22 +10,16 @@ from .common import AsynchronousExtractor, Message from .. import text -import os.path - -info = { - "category": "mangareader", - "extractor": "MangaReaderExtractor", - "directory": ["{category}", "{manga}", "c{chapter:>03} - {title}"], - "filename": "{manga}_c{chapter:>03}_{page:>03}.{extension}", - "pattern": [ - r"(?:https?://)?(?:www\.)?mangareader\.net((/[^/]+)/(\d+))", - r"(?:https?://)?(?:www\.)?mangareader\.net(/\d+-\d+-\d+(/[^/]+)/chapter-(\d+).html)", - ], -} class MangaReaderExtractor(AsynchronousExtractor): - category = info["category"] + category = "mangareader" + directory_fmt = ["{category}", "{manga}", "c{chapter:>03} - {title}"] + filename_fmt = "{manga}_c{chapter:>03}_{page:>03}.{extension}" + pattern = [ + r"(?:https?://)?(?:www\.)?mangareader\.net((/[^/]+)/(\d+))", + r"(?:https?://)?(?:www\.)?mangareader\.net(/\d+-\d+-\d+(/[^/]+)/chapter-(\d+).html)", + ] url_base = "http://www.mangareader.net" def __init__(self, match): diff --git a/gallery_dl/extractor/mangashare.py b/gallery_dl/extractor/mangashare.py index 1665a017..67e2007e 100644 --- a/gallery_dl/extractor/mangashare.py +++ b/gallery_dl/extractor/mangashare.py @@ -10,20 +10,13 @@ from .common import AsynchronousExtractor, Message from .. import text -import os - -info = { - "category": "mangashare", - "extractor": "MangaShareExtractor", - "directory": ["{category}", "{manga}", "c{chapter:>03} - {title}"], - "filename": "{manga}_c{chapter:>03}_{page:>03}.{extension}", - "pattern": [ - r"(?:https?://)?read\.mangashare\.com/([^/]+/chapter-\d+)", - ], -} class MangaShareExtractor(AsynchronousExtractor): + category = "mangashare" + directory_fmt = ["{category}", "{manga}", "c{chapter:>03} - {title}"] + filename_fmt = "{manga}_c{chapter:>03}_{page:>03}.{extension}" + pattern = [r"(?:https?://)?read\.mangashare\.com/([^/]+/chapter-\d+)"] url_fmt = "http://read.mangashare.com/{}/page{:>03}.html" def __init__(self, match): @@ -40,11 +33,10 @@ class MangaShareExtractor(AsynchronousExtractor): text.nameext_from_url(url, data) yield Message.Url, url, data.copy() - @staticmethod - def get_job_metadata(page): + def get_job_metadata(self, page): """Collect metadata for extractor-job""" data = { - "category": info["category"], + "category": self.category, "lang": "en", "language": "English", } diff --git a/gallery_dl/extractor/mangastream.py b/gallery_dl/extractor/mangastream.py index ca6c1175..ae009944 100644 --- a/gallery_dl/extractor/mangastream.py +++ b/gallery_dl/extractor/mangastream.py @@ -10,20 +10,13 @@ from .common import AsynchronousExtractor, Message from .. import text -import os.path - -info = { - "category": "mangastream", - "extractor": "MangaStreamExtractor", - "directory": ["{category}", "{manga}", "c{chapter:>03}{chapter-minor} - {title}"], - "filename": "{manga}_c{chapter:>03}{chapter-minor}_{page:>03}.{extension}", - "pattern": [ - r"(?:https?://)?(?:www\.)?readms\.com/r/([^/]*/(\d+)([^/]*)?/(\d+))", - ], -} class MangaStreamExtractor(AsynchronousExtractor): + category = "mangastream" + directory_fmt = ["{category}", "{manga}", "c{chapter:>03}{chapter-minor} - {title}"] + filename_fmt = "{manga}_c{chapter:>03}{chapter-minor}_{page:>03}.{extension}" + pattern = [r"(?:https?://)?(?:www\.)?readms\.com/r/([^/]*/(\d+)([^/]*)?/(\d+))"] url_base = "https://readms.com/r/" def __init__(self, match): @@ -47,7 +40,7 @@ class MangaStreamExtractor(AsynchronousExtractor): def get_job_metadata(self, page): """Collect metadata for extractor-job""" data = { - "category": info["category"], + "category": self.category, "chapter": self.chapter, "chapter-minor": self.ch_minor, "chapter-id": self.ch_id, @@ -61,7 +54,8 @@ class MangaStreamExtractor(AsynchronousExtractor): ), values=data) return data - def get_page_metadata(self, page): + @staticmethod + def get_page_metadata(page): """Collect next url, image-url and metadata for one manga-page""" nurl, pos = text.extract(page, '
\n03}.{extension}", - "pattern": [ - r"(?:https?://)?(?:www\.)?nhentai\.net/g/(\d+)", - ], -} - class NhentaiExtractor(Extractor): + category = "nhentai" + directory_fmt = ["{category}", "{gallery-id} {title}"] + filename_fmt = "{category}_{gallery-id}_{num:>03}.{extension}" + pattern = [r"(?:https?://)?(?:www\.)?nhentai\.net/g/(\d+)"] + def __init__(self, match): Extractor.__init__(self) self.gid = match.group(1) @@ -57,7 +52,7 @@ class NhentaiExtractor(Extractor): title_en = ginfo["title"].get("english", "") title_ja = ginfo["title"].get("japanese", "") return { - "category": info["category"], + "category": self.category, "gallery-id": self.gid, "upload-date": ginfo["upload_date"], "media-id": ginfo["media_id"], diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py index 8901c427..0249afb1 100644 --- a/gallery_dl/extractor/nijie.py +++ b/gallery_dl/extractor/nijie.py @@ -12,18 +12,12 @@ from .common import AsynchronousExtractor, Message from .. import config, text import re -info = { - "category": "nijie", - "extractor": "NijieExtractor", - "directory": ["{category}", "{artist-id}"], - "filename": "{category}_{artist-id}_{image-id}_p{index:>02}.{extension}", - "pattern": [ - r"(?:https?://)?(?:www\.)?nijie\.info/members(?:_illust)?\.php\?id=(\d+)", - ], -} - class NijieExtractor(AsynchronousExtractor): + category = "nijie" + directory_fmt = ["{category}", "{artist-id}"] + filename_fmt = "{category}_{artist-id}_{image-id}_p{index:>02}.{extension}" + pattern = [r"(?:https?://)?(?:www\.)?nijie\.info/members(?:_illust)?\.php\?id=(\d+)"] popup_url = "https://nijie.info/view_popup.php?id=" def __init__(self, match): @@ -37,7 +31,7 @@ class NijieExtractor(AsynchronousExtractor): self.session.cookies["R18"] = "1" self.session.cookies["nijie_referer"] = "nijie.info" self.session.cookies.update( - config.get(("extractor", info["category"], "cookies")) + config.get(("extractor", self.category, "cookies")) ) def items(self): @@ -52,7 +46,7 @@ class NijieExtractor(AsynchronousExtractor): def get_job_metadata(self): """Collect metadata for extractor-job""" return { - "category": info["category"], + "category": self.category, "artist-id": self.artist_id, } diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py index 975441ed..783d1c15 100644 --- a/gallery_dl/extractor/pixiv.py +++ b/gallery_dl/extractor/pixiv.py @@ -14,19 +14,12 @@ import re import json import time -info = { - "category": "pixiv", - "extractor": "PixivExtractor", - "directory": ["{category}", "{artist-id}-{artist-nick}"], - "filename": "{category}_{artist-id}_{id}{num}.{extension}", - "pattern": [ - r"(?:https?://)?(?:www\.)?pixiv\.net/member(?:_illust)?\.php\?id=(\d+)", - ], -} - - class PixivExtractor(Extractor): + category = "pixiv" + directory_fmt = ["{category}", "{artist-id}-{artist-nick}"] + filename_fmt = "{category}_{artist-id}_{id}{num}.{extension}" + pattern = [r"(?:https?://)?(?:www\.)?pixiv\.net/member(?:_illust)?\.php\?id=(\d+)"] member_url = "http://www.pixiv.net/member_illust.php" illust_url = "http://www.pixiv.net/member_illust.php?mode=medium" @@ -121,7 +114,7 @@ class PixivExtractor(Extractor): """Collect metadata for extractor-job""" data = self.api.user(self.artist_id)["response"][0] return { - "category": info["category"], + "category": self.category, "artist-id": self.artist_id, "artist-name": data["name"], "artist-nick": data["account"], diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py index 5d5d5d94..a821dbd6 100644 --- a/gallery_dl/extractor/sankaku.py +++ b/gallery_dl/extractor/sankaku.py @@ -10,20 +10,13 @@ from .common import AsynchronousExtractor, Message from .. import text -import os.path - -info = { - "category": "sankaku", - "extractor": "SankakuExtractor", - "directory": ["{category}", "{tags}"], - "filename": "{category}_{id}_{md5}.{extension}", - "pattern": [ - r"(?:https?://)?chan\.sankakucomplex\.com/\?tags=([^&]+)", - ], -} class SankakuExtractor(AsynchronousExtractor): + category = "sankaku" + directory_fmt = ["{category}", "{tags}"] + filename_fmt = "{category}_{id}_{md5}.{extension}" + pattern = [r"(?:https?://)?chan\.sankakucomplex\.com/\?tags=([^&]+)"] url = "https://chan.sankakucomplex.com/" def __init__(self, match): @@ -45,7 +38,7 @@ class SankakuExtractor(AsynchronousExtractor): def get_job_metadata(self): """Collect metadata for extractor-job""" return { - "category": info["category"], + "category": self.category, "tags": self.tags, } diff --git a/gallery_dl/extractor/spectrumnexus.py b/gallery_dl/extractor/spectrumnexus.py index 4235a439..c4446e5e 100644 --- a/gallery_dl/extractor/spectrumnexus.py +++ b/gallery_dl/extractor/spectrumnexus.py @@ -10,21 +10,17 @@ from .common import AsynchronousExtractor, Message from .. import text -import os.path - -info = { - "category": "spectrumnexus", - "extractor": "SpectrumNexusExtractor", - "directory": ["{category}", "{manga}", "c{chapter:>03}"], - "filename": "{manga}_c{chapter:>03}_{page:>03}.{extension}", - "pattern": [ - r"(?:https?://)?(view\.thespectrum\.net/series/[^\.]+.html)\?ch=Chapter\+(\d+)", - r"(?:https?://)?(view\.thespectrum\.net/series/[^/]+-chapter-(\d+)\.html)", - ], -} class SpectrumNexusExtractor(AsynchronousExtractor): + category = "spectrumnexus" + directory_fmt = ["{category}", "{manga}", "c{chapter:>03}"] + filename_fmt = "{manga}_c{chapter:>03}_{page:>03}.{extension}" + pattern = [ + r"(?:https?://)?(view\.thespectrum\.net/series/[^\.]+.html)\?ch=Chapter\+(\d+)", + r"(?:https?://)?(view\.thespectrum\.net/series/[^/]+-chapter-(\d+)\.html)", + ] + def __init__(self, match): AsynchronousExtractor.__init__(self) self.url = "http://" + match.group(1) @@ -52,7 +48,7 @@ class SpectrumNexusExtractor(AsynchronousExtractor): def get_job_metadata(self, page): """Collect metadata for extractor-job""" data = { - "category": info["category"], + "category": self.category, "chapter": self.chapter, } return text.extract_all(page, (