update all other extractors
This commit is contained in:
@@ -10,21 +10,14 @@
|
||||
|
||||
from .common import AsynchronousExtractor, Message
|
||||
from .. import text, iso639_1
|
||||
import os.path
|
||||
import re
|
||||
|
||||
info = {
|
||||
"category": "batoto",
|
||||
"extractor": "BatotoExtractor",
|
||||
"directory": ["{category}", "{manga}", "c{chapter:>03} - {title}"],
|
||||
"filename": "{manga}_c{chapter:>03}_{page:>03}.{extension}",
|
||||
"pattern": [
|
||||
r"(?:https?://)?(?:www\.)?bato\.to/reader#([0-9a-f]+)",
|
||||
],
|
||||
}
|
||||
|
||||
class BatotoExtractor(AsynchronousExtractor):
|
||||
|
||||
category = "batoto"
|
||||
directory_fmt = ["{category}", "{manga}", "c{chapter:>03} - {title}"]
|
||||
filename_fmt = "{manga}_c{chapter:>03}_{page:>03}.{extension}"
|
||||
pattern = [r"(?:https?://)?(?:www\.)?bato\.to/reader#([0-9a-f]+)"]
|
||||
url = "https://bato.to/areader"
|
||||
|
||||
def __init__(self, match):
|
||||
@@ -68,7 +61,7 @@ class BatotoExtractor(AsynchronousExtractor):
|
||||
manga, pos = extr(page, "document.title = '", " - ", pos)
|
||||
match = re.match(r"(Vol.(\d+) )?Ch.(\d+)([^:]*)(: (.+))?", cinfo)
|
||||
return {
|
||||
"category": info["category"],
|
||||
"category": self.category,
|
||||
"token": self.token,
|
||||
"manga": manga,
|
||||
"volume": match.group(2) or "",
|
||||
|
||||
@@ -10,21 +10,15 @@
|
||||
|
||||
from .common import AsynchronousExtractor, Message
|
||||
from .. import text
|
||||
import os.path
|
||||
import re
|
||||
|
||||
info = {
|
||||
"category": "deviantart",
|
||||
"extractor": "DeviantArtExtractor",
|
||||
"directory": ["{category}", "{artist}"],
|
||||
"filename": "{category}_{index}_{title}.{extension}",
|
||||
"pattern": [
|
||||
r"(?:https?://)?([^\.]+)\.deviantart\.com/gallery/.*",
|
||||
],
|
||||
}
|
||||
|
||||
class DeviantArtExtractor(AsynchronousExtractor):
|
||||
|
||||
category = "deviantart"
|
||||
directory_fmt = ["{category}", "{artist}"]
|
||||
filename_fmt = "{category}_{index}_{title}.{extension}"
|
||||
pattern = [r"(?:https?://)?([^\.]+)\.deviantart\.com/gallery/.*"]
|
||||
|
||||
def __init__(self, match):
|
||||
AsynchronousExtractor.__init__(self)
|
||||
self.session.cookies["agegate_state"] = "1"
|
||||
@@ -57,14 +51,14 @@ class DeviantArtExtractor(AsynchronousExtractor):
|
||||
def get_job_metadata(self):
|
||||
"""Collect metadata for extractor-job"""
|
||||
return {
|
||||
"category": info["category"],
|
||||
"category": self.category,
|
||||
"artist": self.artist,
|
||||
}
|
||||
|
||||
def get_image_metadata(self, image):
|
||||
"""Collect metadata for an image"""
|
||||
match = self.extract_data(image, 'title',
|
||||
'(.+) by (.+), ([A-Z][a-z]{2} \d+, \d{4}) in')
|
||||
r'(.+) by (.+), ([A-Z][a-z]{2} \d+, \d{4}) in')
|
||||
if image.startswith(" ismature"):
|
||||
# adult image
|
||||
url, _ = text.extract(image, 'href="', '"')
|
||||
@@ -76,7 +70,7 @@ class DeviantArtExtractor(AsynchronousExtractor):
|
||||
height, pos = text.extract(page, ' height="', '"', pos)
|
||||
else:
|
||||
# normal image
|
||||
index = self.extract_data(image, 'href', '[^"]+-(\d+)').group(1)
|
||||
index = self.extract_data(image, 'href', r'[^"]+-(\d+)').group(1)
|
||||
url, pos = text.extract(image, ' data-super-full-img="', '"', match.end())
|
||||
if url:
|
||||
width , pos = text.extract(image, ' data-super-full-width="', '"', pos)
|
||||
|
||||
@@ -13,18 +13,12 @@ from .. import config, text, iso639_1
|
||||
import time
|
||||
import random
|
||||
|
||||
info = {
|
||||
"category": "exhentai",
|
||||
"extractor": "ExhentaiExtractor",
|
||||
"directory": ["{category}", "{gallery-id}"],
|
||||
"filename": "{gallery-id}_{num:>04}_{imgkey}_{name}.{extension}",
|
||||
"pattern": [
|
||||
r"(?:https?://)?(g\.e-|ex)hentai\.org/g/(\d+)/([\da-f]{10})",
|
||||
],
|
||||
}
|
||||
|
||||
class ExhentaiExtractor(Extractor):
|
||||
|
||||
category = "exhentai"
|
||||
directory_fmt = ["{category}", "{gallery-id}"]
|
||||
filename_fmt = "{gallery-id}_{num:>04}_{imgkey}_{name}.{extension}"
|
||||
pattern = [r"(?:https?://)?(g\.e-|ex)hentai\.org/g/(\d+)/([\da-f]{10})"]
|
||||
api_url = "http://exhentai.org/api.php"
|
||||
|
||||
def __init__(self, match):
|
||||
@@ -70,7 +64,7 @@ class ExhentaiExtractor(Extractor):
|
||||
def get_job_metadata(self, page):
|
||||
"""Collect metadata for extractor-job"""
|
||||
data = {
|
||||
"category" : info["category"],
|
||||
"category" : self.category,
|
||||
"gallery-id" : self.gid,
|
||||
"gallery-token": self.token,
|
||||
}
|
||||
|
||||
@@ -10,20 +10,13 @@
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text
|
||||
import os.path
|
||||
|
||||
info = {
|
||||
"category": "hbrowse",
|
||||
"extractor": "HbrowseExtractor",
|
||||
"directory": ["{category}", "{gallery-id} {title}"],
|
||||
"filename": "{category}_{gallery-id}_{num:>03}.{extension}",
|
||||
"pattern": [
|
||||
r"(?:https?://)?(?:www\.)?hbrowse\.com/(\d+)/(c\d+)",
|
||||
],
|
||||
}
|
||||
|
||||
class HbrowseExtractor(Extractor):
|
||||
|
||||
category = "hbrowse"
|
||||
directory_fmt = ["{category}", "{gallery-id} {title}"]
|
||||
filename_fmt = "{category}_{gallery-id}_{num:>03}.{extension}"
|
||||
pattern = [r"(?:https?://)?(?:www\.)?hbrowse\.com/(\d+)/(c\d+)"]
|
||||
url_base = "http://www.hbrowse.com/thumbnails/"
|
||||
|
||||
def __init__(self, match):
|
||||
@@ -43,7 +36,7 @@ class HbrowseExtractor(Extractor):
|
||||
def get_job_metadata(self, page):
|
||||
"""Collect metadata for extractor-job"""
|
||||
data = {
|
||||
"category": info["category"],
|
||||
"category": self.category,
|
||||
'gallery-id': self.gid,
|
||||
'chapter': int(self.chapter[1:]),
|
||||
}
|
||||
|
||||
@@ -12,19 +12,15 @@ from .common import Extractor, Message
|
||||
from .. import text
|
||||
import os.path
|
||||
|
||||
info = {
|
||||
"category": "hentaifoundry",
|
||||
"extractor": "HentaiFoundryExtractor",
|
||||
"directory": ["{category}", "{artist}"],
|
||||
"filename": "{category}_{index}_{title}.{extension}",
|
||||
"pattern": [
|
||||
r"(?:https?://)?(?:www\.)?hentai-foundry\.com/pictures/user/([^/]+)",
|
||||
r"(?:https?://)?(?:www\.)?hentai-foundry\.com/user/([^/]+)/profile",
|
||||
],
|
||||
}
|
||||
|
||||
class HentaiFoundryExtractor(Extractor):
|
||||
|
||||
category = "hentaifoundry"
|
||||
directory_fmt = ["{category}", "{artist}"]
|
||||
filename_fmt = "{category}_{index}_{title}.{extension}"
|
||||
pattern = [
|
||||
r"(?:https?://)?(?:www\.)?hentai-foundry\.com/pictures/user/([^/]+)",
|
||||
r"(?:https?://)?(?:www\.)?hentai-foundry\.com/user/([^/]+)/profile",
|
||||
]
|
||||
url_base = "http://www.hentai-foundry.com/pictures/user/"
|
||||
|
||||
def __init__(self, match):
|
||||
@@ -60,7 +56,7 @@ class HentaiFoundryExtractor(Extractor):
|
||||
token, pos = text.extract(page, 'hidden" value="', '"')
|
||||
count, pos = text.extract(page, 'class="active" >Pictures (', ')', pos)
|
||||
return {
|
||||
"category": info["category"],
|
||||
"category": self.category,
|
||||
"artist": self.artist,
|
||||
"count": count,
|
||||
}, token
|
||||
|
||||
@@ -10,21 +10,15 @@
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text, iso639_1
|
||||
import os.path
|
||||
import string
|
||||
|
||||
info = {
|
||||
"category": "hitomi",
|
||||
"extractor": "HitomiExtractor",
|
||||
"directory": ["{category}", "{gallery-id} {title}"],
|
||||
"filename": "{category}_{gallery-id}_{num:>03}_{name}.{extension}",
|
||||
"pattern": [
|
||||
r"(?:https?://)?hitomi\.la/(?:galleries|reader)/(\d+)\.html",
|
||||
],
|
||||
}
|
||||
|
||||
class HitomiExtractor(Extractor):
|
||||
|
||||
category = "hitomi"
|
||||
directory_fmt = ["{category}", "{gallery-id} {title}"]
|
||||
filename_fmt = "{category}_{gallery-id}_{num:>03}_{name}.{extension}"
|
||||
pattern = [r"(?:https?://)?hitomi\.la/(?:galleries|reader)/(\d+)\.html"]
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self)
|
||||
self.gid = match.group(1)
|
||||
@@ -62,7 +56,7 @@ class HitomiExtractor(Extractor):
|
||||
series, pos = text.extract(page, '.html">', '</a>', pos)
|
||||
lang = lang.capitalize()
|
||||
return {
|
||||
"category": info["category"],
|
||||
"category": self.category,
|
||||
"gallery-id": self.gid,
|
||||
"title": title,
|
||||
"artist": string.capwords(artist),
|
||||
|
||||
@@ -10,20 +10,13 @@
|
||||
|
||||
from .common import AsynchronousExtractor, Message
|
||||
from .. import text
|
||||
import os.path
|
||||
|
||||
info = {
|
||||
"category": "imagebam",
|
||||
"extractor": "ImagebamExtractor",
|
||||
"directory": ["{category}", "{title} - {gallery-key}"],
|
||||
"filename": "{num:>03}-{filename}",
|
||||
"pattern": [
|
||||
r"(?:https?://)?(?:www\.)?imagebam\.com/gallery/([^/]+).*",
|
||||
],
|
||||
}
|
||||
|
||||
class ImagebamExtractor(AsynchronousExtractor):
|
||||
|
||||
category = "imagebam"
|
||||
directory_fmt = ["{category}", "{title} - {gallery-key}"]
|
||||
filename_fmt = "{num:>03}-{filename}"
|
||||
pattern = [r"(?:https?://)?(?:www\.)?imagebam\.com/gallery/([^/]+).*"]
|
||||
url_base = "http://www.imagebam.com"
|
||||
|
||||
def __init__(self, match):
|
||||
@@ -47,7 +40,7 @@ class ImagebamExtractor(AsynchronousExtractor):
|
||||
response.encoding = "utf-8"
|
||||
page = response.text
|
||||
data = {
|
||||
"category": info["category"],
|
||||
"category": self.category,
|
||||
"gallery-key": self.gkey,
|
||||
}
|
||||
data, _ = text.extract_all(page, (
|
||||
|
||||
@@ -12,18 +12,12 @@ from .common import AsynchronousExtractor, Message
|
||||
from .. import text
|
||||
import re
|
||||
|
||||
info = {
|
||||
"category": "imgbox",
|
||||
"extractor": "ImgboxExtractor",
|
||||
"directory": ["{category}", "{title} - {gallery-key}"],
|
||||
"filename": "{num:>03}-{name}",
|
||||
"pattern": [
|
||||
r"(?:https?://)?(?:www\.)?imgbox\.com/g/(.+)",
|
||||
],
|
||||
}
|
||||
|
||||
class ImgboxExtractor(AsynchronousExtractor):
|
||||
|
||||
category = "imgbox"
|
||||
directory_fmt = ["{category}", "{title} - {gallery-key}"]
|
||||
filename_fmt = "{num:>03}-{name}"
|
||||
pattern = [r"(?:https?://)?(?:www\.)?imgbox\.com/g/(.+)"]
|
||||
url_base = "http://imgbox.com"
|
||||
|
||||
def __init__(self, match):
|
||||
@@ -44,7 +38,7 @@ class ImgboxExtractor(AsynchronousExtractor):
|
||||
"""Collect metadata for extractor-job"""
|
||||
match = re.search(r"<h1>(.+) \(([^ ]+) ([^ ]+) \w+\) - (\d+)", page)
|
||||
return {
|
||||
"category": info["category"],
|
||||
"category": self.category,
|
||||
"gallery-key": self.key,
|
||||
"title": match.group(1),
|
||||
"date": match.group(2),
|
||||
@@ -62,7 +56,8 @@ class ImgboxExtractor(AsynchronousExtractor):
|
||||
), values=data)
|
||||
return data
|
||||
|
||||
def get_file_url(self, page):
|
||||
@staticmethod
|
||||
def get_file_url(page):
|
||||
"""Extract download-url"""
|
||||
base = "http://i.imgbox.com/"
|
||||
path, _ = text.extract(page, base, '"')
|
||||
|
||||
@@ -12,18 +12,13 @@ from .common import Extractor, Message
|
||||
from .. import text
|
||||
import re
|
||||
|
||||
info = {
|
||||
"category": "imgchili",
|
||||
"extractor": "ImgchiliExtractor",
|
||||
"directory": ["{category}", "{title} - {key}"],
|
||||
"filename": "{num:>03}-{name}",
|
||||
"pattern": [
|
||||
r"(?:https?://)?(?:www\.)?imgchili\.net/album/([^/]+)",
|
||||
],
|
||||
}
|
||||
|
||||
class ImgchiliExtractor(Extractor):
|
||||
|
||||
category = "imgchili"
|
||||
directory_fmt = ["{category}", "{title} - {key}"]
|
||||
filename_fmt = "{num:>03}-{name}"
|
||||
pattern = [r"(?:https?://)?(?:www\.)?imgchili\.net/album/([^/]+)"]
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self)
|
||||
self.match = match
|
||||
|
||||
@@ -10,20 +10,14 @@
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text
|
||||
import os.path
|
||||
|
||||
info = {
|
||||
"category": "imgth",
|
||||
"extractor": "ImgthExtractor",
|
||||
"directory": ["{category}", "{gallery-id} {title}"],
|
||||
"filename": "{category}_{gallery-id}_{num:>03}.{extension}",
|
||||
"pattern": [
|
||||
r"(?:https?://)?imgth\.com/gallery/(\d+)",
|
||||
],
|
||||
}
|
||||
|
||||
class ImgthExtractor(Extractor):
|
||||
|
||||
category = "imgth"
|
||||
directory_fmt = ["{category}", "{gallery-id} {title}"]
|
||||
filename_fmt = "{category}_{gallery-id}_{num:>03}.{extension}"
|
||||
pattern = [r"(?:https?://)?imgth\.com/gallery/(\d+)"]
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self)
|
||||
self.gid = match.group(1)
|
||||
@@ -55,7 +49,7 @@ class ImgthExtractor(Extractor):
|
||||
def get_job_metadata(self, page):
|
||||
"""Collect metadata for extractor-job"""
|
||||
data = {
|
||||
"category": info["category"],
|
||||
"category": self.category,
|
||||
"gallery-id": self.gid,
|
||||
}
|
||||
data, _ = text.extract_all(page, (
|
||||
|
||||
@@ -12,18 +12,13 @@ from .common import Extractor, Message
|
||||
from .. import text
|
||||
import os.path
|
||||
|
||||
info = {
|
||||
"category": "imgur",
|
||||
"extractor": "ImgurExtractor",
|
||||
"directory": ["{category}", "{album-key} - {title}"],
|
||||
"filename": "{category}_{album-key}_{num:>03}_{name}.{extension}",
|
||||
"pattern": [
|
||||
r"(?:https?://)?(?:www\.)?imgur\.com/(?:a|gallery)/([^/?&#]+)",
|
||||
],
|
||||
}
|
||||
|
||||
class ImgurExtractor(Extractor):
|
||||
|
||||
category = "imgur"
|
||||
directory_fmt = ["{category}", "{album-key} - {title}"]
|
||||
filename_fmt = "{category}_{album-key}_{num:>03}_{name}.{extension}"
|
||||
pattern = [r"(?:https?://)?(?:www\.)?imgur\.com/(?:a|gallery)/([^/?&#]+)"]
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self)
|
||||
self.album = match.group(1)
|
||||
@@ -43,7 +38,7 @@ class ImgurExtractor(Extractor):
|
||||
"""Collect metadata for extractor-job"""
|
||||
page = self.request("https://imgur.com/a/" + self.album).text
|
||||
data = {
|
||||
"category": info["category"],
|
||||
"category": self.category,
|
||||
"album-key": self.album,
|
||||
}
|
||||
return text.extract_all(page, (
|
||||
|
||||
@@ -10,21 +10,15 @@
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text, cloudflare
|
||||
import os.path
|
||||
import re
|
||||
|
||||
info = {
|
||||
"category": "kissmanga",
|
||||
"extractor": "KissmangaExtractor",
|
||||
"directory": ["{category}", "{manga}", "c{chapter:>03}{chapter-minor} - {title}"],
|
||||
"filename": "{manga}_c{chapter:>03}{chapter-minor}_{page:>03}.{extension}",
|
||||
"pattern": [
|
||||
r"(?:https?://)?(?:www\.)?kissmanga\.com/Manga/.+/.+\?id=\d+",
|
||||
],
|
||||
}
|
||||
|
||||
class KissmangaExtractor(Extractor):
|
||||
|
||||
category = "kissmanga"
|
||||
directory_fmt = ["{category}", "{manga}", "c{chapter:>03}{chapter-minor} - {title}"]
|
||||
filename_fmt = "{manga}_c{chapter:>03}{chapter-minor}_{page:>03}.{extension}"
|
||||
pattern = [r"(?:https?://)?(?:www\.)?kissmanga\.com/Manga/.+/.+\?id=\d+"]
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self)
|
||||
self.url = match.group(0)
|
||||
@@ -41,8 +35,7 @@ class KissmangaExtractor(Extractor):
|
||||
data["page"] = num
|
||||
yield Message.Url, url, text.nameext_from_url(url, data)
|
||||
|
||||
@staticmethod
|
||||
def get_job_metadata(page):
|
||||
def get_job_metadata(self, page):
|
||||
"""Collect metadata for extractor-job"""
|
||||
manga, pos = text.extract(page, "Read manga\n", "\n")
|
||||
cinfo, pos = text.extract(page, "", "\n", pos)
|
||||
@@ -50,7 +43,7 @@ class KissmangaExtractor(Extractor):
|
||||
r"(?:Vol.0*(\d+) )?(?:Ch.)?0*(\d+)(?:\.0*(\d+))?(?:: (.+))?", cinfo)
|
||||
chminor = match.group(3)
|
||||
return {
|
||||
"category": info["category"],
|
||||
"category": self.category,
|
||||
"manga": manga,
|
||||
"volume": match.group(1) or "",
|
||||
"chapter": match.group(2),
|
||||
|
||||
@@ -10,18 +10,13 @@
|
||||
|
||||
from .mangareader import MangaReaderExtractor
|
||||
|
||||
info = {
|
||||
"category": "mangapanda",
|
||||
"extractor": "MangaPandaExtractor",
|
||||
"directory": ["{category}", "{manga}", "c{chapter:>03} - {title}"],
|
||||
"filename": "{manga}_c{chapter:>03}_{page:>03}.{extension}",
|
||||
"pattern": [
|
||||
r"(?:https?://)?(?:www\.)?mangapanda\.com((/[^/]+)/(\d+))",
|
||||
r"(?:https?://)?(?:www\.)?mangapanda\.com(/\d+-\d+-\d+(/[^/]+)/chapter-(\d+).html)",
|
||||
],
|
||||
}
|
||||
|
||||
class MangaPandaExtractor(MangaReaderExtractor):
|
||||
|
||||
category = info["category"]
|
||||
category = "mangapanda"
|
||||
directory_fmt = ["{category}", "{manga}", "c{chapter:>03} - {title}"]
|
||||
filename_fmt = "{manga}_c{chapter:>03}_{page:>03}.{extension}"
|
||||
pattern = [
|
||||
r"(?:https?://)?(?:www\.)?mangapanda\.com((/[^/]+)/(\d+))",
|
||||
r"(?:https?://)?(?:www\.)?mangapanda\.com(/\d+-\d+-\d+(/[^/]+)/chapter-(\d+).html)",
|
||||
]
|
||||
url_base = "http://www.mangapanda.com"
|
||||
|
||||
@@ -10,22 +10,16 @@
|
||||
|
||||
from .common import AsynchronousExtractor, Message
|
||||
from .. import text
|
||||
import os.path
|
||||
|
||||
info = {
|
||||
"category": "mangareader",
|
||||
"extractor": "MangaReaderExtractor",
|
||||
"directory": ["{category}", "{manga}", "c{chapter:>03} - {title}"],
|
||||
"filename": "{manga}_c{chapter:>03}_{page:>03}.{extension}",
|
||||
"pattern": [
|
||||
r"(?:https?://)?(?:www\.)?mangareader\.net((/[^/]+)/(\d+))",
|
||||
r"(?:https?://)?(?:www\.)?mangareader\.net(/\d+-\d+-\d+(/[^/]+)/chapter-(\d+).html)",
|
||||
],
|
||||
}
|
||||
|
||||
class MangaReaderExtractor(AsynchronousExtractor):
|
||||
|
||||
category = info["category"]
|
||||
category = "mangareader"
|
||||
directory_fmt = ["{category}", "{manga}", "c{chapter:>03} - {title}"]
|
||||
filename_fmt = "{manga}_c{chapter:>03}_{page:>03}.{extension}"
|
||||
pattern = [
|
||||
r"(?:https?://)?(?:www\.)?mangareader\.net((/[^/]+)/(\d+))",
|
||||
r"(?:https?://)?(?:www\.)?mangareader\.net(/\d+-\d+-\d+(/[^/]+)/chapter-(\d+).html)",
|
||||
]
|
||||
url_base = "http://www.mangareader.net"
|
||||
|
||||
def __init__(self, match):
|
||||
|
||||
@@ -10,20 +10,13 @@
|
||||
|
||||
from .common import AsynchronousExtractor, Message
|
||||
from .. import text
|
||||
import os
|
||||
|
||||
info = {
|
||||
"category": "mangashare",
|
||||
"extractor": "MangaShareExtractor",
|
||||
"directory": ["{category}", "{manga}", "c{chapter:>03} - {title}"],
|
||||
"filename": "{manga}_c{chapter:>03}_{page:>03}.{extension}",
|
||||
"pattern": [
|
||||
r"(?:https?://)?read\.mangashare\.com/([^/]+/chapter-\d+)",
|
||||
],
|
||||
}
|
||||
|
||||
class MangaShareExtractor(AsynchronousExtractor):
|
||||
|
||||
category = "mangashare"
|
||||
directory_fmt = ["{category}", "{manga}", "c{chapter:>03} - {title}"]
|
||||
filename_fmt = "{manga}_c{chapter:>03}_{page:>03}.{extension}"
|
||||
pattern = [r"(?:https?://)?read\.mangashare\.com/([^/]+/chapter-\d+)"]
|
||||
url_fmt = "http://read.mangashare.com/{}/page{:>03}.html"
|
||||
|
||||
def __init__(self, match):
|
||||
@@ -40,11 +33,10 @@ class MangaShareExtractor(AsynchronousExtractor):
|
||||
text.nameext_from_url(url, data)
|
||||
yield Message.Url, url, data.copy()
|
||||
|
||||
@staticmethod
|
||||
def get_job_metadata(page):
|
||||
def get_job_metadata(self, page):
|
||||
"""Collect metadata for extractor-job"""
|
||||
data = {
|
||||
"category": info["category"],
|
||||
"category": self.category,
|
||||
"lang": "en",
|
||||
"language": "English",
|
||||
}
|
||||
|
||||
@@ -10,20 +10,13 @@
|
||||
|
||||
from .common import AsynchronousExtractor, Message
|
||||
from .. import text
|
||||
import os.path
|
||||
|
||||
info = {
|
||||
"category": "mangastream",
|
||||
"extractor": "MangaStreamExtractor",
|
||||
"directory": ["{category}", "{manga}", "c{chapter:>03}{chapter-minor} - {title}"],
|
||||
"filename": "{manga}_c{chapter:>03}{chapter-minor}_{page:>03}.{extension}",
|
||||
"pattern": [
|
||||
r"(?:https?://)?(?:www\.)?readms\.com/r/([^/]*/(\d+)([^/]*)?/(\d+))",
|
||||
],
|
||||
}
|
||||
|
||||
class MangaStreamExtractor(AsynchronousExtractor):
|
||||
|
||||
category = "mangastream"
|
||||
directory_fmt = ["{category}", "{manga}", "c{chapter:>03}{chapter-minor} - {title}"]
|
||||
filename_fmt = "{manga}_c{chapter:>03}{chapter-minor}_{page:>03}.{extension}"
|
||||
pattern = [r"(?:https?://)?(?:www\.)?readms\.com/r/([^/]*/(\d+)([^/]*)?/(\d+))"]
|
||||
url_base = "https://readms.com/r/"
|
||||
|
||||
def __init__(self, match):
|
||||
@@ -47,7 +40,7 @@ class MangaStreamExtractor(AsynchronousExtractor):
|
||||
def get_job_metadata(self, page):
|
||||
"""Collect metadata for extractor-job"""
|
||||
data = {
|
||||
"category": info["category"],
|
||||
"category": self.category,
|
||||
"chapter": self.chapter,
|
||||
"chapter-minor": self.ch_minor,
|
||||
"chapter-id": self.ch_id,
|
||||
@@ -61,7 +54,8 @@ class MangaStreamExtractor(AsynchronousExtractor):
|
||||
), values=data)
|
||||
return data
|
||||
|
||||
def get_page_metadata(self, page):
|
||||
@staticmethod
|
||||
def get_page_metadata(page):
|
||||
"""Collect next url, image-url and metadata for one manga-page"""
|
||||
nurl, pos = text.extract(page, '<div class="page">\n<a href="', '"')
|
||||
iurl, pos = text.extract(page, '<img id="manga-page" src="', '"', pos)
|
||||
|
||||
@@ -12,18 +12,13 @@ from .common import Extractor, Message
|
||||
from .. import text
|
||||
import json
|
||||
|
||||
info = {
|
||||
"category": "nhentai",
|
||||
"extractor": "NhentaiExtractor",
|
||||
"directory": ["{category}", "{gallery-id} {title}"],
|
||||
"filename": "{category}_{gallery-id}_{num:>03}.{extension}",
|
||||
"pattern": [
|
||||
r"(?:https?://)?(?:www\.)?nhentai\.net/g/(\d+)",
|
||||
],
|
||||
}
|
||||
|
||||
class NhentaiExtractor(Extractor):
|
||||
|
||||
category = "nhentai"
|
||||
directory_fmt = ["{category}", "{gallery-id} {title}"]
|
||||
filename_fmt = "{category}_{gallery-id}_{num:>03}.{extension}"
|
||||
pattern = [r"(?:https?://)?(?:www\.)?nhentai\.net/g/(\d+)"]
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self)
|
||||
self.gid = match.group(1)
|
||||
@@ -57,7 +52,7 @@ class NhentaiExtractor(Extractor):
|
||||
title_en = ginfo["title"].get("english", "")
|
||||
title_ja = ginfo["title"].get("japanese", "")
|
||||
return {
|
||||
"category": info["category"],
|
||||
"category": self.category,
|
||||
"gallery-id": self.gid,
|
||||
"upload-date": ginfo["upload_date"],
|
||||
"media-id": ginfo["media_id"],
|
||||
|
||||
@@ -12,18 +12,12 @@ from .common import AsynchronousExtractor, Message
|
||||
from .. import config, text
|
||||
import re
|
||||
|
||||
info = {
|
||||
"category": "nijie",
|
||||
"extractor": "NijieExtractor",
|
||||
"directory": ["{category}", "{artist-id}"],
|
||||
"filename": "{category}_{artist-id}_{image-id}_p{index:>02}.{extension}",
|
||||
"pattern": [
|
||||
r"(?:https?://)?(?:www\.)?nijie\.info/members(?:_illust)?\.php\?id=(\d+)",
|
||||
],
|
||||
}
|
||||
|
||||
class NijieExtractor(AsynchronousExtractor):
|
||||
|
||||
category = "nijie"
|
||||
directory_fmt = ["{category}", "{artist-id}"]
|
||||
filename_fmt = "{category}_{artist-id}_{image-id}_p{index:>02}.{extension}"
|
||||
pattern = [r"(?:https?://)?(?:www\.)?nijie\.info/members(?:_illust)?\.php\?id=(\d+)"]
|
||||
popup_url = "https://nijie.info/view_popup.php?id="
|
||||
|
||||
def __init__(self, match):
|
||||
@@ -37,7 +31,7 @@ class NijieExtractor(AsynchronousExtractor):
|
||||
self.session.cookies["R18"] = "1"
|
||||
self.session.cookies["nijie_referer"] = "nijie.info"
|
||||
self.session.cookies.update(
|
||||
config.get(("extractor", info["category"], "cookies"))
|
||||
config.get(("extractor", self.category, "cookies"))
|
||||
)
|
||||
|
||||
def items(self):
|
||||
@@ -52,7 +46,7 @@ class NijieExtractor(AsynchronousExtractor):
|
||||
def get_job_metadata(self):
|
||||
"""Collect metadata for extractor-job"""
|
||||
return {
|
||||
"category": info["category"],
|
||||
"category": self.category,
|
||||
"artist-id": self.artist_id,
|
||||
}
|
||||
|
||||
|
||||
@@ -14,19 +14,12 @@ import re
|
||||
import json
|
||||
import time
|
||||
|
||||
info = {
|
||||
"category": "pixiv",
|
||||
"extractor": "PixivExtractor",
|
||||
"directory": ["{category}", "{artist-id}-{artist-nick}"],
|
||||
"filename": "{category}_{artist-id}_{id}{num}.{extension}",
|
||||
"pattern": [
|
||||
r"(?:https?://)?(?:www\.)?pixiv\.net/member(?:_illust)?\.php\?id=(\d+)",
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
class PixivExtractor(Extractor):
|
||||
|
||||
category = "pixiv"
|
||||
directory_fmt = ["{category}", "{artist-id}-{artist-nick}"]
|
||||
filename_fmt = "{category}_{artist-id}_{id}{num}.{extension}"
|
||||
pattern = [r"(?:https?://)?(?:www\.)?pixiv\.net/member(?:_illust)?\.php\?id=(\d+)"]
|
||||
member_url = "http://www.pixiv.net/member_illust.php"
|
||||
illust_url = "http://www.pixiv.net/member_illust.php?mode=medium"
|
||||
|
||||
@@ -121,7 +114,7 @@ class PixivExtractor(Extractor):
|
||||
"""Collect metadata for extractor-job"""
|
||||
data = self.api.user(self.artist_id)["response"][0]
|
||||
return {
|
||||
"category": info["category"],
|
||||
"category": self.category,
|
||||
"artist-id": self.artist_id,
|
||||
"artist-name": data["name"],
|
||||
"artist-nick": data["account"],
|
||||
|
||||
@@ -10,20 +10,13 @@
|
||||
|
||||
from .common import AsynchronousExtractor, Message
|
||||
from .. import text
|
||||
import os.path
|
||||
|
||||
info = {
|
||||
"category": "sankaku",
|
||||
"extractor": "SankakuExtractor",
|
||||
"directory": ["{category}", "{tags}"],
|
||||
"filename": "{category}_{id}_{md5}.{extension}",
|
||||
"pattern": [
|
||||
r"(?:https?://)?chan\.sankakucomplex\.com/\?tags=([^&]+)",
|
||||
],
|
||||
}
|
||||
|
||||
class SankakuExtractor(AsynchronousExtractor):
|
||||
|
||||
category = "sankaku"
|
||||
directory_fmt = ["{category}", "{tags}"]
|
||||
filename_fmt = "{category}_{id}_{md5}.{extension}"
|
||||
pattern = [r"(?:https?://)?chan\.sankakucomplex\.com/\?tags=([^&]+)"]
|
||||
url = "https://chan.sankakucomplex.com/"
|
||||
|
||||
def __init__(self, match):
|
||||
@@ -45,7 +38,7 @@ class SankakuExtractor(AsynchronousExtractor):
|
||||
def get_job_metadata(self):
|
||||
"""Collect metadata for extractor-job"""
|
||||
return {
|
||||
"category": info["category"],
|
||||
"category": self.category,
|
||||
"tags": self.tags,
|
||||
}
|
||||
|
||||
|
||||
@@ -10,21 +10,17 @@
|
||||
|
||||
from .common import AsynchronousExtractor, Message
|
||||
from .. import text
|
||||
import os.path
|
||||
|
||||
info = {
|
||||
"category": "spectrumnexus",
|
||||
"extractor": "SpectrumNexusExtractor",
|
||||
"directory": ["{category}", "{manga}", "c{chapter:>03}"],
|
||||
"filename": "{manga}_c{chapter:>03}_{page:>03}.{extension}",
|
||||
"pattern": [
|
||||
r"(?:https?://)?(view\.thespectrum\.net/series/[^\.]+.html)\?ch=Chapter\+(\d+)",
|
||||
r"(?:https?://)?(view\.thespectrum\.net/series/[^/]+-chapter-(\d+)\.html)",
|
||||
],
|
||||
}
|
||||
|
||||
class SpectrumNexusExtractor(AsynchronousExtractor):
|
||||
|
||||
category = "spectrumnexus"
|
||||
directory_fmt = ["{category}", "{manga}", "c{chapter:>03}"]
|
||||
filename_fmt = "{manga}_c{chapter:>03}_{page:>03}.{extension}"
|
||||
pattern = [
|
||||
r"(?:https?://)?(view\.thespectrum\.net/series/[^\.]+.html)\?ch=Chapter\+(\d+)",
|
||||
r"(?:https?://)?(view\.thespectrum\.net/series/[^/]+-chapter-(\d+)\.html)",
|
||||
]
|
||||
|
||||
def __init__(self, match):
|
||||
AsynchronousExtractor.__init__(self)
|
||||
self.url = "http://" + match.group(1)
|
||||
@@ -52,7 +48,7 @@ class SpectrumNexusExtractor(AsynchronousExtractor):
|
||||
def get_job_metadata(self, page):
|
||||
"""Collect metadata for extractor-job"""
|
||||
data = {
|
||||
"category": info["category"],
|
||||
"category": self.category,
|
||||
"chapter": self.chapter,
|
||||
}
|
||||
return text.extract_all(page, (
|
||||
|
||||
Reference in New Issue
Block a user