fix/improve some regular expressions
This commit is contained in:
@@ -63,7 +63,7 @@ class BatotoExtractor():
|
||||
def parse_chapter_string(data):
|
||||
"""Parse 'chapter_string' value contained in 'data'"""
|
||||
data["chapter_string"] = text.unescape(data["chapter_string"])
|
||||
pattern = r"(?:Vol.(\d+) )?Ch\.(\d+)([^ :]*)(?::? (.+))"
|
||||
pattern = r"(?:Vol\.(\d+) )?Ch\.(\d+)([^ :]*)(?::? (.+))"
|
||||
match = re.match(pattern, data["chapter_string"])
|
||||
|
||||
volume, chapter, data["chapter_minor"], title = match.groups()
|
||||
@@ -75,7 +75,8 @@ class BatotoExtractor():
|
||||
|
||||
class BatotoMangaExtractor(BatotoExtractor, MangaExtractor):
|
||||
"""Extractor for manga from bato.to"""
|
||||
pattern = [r"(?:https?://)?(?:www\.)?(bato\.to/comic/_/comics/.*-r\d+)"]
|
||||
pattern = [r"(?:https?://)?(?:www\.)?(bato\.to"
|
||||
r"/comic/_/comics/[^/?&#]*-r\d+)"]
|
||||
test = [("http://bato.to/comic/_/comics/aria-r2007", {
|
||||
"url": "a38585b0339587666d772ee06f2a60abdbf42a97",
|
||||
"keyword": "c33ea7b97e3714530384e2411fae62ae51aae50d",
|
||||
|
||||
@@ -18,7 +18,7 @@ class ImagebamGalleryExtractor(AsynchronousExtractor):
|
||||
subcategory = "gallery"
|
||||
directory_fmt = ["{category}", "{title} - {gallery_key}"]
|
||||
filename_fmt = "{num:>03}-{filename}"
|
||||
pattern = [r"(?:https?://)?(?:www\.)?imagebam\.com/gallery/([^/]+).*"]
|
||||
pattern = [r"(?:https?://)?(?:www\.)?imagebam\.com/gallery/([^/]+)"]
|
||||
test = [(("http://www.imagebam.com/"
|
||||
"gallery/adz2y0f9574bjpmonaismyrhtjgvey4o"), {
|
||||
"url": "d7a4483b6d5ebba81950a349aad58ae034c60eda",
|
||||
|
||||
@@ -242,7 +242,7 @@ class ImagevenueImageExtractor(ImagehostImageExtractor):
|
||||
"""Extractor for single images from imagevenue.com"""
|
||||
category = "imagevenue"
|
||||
pattern = [(r"(?:https?://)?(img\d+\.imagevenue\.com/"
|
||||
r"img\.php\?image=(\d+)_.+)")]
|
||||
r"img\.php\?image=(\d+)_[^&#]+)")]
|
||||
params = None
|
||||
|
||||
def get_info(self, page):
|
||||
|
||||
@@ -18,7 +18,7 @@ class KhinsiderSoundtrackExtractor(AsynchronousExtractor):
|
||||
subcategory = "soundtrack"
|
||||
directory_fmt = ["{category}", "{album}"]
|
||||
pattern = [r"(?:https?://)?downloads\.khinsider\.com/"
|
||||
r"game-soundtracks/album/(.+)"]
|
||||
r"game-soundtracks/album/([^/?&#]+)"]
|
||||
test = [(("http://downloads.khinsider.com/game-soundtracks/"
|
||||
"album/horizon-riders-wii-"), {
|
||||
"pattern": ("https?://\d+\.\d+\.\d+\.\d+/ost/horizon-riders-wii-/"
|
||||
|
||||
@@ -65,7 +65,8 @@ class KissmangaExtractor(Extractor):
|
||||
|
||||
class KissmangaMangaExtractor(KissmangaExtractor, MangaExtractor):
|
||||
"""Extractor for manga from kissmanga.com"""
|
||||
pattern = [r"(?i)(?:https?://)?(?:www\.)?kissmanga\.com/Manga/[^/]+/?$"]
|
||||
pattern = [r"(?i)(?:https?://)?(?:www\.)?kissmanga\.com/"
|
||||
r"Manga/[^/?&#]+/?$"]
|
||||
test = [
|
||||
("http://kissmanga.com/Manga/Dropout", {
|
||||
"url": "992befdd64e178fe5af67de53f8b510860d968ca",
|
||||
@@ -98,7 +99,7 @@ class KissmangaChapterExtractor(KissmangaExtractor):
|
||||
"""Extractor for manga-chapters from kissmanga.com"""
|
||||
subcategory = "chapter"
|
||||
pattern = [r"(?i)(?:https?://)?(?:www\.)?kissmanga\.com/"
|
||||
r"Manga/.+/.+\?id=\d+"]
|
||||
r"Manga/[^/?&#]+/[^/?&#]+\?id=\d+"]
|
||||
test = [
|
||||
("http://kissmanga.com/Manga/Dropout/Ch-000---Oneshot-?id=145847", {
|
||||
"url": "4136bcd1c6cecbca8cc2bc965d54f33ef0a97cc0",
|
||||
|
||||
@@ -19,7 +19,7 @@ class MangapandaBase():
|
||||
|
||||
class MangapandaMangaExtractor(MangapandaBase, MangareaderMangaExtractor):
|
||||
"""Extractor for manga from mangapanda.com"""
|
||||
pattern = [r"(?:https?://)?((?:www\.)?mangapanda\.com/[^/]+)/?$"]
|
||||
pattern = [r"(?:https?://)?((?:www\.)?mangapanda\.com/[^/?&#]+)/?$"]
|
||||
test = [("http://www.mangapanda.com/mushishi", {
|
||||
"url": "50a1ba730b85426b904da256c80f68ba6a8a2566",
|
||||
"keyword": "031b3ea085921c552de017ecbb9b906e462229c9",
|
||||
@@ -29,9 +29,9 @@ class MangapandaMangaExtractor(MangapandaBase, MangareaderMangaExtractor):
|
||||
class MangapandaChapterExtractor(MangapandaBase, MangareaderChapterExtractor):
|
||||
"""Extractor for manga-chapters from mangapanda.com"""
|
||||
pattern = [
|
||||
(r"(?:https?://)?(?:www\.)?mangapanda\.com((/[^/]+)/(\d+))"),
|
||||
(r"(?:https?://)?(?:www\.)?mangapanda\.com((/[^/?&#]+)/(\d+))"),
|
||||
(r"(?:https?://)?(?:www\.)?mangapanda\.com"
|
||||
r"(/\d+-\d+-\d+(/[^/]+)/chapter-(\d+).html)"),
|
||||
r"(/\d+-\d+-\d+(/[^/]+)/chapter-(\d+)\.html)"),
|
||||
]
|
||||
test = [("http://www.mangapanda.com/red-storm/2", {
|
||||
"url": "4bf4ddf6c50105ec8a37675495ab80c46608275d",
|
||||
|
||||
@@ -34,7 +34,7 @@ class MangareaderBase():
|
||||
|
||||
class MangareaderMangaExtractor(MangareaderBase, MangaExtractor):
|
||||
"""Extractor for manga from mangareader.net"""
|
||||
pattern = [r"(?:https?://)?((?:www\.)?mangareader\.net/[^/]+)/?$"]
|
||||
pattern = [r"(?:https?://)?((?:www\.)?mangareader\.net/[^/?&#]+)/?$"]
|
||||
reverse = False
|
||||
test = [("http://www.mangareader.net/mushishi", {
|
||||
"url": "249042420b67a07b32e7f6be4c7410b6d810b808",
|
||||
@@ -63,9 +63,9 @@ class MangareaderChapterExtractor(MangareaderBase, AsynchronousExtractor):
|
||||
directory_fmt = ["{category}", "{manga}", "c{chapter:>03}{title:?: //}"]
|
||||
filename_fmt = "{manga}_c{chapter:>03}_{page:>03}.{extension}"
|
||||
pattern = [
|
||||
(r"(?:https?://)?(?:www\.)?mangareader\.net((/[^/]+)/(\d+))"),
|
||||
(r"(?:https?://)?(?:www\.)?mangareader\.net(/\d+-\d+-\d+(/[^/]+)/"
|
||||
r"chapter-(\d+).html)"),
|
||||
(r"(?:https?://)?(?:www\.)?mangareader\.net((/[^/?&#]+)/(\d+))"),
|
||||
(r"(?:https?://)?(?:www\.)?mangareader\.net"
|
||||
r"(/\d+-\d+-\d+(/[^/]+)/chapter-(\d+)\.html)"),
|
||||
]
|
||||
test = [(("http://www.mangareader.net/"
|
||||
"karate-shoukoushi-kohinata-minoru/11"), {
|
||||
|
||||
@@ -25,7 +25,8 @@ class ReadcomiconlineComicExtractor(ReadcomiconlineExtractor,
|
||||
kissmanga.KissmangaMangaExtractor):
|
||||
"""Extractor for comics from readcomiconline.to"""
|
||||
subcategory = "comic"
|
||||
pattern = [r"(?:https?://)?(?:www\.)?readcomiconline\.to/Comic/[^/]+/?$"]
|
||||
pattern = [r"(?i)(?:https?://)?(?:www\.)?readcomiconline\.to/"
|
||||
r"Comic/[^/?&#]+/?$"]
|
||||
test = [
|
||||
("http://readcomiconline.to/Comic/W-i-t-c-h", {
|
||||
"url": "c5a530538a30b176916e30cbe223a93d83cb2691",
|
||||
@@ -57,8 +58,8 @@ class ReadcomiconlineIssueExtractor(ReadcomiconlineExtractor,
|
||||
kissmanga.KissmangaChapterExtractor):
|
||||
"""Extractor for comic-issues from readcomiconline.to"""
|
||||
subcategory = "issue"
|
||||
pattern = [r"(?:https?://)?(?:www\.)?readcomiconline\.to/"
|
||||
r"Comic/.+/.+\?id=\d+"]
|
||||
pattern = [r"(?i)(?:https?://)?(?:www\.)?readcomiconline\.to/"
|
||||
r"Comic/[^/?&#]+/[^/?&#]+\?id=\d+"]
|
||||
test = [("http://readcomiconline.to/Comic/W-i-t-c-h/Issue-130?id=22289", {
|
||||
"url": "a45c77f8fbde66091fe2346d6341f9cf3c6b1bc5",
|
||||
"keyword": "dee8a8a44659825afe1d69e1d809a48b03e98c68",
|
||||
|
||||
Reference in New Issue
Block a user