[batoto] extend chapter-string regex (closes #60)

Non-numeric chapter indices exist after all ...
This commit is contained in:
Mike Fährmann
2018-01-05 12:53:50 +01:00
parent 1219ebb7f5
commit a794fffc6d

View File

@@ -63,24 +63,32 @@ class BatotoExtractor():
def parse_chapter_string(data): def parse_chapter_string(data):
"""Parse 'chapter_string' value contained in 'data'""" """Parse 'chapter_string' value contained in 'data'"""
data["chapter_string"] = text.unescape(data["chapter_string"]) data["chapter_string"] = text.unescape(data["chapter_string"])
pattern = r"(?:Vol\.(\d+) )?Ch\.(\d+)([^ :]*)(?::? (.+))" pattern = r"(?:Vol\.(\d+) )?Ch\.([\d\w]+)([^ :]*)(?::? (.+))?"
match = re.match(pattern, data["chapter_string"]) match = re.match(pattern, data["chapter_string"])
volume, chapter, data["chapter_minor"], title = match.groups() volume, chapter, data["chapter_minor"], title = match.groups()
data["volume"] = util.safe_int(volume) data["volume"] = util.safe_int(volume)
data["chapter"] = util.safe_int(chapter) data["chapter"] = util.safe_int(chapter, chapter)
data["title"] = title if title != "Read Online" else "" data["title"] = title if title and title != "Read Online" else ""
return data return data
class BatotoMangaExtractor(BatotoExtractor, MangaExtractor): class BatotoMangaExtractor(BatotoExtractor, MangaExtractor):
"""Extractor for manga from bato.to""" """Extractor for manga from bato.to"""
pattern = [r"(?:https?://)?(?:www\.)?(bato\.to" pattern = [r"(?:https?://)?(?:www\.)?(bato\.to"
r"/comic/_/comics/[^/?&#]*-r\d+)"] r"/comic/_(?:/comics)?/[^/?&#]*-r\d+)"]
test = [("http://bato.to/comic/_/comics/aria-r2007", { test = [
"url": "a38585b0339587666d772ee06f2a60abdbf42a97", ("http://bato.to/comic/_/comics/aria-r2007", {
"keyword": "c33ea7b97e3714530384e2411fae62ae51aae50d", "url": "a38585b0339587666d772ee06f2a60abdbf42a97",
})] "keyword": "c33ea7b97e3714530384e2411fae62ae51aae50d",
}),
# non-numeric chapter ("Extra")
("https://bato.to/comic/_/comics/cosplay-deka-r2563", {
"count": ">= 37",
}),
# short URL
("https://bato.to/comic/_/aria-r2007", None),
]
def chapters(self, page): def chapters(self, page):
pos = 0 pos = 0