diff --git a/gallery_dl/extractor/batoto.py b/gallery_dl/extractor/batoto.py index 640df8ac..49a98338 100644 --- a/gallery_dl/extractor/batoto.py +++ b/gallery_dl/extractor/batoto.py @@ -9,7 +9,7 @@ """Extract manga pages from http://bato.to/""" from .common import AsynchronousExtractor, Message -from .. import text +from .. import text, iso639_1 import os.path import re @@ -19,58 +19,77 @@ info = { "directory": ["{category}", "{manga}", "c{chapter:>03} - {title}"], "filename": "{manga}_c{chapter:>03}_{page:>03}.{extension}", "pattern": [ - r"(?:https?://)?(?:www\.)?bato\.to/read/_/(\d+).*", + r"(?:https?://)?(?:www\.)?bato\.to/reader#([0-9a-f]+)", ], } class BatotoExtractor(AsynchronousExtractor): - url_base = "http://bato.to/read/_/" + url = "https://bato.to/areader" def __init__(self, match): AsynchronousExtractor.__init__(self) - self.chapter_id = match.group(1) + self.token = match.group(1) + self.session.headers.update({ + "X-Requested-With": "XMLHttpRequest", + "Referer": "https://bato.to/reader", + }) def items(self): - yield Message.Version, 1 - url = self.url_base + self.chapter_id - while url: - url, data = self.get_page_metadata(url) - yield Message.Directory, data - yield Message.Url, data["image-url"], data - - def get_page_metadata(self, page_url): - """Collect next url and metadata for one manga-page""" - page = self.request(page_url).text - _ , pos = text.extract(page, 'selected="selected"', '') - title, pos = text.extract(page, ': ', '<', pos) - _ , pos = text.extract(page, 'selected="selected"', '', pos) - trans, pos = text.extract(page, '>', '<', pos) - _ , pos = text.extract(page, '
(.+) - (?:vol (\d+) )?" - r"ch (\d+)[^ ]+ Page (\d+) | Batoto!", - page - ) - tmatch = re.match( - r"(.+) - ([^ ]+)", - trans - ) - filename = text.unquote(text.filename_from_url(image)) - name, ext = os.path.splitext(filename) - return url, { - "category": info["category"], - "chapter-id": self.chapter_id, - "manga": text.unescape(mmatch.group(1)), - "volume": mmatch.group(2) or "", - "chapter": mmatch.group(3), - "page": mmatch.group(4), - "group": tmatch.group(1), - "language": tmatch.group(2), - "title": text.unescape(title), - "image-url": image, - "name": name, - "extension": ext[1:], + params = { + "id": self.token, + "p": 1, + "supress_webtoon": "t", } + page = self.request(self.url, params=params).text + data = self.get_job_metadata(page) + yield Message.Version, 1 + yield Message.Directory, data + for i in range(int(data["count"])): + next_url, image_url = self.get_page_urls(page) + filename = text.unquote(text.filename_from_url(image_url)) + name, ext = os.path.splitext(filename) + data["page"] = i+1 + data["name"] = name + data["extension"] = ext[1:] + yield Message.Url, image_url, data.copy() + if next_url: + params["p"] += 1 + page = self.request(self.url, params=params).text + + def get_job_metadata(self, page): + """Collect metadata for extractor-job""" + extr = text.extract + _ , pos = extr(page, '', ' - ', pos) + lang , pos = extr(page, '', '', pos) + _ , pos = extr(page, '