diff --git a/gallery_dl/extractor/batoto.py b/gallery_dl/extractor/batoto.py
index 640df8ac..49a98338 100644
--- a/gallery_dl/extractor/batoto.py
+++ b/gallery_dl/extractor/batoto.py
@@ -9,7 +9,7 @@
"""Extract manga pages from http://bato.to/"""
from .common import AsynchronousExtractor, Message
-from .. import text
+from .. import text, iso639_1
import os.path
import re
@@ -19,58 +19,77 @@ info = {
"directory": ["{category}", "{manga}", "c{chapter:>03} - {title}"],
"filename": "{manga}_c{chapter:>03}_{page:>03}.{extension}",
"pattern": [
- r"(?:https?://)?(?:www\.)?bato\.to/read/_/(\d+).*",
+ r"(?:https?://)?(?:www\.)?bato\.to/reader#([0-9a-f]+)",
],
}
class BatotoExtractor(AsynchronousExtractor):
- url_base = "http://bato.to/read/_/"
+ url = "https://bato.to/areader"
def __init__(self, match):
AsynchronousExtractor.__init__(self)
- self.chapter_id = match.group(1)
+ self.token = match.group(1)
+ self.session.headers.update({
+ "X-Requested-With": "XMLHttpRequest",
+ "Referer": "https://bato.to/reader",
+ })
def items(self):
- yield Message.Version, 1
- url = self.url_base + self.chapter_id
- while url:
- url, data = self.get_page_metadata(url)
- yield Message.Directory, data
- yield Message.Url, data["image-url"], data
-
- def get_page_metadata(self, page_url):
- """Collect next url and metadata for one manga-page"""
- page = self.request(page_url).text
- _ , pos = text.extract(page, 'selected="selected"', '')
- title, pos = text.extract(page, ': ', '<', pos)
- _ , pos = text.extract(page, 'selected="selected"', '', pos)
- trans, pos = text.extract(page, '>', '<', pos)
- _ , pos = text.extract(page, '
(.+) - (?:vol (\d+) )?"
- r"ch (\d+)[^ ]+ Page (\d+) | Batoto!",
- page
- )
- tmatch = re.match(
- r"(.+) - ([^ ]+)",
- trans
- )
- filename = text.unquote(text.filename_from_url(image))
- name, ext = os.path.splitext(filename)
- return url, {
- "category": info["category"],
- "chapter-id": self.chapter_id,
- "manga": text.unescape(mmatch.group(1)),
- "volume": mmatch.group(2) or "",
- "chapter": mmatch.group(3),
- "page": mmatch.group(4),
- "group": tmatch.group(1),
- "language": tmatch.group(2),
- "title": text.unescape(title),
- "image-url": image,
- "name": name,
- "extension": ext[1:],
+ params = {
+ "id": self.token,
+ "p": 1,
+ "supress_webtoon": "t",
}
+ page = self.request(self.url, params=params).text
+ data = self.get_job_metadata(page)
+ yield Message.Version, 1
+ yield Message.Directory, data
+ for i in range(int(data["count"])):
+ next_url, image_url = self.get_page_urls(page)
+ filename = text.unquote(text.filename_from_url(image_url))
+ name, ext = os.path.splitext(filename)
+ data["page"] = i+1
+ data["name"] = name
+ data["extension"] = ext[1:]
+ yield Message.Url, image_url, data.copy()
+ if next_url:
+ params["p"] += 1
+ page = self.request(self.url, params=params).text
+
+ def get_job_metadata(self, page):
+ """Collect metadata for extractor-job"""
+ extr = text.extract
+ _ , pos = extr(page, '