From 74c225f94e967dbdc0cc919a541c82d28dffcb16 Mon Sep 17 00:00:00 2001 From: bug-assassin <7788433+bug-assassin@users.noreply.github.com> Date: Tue, 26 Dec 2023 22:33:33 -0500 Subject: [PATCH 1/9] [bato] add support --- docs/supportedsites.md | 6 ++ gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/bato.py | 113 +++++++++++++++++++++++++++++++ scripts/supportedsites.py | 1 + test/results/bato.py | 65 ++++++++++++++++++ 5 files changed, 186 insertions(+) create mode 100644 gallery_dl/extractor/bato.py create mode 100644 test/results/bato.py diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 8e4c59a1..6040cd47 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -97,6 +97,12 @@ Consider all listed sites to potentially be NSFW. Albums, Artwork Listings, Challenges, Followed Users, individual Images, Likes, Search Results, User Profiles + + Bato + https://bato.to + Chapters, Manga + + BBC https://bbc.co.uk/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 695b8b2a..99de2169 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -24,6 +24,7 @@ modules = [ "architizer", "artstation", "aryion", + "bato", "bbc", "behance", "blogger", diff --git a/gallery_dl/extractor/bato.py b/gallery_dl/extractor/bato.py new file mode 100644 index 00000000..c34b74fc --- /dev/null +++ b/gallery_dl/extractor/bato.py @@ -0,0 +1,113 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://bato.to and aliases (v3x only)""" + +from .common import ChapterExtractor, MangaExtractor +from .. import text, exception +import re + +BASE_PATTERN = r"(?:https?://)?(?:bato\.to|dto\.to|batotoo\.com|wto\.to)" +MANGA_PATTERN = r"/title/\d+(?:-[0-9a-z]+)*/?" +CHAPTER_PATTERN = r"/\d+(?:-vol_\d+)?-ch_\d+\.?\d*/?" + +class BatoBase(): + """Base class for bato v3x extractors""" + category = "bato" + root = "https://bato.to" + +class BatoChapterExtractor(BatoBase, ChapterExtractor): + """Extractor for manga chapters from bato.to""" + pattern = BASE_PATTERN + "(" + MANGA_PATTERN + CHAPTER_PATTERN + ")" + # There are three possible patterns for a chapter + example = "https://bato.to/title/12345-manga-name-with-spaces/54212-ch_1.5" + example1 = "https://bato.to/title/12345-manga-name-with-spaces/54212-vol1-ch_1.5" + example2 = "https://bato.to/title/12345/54212" + # v2x, not supported + example3 = "https://bato.to/chapter/54212" + + def __init__(self, match): + self.path = match.group(1) + ChapterExtractor.__init__(self, match, self.root + self.path) + + def metadata(self, page): + info, _ = text.extract(page, '', r' - Read Free Manga Online at Bato.To') + info = info.encode('latin-1').decode('utf-8').replace("\n", "") + + match = re.match( + r"(.+) - " + r"(?:Volume *(\d+) )?" + r"Chapter *([\d\.]+)", info) + manga, volume, chapter = match.groups() if match else ("", "", info) + chapter, sep, minor = chapter.partition(".") + title_container = text.extr(page, f'") + title = text.extr(title_container, "", "") + + return { + "manga" : text.unescape(manga), + "title" : text.unescape(title), + "author" : "", + "volume" : text.parse_int(volume), + "chapter" : text.parse_int(chapter), + "chapter_minor": sep + minor, + } + + def images(self, page): + images_container = text.extr(page, 'pageOpts', ':[0,0]}"') + images_container = text.unescape(images_container) + + return [(url, None) for url in text.extract_iter(images_container, r'\"', r'\"')] + + +class BatoMangaExtractor(BatoBase, MangaExtractor): + """Extractor for manga from bato.to""" + reverse = False + chapterclass = BatoChapterExtractor + pattern = BASE_PATTERN + "(" + MANGA_PATTERN + "$" + ")" + # There are two possible patterns for a manga + example = "https://bato.to/title/12345-manga-name-with-spaces/" + example2 = "https://bato.to/title/12345/" + # v2x, not supported + example3 = "https://bato.to/series/12345/manga-name-with-space" + + def chapters(self, page): + data = {} + num_chapters, _ = text.extract(page, ">Chapters<", "") + num_chapters, _ = text.extract(num_chapters, r"", r"") + num_chapters = text.parse_int(num_chapters) + if num_chapters == 0: + raise exception.NotFoundError("chapter") + + manga, _ = text.extract(page, '', r' - Read Free Manga Online at Bato.To') + manga = manga.encode('latin-1').decode('utf-8').replace("\n", "") + data["manga"] = manga + + results = [] + for chapter_num in range(num_chapters): + chapter, _ = text.extract(page, f'
") + chapter += r"" # Add this back in so we can match the date + url, pos = text.extract(chapter, '') + title, _ = text.extract(title, r"", r"") + if title is None or title == "" or title == "": + title, _ = text.extract(chapter, ">", "", pos) + + date, _ = text.extract(chapter, "") + date, _ = text.extract(date, 'time="', '"') + + data["date"] = date + data["title"] = title + data["chapter"] = text.parse_int(chapter_major) + data["chapter_minor"] = sep + chapter_minor + + if url.startswith("/"): + url = self.root + url + results.append((url, data.copy())) + return results \ No newline at end of file diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index 4839660d..e3738b8b 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -32,6 +32,7 @@ CATEGORY_MAP = { "atfbooru" : "ATFBooru", "b4k" : "arch.b4k.co", "baraag" : "baraag", + "bato" : "Bato", "bbc" : "BBC", "comicvine" : "Comic Vine", "coomerparty" : "Coomer", diff --git a/test/results/bato.py b/test/results/bato.py new file mode 100644 index 00000000..18479f9a --- /dev/null +++ b/test/results/bato.py @@ -0,0 +1,65 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +from gallery_dl.extractor import bato +from gallery_dl import exception + +__tests__ = ( +{ + "#url" : "https://bato.to/title/86408-i-shall-master-this-family-official/1681030-ch_8", + "#category": ("", "bato", "chapter"), + "#class" : bato.BatoChapterExtractor, + "#count" : 66, + + "manga" : "I Shall Master this Family! [Official]", + "title" : "Observing", + "chapter" : 8, +}, +{ + "#url" : "https://bato.to/title/104929-86-eighty-six-official/1943513-vol_1-ch_5", + "#comment" : "volume (vol) in url", + "#category": ("", "bato", "chapter"), + "#class" : bato.BatoChapterExtractor, + "#count" : 7, + + "manga" : "86--EIGHTY-SIX (Official)", + "title" : "The Spearhead Squadron's Power", + "volume" : 1, + "chapter" : 5, +}, +{ + "#url" : "https://bato.to/title/113742-futsutsuka-na-akujo-de-wa-gozaimasu-ga-suuguu-chouso-torikae-den-official", + "#category": ("", "bato", "manga"), + "#class" : bato.BatoMangaExtractor, + "#count" : ">= 21", + + "manga" : "Futsutsuka na Akujo de wa Gozaimasu ga - Suuguu Chouso Torikae Den (Official)", +}, +{ + "#url" : "https://bato.to/title/104929-86-eighty-six-official", + "#comment" : "Manga with number in name", + "#category": ("", "bato", "manga"), + "#class" : bato.BatoMangaExtractor, + "#count" : ">= 18", + + "manga" : "86--EIGHTY-SIX (Official)", +}, +{ + "#url" : "https://bato.to/title/140046-the-grand-duke-s-fox-princess-mgchan", + "#comment" : "Non-English translation (Indonesian)", + "#category": ("", "bato", "manga"), + "#class" : bato.BatoMangaExtractor, + "#count" : ">= 29", + + "manga" : "The Grand Duke’s Fox Princess ⎝⎝MGCHAN⎠⎠", +}, +{ + "#url" : "https://bato.to/title/134270-removed", + "#category": ("", "bato", "manga"), + "#class" : bato.BatoMangaExtractor, + "#exception": exception.NotFoundError +} +) From 663b8d789a183d6465a45530eb511511b2d3faf7 Mon Sep 17 00:00:00 2001 From: bug-assassin <7788433+bug-assassin@users.noreply.github.com> Date: Tue, 26 Dec 2023 23:41:37 -0500 Subject: [PATCH 2/9] Fix linting --- gallery_dl/extractor/bato.py | 42 +++++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/gallery_dl/extractor/bato.py b/gallery_dl/extractor/bato.py index c34b74fc..320f6999 100644 --- a/gallery_dl/extractor/bato.py +++ b/gallery_dl/extractor/bato.py @@ -14,27 +14,32 @@ BASE_PATTERN = r"(?:https?://)?(?:bato\.to|dto\.to|batotoo\.com|wto\.to)" MANGA_PATTERN = r"/title/\d+(?:-[0-9a-z]+)*/?" CHAPTER_PATTERN = r"/\d+(?:-vol_\d+)?-ch_\d+\.?\d*/?" + class BatoBase(): """Base class for bato v3x extractors""" category = "bato" root = "https://bato.to" + class BatoChapterExtractor(BatoBase, ChapterExtractor): """Extractor for manga chapters from bato.to""" pattern = BASE_PATTERN + "(" + MANGA_PATTERN + CHAPTER_PATTERN + ")" # There are three possible patterns for a chapter example = "https://bato.to/title/12345-manga-name-with-spaces/54212-ch_1.5" - example1 = "https://bato.to/title/12345-manga-name-with-spaces/54212-vol1-ch_1.5" - example2 = "https://bato.to/title/12345/54212" + example2 = \ + "https://bato.to/title/12345-manga-name-with-spaces/54212-vol1-ch_1.5" + example3 = "https://bato.to/title/12345/54212" # v2x, not supported - example3 = "https://bato.to/chapter/54212" + example4 = "https://bato.to/chapter/54212" def __init__(self, match): self.path = match.group(1) ChapterExtractor.__init__(self, match, self.root + self.path) def metadata(self, page): - info, _ = text.extract(page, '', r' - Read Free Manga Online at Bato.To') + info, _ = text.extract( + page, "", r" - Read Free Manga Online at Bato.To" + ) info = info.encode('latin-1').decode('utf-8').replace("\n", "") match = re.match( @@ -58,8 +63,10 @@ class BatoChapterExtractor(BatoBase, ChapterExtractor): def images(self, page): images_container = text.extr(page, 'pageOpts', ':[0,0]}"') images_container = text.unescape(images_container) - - return [(url, None) for url in text.extract_iter(images_container, r'\"', r'\"')] + return [ + (url, None) + for url in text.extract_iter(images_container, r"\"", r"\"") + ] class BatoMangaExtractor(BatoBase, MangaExtractor): @@ -80,28 +87,33 @@ class BatoMangaExtractor(BatoBase, MangaExtractor): num_chapters = text.parse_int(num_chapters) if num_chapters == 0: raise exception.NotFoundError("chapter") - - manga, _ = text.extract(page, '', r' - Read Free Manga Online at Bato.To') + + manga, _ = text.extract( + page, "", r" - Read Free Manga Online at Bato.To" + ) manga = manga.encode('latin-1').decode('utf-8').replace("\n", "") data["manga"] = manga - + results = [] for chapter_num in range(num_chapters): - chapter, _ = text.extract(page, f'
") - chapter += r"" # Add this back in so we can match the date + chapter, _ = text.extract( + page, f'
" + ) + chapter += r"" # so we can match the date url, pos = text.extract(chapter, '') + title, _ = text.extract( + chapter, f'" + ) title, _ = text.extract(title, r"", r"") if title is None or title == "" or title == "": title, _ = text.extract(chapter, ">", "", pos) date, _ = text.extract(chapter, "") date, _ = text.extract(date, 'time="', '"') - + data["date"] = date data["title"] = title data["chapter"] = text.parse_int(chapter_major) @@ -110,4 +122,4 @@ class BatoMangaExtractor(BatoBase, MangaExtractor): if url.startswith("/"): url = self.root + url results.append((url, data.copy())) - return results \ No newline at end of file + return results From 9c1ce28f688b1173508b347a8d975bb7ae6b0743 Mon Sep 17 00:00:00 2001 From: bug-assassin <7788433+bug-assassin@users.noreply.github.com> Date: Tue, 26 Dec 2023 23:44:27 -0500 Subject: [PATCH 3/9] [bato] Added mangatoto alias --- gallery_dl/extractor/bato.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gallery_dl/extractor/bato.py b/gallery_dl/extractor/bato.py index 320f6999..b82416d5 100644 --- a/gallery_dl/extractor/bato.py +++ b/gallery_dl/extractor/bato.py @@ -10,7 +10,8 @@ from .common import ChapterExtractor, MangaExtractor from .. import text, exception import re -BASE_PATTERN = r"(?:https?://)?(?:bato\.to|dto\.to|batotoo\.com|wto\.to)" +BASE_PATTERN = r"(?:https?://)?" \ + r"(?:bato\.to|dto\.to|batotoo\.com|wto\.to|mangatoto\.com)" MANGA_PATTERN = r"/title/\d+(?:-[0-9a-z]+)*/?" CHAPTER_PATTERN = r"/\d+(?:-vol_\d+)?-ch_\d+\.?\d*/?" From 06ff1d3a3cfc0d9b1d1e84b8faf66e74f3d3aadc Mon Sep 17 00:00:00 2001 From: bug-assassin <7788433+bug-assassin@users.noreply.github.com> Date: Tue, 26 Dec 2023 23:47:30 -0500 Subject: [PATCH 4/9] Replace text.extract with extr --- gallery_dl/extractor/bato.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/gallery_dl/extractor/bato.py b/gallery_dl/extractor/bato.py index b82416d5..c885f27b 100644 --- a/gallery_dl/extractor/bato.py +++ b/gallery_dl/extractor/bato.py @@ -38,7 +38,7 @@ class BatoChapterExtractor(BatoBase, ChapterExtractor): ChapterExtractor.__init__(self, match, self.root + self.path) def metadata(self, page): - info, _ = text.extract( + info = text.extr( page, "", r" - Read Free Manga Online at Bato.To" ) info = info.encode('latin-1').decode('utf-8').replace("\n", "") @@ -83,13 +83,13 @@ class BatoMangaExtractor(BatoBase, MangaExtractor): def chapters(self, page): data = {} - num_chapters, _ = text.extract(page, ">Chapters<", "
") - num_chapters, _ = text.extract(num_chapters, r"", r"") + num_chapters = text.extr(page, ">Chapters<", "
") + num_chapters = text.extr(num_chapters, r"", r"") num_chapters = text.parse_int(num_chapters) if num_chapters == 0: raise exception.NotFoundError("chapter") - manga, _ = text.extract( + manga = text.extr( page, "", r" - Read Free Manga Online at Bato.To" ) manga = manga.encode('latin-1').decode('utf-8').replace("\n", "") @@ -97,7 +97,7 @@ class BatoMangaExtractor(BatoBase, MangaExtractor): results = [] for chapter_num in range(num_chapters): - chapter, _ = text.extract( + chapter = text.extr( page, f'
" ) chapter += r"" # so we can match the date @@ -105,15 +105,15 @@ class BatoMangaExtractor(BatoBase, MangaExtractor): chapter_no = re.search(r"-ch_([\d\.]+)", url).group(1) chapter_major, sep, chapter_minor = chapter_no.partition(".") - title, _ = text.extract( + title = text.extr( chapter, f'" ) - title, _ = text.extract(title, r"", r"") + title = text.extr(title, r"", r"") if title is None or title == "" or title == "": title, _ = text.extract(chapter, ">", "", pos) - date, _ = text.extract(chapter, "") - date, _ = text.extract(date, 'time="', '"') + date = text.extr(chapter, "") + date = text.extr(date, 'time="', '"') data["date"] = date data["title"] = title From 2c3f171d653b91e2536a9829866a932f66f4f32c Mon Sep 17 00:00:00 2001 From: bug-assassin <7788433+bug-assassin@users.noreply.github.com> Date: Tue, 26 Dec 2023 23:52:06 -0500 Subject: [PATCH 5/9] Fix python 3.5 linting issue --- gallery_dl/extractor/bato.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gallery_dl/extractor/bato.py b/gallery_dl/extractor/bato.py index c885f27b..87d6c3c6 100644 --- a/gallery_dl/extractor/bato.py +++ b/gallery_dl/extractor/bato.py @@ -49,8 +49,8 @@ class BatoChapterExtractor(BatoBase, ChapterExtractor): r"Chapter *([\d\.]+)", info) manga, volume, chapter = match.groups() if match else ("", "", info) chapter, sep, minor = chapter.partition(".") - title_container = text.extr(page, f'") - title = text.extr(title_container, "", "") + title_section = text.extr(page, '") + title = text.extr(title_section, "", "") return { "manga" : text.unescape(manga), From 35530255847a30fb0eb70da6bb1937ffbd33ef81 Mon Sep 17 00:00:00 2001 From: bug-assassin <7788433+bug-assassin@users.noreply.github.com> Date: Thu, 28 Dec 2023 17:07:41 -0500 Subject: [PATCH 6/9] Removed f-strings --- gallery_dl/extractor/bato.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/gallery_dl/extractor/bato.py b/gallery_dl/extractor/bato.py index 87d6c3c6..082c5e0a 100644 --- a/gallery_dl/extractor/bato.py +++ b/gallery_dl/extractor/bato.py @@ -98,7 +98,9 @@ class BatoMangaExtractor(BatoBase, MangaExtractor): results = [] for chapter_num in range(num_chapters): chapter = text.extr( - page, f'
" + page, + '
" ) chapter += r"" # so we can match the date url, pos = text.extract(chapter, '" + chapter, + '" ) title = text.extr(title, r"", r"") if title is None or title == "" or title == "": From f6ce870885a1df8dfed788c0c9c2cadee1c21f8f Mon Sep 17 00:00:00 2001 From: bug-assassin <7788433+bug-assassin@users.noreply.github.com> Date: Thu, 28 Dec 2023 17:25:15 -0500 Subject: [PATCH 7/9] Better variable names --- gallery_dl/extractor/bato.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/gallery_dl/extractor/bato.py b/gallery_dl/extractor/bato.py index 082c5e0a..d29a58bf 100644 --- a/gallery_dl/extractor/bato.py +++ b/gallery_dl/extractor/bato.py @@ -97,32 +97,33 @@ class BatoMangaExtractor(BatoBase, MangaExtractor): results = [] for chapter_num in range(num_chapters): - chapter = text.extr( + chapter_info = text.extr( page, '
" ) - chapter += r"" # so we can match the date - url, pos = text.extract(chapter, '" # so we can match the date + url, pos = text.extract(chapter_info, '" ) title = text.extr(title, r"", r"") if title is None or title == "" or title == "": - title, _ = text.extract(chapter, ">", "", pos) + title, _ = text.extract(chapter_info, ">", "", pos) - date = text.extr(chapter, "") + date = text.extr(chapter_info, "") date = text.extr(date, 'time="', '"') data["date"] = date data["title"] = title - data["chapter"] = text.parse_int(chapter_major) - data["chapter_minor"] = sep + chapter_minor + data["chapter"] = text.parse_int(chapt_major) + data["chapter_minor"] = sep + chapt_minor if url.startswith("/"): url = self.root + url From 3aa24c3744474a4fe06ebdec946a895c4f9d538c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sat, 6 Jan 2024 00:51:52 +0100 Subject: [PATCH 8/9] [bato] simplify and update --- docs/supportedsites.md | 2 +- gallery_dl/extractor/bato.py | 141 ++++++++++++++++------------------- test/results/bato.py | 2 +- 3 files changed, 66 insertions(+), 79 deletions(-) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 6040cd47..c1acadd2 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -99,7 +99,7 @@ Consider all listed sites to potentially be NSFW. Bato - https://bato.to + https://bato.to/ Chapters, Manga diff --git a/gallery_dl/extractor/bato.py b/gallery_dl/extractor/bato.py index d29a58bf..83404a75 100644 --- a/gallery_dl/extractor/bato.py +++ b/gallery_dl/extractor/bato.py @@ -4,61 +4,63 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extractors for https://bato.to and aliases (v3x only)""" +"""Extractors for https://bato.to/""" -from .common import ChapterExtractor, MangaExtractor +from .common import Extractor, ChapterExtractor, MangaExtractor from .. import text, exception import re -BASE_PATTERN = r"(?:https?://)?" \ - r"(?:bato\.to|dto\.to|batotoo\.com|wto\.to|mangatoto\.com)" -MANGA_PATTERN = r"/title/\d+(?:-[0-9a-z]+)*/?" -CHAPTER_PATTERN = r"/\d+(?:-vol_\d+)?-ch_\d+\.?\d*/?" +BASE_PATTERN = (r"(?:https?://)?" + r"(?:(?:ba|d|w)to\.to|\.to|(?:batotoo|mangatoto)\.com)") class BatoBase(): - """Base class for bato v3x extractors""" + """Base class for bato extractors""" category = "bato" root = "https://bato.to" + def request(self, url, **kwargs): + kwargs["encoding"] = "utf-8" + return Extractor.request(self, url, **kwargs) + class BatoChapterExtractor(BatoBase, ChapterExtractor): - """Extractor for manga chapters from bato.to""" - pattern = BASE_PATTERN + "(" + MANGA_PATTERN + CHAPTER_PATTERN + ")" - # There are three possible patterns for a chapter - example = "https://bato.to/title/12345-manga-name-with-spaces/54212-ch_1.5" - example2 = \ - "https://bato.to/title/12345-manga-name-with-spaces/54212-vol1-ch_1.5" - example3 = "https://bato.to/title/12345/54212" - # v2x, not supported - example4 = "https://bato.to/chapter/54212" + """Extractor for bato.to manga chapters""" + pattern = BASE_PATTERN + r"/(?:title/[^/?#]+|chapter)/(\d+)" + example = "https://bato.to/title/12345-MANGA/54321" def __init__(self, match): - self.path = match.group(1) - ChapterExtractor.__init__(self, match, self.root + self.path) + self.root = text.root_from_url(match.group(0)) + self.chapter_id = match.group(1) + url = "{}/title/0/{}".format(self.root, self.chapter_id) + ChapterExtractor.__init__(self, match, url) def metadata(self, page): - info = text.extr( - page, "", r" - Read Free Manga Online at Bato.To" - ) - info = info.encode('latin-1').decode('utf-8').replace("\n", "") + extr = text.extract_from(page) + manga, info, _ = extr("", "<").rsplit(" - ", 3) + manga_id = extr("/title/", "/") match = re.match( - r"(.+) - " - r"(?:Volume *(\d+) )?" - r"Chapter *([\d\.]+)", info) - manga, volume, chapter = match.groups() if match else ("", "", info) - chapter, sep, minor = chapter.partition(".") - title_section = text.extr(page, '<a href="' + self.path + '"', "</a>") - title = text.extr(title_section, "<!-- -->", "</span>") + r"(?:Volume\s+(\d+) )?" + r"\w+\s+(\d+)(.*)", info) + if match: + volume, chapter, minor = match.groups() + title = text.remove_html(extr( + "selected>", "</option")).partition(" : ")[2] + else: + volume = chapter = 0 + minor = "" + title = info return { "manga" : text.unescape(manga), + "manga_id" : text.parse_int(manga_id), "title" : text.unescape(title), - "author" : "", "volume" : text.parse_int(volume), "chapter" : text.parse_int(chapter), - "chapter_minor": sep + minor, + "chapter_minor": minor, + "chapter_id" : text.parse_int(self.chapter_id), + "date" : text.parse_timestamp(extr(' time="', '"')[:-3]), } def images(self, page): @@ -71,61 +73,46 @@ class BatoChapterExtractor(BatoBase, ChapterExtractor): class BatoMangaExtractor(BatoBase, MangaExtractor): - """Extractor for manga from bato.to""" + """Extractor for bato.to manga""" reverse = False chapterclass = BatoChapterExtractor - pattern = BASE_PATTERN + "(" + MANGA_PATTERN + "$" + ")" - # There are two possible patterns for a manga - example = "https://bato.to/title/12345-manga-name-with-spaces/" - example2 = "https://bato.to/title/12345/" - # v2x, not supported - example3 = "https://bato.to/series/12345/manga-name-with-space" + pattern = BASE_PATTERN + r"/(?:title|series)/(\d+)[^/?#]*/?$" + example = "https://bato.to/title/12345-MANGA/" + + def __init__(self, match): + self.root = text.root_from_url(match.group(0)) + self.manga_id = match.group(1) + url = "{}/title/{}".format(self.root, self.manga_id) + MangaExtractor.__init__(self, match, url) def chapters(self, page): - data = {} - num_chapters = text.extr(page, ">Chapters<", "</div>") - num_chapters = text.extr(num_chapters, r"<!-- -->", r"<!-- -->") - num_chapters = text.parse_int(num_chapters) - if num_chapters == 0: - raise exception.NotFoundError("chapter") + extr = text.extract_from(page) - manga = text.extr( - page, "<title>", r" - Read Free Manga Online at Bato.To" - ) - manga = manga.encode('latin-1').decode('utf-8').replace("\n", "") - data["manga"] = manga + warning = extr(' class="alert alert-warning">', "
<") + if warning: + raise exception.StopExtraction("'%s'", text.remove_html(warning)) + data = { + "manga_id": text.parse_int(self.manga_id), + "manga" : text.unescape(extr( + "", "<").rpartition(" - ")[0]), + } + + extr('<div data-hk="0-0-0-0"', "") results = [] - for chapter_num in range(num_chapters): - chapter_info = text.extr( - page, - '<div data-hk="0-0-{}-0"'.format(chapter_num), - r"</time><!--/-->" - ) - chapter_info += r"</time><!--/-->" # so we can match the date - url, pos = text.extract(chapter_info, '<a href="', '"') + while True: + href = extr('<a href="/title/', '"') + if not href: + break - chapter = re.search(r"-ch_([\d\.]+)", url) - if chapter: - chapt_major, sep, chapt_minor = chapter.group(1).partition(".") - title = text.extr( - chapter_info, - '<span data-hk="0-0-{}-1"'.format(chapter_num), - "</span>" - ) - title = text.extr(title, r"<!--#-->", r"<!--/-->") - if title is None or title == "" or title == "<!--/-->": - title, _ = text.extract(chapter_info, ">", "</a>", pos) + chapter = href.rpartition("-ch_")[2] + chapter, sep, minor = chapter.partition(".") - date = text.extr(chapter_info, "<time", "</time>") - date = text.extr(date, 'time="', '"') + data["chapter"] = text.parse_int(chapter) + data["chapter_minor"] = sep + minor + data["date"] = text.parse_datetime( + extr('time="', '"'), "%Y-%m-%dT%H:%M:%S.%fZ") - data["date"] = date - data["title"] = title - data["chapter"] = text.parse_int(chapt_major) - data["chapter_minor"] = sep + chapt_minor - - if url.startswith("/"): - url = self.root + url + url = "{}/title/{}".format(self.root, href) results.append((url, data.copy())) return results diff --git a/test/results/bato.py b/test/results/bato.py index 18479f9a..672362f5 100644 --- a/test/results/bato.py +++ b/test/results/bato.py @@ -60,6 +60,6 @@ __tests__ = ( "#url" : "https://bato.to/title/134270-removed", "#category": ("", "bato", "manga"), "#class" : bato.BatoMangaExtractor, - "#exception": exception.NotFoundError + "#exception": exception.StopExtraction, } ) From b11c352d66b6f23a9cb03047d4b19f7092bb4b4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= <mike_faehrmann@web.de> Date: Sat, 6 Jan 2024 01:49:34 +0100 Subject: [PATCH 9/9] [bato] rename to 'batoto' to use the same category name as the previous bato.to site --- docs/supportedsites.md | 2 +- gallery_dl/extractor/__init__.py | 2 +- gallery_dl/extractor/{bato.py => batoto.py} | 12 +++++----- scripts/supportedsites.py | 2 +- test/results/{bato.py => batoto.py} | 26 ++++++++++----------- 5 files changed, 22 insertions(+), 22 deletions(-) rename gallery_dl/extractor/{bato.py => batoto.py} (93%) rename test/results/{bato.py => batoto.py} (73%) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index c1acadd2..9dc174a8 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -98,7 +98,7 @@ Consider all listed sites to potentially be NSFW. <td></td> </tr> <tr> - <td>Bato</td> + <td>BATO.TO</td> <td>https://bato.to/</td> <td>Chapters, Manga</td> <td></td> diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 99de2169..4ab9db4d 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -24,7 +24,7 @@ modules = [ "architizer", "artstation", "aryion", - "bato", + "batoto", "bbc", "behance", "blogger", diff --git a/gallery_dl/extractor/bato.py b/gallery_dl/extractor/batoto.py similarity index 93% rename from gallery_dl/extractor/bato.py rename to gallery_dl/extractor/batoto.py index 83404a75..cd6302e6 100644 --- a/gallery_dl/extractor/bato.py +++ b/gallery_dl/extractor/batoto.py @@ -14,9 +14,9 @@ BASE_PATTERN = (r"(?:https?://)?" r"(?:(?:ba|d|w)to\.to|\.to|(?:batotoo|mangatoto)\.com)") -class BatoBase(): - """Base class for bato extractors""" - category = "bato" +class BatotoBase(): + """Base class for batoto extractors""" + category = "batoto" root = "https://bato.to" def request(self, url, **kwargs): @@ -24,7 +24,7 @@ class BatoBase(): return Extractor.request(self, url, **kwargs) -class BatoChapterExtractor(BatoBase, ChapterExtractor): +class BatotoChapterExtractor(BatotoBase, ChapterExtractor): """Extractor for bato.to manga chapters""" pattern = BASE_PATTERN + r"/(?:title/[^/?#]+|chapter)/(\d+)" example = "https://bato.to/title/12345-MANGA/54321" @@ -72,10 +72,10 @@ class BatoChapterExtractor(BatoBase, ChapterExtractor): ] -class BatoMangaExtractor(BatoBase, MangaExtractor): +class BatotoMangaExtractor(BatotoBase, MangaExtractor): """Extractor for bato.to manga""" reverse = False - chapterclass = BatoChapterExtractor + chapterclass = BatotoChapterExtractor pattern = BASE_PATTERN + r"/(?:title|series)/(\d+)[^/?#]*/?$" example = "https://bato.to/title/12345-MANGA/" diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index e3738b8b..ea6c2597 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -32,7 +32,7 @@ CATEGORY_MAP = { "atfbooru" : "ATFBooru", "b4k" : "arch.b4k.co", "baraag" : "baraag", - "bato" : "Bato", + "batoto" : "BATO.TO", "bbc" : "BBC", "comicvine" : "Comic Vine", "coomerparty" : "Coomer", diff --git a/test/results/bato.py b/test/results/batoto.py similarity index 73% rename from test/results/bato.py rename to test/results/batoto.py index 672362f5..f3853247 100644 --- a/test/results/bato.py +++ b/test/results/batoto.py @@ -4,14 +4,14 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -from gallery_dl.extractor import bato +from gallery_dl.extractor import batoto from gallery_dl import exception __tests__ = ( { "#url" : "https://bato.to/title/86408-i-shall-master-this-family-official/1681030-ch_8", - "#category": ("", "bato", "chapter"), - "#class" : bato.BatoChapterExtractor, + "#category": ("", "batoto", "chapter"), + "#class" : batoto.BatotoChapterExtractor, "#count" : 66, "manga" : "I Shall Master this Family! [Official]", @@ -21,8 +21,8 @@ __tests__ = ( { "#url" : "https://bato.to/title/104929-86-eighty-six-official/1943513-vol_1-ch_5", "#comment" : "volume (vol) in url", - "#category": ("", "bato", "chapter"), - "#class" : bato.BatoChapterExtractor, + "#category": ("", "batoto", "chapter"), + "#class" : batoto.BatotoChapterExtractor, "#count" : 7, "manga" : "86--EIGHTY-SIX (Official)", @@ -32,8 +32,8 @@ __tests__ = ( }, { "#url" : "https://bato.to/title/113742-futsutsuka-na-akujo-de-wa-gozaimasu-ga-suuguu-chouso-torikae-den-official", - "#category": ("", "bato", "manga"), - "#class" : bato.BatoMangaExtractor, + "#category": ("", "batoto", "manga"), + "#class" : batoto.BatotoMangaExtractor, "#count" : ">= 21", "manga" : "Futsutsuka na Akujo de wa Gozaimasu ga - Suuguu Chouso Torikae Den (Official)", @@ -41,8 +41,8 @@ __tests__ = ( { "#url" : "https://bato.to/title/104929-86-eighty-six-official", "#comment" : "Manga with number in name", - "#category": ("", "bato", "manga"), - "#class" : bato.BatoMangaExtractor, + "#category": ("", "batoto", "manga"), + "#class" : batoto.BatotoMangaExtractor, "#count" : ">= 18", "manga" : "86--EIGHTY-SIX (Official)", @@ -50,16 +50,16 @@ __tests__ = ( { "#url" : "https://bato.to/title/140046-the-grand-duke-s-fox-princess-mgchan", "#comment" : "Non-English translation (Indonesian)", - "#category": ("", "bato", "manga"), - "#class" : bato.BatoMangaExtractor, + "#category": ("", "batoto", "manga"), + "#class" : batoto.BatotoMangaExtractor, "#count" : ">= 29", "manga" : "The Grand Duke’s Fox Princess ⎝⎝MGCHAN⎠⎠", }, { "#url" : "https://bato.to/title/134270-removed", - "#category": ("", "bato", "manga"), - "#class" : bato.BatoMangaExtractor, + "#category": ("", "batoto", "manga"), + "#class" : batoto.BatotoMangaExtractor, "#exception": exception.StopExtraction, } )