[mangadex] general improvements
- support >100 chapter entries per manga - custom archive ID format - detect non-existing chapters
This commit is contained in:
@@ -1,6 +1,8 @@
|
||||
# Changelog
|
||||
|
||||
## Unreleased
|
||||
- Added support for:
|
||||
- `mangadex` - https://mangadex.org/
|
||||
|
||||
## 1.3.0 - 2018-03-02
|
||||
- Added `--proxy` to explicitly specify a proxy server ([#76](https://github.com/mikf/gallery-dl/issues/76))
|
||||
|
||||
@@ -47,7 +47,7 @@ Luscious https://luscious.net/ Albums
|
||||
Manga Fox http://fanfox.net/ Chapters
|
||||
Manga Here http://www.mangahere.co/ Chapters, Manga
|
||||
Manga Stream https://mangastream.com/ Chapters
|
||||
Mangadex https://mangadex.org/ Chapters, Manga
|
||||
MangaDex https://mangadex.org/ Chapters, Manga
|
||||
Mangapanda https://www.mangapanda.com/ Chapters, Manga
|
||||
MangaPark https://mangapark.me/ Chapters, Manga
|
||||
Mangareader https://www.mangareader.net/ Chapters, Manga
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
"""Extract manga-chapters and entire manga from https://mangadex.org/"""
|
||||
|
||||
from .common import ChapterExtractor, MangaExtractor
|
||||
from .. import text, util
|
||||
from .. import text, util, exception
|
||||
from urllib.parse import urljoin
|
||||
import json
|
||||
import re
|
||||
@@ -23,6 +23,7 @@ class MangadexExtractor():
|
||||
|
||||
class MangadexChapterExtractor(MangadexExtractor, ChapterExtractor):
|
||||
"""Extractor for manga-chapters from mangadex.org"""
|
||||
archive_fmt = "{chapter_id}_{page}"
|
||||
pattern = [r"(?:https?://)?(?:www\.)?mangadex\.(?:org|com)/chapter/(\d+)"]
|
||||
test = [
|
||||
("https://mangadex.org/chapter/122094", {
|
||||
@@ -34,6 +35,10 @@ class MangadexChapterExtractor(MangadexExtractor, ChapterExtractor):
|
||||
"count": 64,
|
||||
"keyword": "9b1b7292f7dbcf10983fbdc34b8cdceeb47328ee",
|
||||
}),
|
||||
# NotFoundError
|
||||
("https://mangadex.org/chapter/1", {
|
||||
"exception": exception.NotFoundError,
|
||||
})
|
||||
]
|
||||
|
||||
def __init__(self, match):
|
||||
@@ -42,6 +47,9 @@ class MangadexChapterExtractor(MangadexExtractor, ChapterExtractor):
|
||||
ChapterExtractor.__init__(self, url)
|
||||
|
||||
def get_metadata(self, page):
|
||||
if "title='Warning'" in page and " does not exist." in page:
|
||||
raise exception.NotFoundError("chapter")
|
||||
|
||||
info , pos = text.extract(page, '="og:title" content="', '"')
|
||||
manga_id, pos = text.extract(page, '/images/manga/', '.', pos)
|
||||
_ , pos = text.extract(page, ' id="jump_group"', '', pos)
|
||||
@@ -88,14 +96,14 @@ class MangadexMangaExtractor(MangadexExtractor, MangaExtractor):
|
||||
pattern = [r"(?:https?://)?(?:www\.)?(mangadex\.(?:org|com)/manga/\d+)"]
|
||||
test = [
|
||||
("https://mangadex.org/manga/2946/souten-no-koumori", {
|
||||
"url": "9e77934759828458d0424473922e41f348719472",
|
||||
"count": ">= 1",
|
||||
"keywords": {
|
||||
"manga": "Souten no Koumori",
|
||||
"manga_id": 2946,
|
||||
"title": "Oneshot",
|
||||
"volume": int,
|
||||
"chapter": int,
|
||||
"chapter_minor": str,
|
||||
"volume": 0,
|
||||
"chapter": 0,
|
||||
"chapter_minor": "",
|
||||
"chapter_id": int,
|
||||
"group": str,
|
||||
"contributor": str,
|
||||
@@ -106,43 +114,53 @@ class MangadexMangaExtractor(MangadexExtractor, MangaExtractor):
|
||||
},
|
||||
}),
|
||||
]
|
||||
scheme = "https"
|
||||
per_page = 100
|
||||
|
||||
def chapters(self, page):
|
||||
results = []
|
||||
extr = text.extract
|
||||
num = 1
|
||||
|
||||
manga = text.unescape(extr(
|
||||
page, '"og:title" content="', '"')[0].rpartition(" (")[0])
|
||||
manga_id = util.safe_int(extr(
|
||||
page, '/images/manga/', '.')[0])
|
||||
|
||||
for info in text.extract_iter(page, "<tr id=", "</tr>"):
|
||||
chid , pos = extr(info, 'data-chapter-id="', '"')
|
||||
chapter , pos = extr(info, 'data-chapter-num="', '"', pos)
|
||||
volume , pos = extr(info, 'data-volume-num="', '"', pos)
|
||||
title , pos = extr(info, 'data-chapter-name="', '"', pos)
|
||||
language, pos = extr(info, " title='", "'", pos)
|
||||
group , pos = extr(info, "<td>", "</td>", pos)
|
||||
user , pos = extr(info, "<td>", "</td>", pos)
|
||||
views , pos = extr(info, ">", "<", pos)
|
||||
date , pos = extr(info, ' datetime="', '"', pos)
|
||||
while True:
|
||||
before = len(results)
|
||||
|
||||
chapter, sep, minor = chapter.partition(".")
|
||||
for info in text.extract_iter(page, "<tr id=", "</tr>"):
|
||||
chid , pos = extr(info, 'data-chapter-id="', '"')
|
||||
chapter , pos = extr(info, 'data-chapter-num="', '"', pos)
|
||||
volume , pos = extr(info, 'data-volume-num="', '"', pos)
|
||||
title , pos = extr(info, 'data-chapter-name="', '"', pos)
|
||||
language, pos = extr(info, " title='", "'", pos)
|
||||
group , pos = extr(info, "<td>", "</td>", pos)
|
||||
user , pos = extr(info, "<td>", "</td>", pos)
|
||||
views , pos = extr(info, ">", "<", pos)
|
||||
date , pos = extr(info, ' datetime="', '"', pos)
|
||||
|
||||
results.append((self.root + "/chapter/" + chid, {
|
||||
"manga": manga,
|
||||
"manga_id": util.safe_int(manga_id),
|
||||
"title": text.unescape(title),
|
||||
"volume": util.safe_int(volume),
|
||||
"chapter": util.safe_int(chapter),
|
||||
"chapter_minor": sep + minor,
|
||||
"chapter_id": util.safe_int(chid),
|
||||
"group": text.unescape(text.remove_html(group)),
|
||||
"contributor": text.remove_html(user),
|
||||
"views": util.safe_int(views),
|
||||
"date": date,
|
||||
"lang": util.language_to_code(language),
|
||||
"language": language,
|
||||
}))
|
||||
chapter, sep, minor = chapter.partition(".")
|
||||
|
||||
return results
|
||||
results.append((self.root + "/chapter/" + chid, {
|
||||
"manga": manga,
|
||||
"manga_id": util.safe_int(manga_id),
|
||||
"title": text.unescape(title),
|
||||
"volume": util.safe_int(volume),
|
||||
"chapter": util.safe_int(chapter),
|
||||
"chapter_minor": sep + minor,
|
||||
"chapter_id": util.safe_int(chid),
|
||||
"group": text.unescape(text.remove_html(group)),
|
||||
"contributor": text.remove_html(user),
|
||||
"views": util.safe_int(views),
|
||||
"date": date,
|
||||
"lang": util.language_to_code(language),
|
||||
"language": language,
|
||||
}))
|
||||
|
||||
if len(results) - before != self.per_page:
|
||||
return results
|
||||
|
||||
num += 1
|
||||
page = self.request("{}/_/{}/".format(self.url, num)).text
|
||||
|
||||
@@ -37,6 +37,7 @@ CATEGORY_MAP = {
|
||||
"kisscomic" : "KissComic",
|
||||
"kissmanga" : "KissManga",
|
||||
"loveisover" : "Love is Over Archive",
|
||||
"mangadex" : "MangaDex",
|
||||
"mangafox" : "Manga Fox",
|
||||
"mangahere" : "Manga Here",
|
||||
"mangapark" : "MangaPark",
|
||||
|
||||
Reference in New Issue
Block a user