remove '&' from URL patterns

'/?&#' -> '/?#' and '?&#' -> '?#'

According to https://www.ietf.org/rfc/rfc3986.txt, URLs are
"organized hierarchically" by using "the slash ("/"), question
mark ("?"), and number sign ("#") characters to delimit components"
This commit is contained in:
Mike Fährmann
2020-10-22 23:12:59 +02:00
parent 1686dc1757
commit 968d3e8465
74 changed files with 158 additions and 158 deletions

View File

@@ -32,7 +32,7 @@ class MangakakalotBase():
class MangakakalotChapterExtractor(MangakakalotBase, ChapterExtractor):
"""Extractor for manga-chapters from mangakakalot.com"""
pattern = (r"(?:https?://)?(?:www\.)?mangakakalot\.com"
r"(/chapter/\w+/chapter_[^/?&#]+)")
r"(/chapter/\w+/chapter_[^/?#]+)")
test = (
("https://mangakakalot.com/chapter/rx922077/chapter_6", {
"pattern": r"https://s\d+\.\w+\.com/mangakakalot/r\d+/rx922077/"