remove '&' from URL patterns
'/?&#' -> '/?#' and '?&#' -> '?#' According to https://www.ietf.org/rfc/rfc3986.txt, URLs are "organized hierarchically" by using "the slash ("/"), question mark ("?"), and number sign ("#") characters to delimit components"
This commit is contained in:
@@ -84,7 +84,7 @@ class LivedoorExtractor(Extractor):
|
||||
class LivedoorBlogExtractor(LivedoorExtractor):
|
||||
"""Extractor for a user's blog on blog.livedoor.jp"""
|
||||
subcategory = "blog"
|
||||
pattern = r"(?:https?://)?blog\.livedoor\.jp/(\w+)/?(?:$|[?&#])"
|
||||
pattern = r"(?:https?://)?blog\.livedoor\.jp/(\w+)/?(?:$|[?#])"
|
||||
test = (
|
||||
("http://blog.livedoor.jp/zatsu_ke/", {
|
||||
"range": "1-50",
|
||||
|
||||
Reference in New Issue
Block a user