remove '&' from URL patterns
'/?&#' -> '/?#' and '?&#' -> '?#' According to https://www.ietf.org/rfc/rfc3986.txt, URLs are "organized hierarchically" by using "the slash ("/"), question mark ("?"), and number sign ("#") characters to delimit components"
This commit is contained in:
@@ -151,7 +151,7 @@ class BehanceUserExtractor(BehanceExtractor):
|
||||
"""Extractor for a user's galleries from www.behance.net"""
|
||||
subcategory = "user"
|
||||
categorytransfer = True
|
||||
pattern = r"(?:https?://)?(?:www\.)?behance\.net/([^/?&#]+)/?$"
|
||||
pattern = r"(?:https?://)?(?:www\.)?behance\.net/([^/?#]+)/?$"
|
||||
test = ("https://www.behance.net/alexstrohl", {
|
||||
"count": ">= 8",
|
||||
"pattern": BehanceGalleryExtractor.pattern,
|
||||
|
||||
Reference in New Issue
Block a user