remove '&' from URL patterns

'/?&#' -> '/?#' and '?&#' -> '?#'

According to https://www.ietf.org/rfc/rfc3986.txt, URLs are
"organized hierarchically" by using "the slash ("/"), question
mark ("?"), and number sign ("#") characters to delimit components"
This commit is contained in:
Mike Fährmann
2020-10-22 23:12:59 +02:00
parent 1686dc1757
commit 968d3e8465
74 changed files with 158 additions and 158 deletions

View File

@@ -295,7 +295,7 @@ class TumblrPostExtractor(TumblrExtractor):
class TumblrTagExtractor(TumblrExtractor):
"""Extractor for images from a tumblr-user by tag"""
subcategory = "tag"
pattern = BASE_PATTERN + r"/tagged/([^/?&#]+)"
pattern = BASE_PATTERN + r"/tagged/([^/?#]+)"
test = ("http://demo.tumblr.com/tagged/Times%20Square", {
"pattern": (r"https://\d+\.media\.tumblr\.com/tumblr_[^/_]+_1280.jpg"),
"count": 1,