remove '&' from URL patterns

'/?&#' -> '/?#' and '?&#' -> '?#'

According to https://www.ietf.org/rfc/rfc3986.txt, URLs are
"organized hierarchically" by using "the slash ("/"), question
mark ("?"), and number sign ("#") characters to delimit components"
This commit is contained in:
Mike Fährmann
2020-10-22 23:12:59 +02:00
parent 1686dc1757
commit 968d3e8465
74 changed files with 158 additions and 158 deletions

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2017-2019 Mike Fährmann
# Copyright 2017-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -17,7 +17,7 @@ class DirectlinkExtractor(Extractor):
category = "directlink"
filename_fmt = "{domain}/{path}/{filename}.{extension}"
archive_fmt = filename_fmt
pattern = (r"(?i)https?://(?P<domain>[^/?&#]+)/(?P<path>[^?&#]+\."
pattern = (r"(?i)https?://(?P<domain>[^/?#]+)/(?P<path>[^?#]+\."
r"(?:jpe?g|jpe|png|gif|web[mp]|mp4|mkv|og[gmv]|opus))"
r"(?:\?(?P<query>[^/?#]*))?(?:#(?P<fragment>.*))?$")
test = (