allow '/' and '?' in URL queries
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2017-2021 Mike Fährmann
|
||||
# Copyright 2017-2022 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@@ -19,7 +19,7 @@ class DirectlinkExtractor(Extractor):
|
||||
archive_fmt = filename_fmt
|
||||
pattern = (r"(?i)https?://(?P<domain>[^/?#]+)/(?P<path>[^?#]+\."
|
||||
r"(?:jpe?g|jpe|png|gif|web[mp]|mp4|mkv|og[gmv]|opus))"
|
||||
r"(?:\?(?P<query>[^/?#]*))?(?:#(?P<fragment>.*))?$")
|
||||
r"(?:\?(?P<query>[^#]*))?(?:#(?P<fragment>.*))?$")
|
||||
test = (
|
||||
(("https://en.wikipedia.org/static/images/project-logos/enwiki.png"), {
|
||||
"url": "18c5d00077332e98e53be9fed2ee4be66154b88d",
|
||||
@@ -31,9 +31,9 @@ class DirectlinkExtractor(Extractor):
|
||||
"keyword": "29dad729c40fb09349f83edafa498dba1297464a",
|
||||
}),
|
||||
# more complex example
|
||||
("https://example.org/path/to/file.webm?que=1&ry=2#fragment", {
|
||||
"url": "114b8f1415cc224b0f26488ccd4c2e7ce9136622",
|
||||
"keyword": "06014abd503e3b2b58aa286f9bdcefdd2ae336c0",
|
||||
("https://example.org/path/to/file.webm?que=1?&ry=2/#fragment", {
|
||||
"url": "6fb1061390f8aada3db01cb24b51797c7ee42b31",
|
||||
"keyword": "3d7abc31d45ba324e59bc599c3b4862452d5f29c",
|
||||
}),
|
||||
# percent-encoded characters
|
||||
("https://example.org/%27%3C%23/%23%3E%27.jpg?key=%3C%26%3E", {
|
||||
|
||||
@@ -27,9 +27,9 @@ class GenericExtractor(Extractor):
|
||||
pattern += r"""
|
||||
(?P<scheme>https?://)? # optional http(s) scheme
|
||||
(?P<domain>[-\w\.]+) # required domain
|
||||
(?P<path>/[^?&#]*)? # optional path
|
||||
(?:\?(?P<query>[^/?#]*))? # optional query
|
||||
(?:\#(?P<fragment>.*))?$ # optional fragment
|
||||
(?P<path>/[^?#]*)? # optional path
|
||||
(?:\?(?P<query>[^#]*))? # optional query
|
||||
(?:\#(?P<fragment>.*))? # optional fragment
|
||||
"""
|
||||
|
||||
def __init__(self, match):
|
||||
|
||||
@@ -210,7 +210,7 @@ class UnsplashCollectionExtractor(UnsplashExtractor):
|
||||
class UnsplashSearchExtractor(UnsplashExtractor):
|
||||
"""Extractor for unsplash search results"""
|
||||
subcategory = "search"
|
||||
pattern = BASE_PATTERN + r"/s/photos/([^/?#]+)(?:\?([^/?#]+))?"
|
||||
pattern = BASE_PATTERN + r"/s/photos/([^/?#]+)(?:\?([^#]+))?"
|
||||
test = ("https://unsplash.com/s/photos/hair-style", {
|
||||
"pattern": r"https://images\.unsplash\.com/((flagged/)?photo-\d+-\w+"
|
||||
r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-1\.2\.1$",
|
||||
|
||||
@@ -52,7 +52,7 @@ class WallhavenSearchExtractor(WallhavenExtractor):
|
||||
subcategory = "search"
|
||||
directory_fmt = ("{category}", "{search[q]}")
|
||||
archive_fmt = "s_{search[q]}_{id}"
|
||||
pattern = r"(?:https?://)?wallhaven\.cc/search(?:/?\?([^/?#]+))?"
|
||||
pattern = r"(?:https?://)?wallhaven\.cc/search(?:/?\?([^#]+))?"
|
||||
test = (
|
||||
("https://wallhaven.cc/search?q=touhou"),
|
||||
(("https://wallhaven.cc/search?q=id%3A87"
|
||||
|
||||
@@ -6,4 +6,4 @@
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
__version__ = "1.23.2"
|
||||
__version__ = "1.23.3-dev"
|
||||
|
||||
Reference in New Issue
Block a user