diff --git a/gallery_dl/text.py b/gallery_dl/text.py index b186a8e2..5fd5a407 100644 --- a/gallery_dl/text.py +++ b/gallery_dl/text.py @@ -238,7 +238,10 @@ def parse_float(value, default=0.0): def parse_query(qs): - """Parse a query string into key-value pairs""" + """Parse a query string into name-value pairs + + Ignore values whose name has been seen before + """ if not qs: return {} @@ -255,6 +258,34 @@ def parse_query(qs): return result +def parse_query_list(qs): + """Parse a query string into name-value pairs + + Combine values of duplicate names into lists + """ + if not qs: + return {} + + result = {} + try: + for name_value in qs.split("&"): + name, eq, value = name_value.partition("=") + if eq: + name = unquote(name.replace("+", " ")) + value = unquote(value.replace("+", " ")) + if name in result: + rvalue = result[name] + if isinstance(rvalue, list): + rvalue.append(value) + else: + result[name] = [rvalue, value] + else: + result[name] = value + except Exception: + pass + return result + + if sys.hexversion < 0x30c0000: # Python <= 3.11 def parse_timestamp(ts, default=None): diff --git a/test/test_text.py b/test/test_text.py index 084436b1..1b19c474 100644 --- a/test/test_text.py +++ b/test/test_text.py @@ -413,6 +413,28 @@ class TestText(unittest.TestCase): for value in INVALID: self.assertEqual(f(value), {}) + def test_parse_query_list(self, f=text.parse_query_list): + # standard usage + self.assertEqual(f(""), {}) + self.assertEqual(f("foo=1"), {"foo": "1"}) + self.assertEqual(f("foo=1&bar=2"), {"foo": "1", "bar": "2"}) + + # missing value + self.assertEqual(f("bar"), {}) + self.assertEqual(f("foo=1&bar"), {"foo": "1"}) + self.assertEqual(f("foo=1&bar&baz=3"), {"foo": "1", "baz": "3"}) + + # keys with identical names + self.assertEqual(f("foo=1&foo=2"), {"foo": ["1", "2"]}) + self.assertEqual( + f("foo=1&bar=2&foo=3&bar=4&foo=5"), + {"foo": ["1", "3", "5"], "bar": ["2", "4"]}, + ) + + # invalid arguments + for value in INVALID: + self.assertEqual(f(value), {}) + def test_parse_timestamp(self, f=text.parse_timestamp): null = util.datetime_utcfromtimestamp(0) value = util.datetime_utcfromtimestamp(1555816235)