From 8cfeed78b1e646e8295e6de562195dd6a86e981e Mon Sep 17 00:00:00 2001
From: thatfuckingbird <67429906+thatfuckingbird@users.noreply.github.com>
Date: Sat, 21 Jan 2023 22:32:42 +0100
Subject: [PATCH] [generic] fix regex for non-src image URLs

---
 gallery_dl/extractor/generic.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gallery_dl/extractor/generic.py b/gallery_dl/extractor/generic.py
index 10c72951..9292da3d 100644
--- a/gallery_dl/extractor/generic.py
+++ b/gallery_dl/extractor/generic.py
@@ -150,7 +150,7 @@ class GenericExtractor(Extractor):
         https://en.wikipedia.org/wiki/List_of_file_formats
 
         Compared to the "pattern" class variable, here we must exclude also
-        other special characters (space, ", ', >), since we are looking for
+        other special characters (space, ", ', <, >), since we are looking for
         urls in html tags.
         """
 
@@ -158,7 +158,7 @@ class GenericExtractor(Extractor):
             (?:[^?&#"'>\s]+)                    # anything until dot+extension
             \.(?:jpe?g|jpe|png|gif
                  |web[mp]|mp4|mkv|og[gmv]|opus) # dot + image/video extensions
-            (?:[^"'>\s]*)?                      # optional query and fragment
+            (?:[^"'<>\s]*)?                      # optional query and fragment
             """
 
         imageurls_src = re.findall(imageurl_pattern_src, page)