From 968d3e8465d70bf589b87ff79182ee9cae3ce4fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Thu, 22 Oct 2020 23:12:59 +0200 Subject: [PATCH] remove '&' from URL patterns '/?&#' -> '/?#' and '?&#' -> '?#' According to https://www.ietf.org/rfc/rfc3986.txt, URLs are "organized hierarchically" by using "the slash ("/"), question mark ("?"), and number sign ("#") characters to delimit components" --- gallery_dl/extractor/35photo.py | 4 ++-- gallery_dl/extractor/4chan.py | 2 +- gallery_dl/extractor/500px.py | 4 ++-- gallery_dl/extractor/8kun.py | 2 +- gallery_dl/extractor/8muses.py | 2 +- gallery_dl/extractor/artstation.py | 10 +++++----- gallery_dl/extractor/aryion.py | 2 +- gallery_dl/extractor/behance.py | 2 +- gallery_dl/extractor/blogger.py | 4 ++-- gallery_dl/extractor/deviantart.py | 10 +++++----- gallery_dl/extractor/directlink.py | 4 ++-- gallery_dl/extractor/dynastyscans.py | 2 +- gallery_dl/extractor/fallenangels.py | 2 +- gallery_dl/extractor/foolslide.py | 4 ++-- gallery_dl/extractor/furaffinity.py | 10 +++++----- gallery_dl/extractor/fuskator.py | 2 +- gallery_dl/extractor/gfycat.py | 4 ++-- gallery_dl/extractor/hentai2read.py | 4 ++-- gallery_dl/extractor/hentaicafe.py | 4 ++-- gallery_dl/extractor/hentaifoundry.py | 18 +++++++++--------- gallery_dl/extractor/hentaihand.py | 2 +- gallery_dl/extractor/hiperdex.py | 6 +++--- gallery_dl/extractor/hitomi.py | 2 +- gallery_dl/extractor/imagefap.py | 2 +- gallery_dl/extractor/imagehosts.py | 8 ++++---- gallery_dl/extractor/imgbb.py | 6 +++--- gallery_dl/extractor/imgur.py | 10 +++++----- gallery_dl/extractor/inkbunny.py | 2 +- gallery_dl/extractor/instagram.py | 12 ++++++------ gallery_dl/extractor/issuu.py | 4 ++-- gallery_dl/extractor/khinsider.py | 2 +- gallery_dl/extractor/komikcast.py | 4 ++-- gallery_dl/extractor/lineblog.py | 4 ++-- gallery_dl/extractor/livedoor.py | 2 +- gallery_dl/extractor/luscious.py | 2 +- gallery_dl/extractor/mangafox.py | 2 +- gallery_dl/extractor/mangahere.py | 2 +- gallery_dl/extractor/mangakakalot.py | 2 +- gallery_dl/extractor/mangapanda.py | 4 ++-- gallery_dl/extractor/mangapark.py | 4 ++-- gallery_dl/extractor/mangareader.py | 4 ++-- gallery_dl/extractor/mastodon.py | 4 ++-- gallery_dl/extractor/myportfolio.py | 4 ++-- gallery_dl/extractor/newgrounds.py | 2 +- gallery_dl/extractor/ngomik.py | 2 +- gallery_dl/extractor/nozomi.py | 2 +- gallery_dl/extractor/oauth.py | 2 +- gallery_dl/extractor/paheal.py | 2 +- gallery_dl/extractor/patreon.py | 6 +++--- gallery_dl/extractor/photobucket.py | 6 +++--- gallery_dl/extractor/piczel.py | 4 ++-- gallery_dl/extractor/pixiv.py | 8 ++++---- gallery_dl/extractor/pixnet.py | 2 +- gallery_dl/extractor/plurk.py | 2 +- gallery_dl/extractor/pornhub.py | 2 +- gallery_dl/extractor/reactor.py | 18 +++++++++--------- gallery_dl/extractor/readcomiconline.py | 4 ++-- gallery_dl/extractor/reddit.py | 8 ++++---- gallery_dl/extractor/redgifs.py | 4 ++-- gallery_dl/extractor/sankakucomplex.py | 2 +- gallery_dl/extractor/sexcom.py | 4 ++-- gallery_dl/extractor/simplyhentai.py | 6 +++--- gallery_dl/extractor/slickpic.py | 2 +- gallery_dl/extractor/slideshare.py | 2 +- gallery_dl/extractor/smugmug.py | 4 ++-- gallery_dl/extractor/speakerdeck.py | 2 +- gallery_dl/extractor/subscribestar.py | 2 +- gallery_dl/extractor/tumblr.py | 2 +- gallery_dl/extractor/twitter.py | 8 ++++---- gallery_dl/extractor/vanillarock.py | 4 ++-- gallery_dl/extractor/webtoons.py | 4 ++-- gallery_dl/extractor/xhamster.py | 4 ++-- gallery_dl/extractor/xvideos.py | 4 ++-- gallery_dl/extractor/yuki.py | 2 +- 74 files changed, 158 insertions(+), 158 deletions(-) diff --git a/gallery_dl/extractor/35photo.py b/gallery_dl/extractor/35photo.py index e33aa2dd..edb9d468 100644 --- a/gallery_dl/extractor/35photo.py +++ b/gallery_dl/extractor/35photo.py @@ -101,7 +101,7 @@ class _35photoUserExtractor(_35photoExtractor): """Extractor for all images of a user on 35photo.pro""" subcategory = "user" pattern = (r"(?:https?://)?(?:[a-z]+\.)?35photo\.pro" - r"/(?!photo_|genre_|tags/|rating/)([^/?&#]+)") + r"/(?!photo_|genre_|tags/|rating/)([^/?#]+)") test = ( ("https://35photo.pro/liya", { "pattern": r"https://m\d+.35photo.pro/photos_(main|series)/.*.jpg", @@ -142,7 +142,7 @@ class _35photoTagExtractor(_35photoExtractor): subcategory = "tag" directory_fmt = ("{category}", "Tags", "{search_tag}") archive_fmt = "t{search_tag}_{id}_{num}" - pattern = r"(?:https?://)?(?:[a-z]+\.)?35photo\.pro/tags/([^/?&#]+)" + pattern = r"(?:https?://)?(?:[a-z]+\.)?35photo\.pro/tags/([^/?#]+)" test = ("https://35photo.pro/tags/landscape/", { "range": "1-25", "count": 25, diff --git a/gallery_dl/extractor/4chan.py b/gallery_dl/extractor/4chan.py index 980dc20c..bed30b16 100644 --- a/gallery_dl/extractor/4chan.py +++ b/gallery_dl/extractor/4chan.py @@ -65,7 +65,7 @@ class _4chanBoardExtractor(Extractor): """Extractor for 4chan boards""" category = "4chan" subcategory = "board" - pattern = r"(?:https?://)?boards\.4chan(?:nel)?\.org/([^/?&#]+)/\d*$" + pattern = r"(?:https?://)?boards\.4chan(?:nel)?\.org/([^/?#]+)/\d*$" test = ("https://boards.4channel.org/po/", { "pattern": _4chanThreadExtractor.pattern, "count": ">= 100", diff --git a/gallery_dl/extractor/500px.py b/gallery_dl/extractor/500px.py index fd973c3c..624b14d1 100644 --- a/gallery_dl/extractor/500px.py +++ b/gallery_dl/extractor/500px.py @@ -90,7 +90,7 @@ class _500pxExtractor(Extractor): class _500pxUserExtractor(_500pxExtractor): """Extractor for photos from a user's photostream on 500px.com""" subcategory = "user" - pattern = BASE_PATTERN + r"/(?!photo/)(?:p/)?([^/?&#]+)/?(?:$|\?|#)" + pattern = BASE_PATTERN + r"/(?!photo/)(?:p/)?([^/?#]+)/?(?:$|[?#])" test = ( ("https://500px.com/p/light_expression_photography", { "pattern": r"https?://drscdn.500px.org/photo/\d+/m%3D4096/v2", @@ -132,7 +132,7 @@ class _500pxGalleryExtractor(_500pxExtractor): subcategory = "gallery" directory_fmt = ("{category}", "{user[username]}", "{gallery[name]}") pattern = (BASE_PATTERN + r"/(?!photo/)(?:p/)?" - r"([^/?&#]+)/galleries/([^/?&#]+)") + r"([^/?#]+)/galleries/([^/?#]+)") test = ( ("https://500px.com/p/fashvamp/galleries/lera", { "url": "002dc81dee5b4a655f0e31ad8349e8903b296df6", diff --git a/gallery_dl/extractor/8kun.py b/gallery_dl/extractor/8kun.py index 71629204..47fe6725 100644 --- a/gallery_dl/extractor/8kun.py +++ b/gallery_dl/extractor/8kun.py @@ -64,7 +64,7 @@ class _8kunBoardExtractor(Extractor): """Extractor for 8kun boards""" category = "8kun" subcategory = "board" - pattern = r"(?:https?://)?8kun\.top/([^/?&#]+)/(?:index|\d+)\.html" + pattern = r"(?:https?://)?8kun\.top/([^/?#]+)/(?:index|\d+)\.html" test = ( ("https://8kun.top/v/index.html", { "pattern": _8kunThreadExtractor.pattern, diff --git a/gallery_dl/extractor/8muses.py b/gallery_dl/extractor/8muses.py index b2487353..3eb55655 100644 --- a/gallery_dl/extractor/8muses.py +++ b/gallery_dl/extractor/8muses.py @@ -22,7 +22,7 @@ class _8musesAlbumExtractor(Extractor): archive_fmt = "{hash}" root = "https://comics.8muses.com" pattern = (r"(?:https?://)?(?:comics\.|www\.)?8muses\.com" - r"(/comics/album/[^?&#]+)(\?[^#]+)?") + r"(/comics/album/[^?#]+)(\?[^#]+)?") test = ( ("https://comics.8muses.com/comics/album/Fakku-Comics/mogg/Liar", { "url": "6286ac33087c236c5a7e51f8a9d4e4d5548212d4", diff --git a/gallery_dl/extractor/artstation.py b/gallery_dl/extractor/artstation.py index 985ad484..6914f240 100644 --- a/gallery_dl/extractor/artstation.py +++ b/gallery_dl/extractor/artstation.py @@ -131,7 +131,7 @@ class ArtstationUserExtractor(ArtstationExtractor): """Extractor for all projects of an artstation user""" subcategory = "user" pattern = (r"(?:https?://)?(?:(?:www\.)?artstation\.com" - r"/(?!artwork|projects|search)([^/?&#]+)(?:/albums/all)?" + r"/(?!artwork|projects|search)([^/?#]+)(?:/albums/all)?" r"|((?!www)\w+)\.artstation\.com(?:/projects)?)/?$") test = ( ("https://www.artstation.com/gaerikim/", { @@ -156,7 +156,7 @@ class ArtstationAlbumExtractor(ArtstationExtractor): "{album[id]} - {album[title]}") archive_fmt = "a_{album[id]}_{asset[id]}" pattern = (r"(?:https?://)?(?:(?:www\.)?artstation\.com" - r"/(?!artwork|projects|search)([^/?&#]+)" + r"/(?!artwork|projects|search)([^/?#]+)" r"|((?!www)\w+)\.artstation\.com)/albums/(\d+)") test = ( ("https://www.artstation.com/huimeiye/albums/770899", { @@ -199,7 +199,7 @@ class ArtstationLikesExtractor(ArtstationExtractor): directory_fmt = ("{category}", "{userinfo[username]}", "Likes") archive_fmt = "f_{userinfo[id]}_{asset[id]}" pattern = (r"(?:https?://)?(?:www\.)?artstation\.com" - r"/(?!artwork|projects|search)([^/?&#]+)/likes/?") + r"/(?!artwork|projects|search)([^/?#]+)/likes/?") test = ( ("https://www.artstation.com/mikf/likes", { "pattern": r"https://\w+\.artstation\.com/p/assets" @@ -225,7 +225,7 @@ class ArtstationChallengeExtractor(ArtstationExtractor): "{challenge[id]} - {challenge[title]}") archive_fmt = "c_{challenge[id]}_{asset_id}" pattern = (r"(?:https?://)?(?:www\.)?artstation\.com" - r"/contests/[^/?&#]+/challenges/(\d+)" + r"/contests/[^/?#]+/challenges/(\d+)" r"/?(?:\?sorting=([a-z]+))?") test = ( ("https://www.artstation.com/contests/thu-2017/challenges/20"), @@ -386,7 +386,7 @@ class ArtstationFollowingExtractor(ArtstationExtractor): """Extractor for a user's followed users""" subcategory = "following" pattern = (r"(?:https?://)?(?:www\.)?artstation\.com" - r"/(?!artwork|projects|search)([^/?&#]+)/following") + r"/(?!artwork|projects|search)([^/?#]+)/following") test = ("https://www.artstation.com/gaerikim/following", { "pattern": ArtstationUserExtractor.pattern, "count": ">= 50", diff --git a/gallery_dl/extractor/aryion.py b/gallery_dl/extractor/aryion.py index 374a9fcb..6a90b762 100644 --- a/gallery_dl/extractor/aryion.py +++ b/gallery_dl/extractor/aryion.py @@ -153,7 +153,7 @@ class AryionGalleryExtractor(AryionExtractor): """Extractor for a user's gallery on eka's portal""" subcategory = "gallery" categorytransfer = True - pattern = BASE_PATTERN + r"/(?:gallery/|user/|latest.php\?name=)([^/?&#]+)" + pattern = BASE_PATTERN + r"/(?:gallery/|user/|latest.php\?name=)([^/?#]+)" test = ( ("https://aryion.com/g4/gallery/jameshoward", { "options": (("recursive", False),), diff --git a/gallery_dl/extractor/behance.py b/gallery_dl/extractor/behance.py index be498bc3..a817174c 100644 --- a/gallery_dl/extractor/behance.py +++ b/gallery_dl/extractor/behance.py @@ -151,7 +151,7 @@ class BehanceUserExtractor(BehanceExtractor): """Extractor for a user's galleries from www.behance.net""" subcategory = "user" categorytransfer = True - pattern = r"(?:https?://)?(?:www\.)?behance\.net/([^/?&#]+)/?$" + pattern = r"(?:https?://)?(?:www\.)?behance\.net/([^/?#]+)/?$" test = ("https://www.behance.net/alexstrohl", { "count": ">= 8", "pattern": BehanceGalleryExtractor.pattern, diff --git a/gallery_dl/extractor/blogger.py b/gallery_dl/extractor/blogger.py index ad95fe88..60170dc4 100644 --- a/gallery_dl/extractor/blogger.py +++ b/gallery_dl/extractor/blogger.py @@ -92,7 +92,7 @@ class BloggerExtractor(Extractor): class BloggerPostExtractor(BloggerExtractor): """Extractor for a single blog post""" subcategory = "post" - pattern = BASE_PATTERN + r"(/\d{4}/\d\d/[^/?&#]+\.html)" + pattern = BASE_PATTERN + r"(/\d{4}/\d\d/[^/?#]+\.html)" test = ( ("https://julianbphotography.blogspot.com/2010/12/moon-rise.html", { "url": "9928429fb62f712eb4de80f53625eccecc614aae", @@ -171,7 +171,7 @@ class BloggerBlogExtractor(BloggerExtractor): class BloggerSearchExtractor(BloggerExtractor): """Extractor for search resuls and labels""" subcategory = "search" - pattern = BASE_PATTERN + r"/search(?:/?\?q=([^/?&#]+)|/label/([^/?&#]+))" + pattern = BASE_PATTERN + r"/search(?:/?\?q=([^/?#]+)|/label/([^/?#]+))" test = ( ("https://julianbphotography.blogspot.com/search?q=400mm", { "count": "< 10" diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index 9cceaeec..e40ec51f 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -460,7 +460,7 @@ class DeviantartFolderExtractor(DeviantartExtractor): subcategory = "folder" directory_fmt = ("{category}", "{username}", "{folder[title]}") archive_fmt = "F_{folder[uuid]}_{index}.{extension}" - pattern = BASE_PATTERN + r"/gallery/(\d+)/([^/?&#]+)" + pattern = BASE_PATTERN + r"/gallery/(\d+)/([^/?#]+)" test = ( # user ("https://www.deviantart.com/shimoda7/gallery/722019/Miscellaneous", { @@ -601,7 +601,7 @@ class DeviantartCollectionExtractor(DeviantartExtractor): directory_fmt = ("{category}", "{username}", "Favourites", "{collection[title]}") archive_fmt = "C_{collection[uuid]}_{index}.{extension}" - pattern = BASE_PATTERN + r"/favourites/(\d+)/([^/?&#]+)" + pattern = BASE_PATTERN + r"/favourites/(\d+)/([^/?#]+)" test = ( (("https://www.deviantart.com/pencilshadings" "/favourites/70595441/3D-Favorites"), { @@ -671,8 +671,8 @@ class DeviantartPopularExtractor(DeviantartExtractor): archive_fmt = "P_{popular[range]}_{popular[search]}_{index}.{extension}" pattern = (r"(?:https?://)?www\.deviantart\.com/(?:" r"search(?:/deviations)?" - r"|(?:deviations/?)?\?order=(popular-[^/?&#]+)" - r"|((?:[\w-]+/)*)(popular-[^/?&#]+)" + r"|(?:deviations/?)?\?order=(popular-[^/?#]+)" + r"|((?:[\w-]+/)*)(popular-[^/?#]+)" r")/?(?:\?([^#]*))?") test = ( ("https://www.deviantart.com/?order=popular-all-time", { @@ -730,7 +730,7 @@ class DeviantartDeviationExtractor(DeviantartExtractor): """Extractor for single deviations""" subcategory = "deviation" archive_fmt = "{index}.{extension}" - pattern = BASE_PATTERN + r"/(art|journal)/(?:[^/?&#]+-)?(\d+)" + pattern = BASE_PATTERN + r"/(art|journal)/(?:[^/?#]+-)?(\d+)" test = ( (("https://www.deviantart.com/shimoda7/art/For-the-sake-10073852"), { "options": (("original", 0),), diff --git a/gallery_dl/extractor/directlink.py b/gallery_dl/extractor/directlink.py index 1d17658d..a6346bf1 100644 --- a/gallery_dl/extractor/directlink.py +++ b/gallery_dl/extractor/directlink.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2017-2019 Mike Fährmann +# Copyright 2017-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -17,7 +17,7 @@ class DirectlinkExtractor(Extractor): category = "directlink" filename_fmt = "{domain}/{path}/{filename}.{extension}" archive_fmt = filename_fmt - pattern = (r"(?i)https?://(?P[^/?&#]+)/(?P[^?&#]+\." + pattern = (r"(?i)https?://(?P[^/?#]+)/(?P[^?#]+\." r"(?:jpe?g|jpe|png|gif|web[mp]|mp4|mkv|og[gmv]|opus))" r"(?:\?(?P[^/?#]*))?(?:#(?P.*))?$") test = ( diff --git a/gallery_dl/extractor/dynastyscans.py b/gallery_dl/extractor/dynastyscans.py index 9cc6738b..7d26c476 100644 --- a/gallery_dl/extractor/dynastyscans.py +++ b/gallery_dl/extractor/dynastyscans.py @@ -44,7 +44,7 @@ class DynastyscansBase(): class DynastyscansChapterExtractor(DynastyscansBase, ChapterExtractor): """Extractor for manga-chapters from dynasty-scans.com""" - pattern = BASE_PATTERN + r"(/chapters/[^/?&#]+)" + pattern = BASE_PATTERN + r"(/chapters/[^/?#]+)" test = ( (("http://dynasty-scans.com/chapters/" "hitoribocchi_no_oo_seikatsu_ch33"), { diff --git a/gallery_dl/extractor/fallenangels.py b/gallery_dl/extractor/fallenangels.py index 44863a99..cdd5e74e 100644 --- a/gallery_dl/extractor/fallenangels.py +++ b/gallery_dl/extractor/fallenangels.py @@ -17,7 +17,7 @@ class FallenangelsChapterExtractor(ChapterExtractor): """Extractor for manga-chapters from fascans.com""" category = "fallenangels" pattern = (r"(?:https?://)?(manga|truyen)\.fascans\.com" - r"/manga/([^/]+)/(\d+)(\.[^/?&#]+)?") + r"/manga/([^/]+)/(\d+)(\.[^/?#]+)?") test = ( ("https://manga.fascans.com/manga/chronos-ruler/20/1", { "url": "4604a7914566cc2da0ff789aa178e2d1c8c241e3", diff --git a/gallery_dl/extractor/foolslide.py b/gallery_dl/extractor/foolslide.py index bf925b69..42456177 100644 --- a/gallery_dl/extractor/foolslide.py +++ b/gallery_dl/extractor/foolslide.py @@ -45,7 +45,7 @@ class FoolslideChapterExtractor(FoolslideBase, ChapterExtractor): """Base class for chapter extractors for FoOlSlide based sites""" directory_fmt = ("{category}", "{manga}", "{chapter_string}") archive_fmt = "{id}" - pattern_fmt = r"(/read/[^/?&#]+/[a-z-]+/\d+/\d+(?:/\d+)?)" + pattern_fmt = r"(/read/[^/?#]+/[a-z-]+/\d+/\d+(?:/\d+)?)" decode = "default" def items(self): @@ -86,7 +86,7 @@ class FoolslideChapterExtractor(FoolslideBase, ChapterExtractor): class FoolslideMangaExtractor(FoolslideBase, MangaExtractor): """Base class for manga extractors for FoOlSlide based sites""" - pattern_fmt = r"(/series/[^/?&#]+)" + pattern_fmt = r"(/series/[^/?#]+)" def chapters(self, page): extr = text.extract_from(page) diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py index 2a5ef6e7..752cd627 100644 --- a/gallery_dl/extractor/furaffinity.py +++ b/gallery_dl/extractor/furaffinity.py @@ -177,7 +177,7 @@ class FuraffinityExtractor(Extractor): class FuraffinityGalleryExtractor(FuraffinityExtractor): """Extractor for a furaffinity user's gallery""" subcategory = "gallery" - pattern = BASE_PATTERN + r"/gallery/([^/?&#]+)" + pattern = BASE_PATTERN + r"/gallery/([^/?#]+)" test = ("https://www.furaffinity.net/gallery/mirlinthloth/", { "pattern": r"https://d\d?.facdn.net/art/mirlinthloth/\d+/\d+.\w+\.\w+", "range": "45-50", @@ -189,7 +189,7 @@ class FuraffinityScrapsExtractor(FuraffinityExtractor): """Extractor for a furaffinity user's scraps""" subcategory = "scraps" directory_fmt = ("{category}", "{user!l}", "Scraps") - pattern = BASE_PATTERN + r"/scraps/([^/?&#]+)" + pattern = BASE_PATTERN + r"/scraps/([^/?#]+)" test = ("https://www.furaffinity.net/scraps/mirlinthloth/", { "pattern": r"https://d\d?.facdn.net/art/[^/]+(/stories)?/\d+/\d+.\w+.", "count": ">= 3", @@ -200,7 +200,7 @@ class FuraffinityFavoriteExtractor(FuraffinityExtractor): """Extractor for a furaffinity user's favorites""" subcategory = "favorite" directory_fmt = ("{category}", "{user!l}", "Favorites") - pattern = BASE_PATTERN + r"/favorites/([^/?&#]+)" + pattern = BASE_PATTERN + r"/favorites/([^/?#]+)" test = ("https://www.furaffinity.net/favorites/mirlinthloth/", { "pattern": r"https://d\d?.facdn.net/art/[^/]+/\d+/\d+.\w+\.\w+", "range": "45-50", @@ -278,7 +278,7 @@ class FuraffinityUserExtractor(FuraffinityExtractor): """Extractor for furaffinity user profiles""" subcategory = "user" cookiedomain = None - pattern = BASE_PATTERN + r"/user/([^/?&#]+)" + pattern = BASE_PATTERN + r"/user/([^/?#]+)" test = ( ("https://www.furaffinity.net/user/mirlinthloth/", { "pattern": r"/gallery/mirlinthloth/$", @@ -302,7 +302,7 @@ class FuraffinityUserExtractor(FuraffinityExtractor): class FuraffinityFollowingExtractor(FuraffinityExtractor): """Extractor for a furaffinity user's watched users""" subcategory = "following" - pattern = BASE_PATTERN + "/watchlist/by/([^/?&#]+)" + pattern = BASE_PATTERN + "/watchlist/by/([^/?#]+)" test = ("https://www.furaffinity.net/watchlist/by/mirlinthloth/", { "pattern": FuraffinityUserExtractor.pattern, "range": "176-225", diff --git a/gallery_dl/extractor/fuskator.py b/gallery_dl/extractor/fuskator.py index eba1c393..df55061f 100644 --- a/gallery_dl/extractor/fuskator.py +++ b/gallery_dl/extractor/fuskator.py @@ -17,7 +17,7 @@ class FuskatorGalleryExtractor(GalleryExtractor): """Extractor for image galleries on fuskator.com""" category = "fuskator" root = "https://fuskator.com" - pattern = r"(?:https?://)?fuskator\.com/(?:thumbs|expanded)/([^/?&#]+)" + pattern = r"(?:https?://)?fuskator\.com/(?:thumbs|expanded)/([^/?#]+)" test = ( ("https://fuskator.com/thumbs/d0GnIzXrSKU/", { "pattern": r"https://i\d+.fuskator.com/large/d0GnIzXrSKU/.+\.jpg", diff --git a/gallery_dl/extractor/gfycat.py b/gallery_dl/extractor/gfycat.py index ba2fe5de..493c1d22 100644 --- a/gallery_dl/extractor/gfycat.py +++ b/gallery_dl/extractor/gfycat.py @@ -56,7 +56,7 @@ class GfycatUserExtractor(GfycatExtractor): """Extractor for gfycat user profiles""" subcategory = "user" directory_fmt = ("{category}", "{userName}") - pattern = r"(?:https?://)?gfycat\.com/@([^/?&#]+)" + pattern = r"(?:https?://)?gfycat\.com/@([^/?#]+)" test = ("https://gfycat.com/@gretta", { "pattern": r"https://giant\.gfycat\.com/[A-Za-z]+\.mp4", "count": ">= 100", @@ -70,7 +70,7 @@ class GfycatSearchExtractor(GfycatExtractor): """Extractor for gfycat search results""" subcategory = "search" directory_fmt = ("{category}", "Search", "{search}") - pattern = r"(?:https?://)?gfycat\.com/gifs/search/([^/?&#]+)" + pattern = r"(?:https?://)?gfycat\.com/gifs/search/([^/?#]+)" test = ("https://gfycat.com/gifs/search/funny+animals", { "pattern": r"https://\w+\.gfycat\.com/[A-Za-z]+\.mp4", "archive": False, diff --git a/gallery_dl/extractor/hentai2read.py b/gallery_dl/extractor/hentai2read.py index 354acbfd..53be67b1 100644 --- a/gallery_dl/extractor/hentai2read.py +++ b/gallery_dl/extractor/hentai2read.py @@ -23,7 +23,7 @@ class Hentai2readBase(): class Hentai2readChapterExtractor(Hentai2readBase, ChapterExtractor): """Extractor for a single manga chapter from hentai2read.com""" archive_fmt = "{chapter_id}_{page}" - pattern = r"(?:https?://)?(?:www\.)?hentai2read\.com(/[^/?&#]+/(\d+))" + pattern = r"(?:https?://)?(?:www\.)?hentai2read\.com(/[^/?#]+/(\d+))" test = ("https://hentai2read.com/amazon_elixir/1/", { "url": "964b942cf492b3a129d2fe2608abfc475bc99e71", "keyword": "ff84b8f751f0e4ee37717efc4332ff1db71951d9", @@ -63,7 +63,7 @@ class Hentai2readChapterExtractor(Hentai2readBase, ChapterExtractor): class Hentai2readMangaExtractor(Hentai2readBase, MangaExtractor): """Extractor for hmanga from hentai2read.com""" chapterclass = Hentai2readChapterExtractor - pattern = r"(?:https?://)?(?:www\.)?hentai2read\.com(/[^/?&#]+)/?$" + pattern = r"(?:https?://)?(?:www\.)?hentai2read\.com(/[^/?#]+)/?$" test = ( ("https://hentai2read.com/amazon_elixir/", { "url": "273073752d418ec887d7f7211e42b832e8c403ba", diff --git a/gallery_dl/extractor/hentaicafe.py b/gallery_dl/extractor/hentaicafe.py index 833135e9..e12670a5 100644 --- a/gallery_dl/extractor/hentaicafe.py +++ b/gallery_dl/extractor/hentaicafe.py @@ -20,7 +20,7 @@ class HentaicafeChapterExtractor(foolslide.FoolslideChapterExtractor): category = "hentaicafe" directory_fmt = ("{category}", "{manga}") pattern = (r"(?:https?://)?(?:www\.)?hentai\.cafe" - r"(/manga/read/[^/?&#]+/[a-z-]+/\d+/\d+(?:/\d+)?)") + r"(/manga/read/[^/?#]+/[a-z-]+/\d+/\d+(?:/\d+)?)") test = ("https://hentai.cafe/manga/read/saitom-box/en/0/1/", { "url": "8c6a8c56875ba3ed7ab0a74a64f9960077767fc2", "keyword": "6913608267d883c82b887303b9ced13821188329", @@ -45,7 +45,7 @@ class HentaicafeMangaExtractor(foolslide.FoolslideMangaExtractor): """Extractor for manga from hentai.cafe""" category = "hentaicafe" pattern = (r"(?:https?://)?" + r"(?:www\.)?hentai\.cafe" - r"(/hc\.fyi/\d+|(?:/manga/series)?/[^/?&#]+)/?$") + r"(/hc\.fyi/\d+|(?:/manga/series)?/[^/?#]+)/?$") test = ( # single chapter ("https://hentai.cafe/hazuki-yuuto-summer-blues/", { diff --git a/gallery_dl/extractor/hentaifoundry.py b/gallery_dl/extractor/hentaifoundry.py index 36dc77ed..0be528dc 100644 --- a/gallery_dl/extractor/hentaifoundry.py +++ b/gallery_dl/extractor/hentaifoundry.py @@ -156,7 +156,7 @@ class HentaifoundryExtractor(Extractor): class HentaifoundryUserExtractor(HentaifoundryExtractor): """Extractor for a hentaifoundry user profile""" subcategory = "user" - pattern = BASE_PATTERN + r"/user/([^/?&#]+)/profile" + pattern = BASE_PATTERN + r"/user/([^/?#]+)/profile" test = ("https://www.hentai-foundry.com/user/Tenpura/profile",) def items(self): @@ -177,7 +177,7 @@ class HentaifoundryUserExtractor(HentaifoundryExtractor): class HentaifoundryPicturesExtractor(HentaifoundryExtractor): """Extractor for all pictures of a hentaifoundry user""" subcategory = "pictures" - pattern = BASE_PATTERN + r"/pictures/user/([^/?&#]+)(?:/page/(\d+))?/?$" + pattern = BASE_PATTERN + r"/pictures/user/([^/?#]+)(?:/page/(\d+))?/?$" test = ( ("https://www.hentai-foundry.com/pictures/user/Tenpura", { "url": "ebbc981a85073745e3ca64a0f2ab31fab967fc28", @@ -194,7 +194,7 @@ class HentaifoundryScrapsExtractor(HentaifoundryExtractor): """Extractor for scraps of a hentaifoundry user""" subcategory = "scraps" directory_fmt = ("{category}", "{user}", "Scraps") - pattern = BASE_PATTERN + r"/pictures/user/([^/?&#]+)/scraps" + pattern = BASE_PATTERN + r"/pictures/user/([^/?#]+)/scraps" test = ( ("https://www.hentai-foundry.com/pictures/user/Evulchibi/scraps", { "url": "7cd9c6ec6258c4ab8c44991f7731be82337492a7", @@ -214,7 +214,7 @@ class HentaifoundryFavoriteExtractor(HentaifoundryExtractor): subcategory = "favorite" directory_fmt = ("{category}", "{user}", "Favorites") archive_fmt = "f_{user}_{index}" - pattern = BASE_PATTERN + r"/user/([^/?&#]+)/faves/pictures" + pattern = BASE_PATTERN + r"/user/([^/?#]+)/faves/pictures" test = ( ("https://www.hentai-foundry.com/user/Tenpura/faves/pictures", { "url": "56f9ae2e89fe855e9fe1da9b81e5ec6212b0320b", @@ -236,7 +236,7 @@ class HentaifoundryRecentExtractor(HentaifoundryExtractor): archive_fmt = "r_{index}" pattern = BASE_PATTERN + r"/pictures/recent/(\d\d\d\d-\d\d-\d\d)" test = ("http://www.hentai-foundry.com/pictures/recent/2018-09-20", { - "pattern": r"https://pictures.hentai-foundry.com/[^/]/[^/?&#]+/\d+/", + "pattern": r"https://pictures.hentai-foundry.com/[^/]/[^/?#]+/\d+/", "range": "20-30", }) @@ -255,7 +255,7 @@ class HentaifoundryPopularExtractor(HentaifoundryExtractor): archive_fmt = "p_{index}" pattern = BASE_PATTERN + r"/pictures/popular()" test = ("http://www.hentai-foundry.com/pictures/popular", { - "pattern": r"https://pictures.hentai-foundry.com/[^/]/[^/?&#]+/\d+/", + "pattern": r"https://pictures.hentai-foundry.com/[^/]/[^/?#]+/\d+/", "range": "20-30", }) @@ -268,7 +268,7 @@ class HentaifoundryImageExtractor(HentaifoundryExtractor): """Extractor for a single image from hentaifoundry.com""" subcategory = "image" pattern = (r"(?:https?://)?(?:www\.|pictures\.)?hentai-foundry\.com" - r"/(?:pictures/user|[^/?&#])/([^/?&#]+)/(\d+)") + r"/(?:pictures/user|[^/?#])/([^/?#]+)/(\d+)") test = ( (("https://www.hentai-foundry.com" "/pictures/user/Tenpura/407501/shimakaze"), { @@ -313,7 +313,7 @@ class HentaifoundryStoriesExtractor(HentaifoundryExtractor): """Extractor for stories of a hentaifoundry user""" subcategory = "stories" archive_fmt = "s_{index}" - pattern = BASE_PATTERN + r"/stories/user/([^/?&#]+)(?:/page/(\d+))?/?$" + pattern = BASE_PATTERN + r"/stories/user/([^/?#]+)(?:/page/(\d+))?/?$" test = ("https://www.hentai-foundry.com/stories/user/SnowWolf35", { "count": ">= 35", "keyword": { @@ -349,7 +349,7 @@ class HentaifoundryStoryExtractor(HentaifoundryExtractor): """Extractor for a hentaifoundry story""" subcategory = "story" archive_fmt = "s_{index}" - pattern = BASE_PATTERN + r"/stories/user/([^/?&#]+)/(\d+)" + pattern = BASE_PATTERN + r"/stories/user/([^/?#]+)/(\d+)" test = (("https://www.hentai-foundry.com/stories/user/SnowWolf35" "/26416/Overwatch-High-Chapter-Voting-Location"), { "url": "5a67cfa8c3bf7634c8af8485dd07c1ea74ee0ae8", diff --git a/gallery_dl/extractor/hentaihand.py b/gallery_dl/extractor/hentaihand.py index 7635bf1c..44859259 100644 --- a/gallery_dl/extractor/hentaihand.py +++ b/gallery_dl/extractor/hentaihand.py @@ -74,7 +74,7 @@ class HentaihandTagExtractor(Extractor): root = "https://hentaihand.com" pattern = (r"(?i)(?:https?://)?(?:www\.)?hentaihand\.com" r"/\w+/(parody|character|tag|artist|group|language" - r"|category|relationship)/([^/?&#]+)") + r"|category|relationship)/([^/?#]+)") test = ( ("https://hentaihand.com/en/artist/himuro", { "pattern": HentaihandGalleryExtractor.pattern, diff --git a/gallery_dl/extractor/hiperdex.py b/gallery_dl/extractor/hiperdex.py index 1c53723d..93ef6f12 100644 --- a/gallery_dl/extractor/hiperdex.py +++ b/gallery_dl/extractor/hiperdex.py @@ -64,7 +64,7 @@ class HiperdexBase(): class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor): """Extractor for manga chapters from hiperdex.com""" - pattern = BASE_PATTERN + r"(/manga/([^/?&#]+)/([^/?&#]+))" + pattern = BASE_PATTERN + r"(/manga/([^/?#]+)/([^/?#]+))" test = ( ("https://hiperdex.com/manga/domestic-na-kanojo/154-5/", { "pattern": r"https://hiperdex.(com|net|info)/wp-content/uploads" @@ -105,7 +105,7 @@ class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor): class HiperdexMangaExtractor(HiperdexBase, MangaExtractor): """Extractor for manga from hiperdex.com""" chapterclass = HiperdexChapterExtractor - pattern = BASE_PATTERN + r"(/manga/([^/?&#]+))/?$" + pattern = BASE_PATTERN + r"(/manga/([^/?#]+))/?$" test = ( ("https://hiperdex.com/manga/youre-not-that-special/", { "count": 51, @@ -157,7 +157,7 @@ class HiperdexArtistExtractor(HiperdexBase, MangaExtractor): categorytransfer = False chapterclass = HiperdexMangaExtractor reverse = False - pattern = BASE_PATTERN + r"(/manga-a(?:rtist|uthor)/([^/?&#]+))" + pattern = BASE_PATTERN + r"(/manga-a(?:rtist|uthor)/([^/?#]+))" test = ( ("https://hiperdex.com/manga-artist/beck-ho-an/"), ("https://hiperdex.net/manga-artist/beck-ho-an/"), diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py index d80ce69f..994e1b7e 100644 --- a/gallery_dl/extractor/hitomi.py +++ b/gallery_dl/extractor/hitomi.py @@ -158,7 +158,7 @@ class HitomiTagExtractor(Extractor): subcategory = "tag" pattern = (r"(?:https?://)?hitomi\.la/" r"(tag|artist|group|series|type|character)/" - r"([^/?&#]+)\.html") + r"([^/?#]+)\.html") test = ( ("https://hitomi.la/tag/screenshots-japanese.html", { "pattern": HitomiGalleryExtractor.pattern, diff --git a/gallery_dl/extractor/imagefap.py b/gallery_dl/extractor/imagefap.py index bf0ac63a..8785f65d 100644 --- a/gallery_dl/extractor/imagefap.py +++ b/gallery_dl/extractor/imagefap.py @@ -151,7 +151,7 @@ class ImagefapUserExtractor(ImagefapExtractor): subcategory = "user" categorytransfer = True pattern = (BASE_PATTERN + - r"/(?:profile(?:\.php\?user=|/)([^/?&#]+)" + r"/(?:profile(?:\.php\?user=|/)([^/?#]+)" r"|usergallery\.php\?userid=(\d+))") test = ( ("https://www.imagefap.com/profile/LucyRae/galleries", { diff --git a/gallery_dl/extractor/imagehosts.py b/gallery_dl/extractor/imagehosts.py index 4015bfdb..ad5a5085 100644 --- a/gallery_dl/extractor/imagehosts.py +++ b/gallery_dl/extractor/imagehosts.py @@ -176,7 +176,7 @@ class ImagetwistImageExtractor(ImagehostImageExtractor): class ImgspiceImageExtractor(ImagehostImageExtractor): """Extractor for single images from imgspice.com""" category = "imgspice" - pattern = r"(?:https?://)?((?:www\.)?imgspice\.com/([^/?&#]+))" + pattern = r"(?:https?://)?((?:www\.)?imgspice\.com/([^/?#]+))" test = ("https://imgspice.com/nwfwtpyog50y/test.png.html", { "url": "b8c30a8f51ee1012959a4cfd46197fabf14de984", "keyword": "100e310a19a2fa22d87e1bbc427ecb9f6501e0c0", @@ -198,7 +198,7 @@ class PixhostImageExtractor(ImagehostImageExtractor): """Extractor for single images from pixhost.to""" category = "pixhost" pattern = (r"(?:https?://)?((?:www\.)?pixhost\.(?:to|org)" - r"/show/\d+/(\d+)_[^/?&#]+)") + r"/show/\d+/(\d+)_[^/?#]+)") test = ("http://pixhost.to/show/190/130327671_test-.png", { "url": "4e5470dcf6513944773044d40d883221bbc46cff", "keyword": "3bad6d59db42a5ebbd7842c2307e1c3ebd35e6b0", @@ -218,7 +218,7 @@ class PostimgImageExtractor(ImagehostImageExtractor): """Extractor for single images from postimages.org""" category = "postimg" pattern = (r"(?:https?://)?((?:www\.)?(?:postimg|pixxxels)\.(?:cc|org)" - r"/(?:image/)?([^/?&#]+)/?)") + r"/(?:image/)?([^/?#]+)/?)") test = ("https://postimg.cc/Wtn2b3hC", { "url": "0794cfda9b8951a8ac3aa692472484200254ab86", "keyword": "2d05808d04e4e83e33200db83521af06e3147a84", @@ -237,7 +237,7 @@ class TurboimagehostImageExtractor(ImagehostImageExtractor): """Extractor for single images from www.turboimagehost.com""" category = "turboimagehost" pattern = (r"(?:https?://)?((?:www\.)?turboimagehost\.com" - r"/p/(\d+)/[^/?&#]+\.html)") + r"/p/(\d+)/[^/?#]+\.html)") test = ("https://www.turboimagehost.com/p/39078423/test--.png.html", { "url": "b94de43612318771ced924cb5085976f13b3b90e", "keyword": "704757ca8825f51cec516ec44c1e627c1f2058ca", diff --git a/gallery_dl/extractor/imgbb.py b/gallery_dl/extractor/imgbb.py index 2a69fb1a..5dcca62c 100644 --- a/gallery_dl/extractor/imgbb.py +++ b/gallery_dl/extractor/imgbb.py @@ -115,7 +115,7 @@ class ImgbbAlbumExtractor(ImgbbExtractor): """Extractor for albums on imgbb.com""" subcategory = "album" directory_fmt = ("{category}", "{user}", "{album_name} {album_id}") - pattern = r"(?:https?://)?ibb\.co/album/([^/?&#]+)/?(?:\?([^#]+))?" + pattern = r"(?:https?://)?ibb\.co/album/([^/?#]+)/?(?:\?([^#]+))?" test = ( ("https://ibb.co/album/i5PggF", { "range": "1-80", @@ -173,7 +173,7 @@ class ImgbbUserExtractor(ImgbbExtractor): pattern = r"(?:https?://)?([^.]+)\.imgbb\.com/?(?:\?([^#]+))?$" test = ("https://folkie.imgbb.com", { "range": "1-80", - "pattern": r"https?://i\.ibb\.co/\w+/[^/?&#]+", + "pattern": r"https?://i\.ibb\.co/\w+/[^/?#]+", }) def __init__(self, match): @@ -197,7 +197,7 @@ class ImgbbUserExtractor(ImgbbExtractor): class ImgbbImageExtractor(ImgbbExtractor): subcategory = "image" - pattern = r"(?:https?://)?ibb\.co/(?!album/)([^/?&#]+)" + pattern = r"(?:https?://)?ibb\.co/(?!album/)([^/?#]+)" test = ("https://ibb.co/fUqh5b", { "pattern": r"https://i\.ibb\.co/g3kvx80/Arundel-Ireeman-5\.jpg", "content": "c5a0965178a8b357acd8aa39660092918c63795e", diff --git a/gallery_dl/extractor/imgur.py b/gallery_dl/extractor/imgur.py index 4391e648..ae4e606e 100644 --- a/gallery_dl/extractor/imgur.py +++ b/gallery_dl/extractor/imgur.py @@ -259,7 +259,7 @@ class ImgurGalleryExtractor(ImgurExtractor): class ImgurUserExtractor(ImgurExtractor): """Extractor for all images posted by a user""" subcategory = "user" - pattern = BASE_PATTERN + r"/user/([^/?&#]+)(?:/posts|/submitted)?/?$" + pattern = BASE_PATTERN + r"/user/([^/?#]+)(?:/posts|/submitted)?/?$" test = ( ("https://imgur.com/user/Miguenzo", { "range": "1-100", @@ -277,7 +277,7 @@ class ImgurUserExtractor(ImgurExtractor): class ImgurFavoriteExtractor(ImgurExtractor): """Extractor for a user's favorites""" subcategory = "favorite" - pattern = BASE_PATTERN + r"/user/([^/?&#]+)/favorites" + pattern = BASE_PATTERN + r"/user/([^/?#]+)/favorites" test = ("https://imgur.com/user/Miguenzo/favorites", { "range": "1-100", "count": 100, @@ -291,7 +291,7 @@ class ImgurFavoriteExtractor(ImgurExtractor): class ImgurSubredditExtractor(ImgurExtractor): """Extractor for a subreddits's imgur links""" subcategory = "subreddit" - pattern = BASE_PATTERN + r"/r/([^/?&#]+)" + pattern = BASE_PATTERN + r"/r/([^/?#]+)" test = ("https://imgur.com/r/pics", { "range": "1-100", "count": 100, @@ -305,7 +305,7 @@ class ImgurSubredditExtractor(ImgurExtractor): class ImgurTagExtractor(ImgurExtractor): """Extractor for imgur tag searches""" subcategory = "tag" - pattern = BASE_PATTERN + r"/t/([^/?&#]+)$" + pattern = BASE_PATTERN + r"/t/([^/?#]+)$" test = ("https://imgur.com/t/animals", { "range": "1-100", "count": 100, @@ -319,7 +319,7 @@ class ImgurTagExtractor(ImgurExtractor): class ImgurSearchExtractor(ImgurExtractor): """Extractor for imgur search results""" subcategory = "search" - pattern = BASE_PATTERN + r"/search(?:/[^?&#]+)?/?\?q=([^&#]+)" + pattern = BASE_PATTERN + r"/search(?:/[^?#]+)?/?\?q=([^&#]+)" test = ("https://imgur.com/search?q=cute+cat", { "range": "1-100", "count": 100, diff --git a/gallery_dl/extractor/inkbunny.py b/gallery_dl/extractor/inkbunny.py index ff8318cd..6051db07 100644 --- a/gallery_dl/extractor/inkbunny.py +++ b/gallery_dl/extractor/inkbunny.py @@ -60,7 +60,7 @@ class InkbunnyExtractor(Extractor): class InkbunnyUserExtractor(InkbunnyExtractor): """Extractor for inkbunny user profiles""" subcategory = "user" - pattern = BASE_PATTERN + r"/(?!s/)(gallery/|scraps/)?([^/?&#]+)" + pattern = BASE_PATTERN + r"/(?!s/)(gallery/|scraps/)?([^/?#]+)" test = ( ("https://inkbunny.net/soina", { "pattern": r"https://[\w.]+\.metapix\.net/files/full" diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py index 639f2724..11946266 100644 --- a/gallery_dl/extractor/instagram.py +++ b/gallery_dl/extractor/instagram.py @@ -343,7 +343,7 @@ class InstagramImageExtractor(InstagramExtractor): """Extractor for PostPage""" subcategory = "image" pattern = (r"(?:https?://)?(?:www\.)?instagram\.com" - r"/(?:p|tv|reel)/([^/?&#]+)") + r"/(?:p|tv|reel)/([^/?#]+)") test = ( # GraphImage ("https://www.instagram.com/p/BqvsDleB3lV/", { @@ -458,7 +458,7 @@ class InstagramStoriesExtractor(InstagramExtractor): """Extractor for StoriesPage""" subcategory = "stories" pattern = (r"(?:https?://)?(?:www\.)?instagram\.com" - r"/stories/([^/?&#]+)(?:/(\d+))?") + r"/stories/([^/?#]+)(?:/(\d+))?") test = ( ("https://www.instagram.com/stories/instagram/"), ("https://www.instagram.com/stories/highlights/18042509488170095/"), @@ -478,7 +478,7 @@ class InstagramSavedExtractor(InstagramExtractor): subcategory = "saved" pattern = (r"(?:https?://)?(?:www\.)?instagram\.com" r"/(?!p/|explore/|directory/|accounts/|stories/|tv/)" - r"([^/?&#]+)/saved") + r"([^/?#]+)/saved") test = ("https://www.instagram.com/instagram/saved/",) def __init__(self, match): @@ -504,7 +504,7 @@ class InstagramUserExtractor(InstagramExtractor): subcategory = "user" pattern = (r"(?:https?://)?(?:www\.)?instagram\.com" r"/(?!(?:p|explore|directory|accounts|stories|tv|reel)/)" - r"([^/?&#]+)/?(?:$|[?#])") + r"([^/?#]+)/?(?:$|[?#])") test = ( ("https://www.instagram.com/instagram/", { "range": "1-16", @@ -550,7 +550,7 @@ class InstagramChannelExtractor(InstagramExtractor): subcategory = "channel" pattern = (r"(?:https?://)?(?:www\.)?instagram\.com" r"/(?!p/|explore/|directory/|accounts/|stories/|tv/)" - r"([^/?&#]+)/channel") + r"([^/?#]+)/channel") test = ("https://www.instagram.com/instagram/channel/", { "range": "1-16", "count": ">= 16", @@ -579,7 +579,7 @@ class InstagramTagExtractor(InstagramExtractor): subcategory = "tag" directory_fmt = ("{category}", "{subcategory}", "{tag}") pattern = (r"(?:https?://)?(?:www\.)?instagram\.com" - r"/explore/tags/([^/?&#]+)") + r"/explore/tags/([^/?#]+)") test = ("https://www.instagram.com/explore/tags/instagram/", { "range": "1-16", "count": ">= 16", diff --git a/gallery_dl/extractor/issuu.py b/gallery_dl/extractor/issuu.py index b34b288b..6266e5ff 100644 --- a/gallery_dl/extractor/issuu.py +++ b/gallery_dl/extractor/issuu.py @@ -26,7 +26,7 @@ class IssuuPublicationExtractor(IssuuBase, GalleryExtractor): "{document[originalPublishDate]} {document[title]}") filename_fmt = "{num:>03}.{extension}" archive_fmt = "{document[id]}_{num}" - pattern = r"(?:https?://)?issuu\.com(/[^/?&#]+/docs/[^/?&#]+)" + pattern = r"(?:https?://)?issuu\.com(/[^/?#]+/docs/[^/?#]+)" test = ("https://issuu.com/issuu/docs/motions-1-2019/", { "pattern": r"https://image.isu.pub/190916155301-\w+/jpg/page_\d+.jpg", "count" : 36, @@ -80,7 +80,7 @@ class IssuuPublicationExtractor(IssuuBase, GalleryExtractor): class IssuuUserExtractor(IssuuBase, Extractor): """Extractor for all publications of a user/publisher""" subcategory = "user" - pattern = r"(?:https?://)?issuu\.com/([^/?&#]+)/?$" + pattern = r"(?:https?://)?issuu\.com/([^/?#]+)/?$" test = ("https://issuu.com/issuu", { "pattern": IssuuPublicationExtractor.pattern, "count" : "> 25", diff --git a/gallery_dl/extractor/khinsider.py b/gallery_dl/extractor/khinsider.py index 2550af24..6ddf0e84 100644 --- a/gallery_dl/extractor/khinsider.py +++ b/gallery_dl/extractor/khinsider.py @@ -19,7 +19,7 @@ class KhinsiderSoundtrackExtractor(AsynchronousMixin, Extractor): directory_fmt = ("{category}", "{album[name]}") archive_fmt = "{filename}.{extension}" pattern = (r"(?:https?://)?downloads\.khinsider\.com" - r"/game-soundtracks/album/([^/?&#]+)") + r"/game-soundtracks/album/([^/?#]+)") root = "https://downloads.khinsider.com" test = (("https://downloads.khinsider.com" "/game-soundtracks/album/horizon-riders-wii"), { diff --git a/gallery_dl/extractor/komikcast.py b/gallery_dl/extractor/komikcast.py index 6e7f139e..b54afb74 100644 --- a/gallery_dl/extractor/komikcast.py +++ b/gallery_dl/extractor/komikcast.py @@ -45,7 +45,7 @@ class KomikcastBase(): class KomikcastChapterExtractor(KomikcastBase, ChapterExtractor): """Extractor for manga-chapters from komikcast.com""" - pattern = r"(?:https?://)?(?:www\.)?komikcast\.com(/chapter/[^/?&#]+/)" + pattern = r"(?:https?://)?(?:www\.)?komikcast\.com(/chapter/[^/?#]+/)" test = ( (("https://komikcast.com/chapter/" "apotheosis-chapter-02-2-bahasa-indonesia/"), { @@ -81,7 +81,7 @@ class KomikcastMangaExtractor(KomikcastBase, MangaExtractor): """Extractor for manga from komikcast.com""" chapterclass = KomikcastChapterExtractor pattern = (r"(?:https?://)?(?:www\.)?komikcast\.com" - r"(/(?:komik/)?[^/?&#]+)/?$") + r"(/(?:komik/)?[^/?#]+)/?$") test = ( ("https://komikcast.com/komik/090-eko-to-issho/", { "url": "dc798d107697d1f2309b14ca24ca9dba30c6600f", diff --git a/gallery_dl/extractor/lineblog.py b/gallery_dl/extractor/lineblog.py index a1daa391..4071a26a 100644 --- a/gallery_dl/extractor/lineblog.py +++ b/gallery_dl/extractor/lineblog.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2019 Mike Fährmann +# Copyright 2019-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -42,7 +42,7 @@ class LineblogBase(): class LineblogBlogExtractor(LineblogBase, LivedoorBlogExtractor): """Extractor for a user's blog on lineblog.me""" - pattern = r"(?:https?://)?lineblog\.me/(\w+)/?(?:$|[?&#])" + pattern = r"(?:https?://)?lineblog\.me/(\w+)/?(?:$|[?#])" test = ("https://lineblog.me/mamoru_miyano/", { "range": "1-20", "count": 20, diff --git a/gallery_dl/extractor/livedoor.py b/gallery_dl/extractor/livedoor.py index 9d2383fc..feffdfdf 100644 --- a/gallery_dl/extractor/livedoor.py +++ b/gallery_dl/extractor/livedoor.py @@ -84,7 +84,7 @@ class LivedoorExtractor(Extractor): class LivedoorBlogExtractor(LivedoorExtractor): """Extractor for a user's blog on blog.livedoor.jp""" subcategory = "blog" - pattern = r"(?:https?://)?blog\.livedoor\.jp/(\w+)/?(?:$|[?&#])" + pattern = r"(?:https?://)?blog\.livedoor\.jp/(\w+)/?(?:$|[?#])" test = ( ("http://blog.livedoor.jp/zatsu_ke/", { "range": "1-50", diff --git a/gallery_dl/extractor/luscious.py b/gallery_dl/extractor/luscious.py index 7561c64e..143d00d1 100644 --- a/gallery_dl/extractor/luscious.py +++ b/gallery_dl/extractor/luscious.py @@ -46,7 +46,7 @@ class LusciousAlbumExtractor(LusciousExtractor): directory_fmt = ("{category}", "{album[id]} {album[title]}") archive_fmt = "{album[id]}_{id}" pattern = (r"(?:https?://)?(?:www\.|members\.)?luscious\.net" - r"/(?:albums|pictures/c/[^/?&#]+/album)/[^/?&#]+_(\d+)") + r"/(?:albums|pictures/c/[^/?#]+/album)/[^/?#]+_(\d+)") test = ( ("https://luscious.net/albums/okinami-no-koigokoro_277031/", { "url": "7e4984a271a1072ac6483e4228a045895aff86f3", diff --git a/gallery_dl/extractor/mangafox.py b/gallery_dl/extractor/mangafox.py index 1b8a4a62..cecbc868 100644 --- a/gallery_dl/extractor/mangafox.py +++ b/gallery_dl/extractor/mangafox.py @@ -16,7 +16,7 @@ class MangafoxChapterExtractor(ChapterExtractor): """Extractor for manga-chapters from fanfox.net""" category = "mangafox" pattern = (r"(?:https?://)?(?:www\.|m\.)?(?:mangafox\.me|fanfox\.net)" - r"(/manga/[^/]+/((?:v(\d+)/)?c(\d+)([^/?&#]*)))") + r"(/manga/[^/]+/((?:v(\d+)/)?c(\d+)([^/?#]*)))") test = ( ("http://fanfox.net/manga/kidou_keisatsu_patlabor/v05/c006.2/1.html", { "keyword": "5661dab258d42d09d98f194f7172fb9851a49766", diff --git a/gallery_dl/extractor/mangahere.py b/gallery_dl/extractor/mangahere.py index 1d88275f..653c61a2 100644 --- a/gallery_dl/extractor/mangahere.py +++ b/gallery_dl/extractor/mangahere.py @@ -24,7 +24,7 @@ class MangahereBase(): class MangahereChapterExtractor(MangahereBase, ChapterExtractor): """Extractor for manga-chapters from mangahere.cc""" pattern = (r"(?:https?://)?(?:www\.|m\.)?mangahere\.c[co]/manga/" - r"([^/]+(?:/v0*(\d+))?/c([^/?&#]+))") + r"([^/]+(?:/v0*(\d+))?/c([^/?#]+))") test = ( ("https://www.mangahere.cc/manga/dongguo_xiaojie/c004.2/", { "keyword": "7c98d7b50a47e6757b089aa875a53aa970cac66f", diff --git a/gallery_dl/extractor/mangakakalot.py b/gallery_dl/extractor/mangakakalot.py index 8686b2d9..dd032678 100644 --- a/gallery_dl/extractor/mangakakalot.py +++ b/gallery_dl/extractor/mangakakalot.py @@ -32,7 +32,7 @@ class MangakakalotBase(): class MangakakalotChapterExtractor(MangakakalotBase, ChapterExtractor): """Extractor for manga-chapters from mangakakalot.com""" pattern = (r"(?:https?://)?(?:www\.)?mangakakalot\.com" - r"(/chapter/\w+/chapter_[^/?&#]+)") + r"(/chapter/\w+/chapter_[^/?#]+)") test = ( ("https://mangakakalot.com/chapter/rx922077/chapter_6", { "pattern": r"https://s\d+\.\w+\.com/mangakakalot/r\d+/rx922077/" diff --git a/gallery_dl/extractor/mangapanda.py b/gallery_dl/extractor/mangapanda.py index a4b8340f..6067bd0d 100644 --- a/gallery_dl/extractor/mangapanda.py +++ b/gallery_dl/extractor/mangapanda.py @@ -35,7 +35,7 @@ class MangapandaBase(): class MangapandaChapterExtractor(MangapandaBase, ChapterExtractor): """Extractor for manga-chapters from mangapanda.com""" archive_fmt = "{manga}_{chapter}_{page}" - pattern = r"(?:https?://)?(?:www\.)?mangapanda\.com((/[^/?&#]+)/(\d+))" + pattern = r"(?:https?://)?(?:www\.)?mangapanda\.com((/[^/?#]+)/(\d+))" test = ("https://www.mangapanda.com/red-storm/2", { "url": "1f633f776e950531ba9b1e81965316458e785261", "keyword": "b24df4b9cc36383fb6a44e06d32a3884a4dcb5fb", @@ -96,7 +96,7 @@ class MangapandaMangaExtractor(MangapandaBase, MangaExtractor): """Extractor for manga from mangapanda.com""" chapterclass = MangapandaChapterExtractor reverse = False - pattern = r"(?:https?://)?(?:www\.)?mangapanda\.com(/[^/?&#]+)/?$" + pattern = r"(?:https?://)?(?:www\.)?mangapanda\.com(/[^/?#]+)/?$" test = ("https://www.mangapanda.com/mushishi", { "url": "357f965732371cac1990fee8b480f62e29141a42", "keyword": "031b3ea085921c552de017ecbb9b906e462229c9", diff --git a/gallery_dl/extractor/mangapark.py b/gallery_dl/extractor/mangapark.py index 59a046cd..0a6fba41 100644 --- a/gallery_dl/extractor/mangapark.py +++ b/gallery_dl/extractor/mangapark.py @@ -51,7 +51,7 @@ class MangaparkBase(): class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor): """Extractor for manga-chapters from mangapark.net""" pattern = (r"(?:https?://)?(?:www\.)?mangapark\.(me|net|com)" - r"/manga/([^?&#]+/i\d+)") + r"/manga/([^?#]+/i\d+)") test = ( ("https://mangapark.net/manga/gosu/i811653/c055/1", { "count": 50, @@ -117,7 +117,7 @@ class MangaparkMangaExtractor(MangaparkBase, MangaExtractor): """Extractor for manga from mangapark.net""" chapterclass = MangaparkChapterExtractor pattern = (r"(?:https?://)?(?:www\.)?mangapark\.(me|net|com)" - r"(/manga/[^/?&#]+)/?$") + r"(/manga/[^/?#]+)/?$") test = ( ("https://mangapark.net/manga/aria", { "url": "9b62883c25c8de471f8ab43651e1448536c4ce3f", diff --git a/gallery_dl/extractor/mangareader.py b/gallery_dl/extractor/mangareader.py index fd9c7ace..30b8ce30 100644 --- a/gallery_dl/extractor/mangareader.py +++ b/gallery_dl/extractor/mangareader.py @@ -53,7 +53,7 @@ class MangareaderBase(): class MangareaderChapterExtractor(MangareaderBase, ChapterExtractor): """Extractor for manga-chapters from mangareader.net""" archive_fmt = "{manga}_{chapter}_{page}" - pattern = r"(?:https?://)?(?:www\.)?mangareader\.net((/[^/?&#]+)/(\d+))" + pattern = r"(?:https?://)?(?:www\.)?mangareader\.net((/[^/?#]+)/(\d+))" test = (("https://www.mangareader.net" "/karate-shoukoushi-kohinata-minoru/11"), { "url": "45ece5668d1e9f65cf2225237d78de58660b54e4", @@ -84,7 +84,7 @@ class MangareaderMangaExtractor(MangareaderBase, MangaExtractor): """Extractor for manga from mangareader.net""" chapterclass = MangareaderChapterExtractor reverse = False - pattern = r"(?:https?://)?(?:www\.)?mangareader\.net(/[^/?&#]+)/?$" + pattern = r"(?:https?://)?(?:www\.)?mangareader\.net(/[^/?#]+)/?$" test = ("https://www.mangareader.net/mushishi", { "url": "bc203b858b4ad76e5d77e39118a7be0350e357da", "keyword": "031b3ea085921c552de017ecbb9b906e462229c9", diff --git a/gallery_dl/extractor/mastodon.py b/gallery_dl/extractor/mastodon.py index ac17cb02..0e063d53 100644 --- a/gallery_dl/extractor/mastodon.py +++ b/gallery_dl/extractor/mastodon.py @@ -184,7 +184,7 @@ def generate_extractors(): Extr.category = category Extr.instance = instance Extr.pattern = (r"(?:https?://)?" + pattern + - r"/@([^/?&#]+)(?:/media)?/?$") + r"/@([^/?#]+)(?:/media)?/?$") Extr.test = info.get("test-user") Extr.root = root Extr.access_token = token @@ -197,7 +197,7 @@ def generate_extractors(): Extr.__doc__ = "Extractor for images from a status on " + instance Extr.category = category Extr.instance = instance - Extr.pattern = r"(?:https?://)?" + pattern + r"/@[^/?&#]+/(\d+)" + Extr.pattern = r"(?:https?://)?" + pattern + r"/@[^/?#]+/(\d+)" Extr.test = info.get("test-status") Extr.root = root Extr.access_token = token diff --git a/gallery_dl/extractor/myportfolio.py b/gallery_dl/extractor/myportfolio.py index e2e163af..abb937f5 100644 --- a/gallery_dl/extractor/myportfolio.py +++ b/gallery_dl/extractor/myportfolio.py @@ -21,14 +21,14 @@ class MyportfolioGalleryExtractor(Extractor): archive_fmt = "{user}_{filename}" pattern = (r"(?:myportfolio:(?:https?://)?([^/]+)|" r"(?:https?://)?([^.]+\.myportfolio\.com))" - r"(/[^/?&#]+)?") + r"(/[^/?#]+)?") test = ( ("https://andrewling.myportfolio.com/volvo-xc-90-hybrid", { "url": "acea0690c76db0e5cf267648cefd86e921bc3499", "keyword": "6ac6befe2ee0af921d24cf1dd4a4ed71be06db6d", }), ("https://andrewling.myportfolio.com/", { - "pattern": r"https://andrewling\.myportfolio\.com/[^/?&#+]+$", + "pattern": r"https://andrewling\.myportfolio\.com/[^/?#+]+$", "count": ">= 6", }), ("https://stevenilousphotography.myportfolio.com/society", { diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py index 3cecee4a..1286ef63 100644 --- a/gallery_dl/extractor/newgrounds.py +++ b/gallery_dl/extractor/newgrounds.py @@ -226,7 +226,7 @@ class NewgroundsImageExtractor(NewgroundsExtractor): """Extractor for a single image from newgrounds.com""" subcategory = "image" pattern = (r"(?:https?://)?(?:" - r"(?:www\.)?newgrounds\.com/art/view/([^/?&#]+)/[^/?&#]+" + r"(?:www\.)?newgrounds\.com/art/view/([^/?#]+)/[^/?#]+" r"|art\.ngfiles\.com/images/\d+/\d+_([^_]+)_([^.]+))") test = ( ("https://www.newgrounds.com/art/view/tomfulp/ryu-is-hawt", { diff --git a/gallery_dl/extractor/ngomik.py b/gallery_dl/extractor/ngomik.py index f3608b26..8e29d97f 100644 --- a/gallery_dl/extractor/ngomik.py +++ b/gallery_dl/extractor/ngomik.py @@ -18,7 +18,7 @@ class NgomikChapterExtractor(ChapterExtractor): category = "ngomik" root = "http://ngomik.in" pattern = (r"(?:https?://)?(?:www\.)?ngomik\.in" - r"(/[^/?&#]+-chapter-[^/?&#]+)") + r"(/[^/?#]+-chapter-[^/?#]+)") test = ( ("https://www.ngomik.in/14-sai-no-koi-chapter-1-6/", { "url": "8e67fdf751bbc79bc6f4dead7675008ddb8e32a4", diff --git a/gallery_dl/extractor/nozomi.py b/gallery_dl/extractor/nozomi.py index 5e7e3873..15bb5767 100644 --- a/gallery_dl/extractor/nozomi.py +++ b/gallery_dl/extractor/nozomi.py @@ -124,7 +124,7 @@ class NozomiTagExtractor(NozomiExtractor): subcategory = "tag" directory_fmt = ("{category}", "{search_tags}") archive_fmt = "t_{search_tags}_{postid}" - pattern = r"(?:https?://)?nozomi\.la/tag/([^/?&#]+)-\d+\." + pattern = r"(?:https?://)?nozomi\.la/tag/([^/?#]+)-\d+\." test = ("https://nozomi.la/tag/3:1_aspect_ratio-1.html", { "pattern": r"^https://i.nozomi.la/\w/\w\w/\w+\.\w+$", "count": ">= 25", diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py index 6d7b27ae..4bb2c482 100644 --- a/gallery_dl/extractor/oauth.py +++ b/gallery_dl/extractor/oauth.py @@ -315,7 +315,7 @@ class OAuthTumblr(OAuthBase): class OAuthMastodon(OAuthBase): subcategory = "mastodon" - pattern = "oauth:mastodon:(?:https?://)?([^/?&#]+)" + pattern = "oauth:mastodon:(?:https?://)?([^/?#]+)" def __init__(self, match): OAuthBase.__init__(self, match) diff --git a/gallery_dl/extractor/paheal.py b/gallery_dl/extractor/paheal.py index f08055c0..57521d69 100644 --- a/gallery_dl/extractor/paheal.py +++ b/gallery_dl/extractor/paheal.py @@ -44,7 +44,7 @@ class PahealTagExtractor(PahealExtractor): subcategory = "tag" directory_fmt = ("{category}", "{search_tags}") pattern = (r"(?:https?://)?(?:rule34|rule63|cosplay)\.paheal\.net" - r"/post/list/([^/?&#]+)") + r"/post/list/([^/?#]+)") test = ("https://rule34.paheal.net/post/list/Ayane_Suzuki/1", { "pattern": r"https://[^.]+\.paheal\.net/_images/\w+/\d+%20-%20", "count": ">= 15" diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py index f1e98d94..ad259f41 100644 --- a/gallery_dl/extractor/patreon.py +++ b/gallery_dl/extractor/patreon.py @@ -202,8 +202,8 @@ class PatreonCreatorExtractor(PatreonExtractor): """Extractor for a creator's works""" subcategory = "creator" pattern = (r"(?:https?://)?(?:www\.)?patreon\.com" - r"/(?!(?:home|join|posts|login|signup)(?:$|[/?&#]))" - r"([^/?&#]+)(?:/posts)?/?(?:\?([^#]+))?") + r"/(?!(?:home|join|posts|login|signup)(?:$|[/?#]))" + r"([^/?#]+)(?:/posts)?/?(?:\?([^#]+))?") test = ( ("https://www.patreon.com/koveliana", { "range": "1-25", @@ -283,7 +283,7 @@ class PatreonUserExtractor(PatreonExtractor): class PatreonPostExtractor(PatreonExtractor): """Extractor for media from a single post""" subcategory = "post" - pattern = r"(?:https?://)?(?:www\.)?patreon\.com/posts/([^/?&#]+)" + pattern = r"(?:https?://)?(?:www\.)?patreon\.com/posts/([^/?#]+)" test = ( # postfile + attachments ("https://www.patreon.com/posts/precious-metal-23563293", { diff --git a/gallery_dl/extractor/photobucket.py b/gallery_dl/extractor/photobucket.py index a6456da9..5e2120a6 100644 --- a/gallery_dl/extractor/photobucket.py +++ b/gallery_dl/extractor/photobucket.py @@ -22,7 +22,7 @@ class PhotobucketAlbumExtractor(Extractor): filename_fmt = "{offset:>03}{pictureId:?_//}_{titleOrFilename}.{extension}" archive_fmt = "{id}" pattern = (r"(?:https?://)?((?:[^.]+\.)?photobucket\.com)" - r"/user/[^/?&#]+/library(?:/[^?&#]*)?") + r"/user/[^/?#]+/library(?:/[^?#]*)?") test = ( ("https://s369.photobucket.com/user/CrpyLrkr/library", { "pattern": r"https?://[oi]+\d+.photobucket.com/albums/oo139/", @@ -111,8 +111,8 @@ class PhotobucketImageExtractor(Extractor): filename_fmt = "{pictureId:?/_/}{titleOrFilename}.{extension}" archive_fmt = "{username}_{id}" pattern = (r"(?:https?://)?(?:[^.]+\.)?photobucket\.com" - r"(?:/gallery/user/([^/?&#]+)/media/([^/?&#]+)" - r"|/user/([^/?&#]+)/media/[^?&#]+\.html)") + r"(?:/gallery/user/([^/?#]+)/media/([^/?#]+)" + r"|/user/([^/?#]+)/media/[^?#]+\.html)") test = ( (("https://s271.photobucket.com/user/lakerfanryan" "/media/Untitled-3-1.jpg.html"), { diff --git a/gallery_dl/extractor/piczel.py b/gallery_dl/extractor/piczel.py index 064967de..45bd8b53 100644 --- a/gallery_dl/extractor/piczel.py +++ b/gallery_dl/extractor/piczel.py @@ -67,7 +67,7 @@ class PiczelExtractor(Extractor): class PiczelUserExtractor(PiczelExtractor): """Extractor for all images from a user's gallery""" subcategory = "user" - pattern = r"(?:https?://)?(?:www\.)?piczel\.tv/gallery/([^/?&#]+)/?$" + pattern = r"(?:https?://)?(?:www\.)?piczel\.tv/gallery/([^/?#]+)/?$" test = ("https://piczel.tv/gallery/Bikupan", { "range": "1-100", "count": ">= 100", @@ -88,7 +88,7 @@ class PiczelFolderExtractor(PiczelExtractor): directory_fmt = ("{category}", "{user[username]}", "{folder[name]}") archive_fmt = "f{folder[id]}_{id}_{num}" pattern = (r"(?:https?://)?(?:www\.)?piczel\.tv" - r"/gallery/(?!image)([^/?&#]+)/(\d+)") + r"/gallery/(?!image)([^/?#]+)/(\d+)") test = ("https://piczel.tv/gallery/Lulena/1114", { "count": ">= 4", }) diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py index ee8f9bbe..a813d0ef 100644 --- a/gallery_dl/extractor/pixiv.py +++ b/gallery_dl/extractor/pixiv.py @@ -86,7 +86,7 @@ class PixivUserExtractor(PixivExtractor): subcategory = "user" pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net/(?:" r"(?:en/)?users/(\d+)(?:/(?:artworks|illustrations|manga)" - r"(?:/([^/?&#]+))?)?/?(?:$|[?#])" + r"(?:/([^/?#]+))?)?/?(?:$|[?#])" r"|member(?:_illust)?\.php\?id=(\d+)(?:&([^#]+))?" r"|(?:u(?:ser)?/|(?:mypage\.php)?#id=)(\d+))") test = ( @@ -170,7 +170,7 @@ class PixivUserExtractor(PixivExtractor): class PixivMeExtractor(PixivExtractor): """Extractor for pixiv.me URLs""" subcategory = "me" - pattern = r"(?:https?://)?pixiv\.me/([^/?&#]+)" + pattern = r"(?:https?://)?pixiv\.me/([^/?#]+)" test = ( ("https://pixiv.me/del_shannon", { "url": "29c295ce75150177e6b0a09089a949804c708fbf", @@ -243,7 +243,7 @@ class PixivFavoriteExtractor(PixivExtractor): "{user_bookmark[id]} {user_bookmark[account]}") archive_fmt = "f_{user_bookmark[id]}_{id}{num}.{extension}" pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net/(?:(?:en/)?" - r"users/(\d+)/(bookmarks/artworks(?:/([^/?&#]+))?|following)" + r"users/(\d+)/(bookmarks/artworks(?:/([^/?#]+))?|following)" r"|bookmark\.php(?:\?([^#]*))?)") test = ( ("https://www.pixiv.net/en/users/173530/bookmarks/artworks", { @@ -407,7 +407,7 @@ class PixivSearchExtractor(PixivExtractor): archive_fmt = "s_{search[word]}_{id}{num}.{extension}" directory_fmt = ("{category}", "search", "{search[word]}") pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net" - r"/(?:(?:en/)?tags/([^/?&#]+)(?:/[^/?&#]+)?/?" + r"/(?:(?:en/)?tags/([^/?#]+)(?:/[^/?#]+)?/?" r"|search\.php)(?:\?([^#]+))?") test = ( ("https://www.pixiv.net/en/tags/Original", { diff --git a/gallery_dl/extractor/pixnet.py b/gallery_dl/extractor/pixnet.py index d8ac9f60..342f4fa8 100644 --- a/gallery_dl/extractor/pixnet.py +++ b/gallery_dl/extractor/pixnet.py @@ -169,7 +169,7 @@ class PixnetUserExtractor(PixnetExtractor): """Extractor for all sets and folders of a pixnet user""" subcategory = "user" url_fmt = "{}{}/album/list" - pattern = BASE_PATTERN + r"()(?:/blog|/album(?:/list)?)?/?(?:$|[?&#])" + pattern = BASE_PATTERN + r"()(?:/blog|/album(?:/list)?)?/?(?:$|[?#])" test = ( ("https://albertayu773.pixnet.net/"), ("https://albertayu773.pixnet.net/blog"), diff --git a/gallery_dl/extractor/plurk.py b/gallery_dl/extractor/plurk.py index 60ca1fb7..f2e964d0 100644 --- a/gallery_dl/extractor/plurk.py +++ b/gallery_dl/extractor/plurk.py @@ -72,7 +72,7 @@ class PlurkExtractor(Extractor): class PlurkTimelineExtractor(PlurkExtractor): """Extractor for URLs from all posts in a Plurk timeline""" subcategory = "timeline" - pattern = r"(?:https?://)?(?:www\.)?plurk\.com/(?!p/)(\w+)/?(?:$|[?&#])" + pattern = r"(?:https?://)?(?:www\.)?plurk\.com/(?!p/)(\w+)/?(?:$|[?#])" test = ("https://www.plurk.com/plurkapi", { "pattern": r"https?://.+", "count": ">= 23" diff --git a/gallery_dl/extractor/pornhub.py b/gallery_dl/extractor/pornhub.py index 6b36cddb..1856c825 100644 --- a/gallery_dl/extractor/pornhub.py +++ b/gallery_dl/extractor/pornhub.py @@ -118,7 +118,7 @@ class PornhubGalleryExtractor(PornhubExtractor): class PornhubUserExtractor(PornhubExtractor): """Extractor for all galleries of a pornhub user""" subcategory = "user" - pattern = (BASE_PATTERN + r"/(users|model)/([^/?&#]+)" + pattern = (BASE_PATTERN + r"/(users|model)/([^/?#]+)" "(?:/photos(?:/(public|private|favorites))?)?/?$") test = ( ("https://www.pornhub.com/users/flyings0l0/photos/public", { diff --git a/gallery_dl/extractor/reactor.py b/gallery_dl/extractor/reactor.py index e5b4b44b..a20312f7 100644 --- a/gallery_dl/extractor/reactor.py +++ b/gallery_dl/extractor/reactor.py @@ -150,7 +150,7 @@ class ReactorTagExtractor(ReactorExtractor): subcategory = "tag" directory_fmt = ("{category}", "{search_tags}") archive_fmt = "{search_tags}_{post_id}_{num}" - pattern = BASE_PATTERN + r"/tag/([^/?&#]+)" + pattern = BASE_PATTERN + r"/tag/([^/?#]+)" test = ("http://anime.reactor.cc/tag/Anime+Art",) def __init__(self, match): @@ -166,7 +166,7 @@ class ReactorSearchExtractor(ReactorTagExtractor): subcategory = "search" directory_fmt = ("{category}", "search", "{search_tags}") archive_fmt = "s_{search_tags}_{post_id}_{num}" - pattern = BASE_PATTERN + r"/search(?:/|\?q=)([^/?&#]+)" + pattern = BASE_PATTERN + r"/search(?:/|\?q=)([^/?#]+)" test = ("http://anime.reactor.cc/search?q=Art",) @@ -174,7 +174,7 @@ class ReactorUserExtractor(ReactorExtractor): """Extractor for all posts of a user on *reactor.cc sites""" subcategory = "user" directory_fmt = ("{category}", "user", "{user}") - pattern = BASE_PATTERN + r"/user/([^/?&#]+)" + pattern = BASE_PATTERN + r"/user/([^/?#]+)" test = ("http://anime.reactor.cc/user/Shuster",) def __init__(self, match): @@ -215,7 +215,7 @@ JR_BASE_PATTERN = r"(?:https?://)?(?:www\.)?(joyreactor\.c(?:c|om))" class JoyreactorTagExtractor(ReactorTagExtractor): """Extractor for tag searches on joyreactor.cc""" category = "joyreactor" - pattern = JR_BASE_PATTERN + r"/tag/([^/?&#]+)" + pattern = JR_BASE_PATTERN + r"/tag/([^/?#]+)" test = ( ("http://joyreactor.cc/tag/Advent+Cirno", { "count": ">= 17", @@ -229,7 +229,7 @@ class JoyreactorTagExtractor(ReactorTagExtractor): class JoyreactorSearchExtractor(ReactorSearchExtractor): """Extractor for search results on joyreactor.cc""" category = "joyreactor" - pattern = JR_BASE_PATTERN + r"/search(?:/|\?q=)([^/?&#]+)" + pattern = JR_BASE_PATTERN + r"/search(?:/|\?q=)([^/?#]+)" test = ( ("http://joyreactor.cc/search/Cirno+Gifs", { "range": "1-25", @@ -244,7 +244,7 @@ class JoyreactorSearchExtractor(ReactorSearchExtractor): class JoyreactorUserExtractor(ReactorUserExtractor): """Extractor for all posts of a user on joyreactor.cc""" category = "joyreactor" - pattern = JR_BASE_PATTERN + r"/user/([^/?&#]+)" + pattern = JR_BASE_PATTERN + r"/user/([^/?#]+)" test = ( ("http://joyreactor.cc/user/hemantic"), ("http://joyreactor.com/user/Tacoman123", { @@ -289,7 +289,7 @@ PR_BASE_PATTERN = r"(?:https?://)?(?:www\.)?(pornreactor\.cc|fapreactor.com)" class PornreactorTagExtractor(ReactorTagExtractor): """Extractor for tag searches on pornreactor.cc""" category = "pornreactor" - pattern = PR_BASE_PATTERN + r"/tag/([^/?&#]+)" + pattern = PR_BASE_PATTERN + r"/tag/([^/?#]+)" test = ( ("http://pornreactor.cc/tag/RiceGnat", { "range": "1-25", @@ -302,7 +302,7 @@ class PornreactorTagExtractor(ReactorTagExtractor): class PornreactorSearchExtractor(ReactorSearchExtractor): """Extractor for search results on pornreactor.cc""" category = "pornreactor" - pattern = PR_BASE_PATTERN + r"/search(?:/|\?q=)([^/?&#]+)" + pattern = PR_BASE_PATTERN + r"/search(?:/|\?q=)([^/?#]+)" test = ( ("http://pornreactor.cc/search?q=ecchi+hentai", { "range": "1-25", @@ -315,7 +315,7 @@ class PornreactorSearchExtractor(ReactorSearchExtractor): class PornreactorUserExtractor(ReactorUserExtractor): """Extractor for all posts of a user on pornreactor.cc""" category = "pornreactor" - pattern = PR_BASE_PATTERN + r"/user/([^/?&#]+)" + pattern = PR_BASE_PATTERN + r"/user/([^/?#]+)" test = ( ("http://pornreactor.cc/user/Disillusion", { "range": "1-25", diff --git a/gallery_dl/extractor/readcomiconline.py b/gallery_dl/extractor/readcomiconline.py index 7030c819..ae1749ec 100644 --- a/gallery_dl/extractor/readcomiconline.py +++ b/gallery_dl/extractor/readcomiconline.py @@ -45,7 +45,7 @@ class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor): """Extractor for comic-issues from readcomiconline.to""" subcategory = "issue" pattern = (r"(?i)(?:https?://)?(?:www\.)?readcomiconline\.to" - r"(/Comic/[^/?&#]+/[^/?&#]+\?id=(\d+))") + r"(/Comic/[^/?#]+/[^/?#]+\?id=(\d+))") test = ("https://readcomiconline.to/Comic/W-i-t-c-h/Issue-130?id=22289", { "url": "2bbab6ec4fbc05d269cca420a82a9b5acda28682", "keyword": "30fe110273e871305001f33c18634516a0a51421", @@ -81,7 +81,7 @@ class ReadcomiconlineComicExtractor(ReadcomiconlineBase, MangaExtractor): chapterclass = ReadcomiconlineIssueExtractor subcategory = "comic" pattern = (r"(?i)(?:https?://)?(?:www\.)?readcomiconline\.to" - r"(/Comic/[^/?&#]+/?)$") + r"(/Comic/[^/?#]+/?)$") test = ( ("https://readcomiconline.to/Comic/W-i-t-c-h", { "url": "e231bc2a293edb465133c37a8e36a7e7d94cab14", diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py index 4294dae9..0be7f175 100644 --- a/gallery_dl/extractor/reddit.py +++ b/gallery_dl/extractor/reddit.py @@ -137,7 +137,7 @@ class RedditSubredditExtractor(RedditExtractor): """Extractor for URLs from subreddits on reddit.com""" subcategory = "subreddit" pattern = (r"(?:https?://)?(?:\w+\.)?reddit\.com/r/" - r"([^/?&#]+(?:/[a-z]+)?)/?(?:\?([^#]*))?(?:$|#)") + r"([^/?#]+(?:/[a-z]+)?)/?(?:\?([^#]*))?(?:$|#)") test = ( ("https://www.reddit.com/r/lavaporn/", { "range": "1-20", @@ -162,7 +162,7 @@ class RedditUserExtractor(RedditExtractor): """Extractor for URLs from posts by a reddit user""" subcategory = "user" pattern = (r"(?:https?://)?(?:\w+\.)?reddit\.com/u(?:ser)?/" - r"([^/?&#]+(?:/[a-z]+)?)/?(?:\?([^#]*))?") + r"([^/?#]+(?:/[a-z]+)?)/?(?:\?([^#]*))?") test = ( ("https://www.reddit.com/user/username/", { "count": ">= 2", @@ -185,7 +185,7 @@ class RedditSubmissionExtractor(RedditExtractor): """Extractor for URLs from a submission on reddit.com""" subcategory = "submission" pattern = (r"(?:https?://)?(?:" - r"(?:\w+\.)?reddit\.com/(?:r/[^/?&#]+/comments|gallery)" + r"(?:\w+\.)?reddit\.com/(?:r/[^/?#]+/comments|gallery)" r"|redd\.it)/([a-z0-9]+)") test = ( ("https://www.reddit.com/r/lavaporn/comments/8cqhub/", { @@ -231,7 +231,7 @@ class RedditImageExtractor(Extractor): subcategory = "image" archive_fmt = "{filename}" pattern = (r"(?:https?://)?i\.redd(?:\.it|ituploads\.com)" - r"/[^/?&#]+(?:\?[^#]*)?") + r"/[^/?#]+(?:\?[^#]*)?") test = ( ("https://i.redd.it/upjtjcx2npzz.jpg", { "url": "0de614900feef103e580b632190458c0b62b641a", diff --git a/gallery_dl/extractor/redgifs.py b/gallery_dl/extractor/redgifs.py index 96be3d87..0a85be6b 100644 --- a/gallery_dl/extractor/redgifs.py +++ b/gallery_dl/extractor/redgifs.py @@ -22,7 +22,7 @@ class RedgifsUserExtractor(RedgifsExtractor): """Extractor for redgifs user profiles""" subcategory = "user" directory_fmt = ("{category}", "{userName}") - pattern = r"(?:https?://)?(?:www\.)?redgifs\.com/users/([^/?&#]+)" + pattern = r"(?:https?://)?(?:www\.)?redgifs\.com/users/([^/?#]+)" test = ("https://www.redgifs.com/users/Natalifiction", { "pattern": r"https://\w+\.(redgifs|gfycat)\.com/[A-Za-z]+\.mp4", "count": ">= 100", @@ -36,7 +36,7 @@ class RedgifsSearchExtractor(RedgifsExtractor): """Extractor for redgifs search results""" subcategory = "search" directory_fmt = ("{category}", "Search", "{search}") - pattern = r"(?:https?://)?(?:www\.)?redgifs\.com/gifs/browse/([^/?&#]+)" + pattern = r"(?:https?://)?(?:www\.)?redgifs\.com/gifs/browse/([^/?#]+)" test = ("https://www.redgifs.com/gifs/browse/jav", { "pattern": r"https://\w+\.(redgifs|gfycat)\.com/[A-Za-z]+\.mp4", "range": "100-300", diff --git a/gallery_dl/extractor/sankakucomplex.py b/gallery_dl/extractor/sankakucomplex.py index 0189fc95..f6ad3271 100644 --- a/gallery_dl/extractor/sankakucomplex.py +++ b/gallery_dl/extractor/sankakucomplex.py @@ -30,7 +30,7 @@ class SankakucomplexArticleExtractor(SankakucomplexExtractor): filename_fmt = "{filename}.{extension}" archive_fmt = "{date:%Y%m%d}_{filename}" pattern = (r"(?:https?://)?www\.sankakucomplex\.com" - r"/(\d{4}/\d\d/\d\d/[^/?&#]+)") + r"/(\d{4}/\d\d/\d\d/[^/?#]+)") test = ( ("https://www.sankakucomplex.com/2019/05/11/twitter-cosplayers", { "url": "4a9ecc5ae917fbce469280da5b6a482510cae84d", diff --git a/gallery_dl/extractor/sexcom.py b/gallery_dl/extractor/sexcom.py index 2cef4301..41d2e677 100644 --- a/gallery_dl/extractor/sexcom.py +++ b/gallery_dl/extractor/sexcom.py @@ -168,7 +168,7 @@ class SexcomBoardExtractor(SexcomExtractor): subcategory = "board" directory_fmt = ("{category}", "{user}", "{board}") pattern = (r"(?:https?://)?(?:www\.)?sex\.com/user" - r"/([^/?&#]+)/(?!(?:following|pins|repins|likes)/)([^/?&#]+)") + r"/([^/?#]+)/(?!(?:following|pins|repins|likes)/)([^/?#]+)") test = ("https://www.sex.com/user/ronin17/exciting-hentai/", { "count": ">= 15", }) @@ -193,7 +193,7 @@ class SexcomSearchExtractor(SexcomExtractor): subcategory = "search" directory_fmt = ("{category}", "search", "{search[query]}") pattern = (r"(?:https?://)?(?:www\.)?sex\.com/((?:" - r"(pic|gif|video)s/([^/?&#]+)|search/(pic|gif|video)s" + r"(pic|gif|video)s/([^/?#]+)|search/(pic|gif|video)s" r")/?(?:\?([^#]+))?)") test = ( ("https://www.sex.com/search/pics?query=ecchi", { diff --git a/gallery_dl/extractor/simplyhentai.py b/gallery_dl/extractor/simplyhentai.py index a0d34d19..7301cbcb 100644 --- a/gallery_dl/extractor/simplyhentai.py +++ b/gallery_dl/extractor/simplyhentai.py @@ -18,7 +18,7 @@ class SimplyhentaiGalleryExtractor(GalleryExtractor): archive_fmt = "{image_id}" pattern = (r"(?:https?://)?(?!videos\.)([\w-]+\.simply-hentai\.com" r"(?!/(?:album|gifs?|images?|series)(?:/|$))" - r"(?:/(?!(?:page|all-pages)(?:/|\.|$))[^/?&#]+)+)") + r"(?:/(?!(?:page|all-pages)(?:/|\.|$))[^/?#]+)+)") test = ( (("https://original-work.simply-hentai.com" "/amazon-no-hiyaku-amazon-elixir"), { @@ -84,7 +84,7 @@ class SimplyhentaiImageExtractor(Extractor): filename_fmt = "{category}_{token}{title:?_//}.{extension}" archive_fmt = "{token}" pattern = (r"(?:https?://)?(?:www\.)?(simply-hentai\.com" - r"/(image|gif)/[^/?&#]+)") + r"/(image|gif)/[^/?#]+)") test = ( (("https://www.simply-hentai.com/image" "/pheromomania-vol-1-kanzenban-isao-3949d8b3-400c-4b6"), { @@ -138,7 +138,7 @@ class SimplyhentaiVideoExtractor(Extractor): directory_fmt = ("{category}", "{type}s") filename_fmt = "{title}{episode:?_//>02}.{extension}" archive_fmt = "{title}_{episode}" - pattern = r"(?:https?://)?(videos\.simply-hentai\.com/[^/?&#]+)" + pattern = r"(?:https?://)?(videos\.simply-hentai\.com/[^/?#]+)" test = ( ("https://videos.simply-hentai.com/creamy-pie-episode-02", { "pattern": r"https://www\.googleapis\.com/drive/v3/files" diff --git a/gallery_dl/extractor/slickpic.py b/gallery_dl/extractor/slickpic.py index 05ec117a..ddd45ce2 100644 --- a/gallery_dl/extractor/slickpic.py +++ b/gallery_dl/extractor/slickpic.py @@ -33,7 +33,7 @@ class SlickpicAlbumExtractor(SlickpicExtractor): "{album[id]} {album[title]}") filename_fmt = "{num:>03}_{id}{title:?_//}.{extension}" archive_fmt = "{id}" - pattern = BASE_PATTERN + r"/albums/([^/?&#]+)" + pattern = BASE_PATTERN + r"/albums/([^/?#]+)" test = ( ("https://mattcrandall.slickpic.com/albums/LamborghiniMurcielago/", { "url": "58bd94ebc80fd906e9879826970b408d54c6da07", diff --git a/gallery_dl/extractor/slideshare.py b/gallery_dl/extractor/slideshare.py index 30420a8b..8f668df1 100644 --- a/gallery_dl/extractor/slideshare.py +++ b/gallery_dl/extractor/slideshare.py @@ -20,7 +20,7 @@ class SlidesharePresentationExtractor(Extractor): filename_fmt = "{presentation}-{num:>02}.{extension}" archive_fmt = "{presentation}_{num}" pattern = (r"(?:https?://)?(?:www\.)?slideshare\.net" - r"/(?:mobile/)?([^/?&#]+)/([^/?&#]+)") + r"/(?:mobile/)?([^/?#]+)/([^/?#]+)") test = ( (("https://www.slideshare.net" "/Slideshare/get-started-with-slide-share"), { diff --git a/gallery_dl/extractor/smugmug.py b/gallery_dl/extractor/smugmug.py index 163102d7..cfbd5eb5 100644 --- a/gallery_dl/extractor/smugmug.py +++ b/gallery_dl/extractor/smugmug.py @@ -108,7 +108,7 @@ class SmugmugImageExtractor(SmugmugExtractor): """Extractor for individual smugmug images""" subcategory = "image" archive_fmt = "{Image[ImageKey]}" - pattern = BASE_PATTERN + r"(?:/[^/?&#]+)+/i-([^/?&#-]+)" + pattern = BASE_PATTERN + r"(?:/[^/?#]+)+/i-([^/?#-]+)" test = ( ("https://tdm.smugmug.com/Nature/Dove/i-kCsLJT6", { "url": "f624ad7293afd6412a7d34e3950a118596c36c85", @@ -141,7 +141,7 @@ class SmugmugImageExtractor(SmugmugExtractor): class SmugmugPathExtractor(SmugmugExtractor): """Extractor for smugmug albums from URL paths and users""" subcategory = "path" - pattern = BASE_PATTERN + r"((?:/[^/?&#a-fh-mo-z][^/?&#]*)*)/?$" + pattern = BASE_PATTERN + r"((?:/[^/?#a-fh-mo-z][^/?#]*)*)/?$" test = ( ("https://tdm.smugmug.com/Nature/Dove", { "pattern": "smugmug:album:cr4C7f$", diff --git a/gallery_dl/extractor/speakerdeck.py b/gallery_dl/extractor/speakerdeck.py index a3819c76..f5b91718 100644 --- a/gallery_dl/extractor/speakerdeck.py +++ b/gallery_dl/extractor/speakerdeck.py @@ -20,7 +20,7 @@ class SpeakerdeckPresentationExtractor(Extractor): filename_fmt = "{presentation}-{num:>02}.{extension}" archive_fmt = "{presentation}_{num}" pattern = (r"(?:https?://)?(?:www\.)?speakerdeck\.com" - r"/([^/?&#]+)/([^/?&#]+)") + r"/([^/?#]+)/([^/?#]+)") test = ( (("https://speakerdeck.com/speakerdeck/introduction-to-speakerdeck"), { "pattern": r"https://files.speakerdeck.com/presentations/" diff --git a/gallery_dl/extractor/subscribestar.py b/gallery_dl/extractor/subscribestar.py index 38b39d4c..753f2662 100644 --- a/gallery_dl/extractor/subscribestar.py +++ b/gallery_dl/extractor/subscribestar.py @@ -135,7 +135,7 @@ class SubscribestarExtractor(Extractor): class SubscribestarUserExtractor(SubscribestarExtractor): """Extractor for media from a subscribestar user""" subcategory = "user" - pattern = BASE_PATTERN + r"/(?!posts/)([^/?&#]+)" + pattern = BASE_PATTERN + r"/(?!posts/)([^/?#]+)" test = ( ("https://www.subscribestar.com/subscribestar", { "count": ">= 20", diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py index 185f33a7..cf57a4d8 100644 --- a/gallery_dl/extractor/tumblr.py +++ b/gallery_dl/extractor/tumblr.py @@ -295,7 +295,7 @@ class TumblrPostExtractor(TumblrExtractor): class TumblrTagExtractor(TumblrExtractor): """Extractor for images from a tumblr-user by tag""" subcategory = "tag" - pattern = BASE_PATTERN + r"/tagged/([^/?&#]+)" + pattern = BASE_PATTERN + r"/tagged/([^/?#]+)" test = ("http://demo.tumblr.com/tagged/Times%20Square", { "pattern": (r"https://\d+\.media\.tumblr\.com/tumblr_[^/_]+_1280.jpg"), "count": 1, diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index 8f941404..06973b27 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -263,7 +263,7 @@ class TwitterTimelineExtractor(TwitterExtractor): """Extractor for all images from a user's timeline""" subcategory = "timeline" pattern = BASE_PATTERN + \ - r"/(?!search)(?:([^/?&#]+)/?(?:$|[?#])|intent/user\?user_id=(\d+))" + r"/(?!search)(?:([^/?#]+)/?(?:$|[?#])|intent/user\?user_id=(\d+))" test = ( ("https://twitter.com/supernaturepics", { "range": "1-40", @@ -287,7 +287,7 @@ class TwitterTimelineExtractor(TwitterExtractor): class TwitterMediaExtractor(TwitterExtractor): """Extractor for all images from a user's Media Tweets""" subcategory = "media" - pattern = BASE_PATTERN + r"/(?!search)([^/?&#]+)/media(?!\w)" + pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/media(?!\w)" test = ( ("https://twitter.com/supernaturepics/media", { "range": "1-40", @@ -304,7 +304,7 @@ class TwitterMediaExtractor(TwitterExtractor): class TwitterLikesExtractor(TwitterExtractor): """Extractor for liked tweets""" subcategory = "likes" - pattern = BASE_PATTERN + r"/(?!search)([^/?&#]+)/likes(?!\w)" + pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/likes(?!\w)" test = ("https://twitter.com/supernaturepics/likes",) def tweets(self): @@ -342,7 +342,7 @@ class TwitterSearchExtractor(TwitterExtractor): class TwitterTweetExtractor(TwitterExtractor): """Extractor for images from individual tweets""" subcategory = "tweet" - pattern = BASE_PATTERN + r"/([^/?&#]+|i/web)/status/(\d+)" + pattern = BASE_PATTERN + r"/([^/?#]+|i/web)/status/(\d+)" test = ( ("https://twitter.com/supernaturepics/status/604341487988576256", { "url": "0e801d2f98142dd87c3630ded9e4be4a4d63b580", diff --git a/gallery_dl/extractor/vanillarock.py b/gallery_dl/extractor/vanillarock.py index 687ce3cf..e10c6426 100644 --- a/gallery_dl/extractor/vanillarock.py +++ b/gallery_dl/extractor/vanillarock.py @@ -29,7 +29,7 @@ class VanillarockPostExtractor(VanillarockExtractor): filename_fmt = "{num:>02}.{extension}" archive_fmt = "{filename}" pattern = (r"(?:https?://)?(?:www\.)?vanilla-rock\.com" - r"(/(?!category/|tag/)[^/?&#]+)/?$") + r"(/(?!category/|tag/)[^/?#]+)/?$") test = ("https://vanilla-rock.com/mizuhashi_parsee-5", { "url": "7fb9a4d18d9fa22d7295fee8d94ab5a7a52265dd", "keyword": "b91df99b714e1958d9636748b1c81a07c3ef52c9", @@ -66,7 +66,7 @@ class VanillarockTagExtractor(VanillarockExtractor): """Extractor for vanillarock blog posts by tag or category""" subcategory = "tag" pattern = (r"(?:https?://)?(?:www\.)?vanilla-rock\.com" - r"(/(?:tag|category)/[^?&#]+)") + r"(/(?:tag|category)/[^?#]+)") test = ( ("https://vanilla-rock.com/tag/%e5%b0%84%e5%91%bd%e4%b8%b8%e6%96%87", { "pattern": VanillarockPostExtractor.pattern, diff --git a/gallery_dl/extractor/webtoons.py b/gallery_dl/extractor/webtoons.py index d42730ed..55324cbc 100644 --- a/gallery_dl/extractor/webtoons.py +++ b/gallery_dl/extractor/webtoons.py @@ -34,7 +34,7 @@ class WebtoonsEpisodeExtractor(WebtoonsExtractor): directory_fmt = ("{category}", "{comic}") filename_fmt = "{episode}-{num:>02}.{extension}" archive_fmt = "{title_no}_{episode}_{num}" - pattern = (BASE_PATTERN + r"/([^/?&#]+)/([^/?&#]+)/(?:[^/?&#]+))" + pattern = (BASE_PATTERN + r"/([^/?#]+)/([^/?#]+)/(?:[^/?#]+))" r"/viewer(?:\?([^#'\"]+))") test = ( (("https://www.webtoons.com/en/comedy/safely-endangered" @@ -97,7 +97,7 @@ class WebtoonsComicExtractor(WebtoonsExtractor): """Extractor for an entire comic on webtoons.com""" subcategory = "comic" categorytransfer = True - pattern = (BASE_PATTERN + r"/([^/?&#]+)/([^/?&#]+))" + pattern = (BASE_PATTERN + r"/([^/?#]+)/([^/?#]+))" r"/list(?:\?([^#]+))") test = ( # english diff --git a/gallery_dl/extractor/xhamster.py b/gallery_dl/extractor/xhamster.py index 70c4eeda..258e89c6 100644 --- a/gallery_dl/extractor/xhamster.py +++ b/gallery_dl/extractor/xhamster.py @@ -33,7 +33,7 @@ class XhamsterGalleryExtractor(XhamsterExtractor): "{gallery[id]} {gallery[title]}") filename_fmt = "{num:>03}_{id}.{extension}" archive_fmt = "{id}" - pattern = BASE_PATTERN + r"(/photos/gallery/[^/?&#]+)" + pattern = BASE_PATTERN + r"(/photos/gallery/[^/?#]+)" test = ( ("https://xhamster.com/photos/gallery/11748968", { "pattern": r"https://thumb-p\d+.xhcdn.com/./[\w/-]+_1000.jpg$", @@ -152,7 +152,7 @@ class XhamsterGalleryExtractor(XhamsterExtractor): class XhamsterUserExtractor(XhamsterExtractor): """Extractor for all galleries of an xhamster user""" subcategory = "user" - pattern = BASE_PATTERN + r"/users/([^/?&#]+)(?:/photos)?/?(?:$|[?#])" + pattern = BASE_PATTERN + r"/users/([^/?#]+)(?:/photos)?/?(?:$|[?#])" test = ( ("https://xhamster.com/users/goldenpalomino/photos", { "pattern": XhamsterGalleryExtractor.pattern, diff --git a/gallery_dl/extractor/xvideos.py b/gallery_dl/extractor/xvideos.py index b7d116ab..9fdc5aa8 100644 --- a/gallery_dl/extractor/xvideos.py +++ b/gallery_dl/extractor/xvideos.py @@ -28,7 +28,7 @@ class XvideosGalleryExtractor(XvideosBase, GalleryExtractor): archive_fmt = "{gallery[id]}_{num}" pattern = (r"(?:https?://)?(?:www\.)?xvideos\.com" r"/(?:profiles|amateur-channels|model-channels)" - r"/([^/?&#]+)/photos/(\d+)") + r"/([^/?#]+)/photos/(\d+)") test = ( ("https://www.xvideos.com/profiles/pervertedcouple/photos/751031", { "url": "cb4657a37eea5ab6b1d333491cee7eeb529b0645", @@ -94,7 +94,7 @@ class XvideosUserExtractor(XvideosBase, Extractor): subcategory = "user" categorytransfer = True pattern = (r"(?:https?://)?(?:www\.)?xvideos\.com" - r"/profiles/([^/?&#]+)/?(?:#.*)?$") + r"/profiles/([^/?#]+)/?(?:#.*)?$") test = ( ("https://www.xvideos.com/profiles/pervertedcouple", { "url": "a413f3e60d6d3a2de79bd44fa3b7a9c03db4336e", diff --git a/gallery_dl/extractor/yuki.py b/gallery_dl/extractor/yuki.py index 0844c404..72d7cad9 100644 --- a/gallery_dl/extractor/yuki.py +++ b/gallery_dl/extractor/yuki.py @@ -19,7 +19,7 @@ class YukiThreadExtractor(Extractor): directory_fmt = ("{category}", "{board}", "{thread}{title:? - //}") filename_fmt = "{time}-{filename}.{extension}" archive_fmt = "{board}_{thread}_{tim}" - pattern = r"(?:https?://)?yuki\.la/([^/?&#]+)/(\d+)" + pattern = r"(?:https?://)?yuki\.la/([^/?#]+)/(\d+)" test = ( ("https://yuki.la/gd/309639", { "url": "289e86c5caf673a2515ec5f5f521ac0ae7e189e9",