From efd104e45e297cdeea4afea5e0cb81c04c3ee01d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Wed, 6 Mar 2019 10:26:01 +0100 Subject: [PATCH] [instagram] reject more non-user URLs (#180) --- gallery_dl/extractor/instagram.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py index 3c903fbd..302640ae 100644 --- a/gallery_dl/extractor/instagram.py +++ b/gallery_dl/extractor/instagram.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Copyright 2018 Leonardo Taccari +# Copyright 2019 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -111,7 +112,7 @@ class InstagramExtractor(Extractor): class InstagramImageExtractor(InstagramExtractor): """Extractor for PostPage""" subcategory = "image" - pattern = r"(?:https?://)?(?:www\.)?instagram\.com/p/([^/]+)/?" + pattern = r"(?:https?://)?(?:www\.)?instagram\.com/p/([^/?&#]+)" test = ( # GraphImage ("https://www.instagram.com/p/BqvsDleB3lV/", { @@ -175,7 +176,8 @@ class InstagramImageExtractor(InstagramExtractor): class InstagramUserExtractor(InstagramExtractor): """Extractor for ProfilePage""" subcategory = "user" - pattern = r"(?:https?://)?(?:www\.)?instagram\.com/(?!p/)([^/?&#]+)" + pattern = (r"(?:https?://)?(?:www\.)?instagram\.com" + r"/(?!p/|explore/|directory/|accounts/)([^/?&#]+)") test = ("https://www.instagram.com/instagram/", { "range": "1-12", "count": ">= 12",