remove '&' from URL patterns part 2

follow-up on 968d3e8465
This commit is contained in:
Mike Fährmann
2023-05-03 20:26:25 +02:00
parent 4d415376d1
commit 850df34c31
4 changed files with 16 additions and 16 deletions

View File

@@ -1,12 +1,12 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2018-2022 Mike Fährmann # Copyright 2018-2023 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation. # published by the Free Software Foundation.
"""Extract images from https://www.myportfolio.com/""" """Extractors for https://www.myportfolio.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, exception from .. import text, exception
@@ -21,7 +21,7 @@ class MyportfolioGalleryExtractor(Extractor):
archive_fmt = "{user}_{filename}" archive_fmt = "{user}_{filename}"
pattern = (r"(?:myportfolio:(?:https?://)?([^/]+)|" pattern = (r"(?:myportfolio:(?:https?://)?([^/]+)|"
r"(?:https?://)?([\w-]+\.myportfolio\.com))" r"(?:https?://)?([\w-]+\.myportfolio\.com))"
r"(/[^/?&#]+)?") r"(/[^/?#]+)?")
test = ( test = (
("https://andrewling.myportfolio.com/volvo-xc-90-hybrid", { ("https://andrewling.myportfolio.com/volvo-xc-90-hybrid", {
"url": "acea0690c76db0e5cf267648cefd86e921bc3499", "url": "acea0690c76db0e5cf267648cefd86e921bc3499",

View File

@@ -112,7 +112,7 @@ class PinterestExtractor(Extractor):
class PinterestPinExtractor(PinterestExtractor): class PinterestPinExtractor(PinterestExtractor):
"""Extractor for images from a single pin from pinterest.com""" """Extractor for images from a single pin from pinterest.com"""
subcategory = "pin" subcategory = "pin"
pattern = BASE_PATTERN + r"/pin/([^/?#&]+)(?!.*#related$)" pattern = BASE_PATTERN + r"/pin/([^/?#]+)(?!.*#related$)"
test = ( test = (
("https://www.pinterest.com/pin/858146903966145189/", { ("https://www.pinterest.com/pin/858146903966145189/", {
"url": "afb3c26719e3a530bb0e871c480882a801a4e8a5", "url": "afb3c26719e3a530bb0e871c480882a801a4e8a5",
@@ -147,8 +147,8 @@ class PinterestBoardExtractor(PinterestExtractor):
subcategory = "board" subcategory = "board"
directory_fmt = ("{category}", "{board[owner][username]}", "{board[name]}") directory_fmt = ("{category}", "{board[owner][username]}", "{board[name]}")
archive_fmt = "{board[id]}_{id}" archive_fmt = "{board[id]}_{id}"
pattern = (BASE_PATTERN + r"/(?!pin/)([^/?#&]+)" pattern = (BASE_PATTERN + r"/(?!pin/)([^/?#]+)"
"/(?!_saved|_created|pins/)([^/?#&]+)/?$") "/(?!_saved|_created|pins/)([^/?#]+)/?$")
test = ( test = (
("https://www.pinterest.com/g1952849/test-/", { ("https://www.pinterest.com/g1952849/test-/", {
"pattern": r"https://i\.pinimg\.com/originals/", "pattern": r"https://i\.pinimg\.com/originals/",
@@ -198,7 +198,7 @@ class PinterestBoardExtractor(PinterestExtractor):
class PinterestUserExtractor(PinterestExtractor): class PinterestUserExtractor(PinterestExtractor):
"""Extractor for a user's boards""" """Extractor for a user's boards"""
subcategory = "user" subcategory = "user"
pattern = BASE_PATTERN + r"/(?!pin/)([^/?#&]+)(?:/_saved)?/?$" pattern = BASE_PATTERN + r"/(?!pin/)([^/?#]+)(?:/_saved)?/?$"
test = ( test = (
("https://www.pinterest.com/g1952849/", { ("https://www.pinterest.com/g1952849/", {
"pattern": PinterestBoardExtractor.pattern, "pattern": PinterestBoardExtractor.pattern,
@@ -223,7 +223,7 @@ class PinterestAllpinsExtractor(PinterestExtractor):
"""Extractor for a user's 'All Pins' feed""" """Extractor for a user's 'All Pins' feed"""
subcategory = "allpins" subcategory = "allpins"
directory_fmt = ("{category}", "{user}") directory_fmt = ("{category}", "{user}")
pattern = BASE_PATTERN + r"/(?!pin/)([^/?#&]+)/pins/?$" pattern = BASE_PATTERN + r"/(?!pin/)([^/?#]+)/pins/?$"
test = ("https://www.pinterest.com/g1952849/pins/", { test = ("https://www.pinterest.com/g1952849/pins/", {
"pattern": r"https://i\.pinimg\.com/originals/[0-9a-f]{2}" "pattern": r"https://i\.pinimg\.com/originals/[0-9a-f]{2}"
r"/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{32}\.\w{3}", r"/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{32}\.\w{3}",
@@ -245,7 +245,7 @@ class PinterestCreatedExtractor(PinterestExtractor):
"""Extractor for a user's created pins""" """Extractor for a user's created pins"""
subcategory = "created" subcategory = "created"
directory_fmt = ("{category}", "{user}") directory_fmt = ("{category}", "{user}")
pattern = BASE_PATTERN + r"/(?!pin/)([^/?#&]+)/_created/?$" pattern = BASE_PATTERN + r"/(?!pin/)([^/?#]+)/_created/?$"
test = ("https://www.pinterest.de/digitalmomblog/_created/", { test = ("https://www.pinterest.de/digitalmomblog/_created/", {
"pattern": r"https://i\.pinimg\.com/originals/[0-9a-f]{2}" "pattern": r"https://i\.pinimg\.com/originals/[0-9a-f]{2}"
r"/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{32}\.(jpg|png)", r"/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{32}\.(jpg|png)",
@@ -270,7 +270,7 @@ class PinterestSectionExtractor(PinterestExtractor):
directory_fmt = ("{category}", "{board[owner][username]}", directory_fmt = ("{category}", "{board[owner][username]}",
"{board[name]}", "{section[title]}") "{board[name]}", "{section[title]}")
archive_fmt = "{board[id]}_{id}" archive_fmt = "{board[id]}_{id}"
pattern = BASE_PATTERN + r"/(?!pin/)([^/?#&]+)/([^/?#&]+)/([^/?#&]+)" pattern = BASE_PATTERN + r"/(?!pin/)([^/?#]+)/([^/?#]+)/([^/?#]+)"
test = ("https://www.pinterest.com/g1952849/stuff/section", { test = ("https://www.pinterest.com/g1952849/stuff/section", {
"count": 2, "count": 2,
}) })
@@ -321,7 +321,7 @@ class PinterestRelatedPinExtractor(PinterestPinExtractor):
"""Extractor for related pins of another pin from pinterest.com""" """Extractor for related pins of another pin from pinterest.com"""
subcategory = "related-pin" subcategory = "related-pin"
directory_fmt = ("{category}", "related {original_pin[id]}") directory_fmt = ("{category}", "related {original_pin[id]}")
pattern = BASE_PATTERN + r"/pin/([^/?#&]+).*#related$" pattern = BASE_PATTERN + r"/pin/([^/?#]+).*#related$"
test = ("https://www.pinterest.com/pin/858146903966145189/#related", { test = ("https://www.pinterest.com/pin/858146903966145189/#related", {
"range": "31-70", "range": "31-70",
"count": 40, "count": 40,
@@ -340,7 +340,7 @@ class PinterestRelatedBoardExtractor(PinterestBoardExtractor):
subcategory = "related-board" subcategory = "related-board"
directory_fmt = ("{category}", "{board[owner][username]}", directory_fmt = ("{category}", "{board[owner][username]}",
"{board[name]}", "related") "{board[name]}", "related")
pattern = BASE_PATTERN + r"/(?!pin/)([^/?#&]+)/([^/?#&]+)/?#related$" pattern = BASE_PATTERN + r"/(?!pin/)([^/?#]+)/([^/?#]+)/?#related$"
test = ("https://www.pinterest.com/g1952849/test-/#related", { test = ("https://www.pinterest.com/g1952849/test-/#related", {
"range": "31-70", "range": "31-70",
"count": 40, "count": 40,
@@ -354,7 +354,7 @@ class PinterestRelatedBoardExtractor(PinterestBoardExtractor):
class PinterestPinitExtractor(PinterestExtractor): class PinterestPinitExtractor(PinterestExtractor):
"""Extractor for images from a pin.it URL""" """Extractor for images from a pin.it URL"""
subcategory = "pinit" subcategory = "pinit"
pattern = r"(?:https?://)?pin\.it/([^/?#&]+)" pattern = r"(?:https?://)?pin\.it/([^/?#]+)"
test = ( test = (
("https://pin.it/Hvt8hgT", { ("https://pin.it/Hvt8hgT", {

View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2019-2021 Mike Fährmann # Copyright 2019-2023 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@@ -112,7 +112,7 @@ class SankakucomplexTagExtractor(SankakucomplexExtractor):
"""Extractor for sankakucomplex blog articles by tag or author""" """Extractor for sankakucomplex blog articles by tag or author"""
subcategory = "tag" subcategory = "tag"
pattern = (r"(?:https?://)?www\.sankakucomplex\.com" pattern = (r"(?:https?://)?www\.sankakucomplex\.com"
r"/((?:tag|category|author)/[^/&?#]+)") r"/((?:tag|category|author)/[^/?#]+)")
test = ( test = (
("https://www.sankakucomplex.com/tag/cosplay/", { ("https://www.sankakucomplex.com/tag/cosplay/", {
"range": "1-50", "range": "1-50",

View File

@@ -34,7 +34,7 @@ BASE_PATTERN = UrlshortenerExtractor.update(INSTANCES)
class UrlshortenerLinkExtractor(UrlshortenerExtractor): class UrlshortenerLinkExtractor(UrlshortenerExtractor):
"""Extractor for general-purpose URL shorteners""" """Extractor for general-purpose URL shorteners"""
subcategory = "link" subcategory = "link"
pattern = BASE_PATTERN + r"/([^/?&#]+)" pattern = BASE_PATTERN + r"/([^/?#]+)"
test = ( test = (
("https://bit.ly/3cWIUgq", { ("https://bit.ly/3cWIUgq", {
"count": 1, "count": 1,