From 9e2a945013cf2a71357983093eacdd4b5bc2910f Mon Sep 17 00:00:00 2001 From: ClosedPort22 <44864697+ClosedPort22@users.noreply.github.com> Date: Wed, 29 Mar 2023 00:06:41 +0800 Subject: [PATCH] [urlshortener] add support for bit.ly & t.co --- docs/supportedsites.md | 16 ++++++++ gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/urlshortener.py | 59 ++++++++++++++++++++++++++++ scripts/supportedsites.py | 3 ++ 4 files changed, 79 insertions(+) create mode 100644 gallery_dl/extractor/urlshortener.py diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 1876b045..08b39071 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -1270,6 +1270,22 @@ Consider all sites to be NSFW unless otherwise known. + + URL Shorteners + + + Bitly + https://bit.ly/ + + + + + Twitter t.co + https://t.co/ + + + + vichan Imageboards diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 3968d727..553a1104 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -153,6 +153,7 @@ modules = [ "twitter", "unsplash", "uploadir", + "urlshortener", "vanillarock", "vichan", "vk", diff --git a/gallery_dl/extractor/urlshortener.py b/gallery_dl/extractor/urlshortener.py new file mode 100644 index 00000000..d95f182e --- /dev/null +++ b/gallery_dl/extractor/urlshortener.py @@ -0,0 +1,59 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractor for general-purpose URL shorteners""" + +from .common import BaseExtractor, Message +from .. import exception + + +class UrlshortenerExtractor(BaseExtractor): + """Base class for general-purpose URL shorteners""" + basecategory = "urlshortener" + test = ( + ("https://bit.ly/3cWIUgq", { + "count": 1, + "pattern": "^https://gumroad.com/l/storm_b1" + }), + ("https://t.co/bCgBY8Iv5n", { + "count": 1, + "pattern": ("^https://twitter.com/elonmusk/status/" + "1421395561324896257/photo/1") + }), + ) + + def __init__(self, match): + BaseExtractor.__init__(self, match) + self.headers = INSTANCES[self.category].get("headers") + self.url = match.group() + + def request(self, url, **kwargs): + kwargs["headers"] = self.headers + return BaseExtractor.request(self, url, **kwargs) + + def items(self): + response = self.request( + self.url, method="HEAD", allow_redirects=False, notfound="URL") + if "location" not in response.headers: + raise exception.StopExtraction("Unable to resolve short URL") + yield Message.Queue, response.headers["location"], {} + + +INSTANCES = { + "bitly": { + "root": "https://bit.ly", + "pattern": r"bit\.ly", + }, + "tco": { + # t.co sends 'http-equiv="refresh"' (200) when using browser UA + "headers": {"User-Agent": None}, + "root": "https://t.co", + "pattern": r"t\.co", + }, +} + +UrlshortenerExtractor.pattern = \ + UrlshortenerExtractor.update(INSTANCES) + r"/[^/?#&]+" diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index ff75c6c8..74100d4f 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -112,6 +112,7 @@ CATEGORY_MAP = { "subscribestar" : "SubscribeStar", "tbib" : "The Big ImageBoard", "tcbscans" : "TCB Scans", + "tco" : "Twitter t.co", "thatpervert" : "ThatPervert", "thebarchive" : "The /b/ Archive", "thecollection" : "The /co/llection", @@ -132,6 +133,7 @@ CATEGORY_MAP = { } SUBCATEGORY_MAP = { + "" : "", "art" : "Art", "audio" : "Audio", "doujin" : "Doujin", @@ -266,6 +268,7 @@ BASE_MAP = { "lynxchan" : "LynxChan Imageboards", "moebooru" : "Moebooru and MyImouto", "szurubooru" : "szurubooru Instances", + "urlshortener": "URL Shorteners", "vichan" : "vichan Imageboards", }