From 9e2a945013cf2a71357983093eacdd4b5bc2910f Mon Sep 17 00:00:00 2001
From: ClosedPort22 <44864697+ClosedPort22@users.noreply.github.com>
Date: Wed, 29 Mar 2023 00:06:41 +0800
Subject: [PATCH 1/4] [urlshortener] add support for bit.ly & t.co

---
 docs/supportedsites.md               | 16 ++++++++
 gallery_dl/extractor/__init__.py     |  1 +
 gallery_dl/extractor/urlshortener.py | 59 ++++++++++++++++++++++++++++
 scripts/supportedsites.py            |  3 ++
 4 files changed, 79 insertions(+)
 create mode 100644 gallery_dl/extractor/urlshortener.py
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index 1876b045..08b39071 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -1270,6 +1270,22 @@ Consider all sites to be NSFW unless otherwise known.
     <td></td>
 </tr>
 
+<tr>
+    <td colspan="4"><strong>URL Shorteners</strong></td>
+</tr>
+<tr>
+    <td>Bitly</td>
+    <td>https://bit.ly/</td>
+    <td></td>
+    <td></td>
+</tr>
+<tr>
+    <td>Twitter t.co</td>
+    <td>https://t.co/</td>
+    <td></td>
+    <td></td>
+</tr>
+
 <tr>
     <td colspan="4"><strong>vichan Imageboards</strong></td>
 </tr>
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 3968d727..553a1104 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -153,6 +153,7 @@ modules = [
     "twitter",
     "unsplash",
     "uploadir",
+    "urlshortener",
     "vanillarock",
     "vichan",
     "vk",
diff --git a/gallery_dl/extractor/urlshortener.py b/gallery_dl/extractor/urlshortener.py
new file mode 100644
index 00000000..d95f182e
--- /dev/null
+++ b/gallery_dl/extractor/urlshortener.py
@@ -0,0 +1,59 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractor for general-purpose URL shorteners"""
+
+from .common import BaseExtractor, Message
+from .. import exception
+
+
+class UrlshortenerExtractor(BaseExtractor):
+    """Base class for general-purpose URL shorteners"""
+    basecategory = "urlshortener"
+    test = (
+        ("https://bit.ly/3cWIUgq", {
+            "count": 1,
+            "pattern": "^https://gumroad.com/l/storm_b1"
+        }),
+        ("https://t.co/bCgBY8Iv5n", {
+            "count": 1,
+            "pattern": ("^https://twitter.com/elonmusk/status/"
+                        "1421395561324896257/photo/1")
+        }),
+    )
+
+    def __init__(self, match):
+        BaseExtractor.__init__(self, match)
+        self.headers = INSTANCES[self.category].get("headers")
+        self.url = match.group()
+
+    def request(self, url, **kwargs):
+        kwargs["headers"] = self.headers
+        return BaseExtractor.request(self, url, **kwargs)
+
+    def items(self):
+        response = self.request(
+            self.url, method="HEAD", allow_redirects=False, notfound="URL")
+        if "location" not in response.headers:
+            raise exception.StopExtraction("Unable to resolve short URL")
+        yield Message.Queue, response.headers["location"], {}
+
+
+INSTANCES = {
+    "bitly": {
+        "root": "https://bit.ly",
+        "pattern": r"bit\.ly",
+    },
+    "tco": {
+        # t.co sends 'http-equiv="refresh"' (200) when using browser UA
+        "headers": {"User-Agent": None},
+        "root": "https://t.co",
+        "pattern": r"t\.co",
+    },
+}
+
+UrlshortenerExtractor.pattern = \
+    UrlshortenerExtractor.update(INSTANCES) + r"/[^/?#&]+"
diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py
index ff75c6c8..74100d4f 100755
--- a/scripts/supportedsites.py
+++ b/scripts/supportedsites.py
@@ -112,6 +112,7 @@ CATEGORY_MAP = {
     "subscribestar"  : "SubscribeStar",
     "tbib"           : "The Big ImageBoard",
     "tcbscans"       : "TCB Scans",
+    "tco"            : "Twitter t.co",
     "thatpervert"    : "ThatPervert",
     "thebarchive"    : "The /b/ Archive",
     "thecollection"  : "The /co/llection",
@@ -132,6 +133,7 @@ CATEGORY_MAP = {
 }
 
 SUBCATEGORY_MAP = {
+    ""       : "",
     "art"    : "Art",
     "audio"  : "Audio",
     "doujin" : "Doujin",
@@ -266,6 +268,7 @@ BASE_MAP = {
     "lynxchan"    : "LynxChan Imageboards",
     "moebooru"    : "Moebooru and MyImouto",
     "szurubooru"  : "szurubooru Instances",
+    "urlshortener": "URL Shorteners",
     "vichan"      : "vichan Imageboards",
 }
 

From 71b26adb9b737717ebd79a1a513bebe52e787e8b Mon Sep 17 00:00:00 2001
From: ClosedPort22 <44864697+ClosedPort22@users.noreply.github.com>
Date: Wed, 29 Mar 2023 13:36:43 +0800
Subject: [PATCH 2/4] [urlshortener] add tinyurl.com as an example

---
 docs/gallery-dl-example.conf         | 4 ++++
 gallery_dl/extractor/urlshortener.py | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/docs/gallery-dl-example.conf b/docs/gallery-dl-example.conf
index ef7b3b50..da386dd6 100644
--- a/docs/gallery-dl-example.conf
+++ b/docs/gallery-dl-example.conf
@@ -317,6 +317,10 @@
             "archive": "~/gallery-dl/custom-archive-file-for-TBIB.db",
             "filename": "{id}_{md5}.{extension}",
             "sleep-request": [0, 1.2]
+        },
+
+        "urlshortener": {
+            "tinyurl": {"root": "https://tinyurl.com"}
         }
     },
 
diff --git a/gallery_dl/extractor/urlshortener.py b/gallery_dl/extractor/urlshortener.py
index d95f182e..23a6df86 100644
--- a/gallery_dl/extractor/urlshortener.py
+++ b/gallery_dl/extractor/urlshortener.py
@@ -11,7 +11,7 @@ from .. import exception
 
 
 class UrlshortenerExtractor(BaseExtractor):
-    """Base class for general-purpose URL shorteners"""
+    """Extractor for general-purpose URL shorteners"""
     basecategory = "urlshortener"
     test = (
         ("https://bit.ly/3cWIUgq", {

From 875485313f216ce96d7d2a2c11e47d2d1b074a42 Mon Sep 17 00:00:00 2001
From: ClosedPort22 <44864697+ClosedPort22@users.noreply.github.com>
Date: Sun, 9 Apr 2023 18:06:42 +0800
Subject: [PATCH 3/4] [urlshortener] force HTTPS

---
 gallery_dl/extractor/urlshortener.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/gallery_dl/extractor/urlshortener.py b/gallery_dl/extractor/urlshortener.py
index 23a6df86..3e404e86 100644
--- a/gallery_dl/extractor/urlshortener.py
+++ b/gallery_dl/extractor/urlshortener.py
@@ -28,7 +28,7 @@ class UrlshortenerExtractor(BaseExtractor):
     def __init__(self, match):
         BaseExtractor.__init__(self, match)
         self.headers = INSTANCES[self.category].get("headers")
-        self.url = match.group()
+        self.id = match.group(match.lastindex)
 
     def request(self, url, **kwargs):
         kwargs["headers"] = self.headers
@@ -36,7 +36,8 @@ class UrlshortenerExtractor(BaseExtractor):
 
     def items(self):
         response = self.request(
-            self.url, method="HEAD", allow_redirects=False, notfound="URL")
+            "{}/{}".format(self.root, self.id), method="HEAD",
+            allow_redirects=False, notfound="URL")
         if "location" not in response.headers:
             raise exception.StopExtraction("Unable to resolve short URL")
         yield Message.Queue, response.headers["location"], {}
@@ -56,4 +57,4 @@ INSTANCES = {
 }
 
 UrlshortenerExtractor.pattern = \
-    UrlshortenerExtractor.update(INSTANCES) + r"/[^/?#&]+"
+    UrlshortenerExtractor.update(INSTANCES) + r"/([^/?#&]+)"

From 5e63942b374fa08c450d888d152d1a0cae430a4c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= <mike_faehrmann@web.de>
Date: Sat, 15 Apr 2023 18:06:06 +0200
Subject: [PATCH 4/4] [urlshortener] update

---
 docs/supportedsites.md               |  4 +-
 gallery_dl/extractor/urlshortener.py | 73 ++++++++++++++++------------
 2 files changed, 43 insertions(+), 34 deletions(-)

diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index 08b39071..d9f6c63a 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -1276,13 +1276,13 @@ Consider all sites to be NSFW unless otherwise known.
 <tr>
     <td>Bitly</td>
     <td>https://bit.ly/</td>
-    <td></td>
+    <td>Links</td>
     <td></td>
 </tr>
 <tr>
     <td>Twitter t.co</td>
     <td>https://t.co/</td>
-    <td></td>
+    <td>Links</td>
     <td></td>
 </tr>
 
diff --git a/gallery_dl/extractor/urlshortener.py b/gallery_dl/extractor/urlshortener.py
index 3e404e86..1a39b5be 100644
--- a/gallery_dl/extractor/urlshortener.py
+++ b/gallery_dl/extractor/urlshortener.py
@@ -4,43 +4,15 @@
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.
 
-"""Extractor for general-purpose URL shorteners"""
+"""Extractors for general-purpose URL shorteners"""
 
 from .common import BaseExtractor, Message
 from .. import exception
 
 
 class UrlshortenerExtractor(BaseExtractor):
-    """Extractor for general-purpose URL shorteners"""
+    """Base class for URL shortener extractors"""
     basecategory = "urlshortener"
-    test = (
-        ("https://bit.ly/3cWIUgq", {
-            "count": 1,
-            "pattern": "^https://gumroad.com/l/storm_b1"
-        }),
-        ("https://t.co/bCgBY8Iv5n", {
-            "count": 1,
-            "pattern": ("^https://twitter.com/elonmusk/status/"
-                        "1421395561324896257/photo/1")
-        }),
-    )
-
-    def __init__(self, match):
-        BaseExtractor.__init__(self, match)
-        self.headers = INSTANCES[self.category].get("headers")
-        self.id = match.group(match.lastindex)
-
-    def request(self, url, **kwargs):
-        kwargs["headers"] = self.headers
-        return BaseExtractor.request(self, url, **kwargs)
-
-    def items(self):
-        response = self.request(
-            "{}/{}".format(self.root, self.id), method="HEAD",
-            allow_redirects=False, notfound="URL")
-        if "location" not in response.headers:
-            raise exception.StopExtraction("Unable to resolve short URL")
-        yield Message.Queue, response.headers["location"], {}
 
 
 INSTANCES = {
@@ -56,5 +28,42 @@ INSTANCES = {
     },
 }
 
-UrlshortenerExtractor.pattern = \
-    UrlshortenerExtractor.update(INSTANCES) + r"/([^/?#&]+)"
+BASE_PATTERN = UrlshortenerExtractor.update(INSTANCES)
+
+
+class UrlshortenerLinkExtractor(UrlshortenerExtractor):
+    """Extractor for general-purpose URL shorteners"""
+    subcategory = "link"
+    pattern = BASE_PATTERN + r"/([^/?&#]+)"
+    test = (
+        ("https://bit.ly/3cWIUgq", {
+            "count": 1,
+            "pattern": "^https://gumroad.com/l/storm_b1",
+        }),
+        ("https://t.co/bCgBY8Iv5n", {
+            "count": 1,
+            "pattern": "^https://twitter.com/elonmusk/status/"
+                       "1421395561324896257/photo/1",
+        }),
+        ("https://t.co/abcdefghij", {
+            "exception": exception.NotFoundError,
+        }),
+    )
+
+    def __init__(self, match):
+        UrlshortenerExtractor.__init__(self, match)
+        self.id = match.group(match.lastindex)
+
+        try:
+            self.headers = INSTANCES[self.category]["headers"]
+        except Exception:
+            self.headers = None
+
+    def items(self):
+        response = self.request(
+            "{}/{}".format(self.root, self.id), headers=self.headers,
+            method="HEAD", allow_redirects=False, notfound="URL")
+        try:
+            yield Message.Queue, response.headers["location"], {}
+        except KeyError:
+            raise exception.StopExtraction("Unable to resolve short URL")