[nitter] support '/i/user/' URLs (#3310)

as well as using 'id:<userid>' as username

not all nitter instances seem to support '/i/user/' ...
This commit is contained in:
Mike Fährmann
2022-12-04 12:07:19 +01:00
parent f820fbed53
commit 20e12b5d7c

View File

@@ -23,7 +23,10 @@ class NitterExtractor(BaseExtractor):
def __init__(self, match): def __init__(self, match):
self.cookiedomain = self.root.partition("://")[2] self.cookiedomain = self.root.partition("://")[2]
BaseExtractor.__init__(self, match) BaseExtractor.__init__(self, match)
self.user = match.group(match.lastindex)
lastindex = match.lastindex
self.user = match.group(lastindex)
self.user_id = match.group(lastindex + 1)
self.user_obj = None self.user_obj = None
def items(self): def items(self):
@@ -181,7 +184,13 @@ class NitterExtractor(BaseExtractor):
def _pagination(self, path): def _pagination(self, path):
quoted = self.config("quoted", False) quoted = self.config("quoted", False)
base_url = url = self.root + path
if self.user_id:
self.user = self.request(
"{}/i/user/{}".format(self.root, self.user_id),
allow_redirects=False,
).headers["location"].rpartition("/")[2]
base_url = url = "{}/{}{}".format(self.root, self.user, path)
while True: while True:
tweets_html = self.request(url).text.split( tweets_html = self.request(url).text.split(
@@ -229,10 +238,12 @@ BASE_PATTERN = NitterExtractor.update({
}, },
}) })
USER_PATTERN = BASE_PATTERN + r"/(i(?:/user/|d:)(\d+)|[^/?#]+)"
class NitterTweetsExtractor(NitterExtractor): class NitterTweetsExtractor(NitterExtractor):
subcategory = "tweets" subcategory = "tweets"
pattern = BASE_PATTERN + r"/([^/?#]+)(?:/tweets)?(?:$|\?|#)" pattern = USER_PATTERN + r"(?:/tweets)?(?:$|\?|#)"
test = ( test = (
("https://nitter.net/supernaturepics", { ("https://nitter.net/supernaturepics", {
"pattern": r"https://nitter\.net/pic/orig" "pattern": r"https://nitter\.net/pic/orig"
@@ -255,9 +266,9 @@ class NitterTweetsExtractor(NitterExtractor):
"user": { "user": {
"date": "dt:2015-01-12 10:25:00", "date": "dt:2015-01-12 10:25:00",
"description": "The very best nature pictures.", "description": "The very best nature pictures.",
"favourites_count": 22698, "favourites_count": int,
"followers_count": int, "followers_count": int,
"friends_count": 2477, "friends_count": int,
"id": "2976459548", "id": "2976459548",
"name": "supernaturepics", "name": "supernaturepics",
"nick": "Nature Pictures", "nick": "Nature Pictures",
@@ -272,20 +283,25 @@ class NitterTweetsExtractor(NitterExtractor):
}, },
}, },
}), }),
("https://nitter.pussthecat.org/i/user/2976459548", {
"url": "c740a2683db2c8ed2f350afc0494475c4444025b",
"pattern": r"https://nitter.pussthecat\.org/pic/orig"
r"/media%2FCGMNYZvW0AIVoom\.jpg",
"range": "1",
}),
("https://nitter.lacontrevoie.fr/supernaturepics"), ("https://nitter.lacontrevoie.fr/supernaturepics"),
("https://nitter.pussthecat.org/supernaturepics"),
("https://nitter.1d4.us/supernaturepics"), ("https://nitter.1d4.us/supernaturepics"),
("https://nitter.kavin.rocks/supernaturepics"), ("https://nitter.kavin.rocks/id:2976459548"),
("https://nitter.unixfox.eu/supernaturepics"), ("https://nitter.unixfox.eu/supernaturepics"),
) )
def tweets(self): def tweets(self):
return self._pagination("/" + self.user) return self._pagination("")
class NitterRepliesExtractor(NitterExtractor): class NitterRepliesExtractor(NitterExtractor):
subcategory = "replies" subcategory = "replies"
pattern = BASE_PATTERN + r"/([^/?#]+)/with_replies" pattern = USER_PATTERN + r"/with_replies"
test = ( test = (
("https://nitter.net/supernaturepics/with_replies", { ("https://nitter.net/supernaturepics/with_replies", {
"pattern": r"https://nitter\.net/pic/orig" "pattern": r"https://nitter\.net/pic/orig"
@@ -295,37 +311,41 @@ class NitterRepliesExtractor(NitterExtractor):
("https://nitter.lacontrevoie.fr/supernaturepics/with_replies"), ("https://nitter.lacontrevoie.fr/supernaturepics/with_replies"),
("https://nitter.pussthecat.org/supernaturepics/with_replies"), ("https://nitter.pussthecat.org/supernaturepics/with_replies"),
("https://nitter.1d4.us/supernaturepics/with_replies"), ("https://nitter.1d4.us/supernaturepics/with_replies"),
("https://nitter.kavin.rocks/supernaturepics/with_replies"), ("https://nitter.kavin.rocks/id:2976459548/with_replies"),
("https://nitter.unixfox.eu/supernaturepics/with_replies"), ("https://nitter.unixfox.eu/i/user/2976459548/with_replies"),
) )
def tweets(self): def tweets(self):
return self._pagination("/" + self.user + "/with_replies") return self._pagination("/with_replies")
class NitterMediaExtractor(NitterExtractor): class NitterMediaExtractor(NitterExtractor):
subcategory = "media" subcategory = "media"
pattern = BASE_PATTERN + r"/([^/?#]+)/media" pattern = USER_PATTERN + r"/media"
test = ( test = (
("https://nitter.net/supernaturepics/media", { ("https://nitter.net/supernaturepics/media", {
"pattern": r"https://nitter\.net/pic/orig" "pattern": r"https://nitter\.net/pic/orig"
r"/media%2F[\w-]+\.(jpg|png)$", r"/media%2F[\w-]+\.(jpg|png)$",
"range": "1-20", "range": "1-20",
}), }),
("https://nitter.kavin.rocks/id:2976459548/media", {
"pattern": r"https://nitter\.kavin\.rocks/pic/orig"
r"/media%2F[\w-]+\.(jpg|png)$",
"range": "1-20",
}),
("https://nitter.lacontrevoie.fr/supernaturepics/media"), ("https://nitter.lacontrevoie.fr/supernaturepics/media"),
("https://nitter.pussthecat.org/supernaturepics/media"), ("https://nitter.pussthecat.org/supernaturepics/media"),
("https://nitter.1d4.us/supernaturepics/media"), ("https://nitter.1d4.us/supernaturepics/media"),
("https://nitter.kavin.rocks/supernaturepics/media"), ("https://nitter.unixfox.eu/i/user/2976459548/media"),
("https://nitter.unixfox.eu/supernaturepics/media"),
) )
def tweets(self): def tweets(self):
return self._pagination("/" + self.user + "/media") return self._pagination("/media")
class NitterSearchExtractor(NitterExtractor): class NitterSearchExtractor(NitterExtractor):
subcategory = "search" subcategory = "search"
pattern = BASE_PATTERN + r"/([^/?#]+)/search" pattern = USER_PATTERN + r"/search"
test = ( test = (
("https://nitter.net/supernaturepics/search", { ("https://nitter.net/supernaturepics/search", {
"pattern": r"https://nitter\.net/pic/orig" "pattern": r"https://nitter\.net/pic/orig"
@@ -335,12 +355,12 @@ class NitterSearchExtractor(NitterExtractor):
("https://nitter.lacontrevoie.fr/supernaturepics/search"), ("https://nitter.lacontrevoie.fr/supernaturepics/search"),
("https://nitter.pussthecat.org/supernaturepics/search"), ("https://nitter.pussthecat.org/supernaturepics/search"),
("https://nitter.1d4.us/supernaturepics/search"), ("https://nitter.1d4.us/supernaturepics/search"),
("https://nitter.kavin.rocks/supernaturepics/search"), ("https://nitter.kavin.rocks/id:2976459548/search"),
("https://nitter.unixfox.eu/supernaturepics/search"), ("https://nitter.unixfox.eu/i/user/2976459548/search"),
) )
def tweets(self): def tweets(self):
return self._pagination("/" + self.user + "/search") return self._pagination("/search")
class NitterTweetExtractor(NitterExtractor): class NitterTweetExtractor(NitterExtractor):
@@ -349,7 +369,7 @@ class NitterTweetExtractor(NitterExtractor):
directory_fmt = ("{category}", "{user[name]}") directory_fmt = ("{category}", "{user[name]}")
filename_fmt = "{tweet_id}_{num}.{extension}" filename_fmt = "{tweet_id}_{num}.{extension}"
archive_fmt = "{tweet_id}_{num}" archive_fmt = "{tweet_id}_{num}"
pattern = BASE_PATTERN + r"/([^/?#]+|i/web)/status/(\d+)" pattern = BASE_PATTERN + r"/(i/web|[^/?#]+)/status/(\d+())"
test = ( test = (
("https://nitter.net/supernaturepics/status/604341487988576256", { ("https://nitter.net/supernaturepics/status/604341487988576256", {
"url": "3f2b64e175bf284aa672c3bb53ed275e470b919a", "url": "3f2b64e175bf284aa672c3bb53ed275e470b919a",