From 3d6489a4c0d2999ba862fa6cbe7ec9d6965572fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Fri, 25 Nov 2022 18:50:04 +0100 Subject: [PATCH] [nitter] update 'user' and 'author' --- gallery_dl/extractor/nitter.py | 62 ++++++++++++++++++++++++++++++---- 1 file changed, 56 insertions(+), 6 deletions(-) diff --git a/gallery_dl/extractor/nitter.py b/gallery_dl/extractor/nitter.py index dc597653..09133fdd 100644 --- a/gallery_dl/extractor/nitter.py +++ b/gallery_dl/extractor/nitter.py @@ -23,6 +23,7 @@ class NitterExtractor(BaseExtractor): self.cookiedomain = self.root.partition("://")[2] BaseExtractor.__init__(self, match) self.user = match.group(match.lastindex) + self.user_obj = None def items(self): videos = self.config("videos", True) @@ -79,14 +80,15 @@ class NitterExtractor(BaseExtractor): def _tweet_from_html(self, html): extr = text.extract_from(html) - user = { + author = { "name": extr('class="fullname" href="/', '"'), "nick": extr('title="', '"'), } extr('")[2]), } + def _user_from_html(self, html): + extr = text.extract_from(html, html.index('class="profile-tabs')) + banner = extr('class="profile-banner">', '<'), + "date" : text.parse_datetime( + extr('class="profile-joindate">', '<').replace(",", ""), + "friends_count" : extr( + 'class="profile-stat-num">', '<').replace(",", ""), + "followers_count" : extr( + 'class="profile-stat-num">', '<').replace(",", ""), + "favourites_count": extr( + 'class="profile-stat-num">', '<').replace(",", ""), + "verified" : 'title="Verified account"' in html, + } + def _pagination(self, path): base_url = url = self.root + path while True: - page = self.request(url).text + tweets_html = self.request(url).text.split( + '