[nitter] add 'retweets' option (#3278)
This commit is contained in:
@@ -1860,6 +1860,16 @@ Description
|
|||||||
You can use ``"all"`` instead of listing all values separately.
|
You can use ``"all"`` instead of listing all values separately.
|
||||||
|
|
||||||
|
|
||||||
|
extractor.nitter.retweets
|
||||||
|
-------------------------
|
||||||
|
Type
|
||||||
|
``bool``
|
||||||
|
Default
|
||||||
|
``false``
|
||||||
|
Description
|
||||||
|
Fetch media from Retweets.
|
||||||
|
|
||||||
|
|
||||||
extractor.nitter.videos
|
extractor.nitter.videos
|
||||||
-----------------------
|
-----------------------
|
||||||
Type
|
Type
|
||||||
|
|||||||
@@ -205,6 +205,7 @@
|
|||||||
"include": "illustration,doujin"
|
"include": "illustration,doujin"
|
||||||
},
|
},
|
||||||
"nitter": {
|
"nitter": {
|
||||||
|
"retweets": false,
|
||||||
"videos": true
|
"videos": true
|
||||||
},
|
},
|
||||||
"oauth":
|
"oauth":
|
||||||
|
|||||||
@@ -26,6 +26,7 @@ class NitterExtractor(BaseExtractor):
|
|||||||
self.user_obj = None
|
self.user_obj = None
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
|
retweets = self.config("retweets", False)
|
||||||
videos = self.config("videos", True)
|
videos = self.config("videos", True)
|
||||||
if videos:
|
if videos:
|
||||||
ytdl = (videos == "ytdl")
|
ytdl = (videos == "ytdl")
|
||||||
@@ -35,6 +36,10 @@ class NitterExtractor(BaseExtractor):
|
|||||||
for tweet_html in self.tweets():
|
for tweet_html in self.tweets():
|
||||||
tweet = self._tweet_from_html(tweet_html)
|
tweet = self._tweet_from_html(tweet_html)
|
||||||
|
|
||||||
|
if not retweets and tweet["retweet"]:
|
||||||
|
self.log.debug("Skipping %s (retweet)", tweet["tweet_id"])
|
||||||
|
continue
|
||||||
|
|
||||||
attachments = tweet.pop("_attach", "")
|
attachments = tweet.pop("_attach", "")
|
||||||
if attachments:
|
if attachments:
|
||||||
files = []
|
files = []
|
||||||
@@ -87,13 +92,13 @@ class NitterExtractor(BaseExtractor):
|
|||||||
extr('<span class="tweet-date', '')
|
extr('<span class="tweet-date', '')
|
||||||
link = extr('href="', '"')
|
link = extr('href="', '"')
|
||||||
return {
|
return {
|
||||||
"author": author,
|
"author" : author,
|
||||||
"user": self.user_obj or author,
|
"user" : self.user_obj or author,
|
||||||
"date": text.parse_datetime(
|
"date" : text.parse_datetime(
|
||||||
extr('title="', '"'), "%b %d, %Y · %I:%M %p %Z"),
|
extr('title="', '"'), "%b %d, %Y · %I:%M %p %Z"),
|
||||||
"tweet_id": link.rpartition("/")[2].partition("#")[0],
|
"tweet_id": link.rpartition("/")[2].partition("#")[0],
|
||||||
"content": extr('class="tweet-content', "</div").partition(">")[2],
|
"content": extr('class="tweet-content', "</div").partition(">")[2],
|
||||||
"_attach": extr('class="attachments', 'class="tweet-stats'),
|
"_attach" : extr('class="attachments', 'class="tweet-stats'),
|
||||||
"comments": text.parse_int(extr(
|
"comments": text.parse_int(extr(
|
||||||
'class="icon-comment', '</div>').rpartition(">")[2]),
|
'class="icon-comment', '</div>').rpartition(">")[2]),
|
||||||
"retweets": text.parse_int(extr(
|
"retweets": text.parse_int(extr(
|
||||||
@@ -102,6 +107,7 @@ class NitterExtractor(BaseExtractor):
|
|||||||
'class="icon-quote', '</div>').rpartition(">")[2]),
|
'class="icon-quote', '</div>').rpartition(">")[2]),
|
||||||
"likes" : text.parse_int(extr(
|
"likes" : text.parse_int(extr(
|
||||||
'class="icon-heart', '</div>').rpartition(">")[2]),
|
'class="icon-heart', '</div>').rpartition(">")[2]),
|
||||||
|
"retweet" : 'class="retweet-header' in html,
|
||||||
}
|
}
|
||||||
|
|
||||||
def _user_from_html(self, html):
|
def _user_from_html(self, html):
|
||||||
|
|||||||
Reference in New Issue
Block a user