[twitter] update 'user' and 'author' fields

- 'author' is always the user who authored a tweet
- 'user' is always the user specified in the input URL
  or equal to 'author' when the former is not given
This commit is contained in:
Mike Fährmann
2022-07-17 17:04:24 +02:00
parent 51b1999d4b
commit 749802c7bd
2 changed files with 26 additions and 47 deletions

View File

@@ -40,7 +40,7 @@ class TwitterExtractor(Extractor):
self.quoted = self.config("quoted", False)
self.videos = self.config("videos", True)
self.cards = self.config("cards", False)
self._user_id = None
self._user = self._user_obj = None
self._user_cache = {}
self._init_sizes()
@@ -90,8 +90,9 @@ class TwitterExtractor(Extractor):
if "in_reply_to_user_id_str" in data and (
not self.replies or (
self.replies == "self" and
(self._user_id or data["in_reply_to_user_id_str"]) !=
data["user_id_str"]
data["user_id_str"] !=
(self._user_obj["rest_id"] if self._user else
data["in_reply_to_user_id_str"])
)
):
self.log.debug("Skipping %s (reply)", data["id_str"])
@@ -229,11 +230,13 @@ class TwitterExtractor(Extractor):
files.append({"url": url})
def _transform_tweet(self, tweet):
if "core" in tweet:
user = self._transform_user(
tweet["core"]["user_results"]["result"])
if "author" in tweet:
author = tweet["author"]
elif "core" in tweet:
author = tweet["core"]["user_results"]["result"]
else:
user = self._transform_user(tweet["user"])
author = tweet["user"]
author = self._transform_user(author)
if "legacy" in tweet:
tweet = tweet["legacy"]
@@ -250,7 +253,8 @@ class TwitterExtractor(Extractor):
tget("in_reply_to_status_id_str")),
"date" : text.parse_datetime(
tweet["created_at"], "%a %b %d %H:%M:%S %z %Y"),
"user" : user,
"user" : self._user or author,
"author" : author,
"lang" : tweet["lang"],
"favorite_count": tget("favorite_count"),
"quote_count" : tget("quote_count"),
@@ -283,11 +287,6 @@ class TwitterExtractor(Extractor):
if "quoted_by_id_str" in tweet:
tdata["quote_by"] = text.parse_int(tweet["quoted_by_id_str"])
if "author" in tweet:
tdata["author"] = self._transform_user(tweet["author"])
else:
tdata["author"] = tdata["user"]
return tdata
def _transform_user(self, user):
@@ -459,21 +458,9 @@ class TwitterTimelineExtractor(TwitterExtractor):
if tweet is None:
return
# get username
if not self.user.startswith("id:"):
username = self.user
elif "core" in tweet:
username = (tweet["core"]["user_results"]["result"]
["legacy"]["screen_name"])
else:
username = tweet["user"]["screen_name"]
# get tweet data
if "legacy" in tweet:
tweet = tweet["legacy"]
# build search query
query = "from:{} max_id:{}".format(username, tweet["id_str"])
query = "from:{} max_id:{}".format(
self._user["name"], tweet["rest_id"])
if self.retweets:
query += " include:retweets include:nativeretweets"
if not self.textonly:
@@ -888,7 +875,6 @@ class TwitterAPI():
self._nsfw_warning = True
self._syndication = extractor.config("syndication")
self._json_dumps = json.JSONEncoder(separators=(",", ":")).encode
self._user = None
cookies = extractor.session.cookies
cookiedomain = extractor.cookiedomain
@@ -1050,13 +1036,13 @@ class TwitterAPI():
def _user_id_by_screen_name(self, screen_name):
if screen_name.startswith("id:"):
self._user = util.SENTINEL
user_id = screen_name[3:]
user = self.user_by_rest_id(user_id)
else:
user = ()
try:
user = self._user = self.user_by_screen_name(screen_name)
user = self.user_by_screen_name(screen_name)
user_id = user["rest_id"]
except KeyError:
if "unavailable_message" in user:
@@ -1066,7 +1052,10 @@ class TwitterAPI():
else:
raise exception.NotFoundError("user")
self.extractor._user_id = user_id
extr = self.extractor
extr._user_obj = user
extr._user = extr._transform_user(user)
return user_id
@cache(maxage=3600)
@@ -1226,17 +1215,10 @@ class TwitterAPI():
except LookupError:
extr.log.debug(data)
if self._user:
user = self._user
if user is util.SENTINEL:
try:
user = self.user_by_rest_id(variables["userId"])
except KeyError:
raise exception.NotFoundError("user")
user = user.get("legacy")
if not user:
pass
elif user.get("blocked_by"):
user = extr._user_obj
if user:
user = user["legacy"]
if user.get("blocked_by"):
if self.headers["x-twitter-auth-type"] and \
extr.config("logout"):
guest_token = self._guest_token()
@@ -1322,7 +1304,7 @@ class TwitterAPI():
try:
legacy["retweeted_status_id_str"] = \
retweet["rest_id"]
legacy["author"] = \
tweet["author"] = \
retweet["core"]["user_results"]["result"]
if "extended_entities" in retweet["legacy"] and \
"extended_entities" not in legacy:
@@ -1336,9 +1318,6 @@ class TwitterAPI():
if "quoted_status_result" in tweet:
try:
quoted = tweet["quoted_status_result"]["result"]
quoted["legacy"]["author"] = \
quoted["core"]["user_results"]["result"]
quoted["core"] = tweet["core"]
quoted["legacy"]["quoted_by_id_str"] = tweet["rest_id"]
yield quoted
except KeyError:

View File

@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
__version__ = "1.22.4"
__version__ = "1.23.0-dev"