[twitter] update 'user' and 'author' fields
- 'author' is always the user who authored a tweet - 'user' is always the user specified in the input URL or equal to 'author' when the former is not given
This commit is contained in:
@@ -40,7 +40,7 @@ class TwitterExtractor(Extractor):
|
||||
self.quoted = self.config("quoted", False)
|
||||
self.videos = self.config("videos", True)
|
||||
self.cards = self.config("cards", False)
|
||||
self._user_id = None
|
||||
self._user = self._user_obj = None
|
||||
self._user_cache = {}
|
||||
self._init_sizes()
|
||||
|
||||
@@ -90,8 +90,9 @@ class TwitterExtractor(Extractor):
|
||||
if "in_reply_to_user_id_str" in data and (
|
||||
not self.replies or (
|
||||
self.replies == "self" and
|
||||
(self._user_id or data["in_reply_to_user_id_str"]) !=
|
||||
data["user_id_str"]
|
||||
data["user_id_str"] !=
|
||||
(self._user_obj["rest_id"] if self._user else
|
||||
data["in_reply_to_user_id_str"])
|
||||
)
|
||||
):
|
||||
self.log.debug("Skipping %s (reply)", data["id_str"])
|
||||
@@ -229,11 +230,13 @@ class TwitterExtractor(Extractor):
|
||||
files.append({"url": url})
|
||||
|
||||
def _transform_tweet(self, tweet):
|
||||
if "core" in tweet:
|
||||
user = self._transform_user(
|
||||
tweet["core"]["user_results"]["result"])
|
||||
if "author" in tweet:
|
||||
author = tweet["author"]
|
||||
elif "core" in tweet:
|
||||
author = tweet["core"]["user_results"]["result"]
|
||||
else:
|
||||
user = self._transform_user(tweet["user"])
|
||||
author = tweet["user"]
|
||||
author = self._transform_user(author)
|
||||
|
||||
if "legacy" in tweet:
|
||||
tweet = tweet["legacy"]
|
||||
@@ -250,7 +253,8 @@ class TwitterExtractor(Extractor):
|
||||
tget("in_reply_to_status_id_str")),
|
||||
"date" : text.parse_datetime(
|
||||
tweet["created_at"], "%a %b %d %H:%M:%S %z %Y"),
|
||||
"user" : user,
|
||||
"user" : self._user or author,
|
||||
"author" : author,
|
||||
"lang" : tweet["lang"],
|
||||
"favorite_count": tget("favorite_count"),
|
||||
"quote_count" : tget("quote_count"),
|
||||
@@ -283,11 +287,6 @@ class TwitterExtractor(Extractor):
|
||||
if "quoted_by_id_str" in tweet:
|
||||
tdata["quote_by"] = text.parse_int(tweet["quoted_by_id_str"])
|
||||
|
||||
if "author" in tweet:
|
||||
tdata["author"] = self._transform_user(tweet["author"])
|
||||
else:
|
||||
tdata["author"] = tdata["user"]
|
||||
|
||||
return tdata
|
||||
|
||||
def _transform_user(self, user):
|
||||
@@ -459,21 +458,9 @@ class TwitterTimelineExtractor(TwitterExtractor):
|
||||
if tweet is None:
|
||||
return
|
||||
|
||||
# get username
|
||||
if not self.user.startswith("id:"):
|
||||
username = self.user
|
||||
elif "core" in tweet:
|
||||
username = (tweet["core"]["user_results"]["result"]
|
||||
["legacy"]["screen_name"])
|
||||
else:
|
||||
username = tweet["user"]["screen_name"]
|
||||
|
||||
# get tweet data
|
||||
if "legacy" in tweet:
|
||||
tweet = tweet["legacy"]
|
||||
|
||||
# build search query
|
||||
query = "from:{} max_id:{}".format(username, tweet["id_str"])
|
||||
query = "from:{} max_id:{}".format(
|
||||
self._user["name"], tweet["rest_id"])
|
||||
if self.retweets:
|
||||
query += " include:retweets include:nativeretweets"
|
||||
if not self.textonly:
|
||||
@@ -888,7 +875,6 @@ class TwitterAPI():
|
||||
self._nsfw_warning = True
|
||||
self._syndication = extractor.config("syndication")
|
||||
self._json_dumps = json.JSONEncoder(separators=(",", ":")).encode
|
||||
self._user = None
|
||||
|
||||
cookies = extractor.session.cookies
|
||||
cookiedomain = extractor.cookiedomain
|
||||
@@ -1050,13 +1036,13 @@ class TwitterAPI():
|
||||
|
||||
def _user_id_by_screen_name(self, screen_name):
|
||||
if screen_name.startswith("id:"):
|
||||
self._user = util.SENTINEL
|
||||
user_id = screen_name[3:]
|
||||
user = self.user_by_rest_id(user_id)
|
||||
|
||||
else:
|
||||
user = ()
|
||||
try:
|
||||
user = self._user = self.user_by_screen_name(screen_name)
|
||||
user = self.user_by_screen_name(screen_name)
|
||||
user_id = user["rest_id"]
|
||||
except KeyError:
|
||||
if "unavailable_message" in user:
|
||||
@@ -1066,7 +1052,10 @@ class TwitterAPI():
|
||||
else:
|
||||
raise exception.NotFoundError("user")
|
||||
|
||||
self.extractor._user_id = user_id
|
||||
extr = self.extractor
|
||||
extr._user_obj = user
|
||||
extr._user = extr._transform_user(user)
|
||||
|
||||
return user_id
|
||||
|
||||
@cache(maxage=3600)
|
||||
@@ -1226,17 +1215,10 @@ class TwitterAPI():
|
||||
except LookupError:
|
||||
extr.log.debug(data)
|
||||
|
||||
if self._user:
|
||||
user = self._user
|
||||
if user is util.SENTINEL:
|
||||
try:
|
||||
user = self.user_by_rest_id(variables["userId"])
|
||||
except KeyError:
|
||||
raise exception.NotFoundError("user")
|
||||
user = user.get("legacy")
|
||||
if not user:
|
||||
pass
|
||||
elif user.get("blocked_by"):
|
||||
user = extr._user_obj
|
||||
if user:
|
||||
user = user["legacy"]
|
||||
if user.get("blocked_by"):
|
||||
if self.headers["x-twitter-auth-type"] and \
|
||||
extr.config("logout"):
|
||||
guest_token = self._guest_token()
|
||||
@@ -1322,7 +1304,7 @@ class TwitterAPI():
|
||||
try:
|
||||
legacy["retweeted_status_id_str"] = \
|
||||
retweet["rest_id"]
|
||||
legacy["author"] = \
|
||||
tweet["author"] = \
|
||||
retweet["core"]["user_results"]["result"]
|
||||
if "extended_entities" in retweet["legacy"] and \
|
||||
"extended_entities" not in legacy:
|
||||
@@ -1336,9 +1318,6 @@ class TwitterAPI():
|
||||
if "quoted_status_result" in tweet:
|
||||
try:
|
||||
quoted = tweet["quoted_status_result"]["result"]
|
||||
quoted["legacy"]["author"] = \
|
||||
quoted["core"]["user_results"]["result"]
|
||||
quoted["core"] = tweet["core"]
|
||||
quoted["legacy"]["quoted_by_id_str"] = tweet["rest_id"]
|
||||
yield quoted
|
||||
except KeyError:
|
||||
|
||||
@@ -6,4 +6,4 @@
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
__version__ = "1.22.4"
|
||||
__version__ = "1.23.0-dev"
|
||||
|
||||
Reference in New Issue
Block a user