[twitter] implement workarounds for empty 'core' data (#8613)

https://github.com/mikf/gallery-dl/issues/8613#issuecomment-3626389269
This commit is contained in:
Mike Fährmann
2025-12-08 22:34:32 +01:00
parent a35e20077d
commit 7516537630

View File

@@ -346,21 +346,24 @@ class TwitterExtractor(Extractor):
files.append({"url": url}) files.append({"url": url})
def _transform_tweet(self, tweet): def _transform_tweet(self, tweet):
if "author" in tweet:
author = tweet["author"]
elif "core" in tweet:
author = tweet["core"]["user_results"]["result"]
else:
author = tweet["user"]
author = self._transform_user(author)
if "legacy" in tweet: if "legacy" in tweet:
legacy = tweet["legacy"] legacy = tweet["legacy"]
else: else:
legacy = tweet legacy = tweet
tget = legacy.get
tweet_id = int(legacy["id_str"]) tweet_id = int(legacy["id_str"])
if "author" in tweet:
author = tweet["author"]
elif "core" in tweet:
try:
author = tweet["core"]["user_results"]["result"]
except KeyError:
self.log.warning("%s: Missing 'author' data", tweet_id)
author = util.NONE
else:
author = tweet["user"]
author = self._transform_user(author)
if tweet_id >= 300000000000000: if tweet_id >= 300000000000000:
date = self.parse_timestamp( date = self.parse_timestamp(
((tweet_id >> 22) + 1288834974657) // 1000) ((tweet_id >> 22) + 1288834974657) // 1000)
@@ -372,6 +375,7 @@ class TwitterExtractor(Extractor):
date = util.NONE date = util.NONE
source = tweet.get("source") source = tweet.get("source")
tget = legacy.get
tdata = { tdata = {
"tweet_id" : tweet_id, "tweet_id" : tweet_id,
"retweet_id" : text.parse_int( "retweet_id" : text.parse_int(
@@ -1991,7 +1995,7 @@ class TwitterAPI():
"Unable to retrieve Tweets from this timeline") "Unable to retrieve Tweets from this timeline")
tweets = [] tweets = []
tweet = last_tweet = None tweet = last_tweet = retry = None
api_tries = 1 api_tries = 1
if pinned_tweet is not None and isinstance(pinned_tweet, dict): if pinned_tweet is not None and isinstance(pinned_tweet, dict):
@@ -2078,6 +2082,16 @@ class TwitterAPI():
(entry.get("entryId") or "").rpartition("-")[2]) (entry.get("entryId") or "").rpartition("-")[2])
continue continue
if retry is None:
try:
tweet["core"]["user_results"]["result"]
retry = False
except KeyError:
self.log.warning("Received Tweet results without "
"'core' data ... Retrying")
retry = True
break
if "retweeted_status_result" in legacy: if "retweeted_status_result" in legacy:
try: try:
retweet = legacy["retweeted_status_result"]["result"] retweet = legacy["retweeted_status_result"]["result"]
@@ -2134,7 +2148,9 @@ class TwitterAPI():
tweet.get("rest_id")) tweet.get("rest_id"))
continue continue
if tweet: if retry:
continue
elif tweet:
stop_tweets = stop_tweets_max stop_tweets = stop_tweets_max
last_tweet = tweet last_tweet = tweet
elif stop_tweets <= 0: elif stop_tweets <= 0: