[twitter] add & use '_tweetid_to_datetime' method
use 'dt' functions directly
This commit is contained in:
@@ -9,7 +9,7 @@
|
|||||||
"""Extractors for https://x.com/"""
|
"""Extractors for https://x.com/"""
|
||||||
|
|
||||||
from .common import Extractor, Message, Dispatch
|
from .common import Extractor, Message, Dispatch
|
||||||
from .. import text, util, exception
|
from .. import text, util, dt, exception
|
||||||
from ..cache import cache, memcache
|
from ..cache import cache, memcache
|
||||||
import itertools
|
import itertools
|
||||||
import random
|
import random
|
||||||
@@ -365,14 +365,13 @@ class TwitterExtractor(Extractor):
|
|||||||
author = self._transform_user(author)
|
author = self._transform_user(author)
|
||||||
|
|
||||||
if tweet_id >= 300_000_000_000_000:
|
if tweet_id >= 300_000_000_000_000:
|
||||||
date = self.parse_timestamp(
|
date = self._tweetid_to_datetime(tweet_id)
|
||||||
((tweet_id >> 22) + 1_288_834_974_657) / 1000)
|
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
date = self.parse_datetime(
|
date = dt.parse(
|
||||||
legacy["created_at"], "%a %b %d %H:%M:%S %z %Y")
|
legacy["created_at"], "%a %b %d %H:%M:%S %z %Y")
|
||||||
except Exception:
|
except Exception:
|
||||||
date = util.NONE
|
date = dt.NONE
|
||||||
source = tweet.get("source")
|
source = tweet.get("source")
|
||||||
|
|
||||||
tget = legacy.get
|
tget = legacy.get
|
||||||
@@ -460,8 +459,8 @@ class TwitterExtractor(Extractor):
|
|||||||
tdata, legacy["extended_entities"]["media"][0])
|
tdata, legacy["extended_entities"]["media"][0])
|
||||||
if tdata["retweet_id"]:
|
if tdata["retweet_id"]:
|
||||||
tdata["content"] = f"RT @{author['name']}: {tdata['content']}"
|
tdata["content"] = f"RT @{author['name']}: {tdata['content']}"
|
||||||
tdata["date_original"] = self.parse_timestamp(
|
tdata["date_original"] = self._tweetid_to_datetime(
|
||||||
((tdata["retweet_id"] >> 22) + 1_288_834_974_657) / 1000)
|
tdata["retweet_id"])
|
||||||
|
|
||||||
return tdata
|
return tdata
|
||||||
|
|
||||||
@@ -497,7 +496,7 @@ class TwitterExtractor(Extractor):
|
|||||||
"id": text.parse_int(cid),
|
"id": text.parse_int(cid),
|
||||||
"name": com.get("name"),
|
"name": com.get("name"),
|
||||||
"description": com.get("description"),
|
"description": com.get("description"),
|
||||||
"date": self.parse_timestamp(com.get("created_at", 0) / 1000),
|
"date": dt.parse_ts(com.get("created_at", 0) / 1000),
|
||||||
"nsfw": com.get("is_nsfw"),
|
"nsfw": com.get("is_nsfw"),
|
||||||
"role": com.get("role"),
|
"role": com.get("role"),
|
||||||
"member_count": com.get("member_count"),
|
"member_count": com.get("member_count"),
|
||||||
@@ -536,7 +535,7 @@ class TwitterExtractor(Extractor):
|
|||||||
"id" : text.parse_int(uid),
|
"id" : text.parse_int(uid),
|
||||||
"name" : core.get("screen_name"),
|
"name" : core.get("screen_name"),
|
||||||
"nick" : core.get("name"),
|
"nick" : core.get("name"),
|
||||||
"date" : self.parse_datetime(
|
"date" : dt.parse(
|
||||||
core["created_at"], "%a %b %d %H:%M:%S %z %Y"),
|
core["created_at"], "%a %b %d %H:%M:%S %z %Y"),
|
||||||
"profile_banner" : lget("profile_banner_url", ""),
|
"profile_banner" : lget("profile_banner_url", ""),
|
||||||
"favourites_count": lget("favourites_count"),
|
"favourites_count": lget("favourites_count"),
|
||||||
@@ -654,6 +653,9 @@ class TwitterExtractor(Extractor):
|
|||||||
self._cursor = cursor
|
self._cursor = cursor
|
||||||
return cursor
|
return cursor
|
||||||
|
|
||||||
|
def _tweetid_to_datetime(self, tweet_id):
|
||||||
|
return dt.parse_ts(((tweet_id >> 22) + 1_288_834_974_657) / 1000)
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
"""Return general metadata"""
|
"""Return general metadata"""
|
||||||
return {}
|
return {}
|
||||||
@@ -927,7 +929,7 @@ class TwitterBookmarkExtractor(TwitterExtractor):
|
|||||||
|
|
||||||
def _transform_tweet(self, tweet):
|
def _transform_tweet(self, tweet):
|
||||||
tdata = TwitterExtractor._transform_tweet(self, tweet)
|
tdata = TwitterExtractor._transform_tweet(self, tweet)
|
||||||
tdata["date_bookmarked"] = self.parse_timestamp(
|
tdata["date_bookmarked"] = dt.parse_ts(
|
||||||
(int(tweet["sortIndex"] or 0) >> 20) / 1000)
|
(int(tweet["sortIndex"] or 0) >> 20) / 1000)
|
||||||
return tdata
|
return tdata
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user