[twitter] handle errors during file extraction (#6647)

This commit is contained in:
Mike Fährmann
2025-01-21 18:23:54 +01:00
parent d9c4fcc7fa
commit cb1a75eefc

View File

@@ -121,14 +121,7 @@ class TwitterExtractor(Extractor):
txt = data.get("full_text") or data.get("text") or ""
self.log.warning("'%s' (%s)", txt, data["id_str"])
files = []
if "extended_entities" in data:
self._extract_media(
data, data["extended_entities"]["media"], files)
if "card" in tweet and self.cards:
self._extract_card(tweet, files)
if self.twitpic:
self._extract_twitpic(data, files)
files = self._extract_files(data, tweet)
if not files and not self.textonly:
continue
@@ -143,6 +136,39 @@ class TwitterExtractor(Extractor):
text.nameext_from_url(url, file)
yield Message.Url, url, file
def _extract_files(self, data, tweet):
files = []
if "extended_entities" in data:
try:
self._extract_media(
data, data["extended_entities"]["media"], files)
except Exception as exc:
self.log.debug("", exc_info=exc)
self.log.warning(
"%s: Error while extracting media files (%s: %s)",
data["id_str"], exc.__class__.__name__, exc)
if self.cards and "card" in tweet:
try:
self._extract_card(tweet, files)
except Exception as exc:
self.log.debug("", exc_info=exc)
self.log.warning(
"%s: Error while extracting Card files (%s: %s)",
data["id_str"], exc.__class__.__name__, exc)
if self.twitpic:
try:
self._extract_twitpic(data, files)
except Exception as exc:
self.log.debug("", exc_info=exc)
self.log.warning(
"%s: Error while extracting TwitPic files (%s: %s)",
data["id_str"], exc.__class__.__name__, exc)
return files
def _extract_media(self, tweet, entities, files):
for media in entities: