[twitter] handle errors during file extraction (#6647)
This commit is contained in:
@@ -121,14 +121,7 @@ class TwitterExtractor(Extractor):
|
|||||||
txt = data.get("full_text") or data.get("text") or ""
|
txt = data.get("full_text") or data.get("text") or ""
|
||||||
self.log.warning("'%s' (%s)", txt, data["id_str"])
|
self.log.warning("'%s' (%s)", txt, data["id_str"])
|
||||||
|
|
||||||
files = []
|
files = self._extract_files(data, tweet)
|
||||||
if "extended_entities" in data:
|
|
||||||
self._extract_media(
|
|
||||||
data, data["extended_entities"]["media"], files)
|
|
||||||
if "card" in tweet and self.cards:
|
|
||||||
self._extract_card(tweet, files)
|
|
||||||
if self.twitpic:
|
|
||||||
self._extract_twitpic(data, files)
|
|
||||||
if not files and not self.textonly:
|
if not files and not self.textonly:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@@ -143,6 +136,39 @@ class TwitterExtractor(Extractor):
|
|||||||
text.nameext_from_url(url, file)
|
text.nameext_from_url(url, file)
|
||||||
yield Message.Url, url, file
|
yield Message.Url, url, file
|
||||||
|
|
||||||
|
def _extract_files(self, data, tweet):
|
||||||
|
files = []
|
||||||
|
|
||||||
|
if "extended_entities" in data:
|
||||||
|
try:
|
||||||
|
self._extract_media(
|
||||||
|
data, data["extended_entities"]["media"], files)
|
||||||
|
except Exception as exc:
|
||||||
|
self.log.debug("", exc_info=exc)
|
||||||
|
self.log.warning(
|
||||||
|
"%s: Error while extracting media files (%s: %s)",
|
||||||
|
data["id_str"], exc.__class__.__name__, exc)
|
||||||
|
|
||||||
|
if self.cards and "card" in tweet:
|
||||||
|
try:
|
||||||
|
self._extract_card(tweet, files)
|
||||||
|
except Exception as exc:
|
||||||
|
self.log.debug("", exc_info=exc)
|
||||||
|
self.log.warning(
|
||||||
|
"%s: Error while extracting Card files (%s: %s)",
|
||||||
|
data["id_str"], exc.__class__.__name__, exc)
|
||||||
|
|
||||||
|
if self.twitpic:
|
||||||
|
try:
|
||||||
|
self._extract_twitpic(data, files)
|
||||||
|
except Exception as exc:
|
||||||
|
self.log.debug("", exc_info=exc)
|
||||||
|
self.log.warning(
|
||||||
|
"%s: Error while extracting TwitPic files (%s: %s)",
|
||||||
|
data["id_str"], exc.__class__.__name__, exc)
|
||||||
|
|
||||||
|
return files
|
||||||
|
|
||||||
def _extract_media(self, tweet, entities, files):
|
def _extract_media(self, tweet, entities, files):
|
||||||
for media in entities:
|
for media in entities:
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user