diff --git a/docs/configuration.rst b/docs/configuration.rst
index 95e47b4b..3406baad 100644
--- a/docs/configuration.rst
+++ b/docs/configuration.rst
@@ -922,6 +922,15 @@ Description A (comma-separated) list of post types to extract images, etc. from.
=========== =====
+extractor.twitter.content
+-------------------------
+=========== =====
+Type ``bool``
+Default ``false``
+Description Extract tweet text as ``content`` metadata.
+=========== =====
+
+
extractor.twitter.retweets
--------------------------
=========== =====
diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
index d67b7fc7..6732028d 100644
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -132,6 +132,7 @@
},
"twitter":
{
+ "content": false,
"retweets": true,
"videos": false
},
diff --git a/gallery_dl/extractor/shopify.py b/gallery_dl/extractor/shopify.py
index 5925319e..b2498a0e 100644
--- a/gallery_dl/extractor/shopify.py
+++ b/gallery_dl/extractor/shopify.py
@@ -107,7 +107,7 @@ EXTRACTORS = {
"pattern": r"(?:www\.)?fashionnova\.com",
"test-product": (
("https://www.fashionnova.com/products/essential-slide-red", {
- "pattern": r"https?://cdn\.shopify.com/",
+ "pattern": r"https?://cdn\d*\.shopify.com/",
"count": 3,
}),
("https://www.fashionnova.com/collections/flats/products/name"),
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index c206ec53..ccba6406 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -11,6 +11,7 @@
from .common import Extractor, Message
from .. import text, exception
from ..cache import cache
+import re
class TwitterExtractor(Extractor):
@@ -26,8 +27,13 @@ class TwitterExtractor(Extractor):
Extractor.__init__(self, match)
self.user = match.group(1)
self.retweets = self.config("retweets", True)
+ self.content = self.config("content", False)
self.videos = self.config("videos", False)
+ if self.content:
+ self._emoji_sub = re.compile(
+ r']*>').sub
+
def items(self):
self.login()
yield Message.Version, 1
@@ -88,10 +94,9 @@ class TwitterExtractor(Extractor):
raise exception.AuthenticationError()
return self.session.cookies
- @staticmethod
- def _data_from_tweet(tweet):
+ def _data_from_tweet(self, tweet):
extr = text.extract_from(tweet)
- return {
+ data = {
"tweet_id" : text.parse_int(extr('data-tweet-id="' , '"')),
"retweet_id": text.parse_int(extr('data-retweet-id="', '"')),
"retweeter" : extr('data-retweeter="' , '"'),
@@ -99,10 +104,15 @@ class TwitterExtractor(Extractor):
"username" : extr('data-name="' , '"'),
"user_id" : text.parse_int(extr('data-user-id="' , '"')),
"date" : text.parse_timestamp(extr('data-time="', '"')),
- "content" : text.unescape(text.remove_html(extr(
- '