[deviantart] rename 'external' to 'stash' (#302)

restrict extracted URLs to ones from https://sta.sh/...
This commit is contained in:
Mike Fährmann
2019-06-08 23:53:16 +02:00
parent c73c2cda50
commit c23bf263fe
3 changed files with 24 additions and 23 deletions

View File

@@ -395,18 +395,6 @@ Description Try to follow external URLs of embedded players.
=========== =====
extractor.deviantart.external
-----------------------------
=========== =====
Type ``bool``
Default ``false``
Description Try to follow external URLs in description fields.
Note: deviantart.metadata_ needs to be enabled to make descriptions
available.
=========== =====
extractor.deviantart.flat
-------------------------
=========== =====
@@ -499,6 +487,17 @@ Description The ``refresh_token`` value you get from linking your
=========== =====
extractor.deviantart.stash
--------------------------
=========== =====
Type ``bool``
Default ``false``
Description Extract Sta.sh resources from description texts.
Note: Enabling this option also enables deviantart.metadata_.
=========== =====
extractor.deviantart.wait-min
-----------------------------
=========== =====

View File

@@ -22,13 +22,13 @@
"deviantart":
{
"refresh-token": null,
"external": false,
"flat": true,
"folders": false,
"journals": "html",
"mature": true,
"metadata": false,
"original": true,
"stash": false,
"wait-min": 0
},
"exhentai":

View File

@@ -38,11 +38,14 @@ class DeviantartExtractor(Extractor):
self.api = DeviantartAPI(self)
self.offset = 0
self.flat = self.config("flat", True)
self.stash = self.config("stash", False)
self.original = self.config("original", True)
self.external = self.config("external", False)
self.user = match.group(1) or match.group(2)
self.group = False
if self.stash:
self.api.metadata = True
self.commit_journal = {
"html": self._commit_journal_html,
"text": self._commit_journal_text,
@@ -96,12 +99,11 @@ class DeviantartExtractor(Extractor):
journal = self.api.deviation_content(deviation["deviationid"])
yield self.commit_journal(deviation, journal)
if self.external:
for url in text.extract_iter(
deviation.get("description", ""), 'href="', '"'):
if "deviantart.com/users/outgoing?" in url:
url = text.unquote(url.partition("?")[2])
yield Message.Queue, url, deviation
if self.stash:
for match in DeviantartStashExtractor.pattern.finditer(
deviation.get("description", "")):
deviation["_extractor"] = DeviantartStashExtractor
yield Message.Queue, match.group(0), deviation
def deviations(self):
"""Return an iterable containing all relevant Deviation-objects"""
@@ -372,10 +374,10 @@ class DeviantartDeviationExtractor(DeviantartExtractor):
# external URLs from description (#302)
(("https://www.deviantart.com/uotapo/art/"
"INANAKI-Memorial-Humane7-590297498"), {
"options": (("external", 1), ("metadata", 1), ("original", 0)),
"pattern": r"https?://(sta\.sh|youtu\.be)/\w+$",
"options": (("stash", 1), ("original", 0)),
"pattern": r"https?://sta\.sh/\w+$",
"range": "2-",
"count": 6,
"count": 4,
}),
# old-style URLs
("https://shimoda7.deviantart.com"