diff --git a/gallery_dl/extractor/2chan.py b/gallery_dl/extractor/2chan.py
index 12a81aac..8df8645b 100644
--- a/gallery_dl/extractor/2chan.py
+++ b/gallery_dl/extractor/2chan.py
@@ -19,8 +19,8 @@ class FutabaThreadExtractor(Extractor):
directory_fmt = ("{category}", "{board_name}", "{thread}")
filename_fmt = "{tim}.{extension}"
archive_fmt = "{board}_{thread}_{tim}"
- urlfmt = "https://{server}.2chan.net/{board}/src/{filename}"
- pattern = r"(?:https?://)?(([^.]+)\.2chan\.net/([^/]+)/res/(\d+))"
+ url_fmt = "https://{server}.2chan.net/{board}/src/{filename}"
+ pattern = r"(?:https?://)?([^.]+)\.2chan\.net/([^/]+)/res/(\d+)"
test = ("http://dec.2chan.net/70/res/947.htm", {
"url": "c5c12b80b290e224b6758507b3bb952044f4595b",
"keyword": "4bd22e7a9c3636faecd6ea7082509e8655e10dd0",
@@ -28,22 +28,23 @@ class FutabaThreadExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
- url, self.server, self.board, self.thread = match.groups()
- self.url = "https://" + url + ".htm"
+ self.server, self.board, self.thread = match.groups()
def items(self):
- page = self.request(self.url).text
- data = self.get_metadata(page)
+ url = "https://{}.2chan.net/{}/res/{}.htm".format(
+ self.server, self.board, self.thread)
+ page = self.request(url).text
+ data = self.metadata(page)
yield Message.Version, 1
yield Message.Directory, data
for post in self.posts(page):
if "filename" not in post:
continue
post.update(data)
- url = self.urlfmt.format_map(post)
+ url = self.url_fmt.format_map(post)
yield Message.Url, url, post
- def get_metadata(self, page):
+ def metadata(self, page):
"""Collect metadata for extractor-job"""
title = text.extract(page, "
", "")[0]
title, _, boardname = title.rpartition(" - ")
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index 8393cc51..893d15ff 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -34,6 +34,7 @@ class Extractor():
def __init__(self, match):
self.session = requests.Session()
self.log = logging.getLogger(self.category)
+ self.url = match.string
self._set_headers()
self._set_cookies()
self._set_proxies()
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 50ae9a5d..b5d103ae 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -312,7 +312,7 @@ class DeviantartDeviationExtractor(DeviantartExtractor):
"""Extractor for single deviations"""
subcategory = "deviation"
archive_fmt = "{index}.{extension}"
- pattern = BASE_PATTERN + r"/(?:art|journal)/[^/?]+-\d+"
+ pattern = BASE_PATTERN + r"/((?:art|journal)/[^/?]+-\d+)"
test = (
(("https://www.deviantart.com/shimoda7/art/"
"For-the-sake-of-a-memory-10073852"), {
@@ -335,23 +335,22 @@ class DeviantartDeviationExtractor(DeviantartExtractor):
def __init__(self, match):
DeviantartExtractor.__init__(self, match)
- self.url = match.group(0)
- if not self.url.startswith("http"):
- self.url = "https://" + self.url
+ self.path = match.group(3)
def deviations(self):
- response = self.request(self.url, expect=range(400, 500))
+ url = "{}/{}/{}".format(self.root, self.user, self.path)
+ response = self.request(url, expect=range(400, 500))
deviation_id = text.extract(response.text, '//deviation/', '"')[0]
if response.status_code >= 400 or not deviation_id:
raise exception.NotFoundError("image")
return (self.api.deviation(deviation_id),)
-class DeviantartStashExtractor(DeviantartDeviationExtractor):
+class DeviantartStashExtractor(DeviantartExtractor):
"""Extractor for sta.sh-ed deviations"""
subcategory = "stash"
archive_fmt = "{index}.{extension}"
- pattern = r"(?:https?://)?sta\.sh/()()[a-z0-9]+"
+ pattern = r"(?:https?://)?sta\.sh/([a-z0-9]+)"
test = (
("https://sta.sh/022c83odnaxc", {
"pattern": r"https://s3.amazonaws.com/origin-orig.deviantart.net",
@@ -366,8 +365,13 @@ class DeviantartStashExtractor(DeviantartDeviationExtractor):
}),
)
+ def __init__(self, match):
+ DeviantartExtractor.__init__(self, match)
+ self.stash_id = match.group(1)
+
def deviations(self):
- page = self.request(self.url).text
+ url = "https://sta.sh/" + self.stash_id
+ page = self.request(url).text
deviation_id = text.extract(page, '//deviation/', '"')[0]
if deviation_id:
diff --git a/gallery_dl/extractor/directlink.py b/gallery_dl/extractor/directlink.py
index 07e75e78..5d00d8ad 100644
--- a/gallery_dl/extractor/directlink.py
+++ b/gallery_dl/extractor/directlink.py
@@ -40,7 +40,6 @@ class DirectlinkExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.data = match.groupdict()
- self.url = match.string
def items(self):
text.nameext_from_url(self.url, self.data)
diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py
index 633faadf..c9cc4b90 100644
--- a/gallery_dl/extractor/exhentai.py
+++ b/gallery_dl/extractor/exhentai.py
@@ -340,7 +340,7 @@ class ExhentaiSearchExtractor(ExhentaiExtractor):
ExhentaiExtractor.__init__(self, match)
self.params = text.parse_query(match.group(1) or "")
self.params["page"] = text.parse_int(self.params.get("page"))
- self.url = self.root
+ self.search_url = self.root
def items(self):
self.login()
@@ -348,7 +348,7 @@ class ExhentaiSearchExtractor(ExhentaiExtractor):
yield Message.Version, 1
while True:
- page = self.request(self.url, params=self.params).text
+ page = self.request(self.search_url, params=self.params).text
for row in text.extract_iter(page, '