[uploadir] update (#3162)
- prevent extra HTTP request from redirects - add 'id' metadata field - set 'filename_fmt' and 'archive_fmt'
This commit is contained in:
@@ -10,8 +10,6 @@
|
|||||||
|
|
||||||
from .common import Extractor, Message
|
from .common import Extractor, Message
|
||||||
from .. import text
|
from .. import text
|
||||||
from email.utils import parsedate_tz
|
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
|
|
||||||
class UploadirFileExtractor(Extractor):
|
class UploadirFileExtractor(Extractor):
|
||||||
@@ -19,6 +17,8 @@ class UploadirFileExtractor(Extractor):
|
|||||||
category = "uploadir"
|
category = "uploadir"
|
||||||
subcategory = "file"
|
subcategory = "file"
|
||||||
root = "https://uploadir.com"
|
root = "https://uploadir.com"
|
||||||
|
filename_fmt = "{filename} ({id}).{extension}"
|
||||||
|
archive_fmt = "{id}"
|
||||||
pattern = r"(?:https?://)?uploadir\.com/(?:user/)?u(?:ploads)?/([^/?#]+)"
|
pattern = r"(?:https?://)?uploadir\.com/(?:user/)?u(?:ploads)?/([^/?#]+)"
|
||||||
test = (
|
test = (
|
||||||
# image
|
# image
|
||||||
@@ -28,6 +28,7 @@ class UploadirFileExtractor(Extractor):
|
|||||||
"keyword": {
|
"keyword": {
|
||||||
"extension": "jpg",
|
"extension": "jpg",
|
||||||
"filename": "Chloe and Rachel 4K jpg",
|
"filename": "Chloe and Rachel 4K jpg",
|
||||||
|
"id": "rd3t46ry",
|
||||||
},
|
},
|
||||||
}),
|
}),
|
||||||
# archive
|
# archive
|
||||||
@@ -37,6 +38,7 @@ class UploadirFileExtractor(Extractor):
|
|||||||
"keyword": {
|
"keyword": {
|
||||||
"extension": "zip",
|
"extension": "zip",
|
||||||
"filename": "NYAN-Mods-Pack#1",
|
"filename": "NYAN-Mods-Pack#1",
|
||||||
|
"id": "gxe8ti9v",
|
||||||
},
|
},
|
||||||
}),
|
}),
|
||||||
("https://uploadir.com/uploads/rd3t46ry"),
|
("https://uploadir.com/uploads/rd3t46ry"),
|
||||||
@@ -49,10 +51,11 @@ class UploadirFileExtractor(Extractor):
|
|||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
url = "{}/u/{}".format(self.root, self.file_id)
|
url = "{}/u/{}".format(self.root, self.file_id)
|
||||||
response = self.request(url, method="HEAD")
|
response = self.request(url, method="HEAD", allow_redirects=False)
|
||||||
|
|
||||||
if response.history:
|
if 300 <= response.status_code < 400:
|
||||||
extr = text.extract_from(self.request(response.url).text)
|
url = response.headers["Location"]
|
||||||
|
extr = text.extract_from(self.request(url).text)
|
||||||
|
|
||||||
name = text.unescape(extr("<h2>", "</h2>").strip())
|
name = text.unescape(extr("<h2>", "</h2>").strip())
|
||||||
url = self.root + extr('class="form" action="', '"')
|
url = self.root + extr('class="form" action="', '"')
|
||||||
@@ -67,13 +70,9 @@ class UploadirFileExtractor(Extractor):
|
|||||||
})
|
})
|
||||||
|
|
||||||
else:
|
else:
|
||||||
hget = response.headers.get
|
hcd = response.headers.get("Content-Disposition")
|
||||||
hcd = hget("Content-Disposition")
|
|
||||||
hlm = hget("Last-Modified")
|
|
||||||
|
|
||||||
data = text.nameext_from_url(text.extr(hcd, 'filename="', '"'))
|
data = text.nameext_from_url(text.extr(hcd, 'filename="', '"'))
|
||||||
if hlm:
|
|
||||||
data["date"] = datetime(*parsedate_tz(hlm)[:6])
|
|
||||||
|
|
||||||
|
data["id"] = self.file_id
|
||||||
yield Message.Directory, data
|
yield Message.Directory, data
|
||||||
yield Message.Url, url, data
|
yield Message.Url, url, data
|
||||||
|
|||||||
Reference in New Issue
Block a user