diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 644b9423..de7c8ed4 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -841,6 +841,12 @@ Consider all sites to be NSFW unless otherwise known. Collections, Favorites, individual Images, Search Results, User Profiles + + Uploadir + https://uploadir.com/ + Files + + Vipr https://vipr.im/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 0293ebcb..6998a21e 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -140,6 +140,7 @@ modules = [ "twibooru", "twitter", "unsplash", + "uploadir", "vanillarock", "vichan", "vk", diff --git a/gallery_dl/extractor/uploadir.py b/gallery_dl/extractor/uploadir.py new file mode 100644 index 00000000..8c532baa --- /dev/null +++ b/gallery_dl/extractor/uploadir.py @@ -0,0 +1,79 @@ +# -*- coding: utf-8 -*- + +# Copyright 2022 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://uploadir.com/""" + +from .common import Extractor, Message +from .. import text +from email.utils import parsedate_tz +from datetime import datetime + + +class UploadirFileExtractor(Extractor): + """Extractor for uploadir files""" + category = "uploadir" + subcategory = "file" + root = "https://uploadir.com" + pattern = r"(?:https?://)?uploadir\.com/(?:user/)?u(?:ploads)?/([^/?#]+)" + test = ( + # image + ("https://uploadir.com/u/rd3t46ry", { + "pattern": r"https://uploadir\.com/u/rd3t46ry", + "count": 1, + "keyword": { + "extension": "jpg", + "filename": "Chloe and Rachel 4K jpg", + }, + }), + # archive + ("https://uploadir.com/uploads/gxe8ti9v/downloads/new", { + "pattern": r"https://uploadir\.com/uploads/gxe8ti9v/downloads", + "count": 1, + "keyword": { + "extension": "zip", + "filename": "NYAN-Mods-Pack#1", + }, + }), + ("https://uploadir.com/uploads/rd3t46ry"), + ("https://uploadir.com/user/uploads/rd3t46ry"), + ) + + def __init__(self, match): + Extractor.__init__(self, match) + self.file_id = match.group(1) + + def items(self): + url = "{}/u/{}".format(self.root, self.file_id) + response = self.request(url, method="HEAD") + + if response.history: + extr = text.extract_from(self.request(response.url).text) + + name = text.unescape(extr("

", "

").strip()) + url = self.root + extr('class="form" action="', '"') + token = extr('name="authenticity_token" value="', '"') + + data = text.nameext_from_url(name, { + "_http_method": "POST", + "_http_data" : { + "authenticity_token": token, + "upload_id": self.file_id, + }, + }) + + else: + hget = response.headers.get + hcd = hget("Content-Disposition") + hlm = hget("Last-Modified") + + data = text.nameext_from_url(text.extr(hcd, 'filename="', '"')) + if hlm: + data["date"] = datetime(*parsedate_tz(hlm)[:6]) + + yield Message.Directory, data + yield Message.Url, url, data