diff --git a/docs/supportedsites.md b/docs/supportedsites.md index dcace894..651d50ca 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -67,6 +67,12 @@ Consider all sites to be NSFW unless otherwise known. Galleries + + Architizer + https://architizer.com/ + Projects + + ArtStation https://www.artstation.com/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 74d4f52e..aeea7187 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -17,6 +17,7 @@ modules = [ "8kun", "8muses", "adultempire", + "architizer", "artstation", "aryion", "bcy", diff --git a/gallery_dl/extractor/architizer.py b/gallery_dl/extractor/architizer.py new file mode 100644 index 00000000..0a8c327d --- /dev/null +++ b/gallery_dl/extractor/architizer.py @@ -0,0 +1,74 @@ +# -*- coding: utf-8 -*- + +# Copyright 2021 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://architizer.com/""" + +from .common import GalleryExtractor +from .. import text + + +class ArchitizerProjectExtractor(GalleryExtractor): + """Extractor for project pages on architizer.com""" + category = "architizer" + subcategory = "project" + root = "https://architizer.com" + directory_fmt = ("{category}", "{firm}", "{title}") + filename_fmt = "{filename}.{extension}" + archive_fmt = "{gid}_{num}" + pattern = r"(?:https?://)?architizer\.com/projects/([^/?#]+)" + test = ("https://architizer.com/projects/house-lo/", { + "pattern": r"https://architizer-prod\.imgix\.net/media/mediadata" + r"/uploads/.+\.jpg$", + "keyword": { + "count": 27, + "description": str, + "firm": "Atelier Lina Bellovicova", + "gid": "225496", + "location": "Czechia", + "num": int, + "size": "1000 sqft - 3000 sqft", + "slug": "house-lo", + "status": "Built", + "subcategory": "project", + "title": "House LO", + "type": "Residential › Private House", + "year": "2018", + }, + }) + + def __init__(self, match): + url = "{}/projects/{}/".format(self.root, match.group(1)) + GalleryExtractor.__init__(self, match, url) + + def metadata(self, page): + extr = text.extract_from(page) + return { + "title" : extr("data-name='", "'"), + "slug" : extr("data-slug='", "'"), + "gid" : extr("data-gid='", "'").rpartition(".")[2], + "firm" : extr("data-firm-leaders-str='", "'"), + "location" : extr("

", "<").strip(), + "type" : text.unescape(text.remove_html(extr( + '
Type
', 'STATUS', 'YEAR', 'SIZE', '', '') + .replace("
", "\n")), + } + + def images(self, page): + return [ + (url, None) + for url in text.extract_iter( + page, "property='og:image:secure_url' content='", "?") + ]