From 0d84d3af555612dc24c3c2ed7ba0be60b74234fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Mon, 3 Aug 2020 22:02:42 +0200 Subject: [PATCH] [subscribestar] extract attached media files (#852) --- gallery_dl/extractor/subscribestar.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/gallery_dl/extractor/subscribestar.py b/gallery_dl/extractor/subscribestar.py index 4542189a..076d0c0f 100644 --- a/gallery_dl/extractor/subscribestar.py +++ b/gallery_dl/extractor/subscribestar.py @@ -89,13 +89,27 @@ class SubscribestarExtractor(Extractor): @staticmethod def _media_from_post(html): + media = [] + gallery = text.extract(html, 'data-gallery="', '"')[0] if gallery: - return [ + media.extend( item for item in json.loads(text.unescape(gallery)) if "/previews/" not in item["url"] - ] - return () + ) + + attachments = text.extract( + html, 'class="uploads-docs"', 'data-role="post-edit_form"')[0] + if attachments: + for att in attachments.split('class="doc_preview"')[1:]: + media.append({ + "id" : text.parse_int(text.extract( + att, 'data-upload-id="', '"')[0]), + "url" : text.extract(att, 'href="', '"')[0], + "type": "attachment", + }) + + return media def _data_from_post(self, html): extr = text.extract_from(html)