[subscribestar] extract attached media files (#852)
This commit is contained in:
@@ -89,13 +89,27 @@ class SubscribestarExtractor(Extractor):
|
||||
|
||||
@staticmethod
|
||||
def _media_from_post(html):
|
||||
media = []
|
||||
|
||||
gallery = text.extract(html, 'data-gallery="', '"')[0]
|
||||
if gallery:
|
||||
return [
|
||||
media.extend(
|
||||
item for item in json.loads(text.unescape(gallery))
|
||||
if "/previews/" not in item["url"]
|
||||
]
|
||||
return ()
|
||||
)
|
||||
|
||||
attachments = text.extract(
|
||||
html, 'class="uploads-docs"', 'data-role="post-edit_form"')[0]
|
||||
if attachments:
|
||||
for att in attachments.split('class="doc_preview"')[1:]:
|
||||
media.append({
|
||||
"id" : text.parse_int(text.extract(
|
||||
att, 'data-upload-id="', '"')[0]),
|
||||
"url" : text.extract(att, 'href="', '"')[0],
|
||||
"type": "attachment",
|
||||
})
|
||||
|
||||
return media
|
||||
|
||||
def _data_from_post(self, html):
|
||||
extr = text.extract_from(html)
|
||||
|
||||
Reference in New Issue
Block a user