[subscribestar] extend fix + add test

some attachments are inside an element with an additional class besides
'doc_preview', e.g. 'class="doc_preview for_post"'
This commit is contained in:
Mike Fährmann
2025-01-02 18:22:15 +01:00
parent a46f7981ee
commit 671297a8cc
2 changed files with 23 additions and 2 deletions

View File

@@ -11,6 +11,7 @@
from .common import Extractor, Message
from .. import text, util, exception
from ..cache import cache
import re
BASE_PATTERN = r"(?:https?://)?(?:www\.)?subscribestar\.(com|adult)"
@@ -100,7 +101,8 @@ class SubscribestarExtractor(Extractor):
attachments = text.extr(
html, 'class="uploads-docs"', 'class="post-edit_form"')
if attachments:
for att in attachments.split('class="doc_preview"')[1:]:
for att in re.split(
r'class="doc_preview[" ]', attachments)[1:]:
media.append({
"id" : text.parse_int(text.extr(
att, 'data-upload-id="', '"')),
@@ -113,7 +115,8 @@ class SubscribestarExtractor(Extractor):
audios = text.extr(
html, 'class="uploads-audios"', 'class="post-edit_form"')
if audios:
for audio in audios.split('class="audio_preview-data"')[1:]:
for audio in re.split(
r'class="audio_preview-data[" ]', audios)[1:]:
media.append({
"id" : text.parse_int(text.extr(
audio, 'data-upload-id="', '"')),

View File

@@ -70,6 +70,24 @@ __tests__ = (
"width" : 700,
},
{
"#url" : "https://www.subscribestar.com/posts/920015",
"#comment" : "attachment (#6721)",
"#category": ("", "subscribestar", "post"),
"#class" : subscribestar.SubscribestarPostExtractor,
"#range" : "2",
"#pattern" : r"https://ss-uploads-prod\.b-cdn\.net/uploads_v2/users/11/posts/920015/bc018a55-9668-47f4-a664-b5fd66b56aaa\.pdf\?token=.+",
"date" : "dt:2023-05-30 09:20:00",
"extension": "pdf",
"filename" : "Training for freelancers - Fiverr",
"id" : 1957727,
"name" : "Training for freelancers - Fiverr.pdf",
"num" : 2,
"post_id" : 920015,
"type" : "attachment",
},
{
"#url" : "https://subscribestar.adult/posts/22950",
"#category": ("", "subscribestar", "post-adult"),