diff --git a/gallery_dl/extractor/simpcity.py b/gallery_dl/extractor/simpcity.py
index 9e66e914..41be2dae 100644
--- a/gallery_dl/extractor/simpcity.py
+++ b/gallery_dl/extractor/simpcity.py
@@ -21,20 +21,66 @@ class SimpcityExtractor(Extractor):
cookies_domain = "simpcity.cr"
cookies_names = ("ogaddgmetaprof_user",)
root = "https://simpcity.cr"
+ directory_fmt = ("{category}", "{thread[section]}",
+ "{thread[title]} ({thread[id]})")
+ filename_fmt = "{post[id]}_{num:>02}_{id}_{filename}.{extension}"
+ archive_fmt = "{post[id]}/{type[0]}{id}_{filename}"
def items(self):
self.login()
extract_urls = text.re(
- r'<(?:a [^>]*?href|iframe [^>]*?src)="([^"]+)').findall
+ r'(?s)<(?:'
+ r'video (.*?\ssrc="[^"]+".*?)'
+ r'|a [^>]*?href="(?:https://[^"]+)?(/attachments/[^"]+".*?)'
+ r'|div [^>]*?ata-src="(?:https://[^"]+)?(/attachments/[^"]+".*?)/>'
+ r'|(?:a [^>]*?href|iframe [^>]*?src)="([^"]+)'
+ r')'
+ ).findall
for post in self.posts():
urls = extract_urls(post["content"])
+ if post["attachments"]:
+ urls.extend(extract_urls(post["attachments"]))
+
data = {"post": post}
post["count"] = data["count"] = len(urls)
yield Message.Directory, data
- for data["num"], url in enumerate(urls, 1):
- yield Message.Queue, url, data
+
+ data["num"] = data["num_internal"] = data["num_external"] = 0
+ for video, inl1, inl2, ext in urls:
+ if ext:
+ data["num"] += 1
+ data["num_external"] += 1
+ data["type"] = "external"
+ yield Message.Queue, ext, data
+
+ elif video:
+ data["num"] += 1
+ data["num_internal"] += 1
+ data["type"] = "video"
+ url = text.extr(video, 'src="', '"')
+ text.nameext_from_url(url, data)
+ data["id"] = text.parse_int(
+ data["filename"].partition("-")[0])
+ yield Message.Url, url, data
+
+ elif (inline := inl1 or inl2):
+ data["num"] += 1
+ data["num_internal"] += 1
+ data["type"] = "inline"
+ path = inline[:inline.find('"')]
+ name, _, id = path[path.rfind("/", 0, -1):].strip(
+ "/").rpartition(".")
+ data["id"] = text.parse_int(id)
+ if alt := text.extr(inline, 'alt="', '"'):
+ text.nameext_from_name(alt, data)
+ if not data["extension"]:
+ data["extension"] = name.rpartition("-")[2]
+ else:
+ data["filename"], _, data["extension"] = \
+ name.rpartition("-")
+ yield Message.Url, self.root + path, data
def request_page(self, url):
try:
@@ -166,6 +212,8 @@ class SimpcityExtractor(Extractor):
'
',
+ ''),
}
url_a = post["author_url"]
diff --git a/test/results/simpcity.py b/test/results/simpcity.py
index 95e83e90..8f3cd929 100644
--- a/test/results/simpcity.py
+++ b/test/results/simpcity.py
@@ -13,7 +13,7 @@ __tests__ = (
"#url" : "https://simpcity.cr/threads/ririkana-rr_loveit.10731/post-1753131",
"#class" : simpcity.SimpcityPostExtractor,
"#auth" : True,
- "#results" : "https://jpg5.su/img/coWRwo",
+ "#results" : "https://jpg6.su/img/coWRwo",
"count" : 1,
"num" : 1,
@@ -25,7 +25,7 @@ __tests__ = (
"date" : "dt:2023-03-08 12:59:10",
"id" : "1753131",
"content" : """\
-


\
""",
},
@@ -65,7 +65,7 @@ __tests__ = (
"#class" : simpcity.SimpcityPostExtractor,
"#auth" : True,
"#results" : (
- "https://jpg5.su/img/NNFssUg",
+ "https://jpg6.su/img/NNFssUg",
"https://saint2.cr/embed/nPy1kG3w55V",
"https://saint2.cr/embed/c0KhPjU4-F3",
"https://saint2.cr/embed/sZWnVZ_mQsV",
@@ -78,7 +78,10 @@ __tests__ = (
"#comment" : "quote in post content (#8214)",
"#class" : simpcity.SimpcityPostExtractor,
"#auth" : True,
- "#results" : ("/goto/post?id=13358068", "https://cyberdrop.me/a/Sh9GlG38"),
+ "#results" : (
+ "/goto/post?id=13358068",
+ "https://cyberdrop.cr/a/Sh9GlG38",
+ ),
},
{
@@ -87,10 +90,10 @@ __tests__ = (
"#class" : simpcity.SimpcityPostExtractor,
"#auth" : True,
"#results" : (
- "https://jpg5.su/img/aKroBJp",
- "https://jpg5.su/img/aKroy2E",
- "https://jpg5.su/img/aKrofqa",
- "https://jpg5.su/img/aKroDgo",
+ "https://jpg6.su/img/aKroBJp",
+ "https://jpg6.su/img/aKroy2E",
+ "https://jpg6.su/img/aKrofqa",
+ "https://jpg6.su/img/aKroDgo",
"https://bunkr.cr/v/6sErIc9pjrnQ3",
),
@@ -114,12 +117,100 @@ __tests__ = (
},
},
+{
+ "#url" : "https://simpcity.cr/threads/sophia-diamond.10049/post-10891",
+ "#class" : simpcity.SimpcityPostExtractor,
+ "#auth" : True,
+ "#results" : (
+ "https://brandarmy.com/SophiaDiamond",
+ "https://www.tiktok.com/@sophia.ilysm?lang=en",
+ "https://www.instagram.com/sophiadiamond/",
+ "https://simpcity.cr/attachments/sophiadiamond_239636842_558607608495946_5357173067872834144_n-jpg.65924/",
+ ),
+
+ "count" : 4,
+ "num" : range(1, 4),
+ "num_external": range(1, 3),
+ "num_internal": {0, 1},
+ "type" : {"inline", "external"},
+ "post" : {
+ "attachments": "",
+ "author" : "inoncognito",
+ "author_id" : "",
+ "author_url" : "",
+ "count" : 4,
+ "date" : "dt:2022-03-11 00:41:28",
+ "id" : "10891",
+ "content" : str,
+ },
+ "thread" : {
+ "author" : "inoncognito",
+ "author_id" : "53824",
+ "author_url": "https://simpcity.cr/members/inoncognito.53824/",
+ "date" : "dt:2022-03-11 00:41:28",
+ "id" : "10049",
+ "posts" : range(1_000, 2_000),
+ "section" : "TikTok",
+ "title" : "Sophia Diamond",
+ "url" : "https://simpcity.cr/threads/sophia-diamond.10049/",
+ "views" : range(4_200_000, 6_000_000),
+ "tags" : [
+ "busty",
+ "diamond",
+ "slut",
+ "sophia",
+ "sophiadiamond",
+ "tease",
+ "teen",
+ "tiktok",
+ "tits",
+ ],
+ },
+},
+
+{
+ "#url" : "https://simpcity.cr/threads/sophia-diamond.10049/post-18744",
+ "#class" : simpcity.SimpcityPostExtractor,
+ "#auth" : True,
+ "#results" : "https://simpcity.cr/attachments/sophiadiamondcancunbikiniwp-png.36179/",
+
+ "count" : 1,
+ "extension" : "png",
+ "filename" : "SophiaDiamondCancunBikiniWP",
+ "id" : 36179,
+ "num" : 1,
+ "num_external": 0,
+ "num_internal": 1,
+ "type" : "inline",
+ "post" : {
+ "author" : "ElyseGooner",
+ "author_id" : "65059",
+ "author_url" : "https://simpcity.cr/members/elysegooner.65059/",
+ "count" : 1,
+ "date" : "dt:2022-03-11 22:39:06",
+ "id" : "18744",
+ "attachments": str,
+ "content" : """\
+
Collage
+
+
+
\
+""",
+ },
+ "thread" : {
+ "date" : "dt:2022-03-11 00:41:28",
+ "id" : "10049",
+ "section" : "TikTok",
+ "title" : "Sophia Diamond",
+ },
+},
+
{
"#url" : "https://simpcity.cr/threads/alua-tatakai.89490/",
"#class" : simpcity.SimpcityThreadExtractor,
"#auth" : True,
- "#pattern" : r"https://(jpg5\.su/img/\w+|bunkr\.\w+/[fiv]/\w+|pixeldrain.com/l/\w+|alua.com/tatakai)|/goto/post",
- "#count" : 29,
+ "#pattern" : r"https://(jpg6\.su/img/\w+|bunkr\.\w+/[fiv]/\w+|pixeldrain.com/l/\w+|alua.com/tatakai)|/goto/post",
+ "#count" : range(100, 300),
"count" : int,
"num" : int,