diff --git a/gallery_dl/extractor/simpcity.py b/gallery_dl/extractor/simpcity.py index 9e66e914..41be2dae 100644 --- a/gallery_dl/extractor/simpcity.py +++ b/gallery_dl/extractor/simpcity.py @@ -21,20 +21,66 @@ class SimpcityExtractor(Extractor): cookies_domain = "simpcity.cr" cookies_names = ("ogaddgmetaprof_user",) root = "https://simpcity.cr" + directory_fmt = ("{category}", "{thread[section]}", + "{thread[title]} ({thread[id]})") + filename_fmt = "{post[id]}_{num:>02}_{id}_{filename}.{extension}" + archive_fmt = "{post[id]}/{type[0]}{id}_{filename}" def items(self): self.login() extract_urls = text.re( - r'<(?:a [^>]*?href|iframe [^>]*?src)="([^"]+)').findall + r'(?s)<(?:' + r'video (.*?\ssrc="[^"]+".*?)' + r'|a [^>]*?href="(?:https://[^"]+)?(/attachments/[^"]+".*?)' + r'|div [^>]*?ata-src="(?:https://[^"]+)?(/attachments/[^"]+".*?)/>' + r'|(?:a [^>]*?href|iframe [^>]*?src)="([^"]+)' + r')' + ).findall for post in self.posts(): urls = extract_urls(post["content"]) + if post["attachments"]: + urls.extend(extract_urls(post["attachments"])) + data = {"post": post} post["count"] = data["count"] = len(urls) yield Message.Directory, data - for data["num"], url in enumerate(urls, 1): - yield Message.Queue, url, data + + data["num"] = data["num_internal"] = data["num_external"] = 0 + for video, inl1, inl2, ext in urls: + if ext: + data["num"] += 1 + data["num_external"] += 1 + data["type"] = "external" + yield Message.Queue, ext, data + + elif video: + data["num"] += 1 + data["num_internal"] += 1 + data["type"] = "video" + url = text.extr(video, 'src="', '"') + text.nameext_from_url(url, data) + data["id"] = text.parse_int( + data["filename"].partition("-")[0]) + yield Message.Url, url, data + + elif (inline := inl1 or inl2): + data["num"] += 1 + data["num_internal"] += 1 + data["type"] = "inline" + path = inline[:inline.find('"')] + name, _, id = path[path.rfind("/", 0, -1):].strip( + "/").rpartition(".") + data["id"] = text.parse_int(id) + if alt := text.extr(inline, 'alt="', '"'): + text.nameext_from_name(alt, data) + if not data["extension"]: + data["extension"] = name.rpartition("-")[2] + else: + data["filename"], _, data["extension"] = \ + name.rpartition("-") + yield Message.Url, self.root + path, data def request_page(self, url): try: @@ -166,6 +212,8 @@ class SimpcityExtractor(Extractor): '
', + ''), } url_a = post["author_url"] diff --git a/test/results/simpcity.py b/test/results/simpcity.py index 95e83e90..8f3cd929 100644 --- a/test/results/simpcity.py +++ b/test/results/simpcity.py @@ -13,7 +13,7 @@ __tests__ = ( "#url" : "https://simpcity.cr/threads/ririkana-rr_loveit.10731/post-1753131", "#class" : simpcity.SimpcityPostExtractor, "#auth" : True, - "#results" : "https://jpg5.su/img/coWRwo", + "#results" : "https://jpg6.su/img/coWRwo", "count" : 1, "num" : 1, @@ -25,7 +25,7 @@ __tests__ = ( "date" : "dt:2023-03-08 12:59:10", "id" : "1753131", "content" : """\ -
\ """, }, @@ -65,7 +65,7 @@ __tests__ = ( "#class" : simpcity.SimpcityPostExtractor, "#auth" : True, "#results" : ( - "https://jpg5.su/img/NNFssUg", + "https://jpg6.su/img/NNFssUg", "https://saint2.cr/embed/nPy1kG3w55V", "https://saint2.cr/embed/c0KhPjU4-F3", "https://saint2.cr/embed/sZWnVZ_mQsV", @@ -78,7 +78,10 @@ __tests__ = ( "#comment" : "quote in post content (#8214)", "#class" : simpcity.SimpcityPostExtractor, "#auth" : True, - "#results" : ("/goto/post?id=13358068", "https://cyberdrop.me/a/Sh9GlG38"), + "#results" : ( + "/goto/post?id=13358068", + "https://cyberdrop.cr/a/Sh9GlG38", + ), }, { @@ -87,10 +90,10 @@ __tests__ = ( "#class" : simpcity.SimpcityPostExtractor, "#auth" : True, "#results" : ( - "https://jpg5.su/img/aKroBJp", - "https://jpg5.su/img/aKroy2E", - "https://jpg5.su/img/aKrofqa", - "https://jpg5.su/img/aKroDgo", + "https://jpg6.su/img/aKroBJp", + "https://jpg6.su/img/aKroy2E", + "https://jpg6.su/img/aKrofqa", + "https://jpg6.su/img/aKroDgo", "https://bunkr.cr/v/6sErIc9pjrnQ3", ), @@ -114,12 +117,100 @@ __tests__ = ( }, }, +{ + "#url" : "https://simpcity.cr/threads/sophia-diamond.10049/post-10891", + "#class" : simpcity.SimpcityPostExtractor, + "#auth" : True, + "#results" : ( + "https://brandarmy.com/SophiaDiamond", + "https://www.tiktok.com/@sophia.ilysm?lang=en", + "https://www.instagram.com/sophiadiamond/", + "https://simpcity.cr/attachments/sophiadiamond_239636842_558607608495946_5357173067872834144_n-jpg.65924/", + ), + + "count" : 4, + "num" : range(1, 4), + "num_external": range(1, 3), + "num_internal": {0, 1}, + "type" : {"inline", "external"}, + "post" : { + "attachments": "", + "author" : "inoncognito", + "author_id" : "", + "author_url" : "", + "count" : 4, + "date" : "dt:2022-03-11 00:41:28", + "id" : "10891", + "content" : str, + }, + "thread" : { + "author" : "inoncognito", + "author_id" : "53824", + "author_url": "https://simpcity.cr/members/inoncognito.53824/", + "date" : "dt:2022-03-11 00:41:28", + "id" : "10049", + "posts" : range(1_000, 2_000), + "section" : "TikTok", + "title" : "Sophia Diamond", + "url" : "https://simpcity.cr/threads/sophia-diamond.10049/", + "views" : range(4_200_000, 6_000_000), + "tags" : [ + "busty", + "diamond", + "slut", + "sophia", + "sophiadiamond", + "tease", + "teen", + "tiktok", + "tits", + ], + }, +}, + +{ + "#url" : "https://simpcity.cr/threads/sophia-diamond.10049/post-18744", + "#class" : simpcity.SimpcityPostExtractor, + "#auth" : True, + "#results" : "https://simpcity.cr/attachments/sophiadiamondcancunbikiniwp-png.36179/", + + "count" : 1, + "extension" : "png", + "filename" : "SophiaDiamondCancunBikiniWP", + "id" : 36179, + "num" : 1, + "num_external": 0, + "num_internal": 1, + "type" : "inline", + "post" : { + "author" : "ElyseGooner", + "author_id" : "65059", + "author_url" : "https://simpcity.cr/members/elysegooner.65059/", + "count" : 1, + "date" : "dt:2022-03-11 22:39:06", + "id" : "18744", + "attachments": str, + "content" : """\ +
Collage
+ + +
\ +""", + }, + "thread" : { + "date" : "dt:2022-03-11 00:41:28", + "id" : "10049", + "section" : "TikTok", + "title" : "Sophia Diamond", + }, +}, + { "#url" : "https://simpcity.cr/threads/alua-tatakai.89490/", "#class" : simpcity.SimpcityThreadExtractor, "#auth" : True, - "#pattern" : r"https://(jpg5\.su/img/\w+|bunkr\.\w+/[fiv]/\w+|pixeldrain.com/l/\w+|alua.com/tatakai)|/goto/post", - "#count" : 29, + "#pattern" : r"https://(jpg6\.su/img/\w+|bunkr\.\w+/[fiv]/\w+|pixeldrain.com/l/\w+|alua.com/tatakai)|/goto/post", + "#count" : range(100, 300), "count" : int, "num" : int,