@@ -883,6 +883,12 @@ Consider all listed sites to potentially be NSFW.
|
|||||||
<td>Boards, Likes, Pins, User Pins, related Pins, Search Results</td>
|
<td>Boards, Likes, Pins, User Pins, related Pins, Search Results</td>
|
||||||
<td></td>
|
<td></td>
|
||||||
</tr>
|
</tr>
|
||||||
|
<tr id="simpcity" title="simpcity">
|
||||||
|
<td>SimpCity Forums</td>
|
||||||
|
<td>https://simpcity.cr/</td>
|
||||||
|
<td>Forums, Posts, Threads</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
<tr id="simplyhentai" title="simplyhentai">
|
<tr id="simplyhentai" title="simplyhentai">
|
||||||
<td>Simply Hentai</td>
|
<td>Simply Hentai</td>
|
||||||
<td>https://www.simply-hentai.com/</td>
|
<td>https://www.simply-hentai.com/</td>
|
||||||
|
|||||||
@@ -171,6 +171,7 @@ modules = [
|
|||||||
"senmanga",
|
"senmanga",
|
||||||
"sexcom",
|
"sexcom",
|
||||||
"shimmie2",
|
"shimmie2",
|
||||||
|
"simpcity",
|
||||||
"simplyhentai",
|
"simplyhentai",
|
||||||
"sizebooru",
|
"sizebooru",
|
||||||
"skeb",
|
"skeb",
|
||||||
|
|||||||
145
gallery_dl/extractor/simpcity.py
Normal file
145
gallery_dl/extractor/simpcity.py
Normal file
@@ -0,0 +1,145 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# Copyright 2025 Mike Fährmann
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
|
# published by the Free Software Foundation.
|
||||||
|
|
||||||
|
"""Extractors for https://simpcity.cr/"""
|
||||||
|
|
||||||
|
from .common import Extractor, Message
|
||||||
|
from .. import text, exception
|
||||||
|
|
||||||
|
BASE_PATTERN = r"(?:https?://)?(?:www\.)?simpcity\.(?:cr|su)"
|
||||||
|
|
||||||
|
|
||||||
|
class SimpcityExtractor(Extractor):
|
||||||
|
"""Base class for simpcity extractors"""
|
||||||
|
category = "simpcity"
|
||||||
|
root = "https://simpcity.cr"
|
||||||
|
|
||||||
|
def items(self):
|
||||||
|
extract_urls = text.re(r' href="([^"]+)').findall
|
||||||
|
|
||||||
|
for post in self.posts():
|
||||||
|
urls = extract_urls(post["content"])
|
||||||
|
data = {"post": post}
|
||||||
|
post["count"] = data["count"] = len(urls)
|
||||||
|
for data["num"], url in enumerate(urls, 1):
|
||||||
|
yield Message.Queue, url, data
|
||||||
|
|
||||||
|
def request_page(self, url):
|
||||||
|
try:
|
||||||
|
return self.request(url).text
|
||||||
|
except exception.HttpError as exc:
|
||||||
|
if exc.status == 403 and b">Log in<" in exc.response.content:
|
||||||
|
msg = text.extr(exc.response.text, "blockMessage--error", "</")
|
||||||
|
raise exception.AuthRequired(
|
||||||
|
"'authenticated cookies'", None,
|
||||||
|
msg.rpartition(">")[2].strip())
|
||||||
|
raise
|
||||||
|
|
||||||
|
def _pagination(self, base, pnum=None):
|
||||||
|
base = f"{self.root}{base}"
|
||||||
|
|
||||||
|
if pnum is None:
|
||||||
|
url = base
|
||||||
|
pnum = 1
|
||||||
|
else:
|
||||||
|
url = f"{base}/page-{pnum}"
|
||||||
|
pnum = None
|
||||||
|
|
||||||
|
while True:
|
||||||
|
page = self.request_page(url)
|
||||||
|
|
||||||
|
yield page
|
||||||
|
|
||||||
|
if pnum is None or "pageNav-jump--next" not in page:
|
||||||
|
return
|
||||||
|
pnum += 1
|
||||||
|
url = f"{base}/page-{pnum}"
|
||||||
|
|
||||||
|
def _parse_thread(self, page):
|
||||||
|
schema = self._extract_jsonld(page)["mainEntity"]
|
||||||
|
author = schema["author"]
|
||||||
|
stats = schema["interactionStatistic"]
|
||||||
|
url_t = schema["url"]
|
||||||
|
url_a = author["url"]
|
||||||
|
|
||||||
|
thread = {
|
||||||
|
"id" : url_t[url_t.rfind(".")+1:-1],
|
||||||
|
"url" : url_t,
|
||||||
|
"title": schema["headline"],
|
||||||
|
"date" : text.parse_datetime(schema["datePublished"]),
|
||||||
|
"views": stats[0]["userInteractionCount"],
|
||||||
|
"posts": stats[1]["userInteractionCount"],
|
||||||
|
"tags" : (schema["keywords"].split(", ")
|
||||||
|
if "keywords" in schema else ()),
|
||||||
|
"section" : schema["articleSection"],
|
||||||
|
"author" : author["name"],
|
||||||
|
"author_id" : url_a[url_a.rfind(".")+1:-1],
|
||||||
|
"author_url": url_a,
|
||||||
|
}
|
||||||
|
|
||||||
|
return thread
|
||||||
|
|
||||||
|
def _parse_post(self, html):
|
||||||
|
extr = text.extract_from(html)
|
||||||
|
|
||||||
|
post = {
|
||||||
|
"author": extr('data-author="', '"'),
|
||||||
|
"id": extr('data-content="post-', '"'),
|
||||||
|
"author_url": extr('itemprop="url" content="', '"'),
|
||||||
|
"date": text.parse_datetime(extr('datetime="', '"')),
|
||||||
|
"content": extr('<div itemprop="text">', "\t\t</div>").strip(),
|
||||||
|
}
|
||||||
|
|
||||||
|
url_a = post["author_url"]
|
||||||
|
post["author_id"] = url_a[url_a.rfind(".")+1:-1]
|
||||||
|
|
||||||
|
return post
|
||||||
|
|
||||||
|
|
||||||
|
class SimpcityPostExtractor(SimpcityExtractor):
|
||||||
|
subcategory = "post"
|
||||||
|
pattern = rf"{BASE_PATTERN}/(?:threads/[^/?#]+/post-|posts/)(\d+)"
|
||||||
|
example = "https://simpcity.cr/threads/TITLE.12345/post-54321"
|
||||||
|
|
||||||
|
def posts(self):
|
||||||
|
post_id = self.groups[0]
|
||||||
|
url = f"{self.root}/posts/{post_id}/"
|
||||||
|
page = self.request_page(url)
|
||||||
|
|
||||||
|
pos = page.find(f'data-content="post-{post_id}"')
|
||||||
|
if pos < 0:
|
||||||
|
raise exception.NotFoundError("post")
|
||||||
|
html = text.extract(page, "<article ", "</article>", pos-200)[0]
|
||||||
|
|
||||||
|
self.kwdict["thread"] = self._parse_thread(page)
|
||||||
|
return (self._parse_post(html),)
|
||||||
|
|
||||||
|
|
||||||
|
class SimpcityThreadExtractor(SimpcityExtractor):
|
||||||
|
subcategory = "thread"
|
||||||
|
pattern = rf"{BASE_PATTERN}(/threads/(?:[^/?#]+\.)?\d+)(?:/page-(\d+))?"
|
||||||
|
example = "https://simpcity.cr/threads/TITLE.12345/"
|
||||||
|
|
||||||
|
def posts(self):
|
||||||
|
for page in self._pagination(*self.groups):
|
||||||
|
if "thread" not in self.kwdict:
|
||||||
|
self.kwdict["thread"] = self._parse_thread(page)
|
||||||
|
for html in text.extract_iter(page, "<article ", "</article>"):
|
||||||
|
yield self._parse_post(html)
|
||||||
|
|
||||||
|
|
||||||
|
class SimpcityForumExtractor(SimpcityExtractor):
|
||||||
|
subcategory = "forum"
|
||||||
|
pattern = rf"{BASE_PATTERN}(/forums/(?:[^/?#]+\.)?\d+)(?:/page-(\d+))?"
|
||||||
|
example = "https://simpcity.cr/forums/TITLE.123/"
|
||||||
|
|
||||||
|
def items(self):
|
||||||
|
data = {"_extractor": SimpcityThreadExtractor}
|
||||||
|
for page in self._pagination(*self.groups):
|
||||||
|
for path in text.extract_iter(page, ' uix-href="', '"'):
|
||||||
|
yield Message.Queue, f"{self.root}{text.unquote(path)}", data
|
||||||
@@ -159,6 +159,7 @@ CATEGORY_MAP = {
|
|||||||
"sensescans" : "Sense-Scans",
|
"sensescans" : "Sense-Scans",
|
||||||
"sexcom" : "Sex.com",
|
"sexcom" : "Sex.com",
|
||||||
"silverpic" : "SilverPic.com",
|
"silverpic" : "SilverPic.com",
|
||||||
|
"simpcity" : "SimpCity Forums",
|
||||||
"simplyhentai" : "Simply Hentai",
|
"simplyhentai" : "Simply Hentai",
|
||||||
"sizebooru" : "Size Booru",
|
"sizebooru" : "Size Booru",
|
||||||
"slickpic" : "SlickPic",
|
"slickpic" : "SlickPic",
|
||||||
|
|||||||
112
test/results/simpcity.py
Normal file
112
test/results/simpcity.py
Normal file
@@ -0,0 +1,112 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
|
# published by the Free Software Foundation.
|
||||||
|
|
||||||
|
from gallery_dl.extractor import simpcity
|
||||||
|
from gallery_dl import exception
|
||||||
|
|
||||||
|
|
||||||
|
__tests__ = (
|
||||||
|
{
|
||||||
|
"#url" : "https://simpcity.cr/threads/ririkana-rr_loveit.10731/post-1753131",
|
||||||
|
"#class" : simpcity.SimpcityPostExtractor,
|
||||||
|
"#auth" : True,
|
||||||
|
"#results" : "https://jpg5.su/img/coWRwo",
|
||||||
|
|
||||||
|
"count" : 1,
|
||||||
|
"num" : 1,
|
||||||
|
"post" : {
|
||||||
|
"author" : "Zebrabobinn",
|
||||||
|
"author_id" : "171827",
|
||||||
|
"author_url": "https://simpcity.cr/members/zebrabobinn.171827/",
|
||||||
|
"count" : 1,
|
||||||
|
"date" : "dt:2023-03-08 12:59:10",
|
||||||
|
"id" : "1753131",
|
||||||
|
"content" : """\
|
||||||
|
<div class="bbWrapper"><a href="https://jpg5.su/img/coWRwo" target="_blank" class="link link--external" rel="noopener"><img src="https://simp6.jpg5.su/images/FqsNcNCaIAITBEL.md.jpg" data-url="https://simp6.jpg5.su/images/FqsNcNCaIAITBEL.md.jpg" class="bbImage " loading="lazy"
|
||||||
|
\t\talt="FqsNcNCaIAITBEL.md.jpg" title="FqsNcNCaIAITBEL.md.jpg" style="" width="" height="" /></a></div>\
|
||||||
|
""",
|
||||||
|
},
|
||||||
|
"thread": {
|
||||||
|
"author" : "eula",
|
||||||
|
"author_id" : "54987",
|
||||||
|
"author_url": "https://simpcity.cr/members/eula.54987/",
|
||||||
|
"date" : "dt:2022-03-11 17:15:59",
|
||||||
|
"id" : "10731",
|
||||||
|
"posts" : range(320, 500),
|
||||||
|
"section" : "Asians",
|
||||||
|
"title" : "Ririkana | RR_loveit",
|
||||||
|
"url" : "https://simpcity.cr/threads/ririkana-rr_loveit.10731/",
|
||||||
|
"views" : range(790_000, 900_000),
|
||||||
|
"tags" : [
|
||||||
|
"asian",
|
||||||
|
"big ass",
|
||||||
|
"gravure",
|
||||||
|
"japanese",
|
||||||
|
"japanese big ass",
|
||||||
|
"small tits",
|
||||||
|
"thicc",
|
||||||
|
],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://simpcity.cr/threads/ririkana-rr_loveit.10731/post-1753131",
|
||||||
|
"#class" : simpcity.SimpcityPostExtractor,
|
||||||
|
"#auth" : False,
|
||||||
|
"#exception": exception.AuthRequired,
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://simpcity.cr/threads/alua-tatakai.89490/",
|
||||||
|
"#class" : simpcity.SimpcityThreadExtractor,
|
||||||
|
"#auth" : True,
|
||||||
|
"#pattern" : r"https://(jpg5\.su/img/\w+|bunkr\.\w+/[fiv]/\w+|pixeldrain.com/l/\w+|alua.com/tatakai)|/goto/post",
|
||||||
|
"#count" : 29,
|
||||||
|
|
||||||
|
"count" : int,
|
||||||
|
"num" : int,
|
||||||
|
"post" : {
|
||||||
|
"author" : str,
|
||||||
|
"author_id" : r"re:\d+",
|
||||||
|
"author_url": str,
|
||||||
|
"content" : str,
|
||||||
|
"count" : int,
|
||||||
|
"date" : "type:datetime",
|
||||||
|
"id" : r"re:\d+",
|
||||||
|
},
|
||||||
|
"thread": {
|
||||||
|
"author" : "Ekalamosus",
|
||||||
|
"author_id" : "1036155",
|
||||||
|
"author_url": "https://simpcity.cr/members/ekalamosus.1036155/",
|
||||||
|
"date" : "dt:2022-07-31 15:40:14",
|
||||||
|
"id" : "89490",
|
||||||
|
"posts" : 45,
|
||||||
|
"section" : "Asians",
|
||||||
|
"title" : "Alua tatakai",
|
||||||
|
"url" : "https://simpcity.cr/threads/alua-tatakai.89490/",
|
||||||
|
"views" : range(47_000, 60_000),
|
||||||
|
"tags" : [
|
||||||
|
"alter",
|
||||||
|
"alua",
|
||||||
|
"pinay",
|
||||||
|
],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://simpcity.su/threads/angel-chan-wlep-wlop-menruinyanko_.12948/",
|
||||||
|
"#class" : simpcity.SimpcityThreadExtractor,
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://simpcity.cr/forums/asians.48/",
|
||||||
|
"#class" : simpcity.SimpcityForumExtractor,
|
||||||
|
"#pattern" : simpcity.SimpcityThreadExtractor.pattern,
|
||||||
|
"#range" : "1-100",
|
||||||
|
"#count" : 100,
|
||||||
|
},
|
||||||
|
|
||||||
|
)
|
||||||
Reference in New Issue
Block a user