[xenforo] add 'media-category' extractor (#8785)
This commit is contained in:
@@ -1888,19 +1888,19 @@ Consider all listed sites to potentially be NSFW.
|
|||||||
<tr id="simpcity" title="simpcity">
|
<tr id="simpcity" title="simpcity">
|
||||||
<td>SimpCity Forums</td>
|
<td>SimpCity Forums</td>
|
||||||
<td>https://simpcity.cr/</td>
|
<td>https://simpcity.cr/</td>
|
||||||
<td>Forums, Media Files, User Media, Posts, Threads</td>
|
<td>Forums, Media Categories, Media Files, User Media, Posts, Threads</td>
|
||||||
<td>Supported</td>
|
<td>Supported</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr id="nudostarforum" title="nudostarforum">
|
<tr id="nudostarforum" title="nudostarforum">
|
||||||
<td>NudoStar Forums</td>
|
<td>NudoStar Forums</td>
|
||||||
<td>https://nudostar.com/forum/</td>
|
<td>https://nudostar.com/forum/</td>
|
||||||
<td>Forums, Media Files, User Media, Posts, Threads</td>
|
<td>Forums, Media Categories, Media Files, User Media, Posts, Threads</td>
|
||||||
<td>Supported</td>
|
<td>Supported</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr id="atfforum" title="atfforum">
|
<tr id="atfforum" title="atfforum">
|
||||||
<td>All The Fallen</td>
|
<td>All The Fallen</td>
|
||||||
<td>https://allthefallen.moe/forum/</td>
|
<td>https://allthefallen.moe/forum/</td>
|
||||||
<td>Forums, Media Files, User Media, Posts, Threads</td>
|
<td>Forums, Media Categories, Media Files, User Media, Posts, Threads</td>
|
||||||
<td></td>
|
<td></td>
|
||||||
</tr>
|
</tr>
|
||||||
|
|
||||||
|
|||||||
@@ -101,6 +101,44 @@ class XenforoExtractor(BaseExtractor):
|
|||||||
data["type"] = "inline"
|
data["type"] = "inline"
|
||||||
yield Message.Url, self.root + path, data
|
yield Message.Url, self.root + path, data
|
||||||
|
|
||||||
|
def items_media(self, path, pnum):
|
||||||
|
self.root_media = self.config_instance("root-media") or self.root
|
||||||
|
|
||||||
|
if (order := self.config("order-posts")) and \
|
||||||
|
order[0] in ("d", "r"):
|
||||||
|
pages = self._pagination_reverse(path, pnum)
|
||||||
|
reverse = True
|
||||||
|
else:
|
||||||
|
pages = self._pagination(path, pnum)
|
||||||
|
reverse = False
|
||||||
|
|
||||||
|
if meta := self.config("metadata"):
|
||||||
|
extr_media = self._extract_media_ex
|
||||||
|
meta = True
|
||||||
|
else:
|
||||||
|
extr_media = self._extract_media
|
||||||
|
meta = False
|
||||||
|
|
||||||
|
for page in pages:
|
||||||
|
posts = page.split(
|
||||||
|
'<div class="itemList-item js-inlineModContainer')
|
||||||
|
del posts[0]
|
||||||
|
|
||||||
|
if reverse:
|
||||||
|
posts.reverse()
|
||||||
|
|
||||||
|
for html in posts:
|
||||||
|
href, pos = text.extract(html, 'href="', '"')
|
||||||
|
name, pos = text.extract(html, "alt='", "'", pos)
|
||||||
|
|
||||||
|
href = href[:-1]
|
||||||
|
url, media = extr_media(href, href.rpartition("/")[2])
|
||||||
|
if not meta and name:
|
||||||
|
text.nameext_from_name(text.unescape(name), media)
|
||||||
|
|
||||||
|
yield Message.Directory, "", media
|
||||||
|
yield Message.Url, url, media
|
||||||
|
|
||||||
def request_page(self, url):
|
def request_page(self, url):
|
||||||
try:
|
try:
|
||||||
return self.request(url)
|
return self.request(url)
|
||||||
@@ -270,7 +308,7 @@ class XenforoExtractor(BaseExtractor):
|
|||||||
|
|
||||||
media = text.nameext_from_name(main["name"], {
|
media = text.nameext_from_name(main["name"], {
|
||||||
"schema": schema,
|
"schema": schema,
|
||||||
"id" : file.rpartition("."),
|
"id" : file.rpartition(".")[2],
|
||||||
"size" : main.get("contentSize"),
|
"size" : main.get("contentSize"),
|
||||||
"description": main.get("description"),
|
"description": main.get("description"),
|
||||||
"date" : self.parse_datetime_iso(main.get("dateCreated")),
|
"date" : self.parse_datetime_iso(main.get("dateCreated")),
|
||||||
@@ -404,7 +442,6 @@ class XenforoMediaUserExtractor(XenforoExtractor):
|
|||||||
example = "https://simpcity.cr/media/users/USER.123/"
|
example = "https://simpcity.cr/media/users/USER.123/"
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
self.root_media = self.config_instance("root-media") or self.root
|
|
||||||
groups = self.groups
|
groups = self.groups
|
||||||
|
|
||||||
user = groups[-3]
|
user = groups[-3]
|
||||||
@@ -414,43 +451,26 @@ class XenforoMediaUserExtractor(XenforoExtractor):
|
|||||||
else:
|
else:
|
||||||
pnum = groups[-2]
|
pnum = groups[-2]
|
||||||
|
|
||||||
path = f"{groups[-4]}media/users/{user}"
|
if not self.config("metadata"):
|
||||||
if (order := self.config("order-posts")) and \
|
|
||||||
order[0] in ("d", "r"):
|
|
||||||
pages = self._pagination_reverse(path, pnum)
|
|
||||||
reverse = True
|
|
||||||
else:
|
|
||||||
pages = self._pagination(path, pnum)
|
|
||||||
reverse = False
|
|
||||||
|
|
||||||
if meta := self.config("metadata"):
|
|
||||||
extr_media = self._extract_media_ex
|
|
||||||
meta = True
|
|
||||||
else:
|
|
||||||
extr_media = self._extract_media
|
|
||||||
meta = False
|
|
||||||
self.kwdict["author"], _, self.kwdict["author_id"] = \
|
self.kwdict["author"], _, self.kwdict["author_id"] = \
|
||||||
user.rpartition(".")
|
user.rpartition(".")
|
||||||
|
|
||||||
for page in pages:
|
return self.items_media(f"{groups[-4]}media/users/{user}", pnum)
|
||||||
posts = page.split(
|
|
||||||
'<div class="itemList-item js-inlineModContainer')
|
|
||||||
del posts[0]
|
|
||||||
|
|
||||||
if reverse:
|
|
||||||
posts.reverse()
|
|
||||||
|
|
||||||
for html in posts:
|
class XenforoMediaCategoryExtractor(XenforoExtractor):
|
||||||
href, pos = text.extract(html, 'href="', '"')
|
subcategory = "media-category"
|
||||||
name, pos = text.extract(html, "alt='", "'", pos)
|
directory_fmt = ("{category}", "Media", "Category", "{mcategory}")
|
||||||
|
filename_fmt = "{filename}.{extension}"
|
||||||
|
archive_fmt = "{id}"
|
||||||
|
pattern = (BASE_PATTERN + r"(/(?:index\.php\?)?"
|
||||||
|
r"media/categories/([^/?#]+))(?:/page-(\d+))?")
|
||||||
|
example = "https://simpcity.cr/media/categories/CATEGORY.123/"
|
||||||
|
|
||||||
href = href[:-1]
|
def items(self):
|
||||||
url, media = extr_media(href, href.rpartition("/")[2])
|
self.kwdict["mcategory"], _, self.kwdict["mcategory_id"] = \
|
||||||
if not meta and name:
|
self.groups[-2].rpartition(".")
|
||||||
text.nameext_from_name(text.unescape(name), media)
|
return self.items_media(self.groups[-3], self.groups[-1])
|
||||||
|
|
||||||
yield Message.Directory, "", media
|
|
||||||
yield Message.Url, url, media
|
|
||||||
|
|
||||||
|
|
||||||
class XenforoMediaItemExtractor(XenforoExtractor):
|
class XenforoMediaItemExtractor(XenforoExtractor):
|
||||||
@@ -463,10 +483,7 @@ class XenforoMediaItemExtractor(XenforoExtractor):
|
|||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
self.root_media = self.root
|
self.root_media = self.root
|
||||||
|
|
||||||
path = self.groups[-2]
|
|
||||||
file = self.groups[-1]
|
|
||||||
url, media = (self._extract_media_ex if self.config("metadata") else
|
url, media = (self._extract_media_ex if self.config("metadata") else
|
||||||
self._extract_media)(path, file)
|
self._extract_media)(self.groups[-2], self.groups[-1])
|
||||||
yield Message.Directory, "", media
|
yield Message.Directory, "", media
|
||||||
yield Message.Url, url, media
|
yield Message.Url, url, media
|
||||||
|
|||||||
@@ -468,6 +468,7 @@ SUBCATEGORY_MAP = {
|
|||||||
"xenforo": {
|
"xenforo": {
|
||||||
"media-user": "User Media",
|
"media-user": "User Media",
|
||||||
"media-item": "Media Files",
|
"media-item": "Media Files",
|
||||||
|
"media-category": "Media Categories",
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -148,4 +148,10 @@ __tests__ = (
|
|||||||
"#class" : xenforo.XenforoMediaUserExtractor,
|
"#class" : xenforo.XenforoMediaUserExtractor,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://www.allthefallen.moe/forum/index.php?media/categories/translations.2/",
|
||||||
|
"#category": ("xenforo", "atfforum", "media-category"),
|
||||||
|
"#class" : xenforo.XenforoMediaCategoryExtractor,
|
||||||
|
},
|
||||||
|
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user