[xenforo] implement 'media' support (#8785)
add 'media-user' and 'media-item' extractors TODO: 'media-category' extractor (?)
This commit is contained in:
@@ -7049,6 +7049,18 @@ Description
|
|||||||
For ``Category:`` pages, recursively descent into subcategories.
|
For ``Category:`` pages, recursively descent into subcategories.
|
||||||
|
|
||||||
|
|
||||||
|
extractor.[xenforo].metadata
|
||||||
|
----------------------------
|
||||||
|
Type
|
||||||
|
``bool``
|
||||||
|
Default
|
||||||
|
``false``
|
||||||
|
Description
|
||||||
|
Extract detailed metadata for `media` files.
|
||||||
|
Note
|
||||||
|
This requires 1 additional HTTP request per file.
|
||||||
|
|
||||||
|
|
||||||
extractor.[xenforo].order-posts
|
extractor.[xenforo].order-posts
|
||||||
-------------------------------
|
-------------------------------
|
||||||
Type
|
Type
|
||||||
|
|||||||
@@ -1187,6 +1187,7 @@
|
|||||||
|
|
||||||
"xenforo":
|
"xenforo":
|
||||||
{
|
{
|
||||||
|
"metadata" : false,
|
||||||
"order-posts": "desc"
|
"order-posts": "desc"
|
||||||
},
|
},
|
||||||
"nudostarforum":
|
"nudostarforum":
|
||||||
|
|||||||
@@ -1888,19 +1888,19 @@ Consider all listed sites to potentially be NSFW.
|
|||||||
<tr id="simpcity" title="simpcity">
|
<tr id="simpcity" title="simpcity">
|
||||||
<td>SimpCity Forums</td>
|
<td>SimpCity Forums</td>
|
||||||
<td>https://simpcity.cr/</td>
|
<td>https://simpcity.cr/</td>
|
||||||
<td>Forums, Posts, Threads</td>
|
<td>Forums, Media Files, User Media, Posts, Threads</td>
|
||||||
<td>Supported</td>
|
<td>Supported</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr id="nudostarforum" title="nudostarforum">
|
<tr id="nudostarforum" title="nudostarforum">
|
||||||
<td>NudoStar Forums</td>
|
<td>NudoStar Forums</td>
|
||||||
<td>https://nudostar.com/forum/</td>
|
<td>https://nudostar.com/forum/</td>
|
||||||
<td>Forums, Posts, Threads</td>
|
<td>Forums, Media Files, User Media, Posts, Threads</td>
|
||||||
<td>Supported</td>
|
<td>Supported</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr id="atfforum" title="atfforum">
|
<tr id="atfforum" title="atfforum">
|
||||||
<td>All The Fallen</td>
|
<td>All The Fallen</td>
|
||||||
<td>https://allthefallen.moe/forum/</td>
|
<td>https://allthefallen.moe/forum/</td>
|
||||||
<td>Forums, Posts, Threads</td>
|
<td>Forums, Media Files, User Media, Posts, Threads</td>
|
||||||
<td></td>
|
<td></td>
|
||||||
</tr>
|
</tr>
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
# Copyright 2025 Mike Fährmann
|
# Copyright 2025-2026 Mike Fährmann
|
||||||
#
|
#
|
||||||
# This program is free software; you can redistribute it and/or modify
|
# This program is free software; you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU General Public License version 2 as
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
@@ -253,6 +253,47 @@ class XenforoExtractor(BaseExtractor):
|
|||||||
|
|
||||||
return post
|
return post
|
||||||
|
|
||||||
|
def _extract_media(self, path, file):
|
||||||
|
media = {}
|
||||||
|
name, _, media["id"] = file.rpartition(".")
|
||||||
|
media["filename"], _, media["extension"] = name.rpartition("-")
|
||||||
|
return f"{self.root_media}{path}/full", media
|
||||||
|
|
||||||
|
def _extract_media_ex(self, path, file):
|
||||||
|
page = self.request(f"{self.root}{path}/").text
|
||||||
|
|
||||||
|
schema = self._extract_jsonld(page)
|
||||||
|
main = schema["mainEntity"]
|
||||||
|
author = main["author"]
|
||||||
|
url_a = author.get("url") or ""
|
||||||
|
stats = main["interactionStatistic"]
|
||||||
|
|
||||||
|
media = text.nameext_from_name(main["name"], {
|
||||||
|
"schema": schema,
|
||||||
|
"id" : file.rpartition("."),
|
||||||
|
"size" : main.get("contentSize"),
|
||||||
|
"description": main.get("description"),
|
||||||
|
"date" : self.parse_datetime_iso(main.get("dateCreated")),
|
||||||
|
"width" : (w := main.get("width")) and text.parse_int(
|
||||||
|
w["name"].partition(" ")[0]) or 0,
|
||||||
|
"height": (h := main.get("height")) and text.parse_int(
|
||||||
|
h["name"].partition(" ")[0]) or 0,
|
||||||
|
"author" : author.get("name") or "",
|
||||||
|
"author_id" : (url_a[url_a.rfind(".")+1:-1] if url_a else
|
||||||
|
(author.get("name") or "")[15:]),
|
||||||
|
"author_url": url_a,
|
||||||
|
})
|
||||||
|
|
||||||
|
if ext := main.get("encodingFormat"):
|
||||||
|
media["extension"] = ext
|
||||||
|
|
||||||
|
if isinstance(stats, list):
|
||||||
|
media["likes"] = stats[1]["userInteractionCount"]
|
||||||
|
media["views"] = stats[0]["userInteractionCount"]
|
||||||
|
media["comments"] = stats[0]["userInteractionCount"]
|
||||||
|
|
||||||
|
return main["contentUrl"], media
|
||||||
|
|
||||||
def _require_auth(self, response=None):
|
def _require_auth(self, response=None):
|
||||||
raise exception.AuthRequired(
|
raise exception.AuthRequired(
|
||||||
("username & password", "authenticated cookies"), None,
|
("username & password", "authenticated cookies"), None,
|
||||||
@@ -350,3 +391,82 @@ class XenforoForumExtractor(XenforoExtractor):
|
|||||||
for page in self._pagination(path, pnum):
|
for page in self._pagination(path, pnum):
|
||||||
for path in extract_threads(page):
|
for path in extract_threads(page):
|
||||||
yield Message.Queue, self.root + text.unquote(path), data
|
yield Message.Queue, self.root + text.unquote(path), data
|
||||||
|
|
||||||
|
|
||||||
|
class XenforoMediaUserExtractor(XenforoExtractor):
|
||||||
|
subcategory = "media-user"
|
||||||
|
directory_fmt = ("{category}", "Media", "{author}")
|
||||||
|
filename_fmt = "{filename}.{extension}"
|
||||||
|
archive_fmt = "{id}"
|
||||||
|
pattern = (BASE_PATTERN + r"(/(?:index\.php\?)?)me(?:"
|
||||||
|
r"dia/users/([^/?#]+)(?:/page-(\d+))?|"
|
||||||
|
r"mbers/([^/?#]+)/#xfmgMedia)")
|
||||||
|
example = "https://simpcity.cr/media/users/USER.123/"
|
||||||
|
|
||||||
|
def items(self):
|
||||||
|
self.root_media = self.config_instance("root-media") or self.root
|
||||||
|
groups = self.groups
|
||||||
|
|
||||||
|
user = groups[-3]
|
||||||
|
if user is None:
|
||||||
|
user = groups[-1]
|
||||||
|
pnum = None
|
||||||
|
else:
|
||||||
|
pnum = groups[-2]
|
||||||
|
|
||||||
|
path = f"{groups[-4]}media/users/{user}"
|
||||||
|
if (order := self.config("order-posts")) and \
|
||||||
|
order[0] in ("d", "r"):
|
||||||
|
pages = self._pagination_reverse(path, pnum)
|
||||||
|
reverse = True
|
||||||
|
else:
|
||||||
|
pages = self._pagination(path, pnum)
|
||||||
|
reverse = False
|
||||||
|
|
||||||
|
if meta := self.config("metadata"):
|
||||||
|
extr_media = self._extract_media_ex
|
||||||
|
meta = True
|
||||||
|
else:
|
||||||
|
extr_media = self._extract_media
|
||||||
|
meta = False
|
||||||
|
self.kwdict["author"], _, self.kwdict["author_id"] = \
|
||||||
|
user.rpartition(".")
|
||||||
|
|
||||||
|
for page in pages:
|
||||||
|
posts = page.split(
|
||||||
|
'<div class="itemList-item js-inlineModContainer')
|
||||||
|
del posts[0]
|
||||||
|
|
||||||
|
if reverse:
|
||||||
|
posts.reverse()
|
||||||
|
|
||||||
|
for html in posts:
|
||||||
|
href, pos = text.extract(html, 'href="', '"')
|
||||||
|
name, pos = text.extract(html, "alt='", "'", pos)
|
||||||
|
|
||||||
|
href = href[:-1]
|
||||||
|
url, media = extr_media(href, href.rpartition("/")[2])
|
||||||
|
if not meta and name:
|
||||||
|
text.nameext_from_name(text.unescape(name), media)
|
||||||
|
|
||||||
|
yield Message.Directory, "", media
|
||||||
|
yield Message.Url, url, media
|
||||||
|
|
||||||
|
|
||||||
|
class XenforoMediaItemExtractor(XenforoExtractor):
|
||||||
|
subcategory = "media-item"
|
||||||
|
directory_fmt = ("{category}", "Media", "{author|''}")
|
||||||
|
filename_fmt = "{filename}.{extension}"
|
||||||
|
archive_fmt = "{id}"
|
||||||
|
pattern = BASE_PATTERN + r"(/(?:index\.php\?)?media/((?:[^/?#]+\.)\d+))"
|
||||||
|
example = "https://simpcity.cr/media/NAME.123/"
|
||||||
|
|
||||||
|
def items(self):
|
||||||
|
self.root_media = self.root
|
||||||
|
|
||||||
|
path = self.groups[-2]
|
||||||
|
file = self.groups[-1]
|
||||||
|
url, media = (self._extract_media_ex if self.config("metadata") else
|
||||||
|
self._extract_media)(path, file)
|
||||||
|
yield Message.Directory, "", media
|
||||||
|
yield Message.Url, url, media
|
||||||
|
|||||||
@@ -465,6 +465,10 @@ SUBCATEGORY_MAP = {
|
|||||||
"wikimedia": {
|
"wikimedia": {
|
||||||
"article": ["Articles", "Categories", "Files"],
|
"article": ["Articles", "Categories", "Files"],
|
||||||
},
|
},
|
||||||
|
"xenforo": {
|
||||||
|
"media-user": "User Media",
|
||||||
|
"media-item": "Media Files",
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
BASE_MAP = {
|
BASE_MAP = {
|
||||||
|
|||||||
@@ -117,4 +117,35 @@ __tests__ = (
|
|||||||
"#count" : range(100, 200),
|
"#count" : range(100, 200),
|
||||||
},
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://allthefallen.moe/forum/index.php?media/1737485564664-png.224260/",
|
||||||
|
"#category": ("xenforo", "atfforum", "media-item"),
|
||||||
|
"#class" : xenforo.XenforoMediaItemExtractor,
|
||||||
|
"#options" : {"metadata": False},
|
||||||
|
"#results" : "https://allthefallen.moe/forum/index.php?media/1737485564664-png.224260/full",
|
||||||
|
|
||||||
|
"extension": "png",
|
||||||
|
"filename" : "1737485564664",
|
||||||
|
"id" : "224260",
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://allthefallen.moe/forum/index.php?media/users/peters.150992/",
|
||||||
|
"#category": ("xenforo", "atfforum", "media-user"),
|
||||||
|
"#class" : xenforo.XenforoMediaUserExtractor,
|
||||||
|
"#options" : {"metadata": False},
|
||||||
|
"#auth" : True,
|
||||||
|
"#results" : (
|
||||||
|
"https://allthefallen.moe/forum/index.php?media/eden-invitation-jpg.254624/full",
|
||||||
|
"https://allthefallen.moe/forum/index.php?media/1737485564664-png.224260/full",
|
||||||
|
"https://allthefallen.moe/forum/index.php?media/laughing-cat-emoji-png.243825/full",
|
||||||
|
),
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://allthefallen.moe/forum/index.php?members/peters.150992/#xfmgMedia",
|
||||||
|
"#category": ("xenforo", "atfforum", "media-user"),
|
||||||
|
"#class" : xenforo.XenforoMediaUserExtractor,
|
||||||
|
},
|
||||||
|
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user