[xenforo] extract 'author_slug' metadata (#8785)
This commit is contained in:
@@ -245,7 +245,6 @@ class XenforoExtractor(BaseExtractor):
|
||||
author = schema["author"]
|
||||
stats = schema["interactionStatistic"]
|
||||
url_t = schema.get("url") or schema.get("@id") or ""
|
||||
url_a = author.get("url") or ""
|
||||
|
||||
thread = {
|
||||
"id" : url_t[url_t.rfind(".")+1:-1],
|
||||
@@ -254,13 +253,19 @@ class XenforoExtractor(BaseExtractor):
|
||||
"date" : self.parse_datetime_iso(schema["datePublished"]),
|
||||
"tags" : (schema["keywords"].split(", ")
|
||||
if "keywords" in schema else ()),
|
||||
"section" : schema["articleSection"],
|
||||
"author" : author.get("name") or "",
|
||||
"author_id" : (url_a[url_a.rfind(".")+1:-1] if url_a else
|
||||
(author.get("name") or "")[15:]),
|
||||
"author_url": url_a,
|
||||
"section": schema["articleSection"],
|
||||
"author" : author.get("name") or "",
|
||||
}
|
||||
|
||||
if url_a := author.get("url"):
|
||||
thread["author_url"] = url_a
|
||||
thread["author_slug"], _, thread["author_id"] = \
|
||||
url_a[url_a.rfind("/", 0, -1)+1:-1].rpartition(".")
|
||||
else:
|
||||
thread["author_url"] = ""
|
||||
thread["author_slug"] = text.slugify(thread["author"][:15])
|
||||
thread["author_id"] = thread["author"][15:]
|
||||
|
||||
if isinstance(stats, list):
|
||||
thread["views"] = stats[0]["userInteractionCount"]
|
||||
thread["posts"] = stats[1]["userInteractionCount"]
|
||||
@@ -286,7 +291,8 @@ class XenforoExtractor(BaseExtractor):
|
||||
}
|
||||
|
||||
url_a = post["author_url"]
|
||||
post["author_id"] = url_a[url_a.rfind(".")+1:-1]
|
||||
post["author_slug"], _, post["author_id"] = \
|
||||
url_a[url_a.rfind("/", 0, -1)+1:-1].rpartition(".")
|
||||
|
||||
con = post["content"]
|
||||
if (pos := con.find('<div class="bbWrapper')) >= 0:
|
||||
@@ -307,7 +313,6 @@ class XenforoExtractor(BaseExtractor):
|
||||
schema = self._extract_jsonld(page)
|
||||
main = schema["mainEntity"]
|
||||
author = main["author"]
|
||||
url_a = author.get("url") or ""
|
||||
stats = main["interactionStatistic"]
|
||||
|
||||
media = text.nameext_from_name(main["name"], {
|
||||
@@ -320,19 +325,25 @@ class XenforoExtractor(BaseExtractor):
|
||||
w["name"].partition(" ")[0]) or 0,
|
||||
"height": (h := main.get("height")) and text.parse_int(
|
||||
h["name"].partition(" ")[0]) or 0,
|
||||
"author" : author.get("name") or "",
|
||||
"author_id" : (url_a[url_a.rfind(".")+1:-1] if url_a else
|
||||
(author.get("name") or "")[15:]),
|
||||
"author_url": url_a,
|
||||
"author": author.get("name") or "",
|
||||
})
|
||||
|
||||
if url_a := author.get("url"):
|
||||
media["author_url"] = url_a
|
||||
media["author_slug"], _, media["author_id"] = \
|
||||
url_a[url_a.rfind("/", 0, -1)+1:-1].rpartition(".")
|
||||
else:
|
||||
media["author_url"] = ""
|
||||
media["author_slug"] = text.slugify(media["author"][15:])
|
||||
media["author_id"] = media["author"][15:]
|
||||
|
||||
if ext := main.get("encodingFormat"):
|
||||
media["extension"] = ext
|
||||
|
||||
if isinstance(stats, list):
|
||||
media["likes"] = stats[1]["userInteractionCount"]
|
||||
media["views"] = stats[0]["userInteractionCount"]
|
||||
media["comments"] = stats[0]["userInteractionCount"]
|
||||
media["likes"] = stats[1]["userInteractionCount"]
|
||||
media["comments"] = stats[2]["userInteractionCount"]
|
||||
|
||||
return main["contentUrl"], media
|
||||
|
||||
|
||||
@@ -26,6 +26,7 @@ __tests__ = (
|
||||
"author" : "djokica",
|
||||
"author_id" : "3471965",
|
||||
"author_url" : "/forum/members/djokica.3471965/",
|
||||
"author_slug": "djokica",
|
||||
"content" : """<div class="bbWrapper"><a href="https://imagetwist.com/bvolb8129fnm/v1.jpg" target="_blank" class="link link--external" rel="nofollow noopener"><img src="https://s10.imagetwist.com/th/73048/bvolb8129fnm.jpg" data-url="https://s10.imagetwist.com/th/73048/bvolb8129fnm.jpg" class="bbImage " style="" alt="" title="" /></a> <a href="https://imagetwist.com/9pddder15iow/v2.jpg" target="_blank" class="link link--external" rel="nofollow noopener"><img src="https://s10.imagetwist.com/th/73048/9pddder15iow.jpg" data-url="https://s10.imagetwist.com/th/73048/9pddder15iow.jpg" class="bbImage " style="" alt="" title="" /></a> <a href="https://imagetwist.com/zzonmk0gqqdv/v3.jpg" target="_blank" class="link link--external" rel="nofollow noopener"><img src="https://s10.imagetwist.com/th/73048/zzonmk0gqqdv.jpg" data-url="https://s10.imagetwist.com/th/73048/zzonmk0gqqdv.jpg" class="bbImage " style="" alt="" title="" /></a></div>""",
|
||||
"count" : 3,
|
||||
"date" : "dt:2025-10-31 21:26:42",
|
||||
|
||||
@@ -22,25 +22,24 @@ __tests__ = (
|
||||
"author" : "Zebrabobinn",
|
||||
"author_id" : "171827",
|
||||
"author_url": "https://simpcity.cr/members/zebrabobinn.171827/",
|
||||
"author_slug": "zebrabobinn",
|
||||
"count" : 1,
|
||||
"date" : "dt:2023-03-08 12:59:10",
|
||||
"id" : "1753131",
|
||||
"content" : """\
|
||||
<div class="bbWrapper"><a href="https://jpg6.su/img/coWRwo" target="_blank" class="link link--external" rel="noopener"><img src="https://simp6.jpg6.su/images/FqsNcNCaIAITBEL.md.jpg" data-url="https://simp6.jpg6.su/images/FqsNcNCaIAITBEL.md.jpg" class="bbImage " loading="lazy"
|
||||
\t\talt="FqsNcNCaIAITBEL.md.jpg" title="FqsNcNCaIAITBEL.md.jpg" style="" width="" height="" /></a></div>\
|
||||
""",
|
||||
"content" : str,
|
||||
},
|
||||
"thread": {
|
||||
"author" : "eula",
|
||||
"author_id" : "54987",
|
||||
"author_url": "https://simpcity.cr/members/eula.54987/",
|
||||
"author_slug": "eula",
|
||||
"date" : "dt:2022-03-11 17:15:59",
|
||||
"id" : "10731",
|
||||
"posts" : range(320, 500),
|
||||
"section" : "Asians",
|
||||
"title" : "Ririkana | RR_loveit",
|
||||
"url" : "https://simpcity.cr/threads/ririkana-rr_loveit.10731/",
|
||||
"views" : range(790_000, 900_000),
|
||||
"views" : range(900_000, 2_000_000),
|
||||
"tags" : [
|
||||
"asian",
|
||||
"big ass",
|
||||
@@ -69,10 +68,10 @@ __tests__ = (
|
||||
"#auth" : True,
|
||||
"#results" : (
|
||||
"https://jpg6.su/img/NNFssUg",
|
||||
"https://saint2.cr/embed/nPy1kG3w55V",
|
||||
"https://saint2.cr/embed/c0KhPjU4-F3",
|
||||
"https://saint2.cr/embed/sZWnVZ_mQsV",
|
||||
"https://saint2.cr/embed/MEBiLx6DETQ",
|
||||
"https://turbovid.cr/embed/nPy1kG3w55V",
|
||||
"https://turbovid.cr/embed/c0KhPjU4-F3",
|
||||
"https://turbovid.cr/embed/sZWnVZ_mQsV",
|
||||
"https://turbovid.cr/embed/MEBiLx6DETQ",
|
||||
),
|
||||
},
|
||||
|
||||
@@ -104,6 +103,7 @@ __tests__ = (
|
||||
"author" : "Hexorium",
|
||||
"author_id" : "3715883",
|
||||
"author_url": "https://simpcity.cr/members/hexorium.3715883/",
|
||||
"author_slug": "hexorium",
|
||||
"count" : 6,
|
||||
"date" : "dt:2024-12-15 21:37:05",
|
||||
"id" : "12065490",
|
||||
@@ -112,6 +112,7 @@ __tests__ = (
|
||||
"author" : "Deleted member 166159",
|
||||
"author_id" : "166159",
|
||||
"author_url": "",
|
||||
"author_slug": "deleted-member",
|
||||
"date" : "dt:2022-04-05 14:48:14",
|
||||
"id" : "36572",
|
||||
"section" : "Premium Asians",
|
||||
@@ -244,7 +245,7 @@ __tests__ = (
|
||||
"#category": ("xenforo", "simpcity", "thread"),
|
||||
"#class" : xenforo.XenforoThreadExtractor,
|
||||
"#auth" : True,
|
||||
"#pattern" : r"https://(jpg6\.su/img/\w+|bunkr\.\w+/[fiv]/\w+|pixeldrain.com/l/\w+|alua.com/tatakai)|saint2.cr/embed",
|
||||
"#pattern" : r"https://(jpg6\.su/img/\w+|bunkr\.\w+/[fiv]/\w+|pixeldrain.com/l/\w+|alua.com/tatakai)|turbovid.cr/embed",
|
||||
"#count" : range(100, 300),
|
||||
|
||||
"count" : int,
|
||||
|
||||
Reference in New Issue
Block a user