[motherless] fix 'gallery_title' extraction (#8605)

* Update motherless.py for title selector

Updated the selector for the title property for Motherless galleries to be an h2 instead of an h1 to reflect changes on the site

* fix 'gallery_title' extraction

---------

Co-authored-by: Mike Fährmann <mike_faehrmann@web.de>
This commit is contained in:
spicybiguy
2025-11-28 04:53:16 -05:00
committed by GitHub
parent acf281a46a
commit 11151033f6

View File

@@ -41,6 +41,8 @@ class MotherlessExtractor(Extractor):
path, _, media_id = path.rpartition("/") path, _, media_id = path.rpartition("/")
data = { data = {
"id" : media_id, "id" : media_id,
"title": text.unescape(
(t := extr("<title>", "<")) and t[:t.rfind(" | ")]),
"type" : extr("__mediatype = '", "'"), "type" : extr("__mediatype = '", "'"),
"group": extr("__group = '", "'"), "group": extr("__group = '", "'"),
"url" : extr("__fileurl = '", "'"), "url" : extr("__fileurl = '", "'"),
@@ -49,7 +51,6 @@ class MotherlessExtractor(Extractor):
for tag in text.extract_iter( for tag in text.extract_iter(
extr('class="media-meta-tags">', "</div>"), ">#", "<") extr('class="media-meta-tags">', "</div>"), ">#", "<")
], ],
"title": text.unescape(extr("<h1>", "<")),
"views": text.parse_int(extr( "views": text.parse_int(extr(
'class="count">', " ").replace(",", "")), 'class="count">', " ").replace(",", "")),
"favorites": text.parse_int(extr( "favorites": text.parse_int(extr(
@@ -131,10 +132,9 @@ class MotherlessExtractor(Extractor):
if title: if title:
return text.unescape(title.strip()) return text.unescape(title.strip())
pos = page.find(f' href="/G{gallery_id}"') if f' href="/G{gallery_id}"' in page:
if pos >= 0: return text.unescape(
return text.unescape(text.extract( (t := text.extr(page, "<title>", "<")) and t[:t.rfind(" | ")])
page, ' title="', '"', pos)[0])
return "" return ""