[motherless] fix 'gallery_title' extraction (#8605)

* Update motherless.py for title selector

Updated the selector for the title property for Motherless galleries to be an h2 instead of an h1 to reflect changes on the site

* fix 'gallery_title' extraction

---------

Co-authored-by: Mike Fährmann <mike_faehrmann@web.de>
This commit is contained in:
spicybiguy
2025-11-28 04:53:16 -05:00
committed by GitHub
parent acf281a46a
commit 11151033f6

View File

@@ -41,6 +41,8 @@ class MotherlessExtractor(Extractor):
path, _, media_id = path.rpartition("/")
data = {
"id" : media_id,
"title": text.unescape(
(t := extr("<title>", "<")) and t[:t.rfind(" | ")]),
"type" : extr("__mediatype = '", "'"),
"group": extr("__group = '", "'"),
"url" : extr("__fileurl = '", "'"),
@@ -49,7 +51,6 @@ class MotherlessExtractor(Extractor):
for tag in text.extract_iter(
extr('class="media-meta-tags">', "</div>"), ">#", "<")
],
"title": text.unescape(extr("<h1>", "<")),
"views": text.parse_int(extr(
'class="count">', " ").replace(",", "")),
"favorites": text.parse_int(extr(
@@ -131,10 +132,9 @@ class MotherlessExtractor(Extractor):
if title:
return text.unescape(title.strip())
pos = page.find(f' href="/G{gallery_id}"')
if pos >= 0:
return text.unescape(text.extract(
page, ' title="', '"', pos)[0])
if f' href="/G{gallery_id}"' in page:
return text.unescape(
(t := text.extr(page, "<title>", "<")) and t[:t.rfind(" | ")])
return ""