[motherless] fix 'gallery_title' extraction (#8605)
* Update motherless.py for title selector Updated the selector for the title property for Motherless galleries to be an h2 instead of an h1 to reflect changes on the site * fix 'gallery_title' extraction --------- Co-authored-by: Mike Fährmann <mike_faehrmann@web.de>
This commit is contained in:
@@ -41,6 +41,8 @@ class MotherlessExtractor(Extractor):
|
|||||||
path, _, media_id = path.rpartition("/")
|
path, _, media_id = path.rpartition("/")
|
||||||
data = {
|
data = {
|
||||||
"id" : media_id,
|
"id" : media_id,
|
||||||
|
"title": text.unescape(
|
||||||
|
(t := extr("<title>", "<")) and t[:t.rfind(" | ")]),
|
||||||
"type" : extr("__mediatype = '", "'"),
|
"type" : extr("__mediatype = '", "'"),
|
||||||
"group": extr("__group = '", "'"),
|
"group": extr("__group = '", "'"),
|
||||||
"url" : extr("__fileurl = '", "'"),
|
"url" : extr("__fileurl = '", "'"),
|
||||||
@@ -49,7 +51,6 @@ class MotherlessExtractor(Extractor):
|
|||||||
for tag in text.extract_iter(
|
for tag in text.extract_iter(
|
||||||
extr('class="media-meta-tags">', "</div>"), ">#", "<")
|
extr('class="media-meta-tags">', "</div>"), ">#", "<")
|
||||||
],
|
],
|
||||||
"title": text.unescape(extr("<h1>", "<")),
|
|
||||||
"views": text.parse_int(extr(
|
"views": text.parse_int(extr(
|
||||||
'class="count">', " ").replace(",", "")),
|
'class="count">', " ").replace(",", "")),
|
||||||
"favorites": text.parse_int(extr(
|
"favorites": text.parse_int(extr(
|
||||||
@@ -131,10 +132,9 @@ class MotherlessExtractor(Extractor):
|
|||||||
if title:
|
if title:
|
||||||
return text.unescape(title.strip())
|
return text.unescape(title.strip())
|
||||||
|
|
||||||
pos = page.find(f' href="/G{gallery_id}"')
|
if f' href="/G{gallery_id}"' in page:
|
||||||
if pos >= 0:
|
return text.unescape(
|
||||||
return text.unescape(text.extract(
|
(t := text.extr(page, "<title>", "<")) and t[:t.rfind(" | ")])
|
||||||
page, ' title="', '"', pos)[0])
|
|
||||||
|
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user