[myportfolio] fix extraction of galleries without title

This commit is contained in:
Mike Fährmann
2020-04-08 21:08:05 +02:00
parent 3b50c4f49d
commit 9e7dfc0cfc
2 changed files with 17 additions and 9 deletions

View File

@@ -23,18 +23,24 @@ class MyportfolioGalleryExtractor(Extractor):
r"(?:https?://)?([^.]+\.myportfolio\.com))" r"(?:https?://)?([^.]+\.myportfolio\.com))"
r"(/[^/?&#]+)?") r"(/[^/?&#]+)?")
test = ( test = (
("https://hannahcosgrove.myportfolio.com/niamh-1", { ("https://andrewling.myportfolio.com/volvo-xc-90-hybrid", {
"url": "8cbd73a73e5bf3b4f5d1b1d4a1eb114c01a72a66", "url": "acea0690c76db0e5cf267648cefd86e921bc3499",
"keyword": "7a460bb5641e648ae70702ff91c2fb11054b0e0b", "keyword": "6ac6befe2ee0af921d24cf1dd4a4ed71be06db6d",
}), }),
("https://hannahcosgrove.myportfolio.com/lfw", { ("https://andrewling.myportfolio.com/", {
"pattern": r"https://hannahcosgrove\.myportfolio\.com/[^/?&#+]+$", "pattern": r"https://andrewling\.myportfolio\.com/[^/?&#+]+$",
"count": ">= 8", "count": ">= 6",
}), }),
# no explicit title
("https://stevenilousphotography.myportfolio.com/society", {
"keyword": "49e7ff6322645c22b409280656202c2736a380c9",
}),
# custom domain
("myportfolio:https://tooco.com.ar/6-of-diamonds-paradise-bird", { ("myportfolio:https://tooco.com.ar/6-of-diamonds-paradise-bird", {
"count": 3, "count": 3,
}), }),
("myportfolio:https://tooco.com.ar/", { ("myportfolio:https://tooco.com.ar/", {
"pattern": pattern,
"count": ">= 40", "count": ">= 40",
}), }),
) )
@@ -80,8 +86,11 @@ class MyportfolioGalleryExtractor(Extractor):
title, pos = text.extract( title, pos = text.extract(
page, '<h1 ', '</h1>', pos) page, '<h1 ', '</h1>', pos)
title = title.partition(">")[2] if title:
user = user[:-len(title)-3] title = title.partition(">")[2]
user = user[:-len(title)-3]
else:
user, _, title = user.partition(" - ")
return { return {
"user": text.unescape(user), "user": text.unescape(user),

View File

@@ -27,7 +27,6 @@ TRAVIS_SKIP = {
# temporary issues, etc. # temporary issues, etc.
BROKEN = { BROKEN = {
"myportfolio",
"photobucket", "photobucket",
"worldthree", "worldthree",
} }