[urlgalleries] fix extraction (#7858)

This commit is contained in:
Mike Fährmann
2025-07-20 16:30:09 +02:00
parent 008a08ca43
commit fc06243b89
2 changed files with 9 additions and 9 deletions

View File

@@ -37,12 +37,12 @@ class UrlgalleriesGalleryExtractor(GalleryExtractor):
data = self.metadata(page)
data["count"] = len(imgs)
root = "https://urlgalleries.net/b/" + blog
root = self.root
yield Message.Directory, data
for data["num"], img in enumerate(imgs, 1):
page = self.request(root + img).text
url = text.extr(page, "window.location.href = '", "'")
yield Message.Queue, url, data
yield Message.Queue, url.partition("?")[0], data
def metadata(self, page):
extr = text.extract_from(page)
@@ -53,7 +53,7 @@ class UrlgalleriesGalleryExtractor(GalleryExtractor):
"_rprt": extr(' title="', '"'), # report button
"title": text.unescape(extr(' title="', '"').strip()),
"date" : text.parse_datetime(
extr(" images in gallery | ", "<"), "%B %d, %Y %H:%M"),
extr(" images in gallery | ", "<"), "%B %d, %Y"),
}
def images(self, page):

View File

@@ -21,10 +21,10 @@ __tests__ = (
"blog" : "photos2q",
"count" : 39,
"date" : "dt:2023-12-08 13:59:00",
"date" : "dt:2023-12-08 00:00:00",
"gallery_id": "7851311",
"num" : range(1, 3),
"title" : "Clarice window 8",
"title" : "Clarice window 8 g7rn2ebbao",
},
{
@@ -40,10 +40,10 @@ __tests__ = (
"blog" : "photos2q",
"count" : 39,
"date" : "dt:2023-12-08 13:59:00",
"date" : "dt:2023-12-08 00:00:00",
"gallery_id": "7851311",
"num" : range(1, 3),
"title" : "Clarice window 8",
"title" : "Clarice window 8 g7rn2ebbao",
},
{
@@ -59,10 +59,10 @@ __tests__ = (
"blog" : "Dreamer",
"count" : 105,
"date" : "dt:2020-03-10 21:17:00",
"date" : "dt:2020-03-10 00:00:00",
"gallery_id": "7645840",
"num" : range(1, 3),
"title" : "Angelika - Rustic Charm - AmourAngels 2016-09-27",
"title" : "Angelika - Rustic Charm - AmourAngels 2016-09-27 7722lhrmys",
},
)