[urlgalleries] fix extraction (#7858)
This commit is contained in:
@@ -37,12 +37,12 @@ class UrlgalleriesGalleryExtractor(GalleryExtractor):
|
|||||||
data = self.metadata(page)
|
data = self.metadata(page)
|
||||||
data["count"] = len(imgs)
|
data["count"] = len(imgs)
|
||||||
|
|
||||||
root = "https://urlgalleries.net/b/" + blog
|
root = self.root
|
||||||
yield Message.Directory, data
|
yield Message.Directory, data
|
||||||
for data["num"], img in enumerate(imgs, 1):
|
for data["num"], img in enumerate(imgs, 1):
|
||||||
page = self.request(root + img).text
|
page = self.request(root + img).text
|
||||||
url = text.extr(page, "window.location.href = '", "'")
|
url = text.extr(page, "window.location.href = '", "'")
|
||||||
yield Message.Queue, url, data
|
yield Message.Queue, url.partition("?")[0], data
|
||||||
|
|
||||||
def metadata(self, page):
|
def metadata(self, page):
|
||||||
extr = text.extract_from(page)
|
extr = text.extract_from(page)
|
||||||
@@ -53,7 +53,7 @@ class UrlgalleriesGalleryExtractor(GalleryExtractor):
|
|||||||
"_rprt": extr(' title="', '"'), # report button
|
"_rprt": extr(' title="', '"'), # report button
|
||||||
"title": text.unescape(extr(' title="', '"').strip()),
|
"title": text.unescape(extr(' title="', '"').strip()),
|
||||||
"date" : text.parse_datetime(
|
"date" : text.parse_datetime(
|
||||||
extr(" images in gallery | ", "<"), "%B %d, %Y %H:%M"),
|
extr(" images in gallery | ", "<"), "%B %d, %Y"),
|
||||||
}
|
}
|
||||||
|
|
||||||
def images(self, page):
|
def images(self, page):
|
||||||
|
|||||||
@@ -21,10 +21,10 @@ __tests__ = (
|
|||||||
|
|
||||||
"blog" : "photos2q",
|
"blog" : "photos2q",
|
||||||
"count" : 39,
|
"count" : 39,
|
||||||
"date" : "dt:2023-12-08 13:59:00",
|
"date" : "dt:2023-12-08 00:00:00",
|
||||||
"gallery_id": "7851311",
|
"gallery_id": "7851311",
|
||||||
"num" : range(1, 3),
|
"num" : range(1, 3),
|
||||||
"title" : "Clarice window 8",
|
"title" : "Clarice window 8 g7rn2ebbao",
|
||||||
},
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
@@ -40,10 +40,10 @@ __tests__ = (
|
|||||||
|
|
||||||
"blog" : "photos2q",
|
"blog" : "photos2q",
|
||||||
"count" : 39,
|
"count" : 39,
|
||||||
"date" : "dt:2023-12-08 13:59:00",
|
"date" : "dt:2023-12-08 00:00:00",
|
||||||
"gallery_id": "7851311",
|
"gallery_id": "7851311",
|
||||||
"num" : range(1, 3),
|
"num" : range(1, 3),
|
||||||
"title" : "Clarice window 8",
|
"title" : "Clarice window 8 g7rn2ebbao",
|
||||||
},
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
@@ -59,10 +59,10 @@ __tests__ = (
|
|||||||
|
|
||||||
"blog" : "Dreamer",
|
"blog" : "Dreamer",
|
||||||
"count" : 105,
|
"count" : 105,
|
||||||
"date" : "dt:2020-03-10 21:17:00",
|
"date" : "dt:2020-03-10 00:00:00",
|
||||||
"gallery_id": "7645840",
|
"gallery_id": "7645840",
|
||||||
"num" : range(1, 3),
|
"num" : range(1, 3),
|
||||||
"title" : "Angelika - Rustic Charm - AmourAngels 2016-09-27",
|
"title" : "Angelika - Rustic Charm - AmourAngels 2016-09-27 7722lhrmys",
|
||||||
},
|
},
|
||||||
|
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user