[chevereto:album] extract 'album_…' metadata (#8604)

This commit is contained in:
Mike Fährmann
2025-12-08 19:02:41 +01:00
parent e3875127af
commit f5f4122cb6
2 changed files with 34 additions and 4 deletions

View File

@@ -21,10 +21,12 @@ class CheveretoExtractor(BaseExtractor):
def _init(self): def _init(self):
self.path = self.groups[-1] self.path = self.groups[-1]
def _pagination(self, url): def _pagination(self, url, callback=None):
while True: page = self.request(url).text
page = self.request(url).text if callback is not None:
callback(page)
while True:
for item in text.extract_iter( for item in text.extract_iter(
page, '<div class="list-item-image ', 'image-container'): page, '<div class="list-item-image ', 'image-container'):
yield text.urljoin(self.root, text.extr( yield text.urljoin(self.root, text.extr(
@@ -35,6 +37,7 @@ class CheveretoExtractor(BaseExtractor):
return return
if url[0] == "/": if url[0] == "/":
url = self.root + url url = self.root + url
page = self.request(url).text
BASE_PATTERN = CheveretoExtractor.update({ BASE_PATTERN = CheveretoExtractor.update({
@@ -155,10 +158,21 @@ class CheveretoAlbumExtractor(CheveretoExtractor):
albums = (url,) albums = (url,)
for album in albums: for album in albums:
for item_url in self._pagination(album): for item_url in self._pagination(
album, self._extract_metadata_album):
data = data_video if "/video/" in item_url else data_image data = data_video if "/video/" in item_url else data_image
yield Message.Queue, item_url, data yield Message.Queue, item_url, data
def _extract_metadata_album(self, page):
url, pos = text.extract(
page, 'property="og:url" content="', '"')
kwdict = self.kwdict
kwdict["album_slug"], _, kwdict["album_id"] = \
url[url.rfind("/")+1:].rpartition(".")
kwdict["album"] = text.unescape(text.extract(
page, 'property="og:title" content="', '"', pos)[0])
class CheveretoCategoryExtractor(CheveretoExtractor): class CheveretoCategoryExtractor(CheveretoExtractor):
"""Extractor for chevereto galleries""" """Extractor for chevereto galleries"""

View File

@@ -118,6 +118,10 @@ __tests__ = (
"#category": ("chevereto", "jpgfish", "album"), "#category": ("chevereto", "jpgfish", "album"),
"#class" : chevereto.CheveretoAlbumExtractor, "#class" : chevereto.CheveretoAlbumExtractor,
"#count" : 2, "#count" : 2,
"album" : "funny meme album",
"album_id" : "CDilP",
"album_slug": "funny-meme-album",
}, },
{ {
@@ -125,6 +129,10 @@ __tests__ = (
"#category": ("chevereto", "jpgfish", "album"), "#category": ("chevereto", "jpgfish", "album"),
"#class" : chevereto.CheveretoAlbumExtractor, "#class" : chevereto.CheveretoAlbumExtractor,
"#count" : 114, "#count" : 114,
"album" : "Gunggingnsk OF",
"album_id" : "N9OOI",
"album_slug": "gunggingnsk",
}, },
{ {
@@ -132,6 +140,10 @@ __tests__ = (
"#category": ("chevereto", "jpgfish", "album"), "#category": ("chevereto", "jpgfish", "album"),
"#class" : chevereto.CheveretoAlbumExtractor, "#class" : chevereto.CheveretoAlbumExtractor,
"#count" : 100, "#count" : 100,
"album" : "101-200",
"album_id" : "aNJ6A",
"album_slug": "101-200",
}, },
{ {
@@ -139,6 +151,10 @@ __tests__ = (
"#category": ("chevereto", "jpgfish", "album"), "#category": ("chevereto", "jpgfish", "album"),
"#class" : chevereto.CheveretoAlbumExtractor, "#class" : chevereto.CheveretoAlbumExtractor,
"#count" : 606, "#count" : 606,
"album" : "re:([12345]0)?1-[123456]00",
"album_id" : str,
"album_slug": str,
}, },
{ {