[imagechest] fix extraction (#3914)

This commit is contained in:
Mike Fährmann
2023-04-14 20:06:59 +02:00
parent 2cd4411ff8
commit c45f09d2a8

View File

@@ -1,12 +1,13 @@
# -*- coding: utf-8 -*-
# Copyright 2020 Leonid "Bepis" Pavel
# Copyright 2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extract images from galleries at https://imgchest.com/"""
"""Extractors for https://imgchest.com/"""
from .common import GalleryExtractor
from .. import text, exception
@@ -19,7 +20,14 @@ class ImagechestGalleryExtractor(GalleryExtractor):
pattern = r"(?:https?://)?(?:www\.)?imgchest\.com/p/([A-Za-z0-9]{11})"
test = (
("https://imgchest.com/p/3na7kr3by8d", {
"url": "f095b4f78c051e5a94e7c663814d1e8d4c93c1f7",
"pattern": r"https://cdn\.imgchest\.com/files/\w+\.(jpg|png)",
"keyword": {
"count": 3,
"gallery_id": "3na7kr3by8d",
"num": int,
"title": "Wizardry - Video Game From The Mid 80's",
},
"url": "7328ca4ec2459378d725e3be19f661d2b045feda",
"content": "076959e65be30249a2c651fbe6090dc30ba85193",
"count": 3
}),
@@ -43,6 +51,5 @@ class ImagechestGalleryExtractor(GalleryExtractor):
def images(self, page):
return [
(url, None)
for url in text.extract_iter(
page, 'property="og:image" content="', '"')
for url in text.extract_iter(page, 'data-url="', '"')
]