[motherless] detect 404 / 'File not found' pages
This commit is contained in:
@@ -9,7 +9,7 @@
|
||||
"""Extractors for https://motherless.com/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text, util
|
||||
from .. import text, util, exception
|
||||
from ..cache import memcache
|
||||
from datetime import timedelta
|
||||
|
||||
@@ -23,6 +23,17 @@ class MotherlessExtractor(Extractor):
|
||||
filename_fmt = "{id} {title}.{extension}"
|
||||
archive_fmt = "{id}"
|
||||
|
||||
def request(self, url, **kwargs):
|
||||
response = Extractor.request(self, url, **kwargs)
|
||||
|
||||
content = response.content
|
||||
if (b'<div class="error-page' in content or
|
||||
b">The page you're looking for cannot be found.<" in content):
|
||||
raise exception.NotFoundError("page")
|
||||
|
||||
self.request = Extractor.request.__get__(self)
|
||||
return response
|
||||
|
||||
def _extract_media(self, path):
|
||||
url = f"{self.root}/{path}"
|
||||
page = self.request(url).text
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from gallery_dl.extractor import motherless
|
||||
from gallery_dl import exception
|
||||
|
||||
|
||||
__tests__ = (
|
||||
@@ -76,6 +77,12 @@ __tests__ = (
|
||||
"views" : range(100, 2000),
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://motherless.com/8850983",
|
||||
"#class" : motherless.MotherlessMediaExtractor,
|
||||
"#exception": exception.NotFoundError,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://motherless.com/G444B6FA",
|
||||
"#class": motherless.MotherlessGalleryExtractor,
|
||||
@@ -90,7 +97,7 @@ __tests__ = (
|
||||
"#class": motherless.MotherlessGalleryExtractor,
|
||||
"#pattern": r"https://cdn5-images\.motherlessmedia\.com/images/[^/]+\.(jpg|jpeg|png|gif)",
|
||||
"#range" : "1-100",
|
||||
"#count" : 10,
|
||||
"#count" : range(5, 50),
|
||||
|
||||
"count" : range(5, 50),
|
||||
"extension" : {"jpg", "jpeg", "png", "gif"},
|
||||
@@ -110,7 +117,7 @@ __tests__ = (
|
||||
"#class": motherless.MotherlessGalleryExtractor,
|
||||
"#pattern": r"https://cdn5-videos\.motherlessmedia\.com/videos/[^/]+\.mp4(?:\?.*)?",
|
||||
"#range" : "1-100",
|
||||
"#count" : 29,
|
||||
"#count" : range(20, 40),
|
||||
|
||||
"count" : range(20, 100),
|
||||
"extension" : "mp4",
|
||||
@@ -125,6 +132,12 @@ __tests__ = (
|
||||
"url" : r"re:https://cdn5-videos.motherlessmedia.com/videos/[^/]+\.mp4(?:\?.*)?",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://motherless.com/GI466D59F",
|
||||
"#class" : motherless.MotherlessGalleryExtractor,
|
||||
"#exception": exception.NotFoundError,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://motherless.com/g/bump___grind",
|
||||
"#class": motherless.MotherlessGroupExtractor,
|
||||
|
||||
Reference in New Issue
Block a user