* [motherless] Add group support * [motherless] Update old unit tests * [motherless] Add new unit tests * [motherless] Update docs
This commit is contained in:
@@ -622,7 +622,7 @@ Consider all listed sites to potentially be NSFW.
|
|||||||
<tr>
|
<tr>
|
||||||
<td>Motherless</td>
|
<td>Motherless</td>
|
||||||
<td>https://motherless.com/</td>
|
<td>https://motherless.com/</td>
|
||||||
<td>Galleries, Media Files</td>
|
<td>Galleries, Groups, Media Files</td>
|
||||||
<td></td>
|
<td></td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ class MotherlessExtractor(Extractor):
|
|||||||
archive_fmt = "{id}"
|
archive_fmt = "{id}"
|
||||||
|
|
||||||
def _extract_media(self, path):
|
def _extract_media(self, path):
|
||||||
url = self.root + "/" + path
|
url = f"{self.root}/{path}"
|
||||||
page = self.request(url).text
|
page = self.request(url).text
|
||||||
extr = text.extract_from(page)
|
extr = text.extract_from(page)
|
||||||
|
|
||||||
@@ -48,10 +48,59 @@ class MotherlessExtractor(Extractor):
|
|||||||
"uploader": text.unescape(extr('class="username">', "<").strip()),
|
"uploader": text.unescape(extr('class="username">', "<").strip()),
|
||||||
}
|
}
|
||||||
|
|
||||||
if path and path[0] == "G":
|
if not path:
|
||||||
|
pass
|
||||||
|
elif path[0] == "G":
|
||||||
data["gallery_id"] = path[1:]
|
data["gallery_id"] = path[1:]
|
||||||
data["gallery_title"] = self._extract_gallery_title(
|
data["gallery_title"] = self._extract_gallery_title(
|
||||||
page, data["gallery_id"])
|
page, data["gallery_id"])
|
||||||
|
elif path[0] == "g":
|
||||||
|
data["group_id"] = path[2:]
|
||||||
|
data["group_title"] = self._extract_group_title(
|
||||||
|
page, data["group_id"])
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
def _pagination(self, page):
|
||||||
|
while True:
|
||||||
|
for thumb in text.extract_iter(
|
||||||
|
page, 'class="thumb-container', "</div>"):
|
||||||
|
yield thumb
|
||||||
|
|
||||||
|
url = text.extr(page, '<link rel="next" href="', '"')
|
||||||
|
if not url:
|
||||||
|
return
|
||||||
|
page = self.request(text.unescape(url)).text
|
||||||
|
|
||||||
|
def _extract_data(self, page, category):
|
||||||
|
extr = text.extract_from(page)
|
||||||
|
|
||||||
|
gid = self.groups[-1]
|
||||||
|
if category == "gallery":
|
||||||
|
title = self._extract_gallery_title(page, gid)
|
||||||
|
else:
|
||||||
|
title = self._extract_group_title(page, gid)
|
||||||
|
|
||||||
|
return {
|
||||||
|
f"{category}_id": gid,
|
||||||
|
f"{category}_title": title,
|
||||||
|
"uploader": text.remove_html(extr(
|
||||||
|
f'class="{category}-member-username">', "</")),
|
||||||
|
"count": text.parse_int(
|
||||||
|
extr('<span class="active">', ")")
|
||||||
|
.rpartition("(")[2].replace(",", "")),
|
||||||
|
}
|
||||||
|
|
||||||
|
def _parse_thumb_data(self, thumb):
|
||||||
|
extr = text.extract_from(thumb)
|
||||||
|
|
||||||
|
data = {
|
||||||
|
"id" : extr('data-codename="', '"'),
|
||||||
|
"type" : extr('data-mediatype="', '"'),
|
||||||
|
"thumbnail": extr('class="static" src="', '"'),
|
||||||
|
"title" : extr(' alt="', '"'),
|
||||||
|
}
|
||||||
|
data["url"] = data["thumbnail"].replace("thumb", data["type"])
|
||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
@@ -72,13 +121,23 @@ class MotherlessExtractor(Extractor):
|
|||||||
if title:
|
if title:
|
||||||
return text.unescape(title.strip())
|
return text.unescape(title.strip())
|
||||||
|
|
||||||
pos = page.find(' href="/G' + gallery_id + '"')
|
pos = page.find(f' href="/G{gallery_id}"')
|
||||||
if pos >= 0:
|
if pos >= 0:
|
||||||
return text.unescape(text.extract(
|
return text.unescape(text.extract(
|
||||||
page, ' title="', '"', pos)[0])
|
page, ' title="', '"', pos)[0])
|
||||||
|
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
|
@memcache(keyarg=2)
|
||||||
|
def _extract_group_title(self, page, group_id):
|
||||||
|
title = text.extr(
|
||||||
|
text.extr(page, '<h1 class="group-bio-name">', "</h1>"),
|
||||||
|
">", "<")
|
||||||
|
if title:
|
||||||
|
return text.unescape(title.strip())
|
||||||
|
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
class MotherlessMediaExtractor(MotherlessExtractor):
|
class MotherlessMediaExtractor(MotherlessExtractor):
|
||||||
"""Extractor for a single image/video from motherless.com"""
|
"""Extractor for a single image/video from motherless.com"""
|
||||||
@@ -109,59 +168,62 @@ class MotherlessGalleryExtractor(MotherlessExtractor):
|
|||||||
|
|
||||||
if not type:
|
if not type:
|
||||||
data = {"_extractor": MotherlessGalleryExtractor}
|
data = {"_extractor": MotherlessGalleryExtractor}
|
||||||
yield Message.Queue, self.root + "/GI" + gid, data
|
yield Message.Queue, f"{self.root}/GI{gid}", data
|
||||||
yield Message.Queue, self.root + "/GV" + gid, data
|
yield Message.Queue, f"{self.root}/GV{gid}", data
|
||||||
return
|
return
|
||||||
|
|
||||||
url = f"{self.root}/G{type}{gid}"
|
url = f"{self.root}/G{type}{gid}"
|
||||||
page = self.request(url).text
|
page = self.request(url).text
|
||||||
data = self._extract_gallery_data(page)
|
data = self._extract_data(page, "gallery")
|
||||||
|
|
||||||
for num, thumb in enumerate(self._pagination(page), 1):
|
for num, thumb in enumerate(self._pagination(page), 1):
|
||||||
file = self._parse_thumb_data(thumb)
|
file = self._parse_thumb_data(thumb)
|
||||||
|
thumbnail = file["thumbnail"]
|
||||||
|
|
||||||
if file["type"] == "video":
|
if file["type"] == "video":
|
||||||
file = self._extract_media(file["id"])
|
file = self._extract_media(file["id"])
|
||||||
|
|
||||||
file.update(data)
|
file.update(data)
|
||||||
file["num"] = num
|
file["num"] = num
|
||||||
|
file["thumbnail"] = thumbnail
|
||||||
url = file["url"]
|
url = file["url"]
|
||||||
yield Message.Directory, file
|
yield Message.Directory, file
|
||||||
yield Message.Url, url, text.nameext_from_url(url, file)
|
yield Message.Url, url, text.nameext_from_url(url, file)
|
||||||
|
|
||||||
def _pagination(self, page):
|
|
||||||
while True:
|
|
||||||
for thumb in text.extract_iter(
|
|
||||||
page, 'class="thumb-container', "</div>"):
|
|
||||||
yield thumb
|
|
||||||
|
|
||||||
url = text.extr(page, '<link rel="next" href="', '"')
|
class MotherlessGroupExtractor(MotherlessExtractor):
|
||||||
if not url:
|
subcategory = "group"
|
||||||
return
|
directory_fmt = ("{category}", "{uploader}",
|
||||||
page = self.request(text.unescape(url)).text
|
"{group_id} {group_title}")
|
||||||
|
archive_fmt = "{group_id}_{id}"
|
||||||
|
pattern = BASE_PATTERN + "/g([iv]?)/?([a-z0-9_]+)/?$"
|
||||||
|
example = "https://motherless.com/g/abc123"
|
||||||
|
|
||||||
def _extract_gallery_data(self, page):
|
def items(self):
|
||||||
extr = text.extract_from(page)
|
type, gid = self.groups
|
||||||
return {
|
|
||||||
"gallery_id": self.groups[-1],
|
|
||||||
"gallery_title": text.unescape(extr(
|
|
||||||
"<title>", "<").rpartition(" | ")[0]),
|
|
||||||
"uploader": text.remove_html(extr(
|
|
||||||
'class="gallery-member-username">', "</")),
|
|
||||||
"count": text.parse_int(
|
|
||||||
extr('<span class="active">', ")")
|
|
||||||
.rpartition("(")[2].replace(",", "")),
|
|
||||||
}
|
|
||||||
|
|
||||||
def _parse_thumb_data(self, thumb):
|
if not type:
|
||||||
extr = text.extract_from(thumb)
|
data = {"_extractor": MotherlessGroupExtractor}
|
||||||
|
yield Message.Queue, f"{self.root}/gi/{gid}", data
|
||||||
|
yield Message.Queue, f"{self.root}/gv/{gid}", data
|
||||||
|
return
|
||||||
|
|
||||||
data = {
|
url = f"{self.root}/g{type}/{gid}"
|
||||||
"id" : extr('data-codename="', '"'),
|
page = self.request(url).text
|
||||||
"type" : extr('data-mediatype="', '"'),
|
data = self._extract_data(page, "group")
|
||||||
"thumbnail": extr('class="static" src="', '"'),
|
|
||||||
"title" : extr(' alt="', '"'),
|
|
||||||
}
|
|
||||||
data["url"] = data["thumbnail"].replace("thumb", data["type"])
|
|
||||||
|
|
||||||
return data
|
for num, thumb in enumerate(self._pagination(page), 1):
|
||||||
|
file = self._parse_thumb_data(thumb)
|
||||||
|
thumbnail = file["thumbnail"]
|
||||||
|
|
||||||
|
file = self._extract_media(file["id"])
|
||||||
|
|
||||||
|
uploader = file.get("uploader")
|
||||||
|
file.update(data)
|
||||||
|
file["num"] = num
|
||||||
|
file["thumbnail"] = thumbnail
|
||||||
|
file["uploader"] = uploader
|
||||||
|
file["group"] = file["group_id"]
|
||||||
|
url = file["url"]
|
||||||
|
yield Message.Directory, file
|
||||||
|
yield Message.Url, url, text.nameext_from_url(url, file)
|
||||||
|
|||||||
@@ -33,27 +33,6 @@ __tests__ = (
|
|||||||
|
|
||||||
},
|
},
|
||||||
|
|
||||||
{
|
|
||||||
"#url" : "https://motherless.com/G43D8704/F0C07D3",
|
|
||||||
"#class": motherless.MotherlessMediaExtractor,
|
|
||||||
"#results": "https://cdn5-images.motherlessmedia.com/images/F0C07D3.jpg",
|
|
||||||
|
|
||||||
"date" : "dt:2014-08-13 00:00:00",
|
|
||||||
"extension" : "jpg",
|
|
||||||
"favorites" : range(100, 200),
|
|
||||||
"filename" : "F0C07D3",
|
|
||||||
"gallery_id": "43D8704",
|
|
||||||
"gallery_title": "SpeechLess",
|
|
||||||
"group" : "",
|
|
||||||
"id" : "F0C07D3",
|
|
||||||
"tags" : [],
|
|
||||||
"title" : "Spunky Angels Amy Black Dress",
|
|
||||||
"type" : "image",
|
|
||||||
"uploader" : "jonesyjonesy",
|
|
||||||
"url" : "https://cdn5-images.motherlessmedia.com/images/F0C07D3.jpg",
|
|
||||||
"views" : range(14000, 20000),
|
|
||||||
},
|
|
||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://motherless.com/g/classic_porn/19D6C80",
|
"#url" : "https://motherless.com/g/classic_porn/19D6C80",
|
||||||
"#class": motherless.MotherlessMediaExtractor,
|
"#class": motherless.MotherlessMediaExtractor,
|
||||||
@@ -74,54 +53,125 @@ __tests__ = (
|
|||||||
},
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://motherless.com/G43D8704",
|
"#url" : "https://motherless.com/G444B6FA/46ABC1A",
|
||||||
|
"#class": motherless.MotherlessMediaExtractor,
|
||||||
|
"#results": "https://cdn5-images.motherlessmedia.com/images/46ABC1A.jpg",
|
||||||
|
|
||||||
|
"date" : "dt:2017-11-24 00:00:00",
|
||||||
|
"extension" : "jpg",
|
||||||
|
"favorites" : range(0, 100),
|
||||||
|
"filename" : "46ABC1A",
|
||||||
|
"gallery_id": "444B6FA",
|
||||||
|
"group" : "",
|
||||||
|
"id" : "46ABC1A",
|
||||||
|
"tags" : [
|
||||||
|
"rope",
|
||||||
|
"bondage",
|
||||||
|
"bdsm"
|
||||||
|
],
|
||||||
|
"title" : "Some More Pix",
|
||||||
|
"type" : "image",
|
||||||
|
"uploader" : "FATBOY114",
|
||||||
|
"url" : "https://cdn5-images.motherlessmedia.com/images/46ABC1A.jpg",
|
||||||
|
"views" : range(100, 2000),
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://motherless.com/G444B6FA",
|
||||||
"#class": motherless.MotherlessGalleryExtractor,
|
"#class": motherless.MotherlessGalleryExtractor,
|
||||||
"#results": (
|
"#results": (
|
||||||
"https://motherless.com/GI43D8704",
|
"https://motherless.com/GI444B6FA",
|
||||||
"https://motherless.com/GV43D8704",
|
"https://motherless.com/GV444B6FA",
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://motherless.com/GI43D8704",
|
"#url" : "https://motherless.com/GI444B6FA",
|
||||||
"#class": motherless.MotherlessGalleryExtractor,
|
"#class": motherless.MotherlessGalleryExtractor,
|
||||||
"#pattern": r"https://cdn5-images\.motherlessmedia\.com/images/\w+\.(jpg|png|gif)",
|
"#pattern": r"https://cdn5-images\.motherlessmedia\.com/images/[^/]+\.(jpg|jpeg|png|gif)",
|
||||||
"#range" : "1-100",
|
"#range" : "1-100",
|
||||||
"#count" : 100,
|
"#count" : 10,
|
||||||
|
|
||||||
"count" : range(5000, 8000),
|
"count" : range(5, 50),
|
||||||
"extension" : {"jpg", "png", "gif"},
|
"extension" : {"jpg", "jpeg", "png", "gif"},
|
||||||
"filename" : str,
|
"filename" : str,
|
||||||
"gallery_id" : "43D8704",
|
"gallery_id" : "444B6FA",
|
||||||
"gallery_title": "SpeechLess",
|
|
||||||
"id" : str,
|
"id" : str,
|
||||||
"num" : int,
|
"num" : int,
|
||||||
"thumbnail" : r"re:https://cdn5-thumbs\.motherlessmedia\.com/thumbs/\w+\.\w+",
|
"thumbnail" : r"re:https://cdn5-thumbs\.motherlessmedia\.com/thumbs/[^/]+\.\w+",
|
||||||
"title" : str,
|
"title" : str,
|
||||||
"type" : "image",
|
"type" : "image",
|
||||||
"uploader" : "gaylobe",
|
"uploader" : "WawaWeWa",
|
||||||
"url" : r"re:https://cdn5-images\.motherlessmedia\.com/images/\w+\.(jpg|png|gif)",
|
"url" : r"re:https://cdn5-images\.motherlessmedia\.com/images/[^/]+\.(jpg|jpeg|png|gif)",
|
||||||
},
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"#url" : "https://motherless.com/GV43D8704",
|
"#url" : "https://motherless.com/GV444B6FA",
|
||||||
"#class": motherless.MotherlessGalleryExtractor,
|
"#class": motherless.MotherlessGalleryExtractor,
|
||||||
"#pattern": r"https://cdn5-videos.motherlessmedia.com/videos/\w+\.mp4",
|
"#pattern": r"https://cdn5-videos\.motherlessmedia\.com/videos/[^/]+\.mp4(?:\?.*)?",
|
||||||
"#range" : "1-100",
|
"#range" : "1-100",
|
||||||
"#count" : 100,
|
"#count" : 29,
|
||||||
|
|
||||||
"count" : range(500, 900),
|
"count" : range(20, 100),
|
||||||
"extension" : "mp4",
|
"extension" : "mp4",
|
||||||
"filename" : str,
|
"filename" : str,
|
||||||
"gallery_id" : "43D8704",
|
"gallery_id" : "444B6FA",
|
||||||
"gallery_title": "SpeechLess",
|
|
||||||
"id" : str,
|
"id" : str,
|
||||||
"num" : int,
|
"num" : int,
|
||||||
"thumbnail" : r"re:https://cdn5-thumbs\.motherlessmedia\.com/thumbs/[\w-]+\.\w+",
|
"thumbnail" : r"re:https://cdn5-thumbs\.motherlessmedia\.com/thumbs/[^/]+\.\w+",
|
||||||
"title" : str,
|
"title" : str,
|
||||||
"type" : "video",
|
"type" : "video",
|
||||||
"uploader" : "gaylobe",
|
"uploader" : "WawaWeWa",
|
||||||
"url" : r"re:https://cdn5-videos.motherlessmedia.com/videos/\w+\.mp4",
|
"url" : r"re:https://cdn5-videos.motherlessmedia.com/videos/[^/]+\.mp4(?:\?.*)?",
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://motherless.com/g/bump___grind",
|
||||||
|
"#class": motherless.MotherlessGroupExtractor,
|
||||||
|
"#results": (
|
||||||
|
"https://motherless.com/gi/bump___grind",
|
||||||
|
"https://motherless.com/gv/bump___grind",
|
||||||
|
),
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://motherless.com/gi/bump___grind",
|
||||||
|
"#class": motherless.MotherlessGroupExtractor,
|
||||||
|
"#pattern": r"https://cdn5-images\.motherlessmedia\.com/images/[^/]+\.(jpg|jpeg|png|gif)",
|
||||||
|
"#range" : "1-100",
|
||||||
|
"#count" : 18,
|
||||||
|
|
||||||
|
"count" : range(5, 50),
|
||||||
|
"extension" : {"jpg", "jpeg", "png", "gif"},
|
||||||
|
"filename" : str,
|
||||||
|
"group_id" : "bump___grind",
|
||||||
|
"group" : "bump___grind",
|
||||||
|
"id" : str,
|
||||||
|
"num" : int,
|
||||||
|
"thumbnail" : r"re:https://cdn5-thumbs\.motherlessmedia\.com/thumbs/[^/]+\.\w+",
|
||||||
|
"title" : str,
|
||||||
|
"type" : "image",
|
||||||
|
"url" : r"re:https://cdn5-images\.motherlessmedia\.com/images/[^/]+\.(jpg|jpeg|png|gif)",
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://motherless.com/gv/bump___grind",
|
||||||
|
"#class": motherless.MotherlessGroupExtractor,
|
||||||
|
"#pattern": r"https://cdn5-videos\.motherlessmedia\.com/videos/[^/]+\.mp4(?:\?.*)?",
|
||||||
|
"#range" : "1-100",
|
||||||
|
"#count" : 25,
|
||||||
|
|
||||||
|
"count" : range(20, 100),
|
||||||
|
"extension" : "mp4",
|
||||||
|
"filename" : str,
|
||||||
|
"group_id" : "bump___grind",
|
||||||
|
"group" : "bump___grind",
|
||||||
|
"id" : str,
|
||||||
|
"num" : int,
|
||||||
|
"thumbnail" : r"re:https://cdn5-thumbs\.motherlessmedia\.com/thumbs/[^/]+\.\w+",
|
||||||
|
"title" : str,
|
||||||
|
"type" : "video",
|
||||||
|
"url" : r"re:https://cdn5-videos.motherlessmedia.com/videos/[^/]+\.mp4(?:\?.*)?",
|
||||||
},
|
},
|
||||||
|
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user