diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index 51d6fe68..d4b0ba7b 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -622,7 +622,7 @@ Consider all listed sites to potentially be NSFW.
| Motherless |
https://motherless.com/ |
- Galleries, Media Files |
+ Galleries, Groups, Media Files |
|
diff --git a/gallery_dl/extractor/motherless.py b/gallery_dl/extractor/motherless.py
index 35454807..c81a4d15 100644
--- a/gallery_dl/extractor/motherless.py
+++ b/gallery_dl/extractor/motherless.py
@@ -24,7 +24,7 @@ class MotherlessExtractor(Extractor):
archive_fmt = "{id}"
def _extract_media(self, path):
- url = self.root + "/" + path
+ url = f"{self.root}/{path}"
page = self.request(url).text
extr = text.extract_from(page)
@@ -48,10 +48,59 @@ class MotherlessExtractor(Extractor):
"uploader": text.unescape(extr('class="username">', "<").strip()),
}
- if path and path[0] == "G":
+ if not path:
+ pass
+ elif path[0] == "G":
data["gallery_id"] = path[1:]
data["gallery_title"] = self._extract_gallery_title(
page, data["gallery_id"])
+ elif path[0] == "g":
+ data["group_id"] = path[2:]
+ data["group_title"] = self._extract_group_title(
+ page, data["group_id"])
+
+ return data
+
+ def _pagination(self, page):
+ while True:
+ for thumb in text.extract_iter(
+ page, 'class="thumb-container', ""):
+ yield thumb
+
+ url = text.extr(page, '', "")),
+ "count": text.parse_int(
+ extr('', ")")
+ .rpartition("(")[2].replace(",", "")),
+ }
+
+ def _parse_thumb_data(self, thumb):
+ extr = text.extract_from(thumb)
+
+ data = {
+ "id" : extr('data-codename="', '"'),
+ "type" : extr('data-mediatype="', '"'),
+ "thumbnail": extr('class="static" src="', '"'),
+ "title" : extr(' alt="', '"'),
+ }
+ data["url"] = data["thumbnail"].replace("thumb", data["type"])
return data
@@ -72,13 +121,23 @@ class MotherlessExtractor(Extractor):
if title:
return text.unescape(title.strip())
- pos = page.find(' href="/G' + gallery_id + '"')
+ pos = page.find(f' href="/G{gallery_id}"')
if pos >= 0:
return text.unescape(text.extract(
page, ' title="', '"', pos)[0])
return ""
+ @memcache(keyarg=2)
+ def _extract_group_title(self, page, group_id):
+ title = text.extr(
+ text.extr(page, '', "
"),
+ ">", "<")
+ if title:
+ return text.unescape(title.strip())
+
+ return ""
+
class MotherlessMediaExtractor(MotherlessExtractor):
"""Extractor for a single image/video from motherless.com"""
@@ -109,59 +168,62 @@ class MotherlessGalleryExtractor(MotherlessExtractor):
if not type:
data = {"_extractor": MotherlessGalleryExtractor}
- yield Message.Queue, self.root + "/GI" + gid, data
- yield Message.Queue, self.root + "/GV" + gid, data
+ yield Message.Queue, f"{self.root}/GI{gid}", data
+ yield Message.Queue, f"{self.root}/GV{gid}", data
return
url = f"{self.root}/G{type}{gid}"
page = self.request(url).text
- data = self._extract_gallery_data(page)
+ data = self._extract_data(page, "gallery")
for num, thumb in enumerate(self._pagination(page), 1):
file = self._parse_thumb_data(thumb)
+ thumbnail = file["thumbnail"]
if file["type"] == "video":
file = self._extract_media(file["id"])
file.update(data)
file["num"] = num
+ file["thumbnail"] = thumbnail
url = file["url"]
yield Message.Directory, file
yield Message.Url, url, text.nameext_from_url(url, file)
- def _pagination(self, page):
- while True:
- for thumb in text.extract_iter(
- page, 'class="thumb-container', ""):
- yield thumb
- url = text.extr(page, '", "<").rpartition(" | ")[0]),
- "uploader": text.remove_html(extr(
- 'class="gallery-member-username">', "")),
- "count": text.parse_int(
- extr('', ")")
- .rpartition("(")[2].replace(",", "")),
- }
+ def items(self):
+ type, gid = self.groups
- def _parse_thumb_data(self, thumb):
- extr = text.extract_from(thumb)
+ if not type:
+ data = {"_extractor": MotherlessGroupExtractor}
+ yield Message.Queue, f"{self.root}/gi/{gid}", data
+ yield Message.Queue, f"{self.root}/gv/{gid}", data
+ return
- data = {
- "id" : extr('data-codename="', '"'),
- "type" : extr('data-mediatype="', '"'),
- "thumbnail": extr('class="static" src="', '"'),
- "title" : extr(' alt="', '"'),
- }
- data["url"] = data["thumbnail"].replace("thumb", data["type"])
+ url = f"{self.root}/g{type}/{gid}"
+ page = self.request(url).text
+ data = self._extract_data(page, "group")
- return data
+ for num, thumb in enumerate(self._pagination(page), 1):
+ file = self._parse_thumb_data(thumb)
+ thumbnail = file["thumbnail"]
+
+ file = self._extract_media(file["id"])
+
+ uploader = file.get("uploader")
+ file.update(data)
+ file["num"] = num
+ file["thumbnail"] = thumbnail
+ file["uploader"] = uploader
+ file["group"] = file["group_id"]
+ url = file["url"]
+ yield Message.Directory, file
+ yield Message.Url, url, text.nameext_from_url(url, file)
diff --git a/test/results/motherless.py b/test/results/motherless.py
index f0d469d7..b4742ad3 100644
--- a/test/results/motherless.py
+++ b/test/results/motherless.py
@@ -33,27 +33,6 @@ __tests__ = (
},
-{
- "#url" : "https://motherless.com/G43D8704/F0C07D3",
- "#class": motherless.MotherlessMediaExtractor,
- "#results": "https://cdn5-images.motherlessmedia.com/images/F0C07D3.jpg",
-
- "date" : "dt:2014-08-13 00:00:00",
- "extension" : "jpg",
- "favorites" : range(100, 200),
- "filename" : "F0C07D3",
- "gallery_id": "43D8704",
- "gallery_title": "SpeechLess",
- "group" : "",
- "id" : "F0C07D3",
- "tags" : [],
- "title" : "Spunky Angels Amy Black Dress",
- "type" : "image",
- "uploader" : "jonesyjonesy",
- "url" : "https://cdn5-images.motherlessmedia.com/images/F0C07D3.jpg",
- "views" : range(14000, 20000),
-},
-
{
"#url" : "https://motherless.com/g/classic_porn/19D6C80",
"#class": motherless.MotherlessMediaExtractor,
@@ -74,54 +53,125 @@ __tests__ = (
},
{
- "#url" : "https://motherless.com/G43D8704",
+ "#url" : "https://motherless.com/G444B6FA/46ABC1A",
+ "#class": motherless.MotherlessMediaExtractor,
+ "#results": "https://cdn5-images.motherlessmedia.com/images/46ABC1A.jpg",
+
+ "date" : "dt:2017-11-24 00:00:00",
+ "extension" : "jpg",
+ "favorites" : range(0, 100),
+ "filename" : "46ABC1A",
+ "gallery_id": "444B6FA",
+ "group" : "",
+ "id" : "46ABC1A",
+ "tags" : [
+ "rope",
+ "bondage",
+ "bdsm"
+ ],
+ "title" : "Some More Pix",
+ "type" : "image",
+ "uploader" : "FATBOY114",
+ "url" : "https://cdn5-images.motherlessmedia.com/images/46ABC1A.jpg",
+ "views" : range(100, 2000),
+},
+
+{
+ "#url" : "https://motherless.com/G444B6FA",
"#class": motherless.MotherlessGalleryExtractor,
"#results": (
- "https://motherless.com/GI43D8704",
- "https://motherless.com/GV43D8704",
+ "https://motherless.com/GI444B6FA",
+ "https://motherless.com/GV444B6FA",
),
},
{
- "#url" : "https://motherless.com/GI43D8704",
+ "#url" : "https://motherless.com/GI444B6FA",
"#class": motherless.MotherlessGalleryExtractor,
- "#pattern": r"https://cdn5-images\.motherlessmedia\.com/images/\w+\.(jpg|png|gif)",
+ "#pattern": r"https://cdn5-images\.motherlessmedia\.com/images/[^/]+\.(jpg|jpeg|png|gif)",
"#range" : "1-100",
- "#count" : 100,
+ "#count" : 10,
- "count" : range(5000, 8000),
- "extension" : {"jpg", "png", "gif"},
+ "count" : range(5, 50),
+ "extension" : {"jpg", "jpeg", "png", "gif"},
"filename" : str,
- "gallery_id" : "43D8704",
- "gallery_title": "SpeechLess",
+ "gallery_id" : "444B6FA",
"id" : str,
"num" : int,
- "thumbnail" : r"re:https://cdn5-thumbs\.motherlessmedia\.com/thumbs/\w+\.\w+",
+ "thumbnail" : r"re:https://cdn5-thumbs\.motherlessmedia\.com/thumbs/[^/]+\.\w+",
"title" : str,
"type" : "image",
- "uploader" : "gaylobe",
- "url" : r"re:https://cdn5-images\.motherlessmedia\.com/images/\w+\.(jpg|png|gif)",
+ "uploader" : "WawaWeWa",
+ "url" : r"re:https://cdn5-images\.motherlessmedia\.com/images/[^/]+\.(jpg|jpeg|png|gif)",
},
{
- "#url" : "https://motherless.com/GV43D8704",
+ "#url" : "https://motherless.com/GV444B6FA",
"#class": motherless.MotherlessGalleryExtractor,
- "#pattern": r"https://cdn5-videos.motherlessmedia.com/videos/\w+\.mp4",
+ "#pattern": r"https://cdn5-videos\.motherlessmedia\.com/videos/[^/]+\.mp4(?:\?.*)?",
"#range" : "1-100",
- "#count" : 100,
+ "#count" : 29,
- "count" : range(500, 900),
+ "count" : range(20, 100),
"extension" : "mp4",
"filename" : str,
- "gallery_id" : "43D8704",
- "gallery_title": "SpeechLess",
+ "gallery_id" : "444B6FA",
"id" : str,
"num" : int,
- "thumbnail" : r"re:https://cdn5-thumbs\.motherlessmedia\.com/thumbs/[\w-]+\.\w+",
+ "thumbnail" : r"re:https://cdn5-thumbs\.motherlessmedia\.com/thumbs/[^/]+\.\w+",
"title" : str,
"type" : "video",
- "uploader" : "gaylobe",
- "url" : r"re:https://cdn5-videos.motherlessmedia.com/videos/\w+\.mp4",
+ "uploader" : "WawaWeWa",
+ "url" : r"re:https://cdn5-videos.motherlessmedia.com/videos/[^/]+\.mp4(?:\?.*)?",
+},
+
+{
+ "#url" : "https://motherless.com/g/bump___grind",
+ "#class": motherless.MotherlessGroupExtractor,
+ "#results": (
+ "https://motherless.com/gi/bump___grind",
+ "https://motherless.com/gv/bump___grind",
+ ),
+},
+
+{
+ "#url" : "https://motherless.com/gi/bump___grind",
+ "#class": motherless.MotherlessGroupExtractor,
+ "#pattern": r"https://cdn5-images\.motherlessmedia\.com/images/[^/]+\.(jpg|jpeg|png|gif)",
+ "#range" : "1-100",
+ "#count" : 18,
+
+ "count" : range(5, 50),
+ "extension" : {"jpg", "jpeg", "png", "gif"},
+ "filename" : str,
+ "group_id" : "bump___grind",
+ "group" : "bump___grind",
+ "id" : str,
+ "num" : int,
+ "thumbnail" : r"re:https://cdn5-thumbs\.motherlessmedia\.com/thumbs/[^/]+\.\w+",
+ "title" : str,
+ "type" : "image",
+ "url" : r"re:https://cdn5-images\.motherlessmedia\.com/images/[^/]+\.(jpg|jpeg|png|gif)",
+},
+
+{
+ "#url" : "https://motherless.com/gv/bump___grind",
+ "#class": motherless.MotherlessGroupExtractor,
+ "#pattern": r"https://cdn5-videos\.motherlessmedia\.com/videos/[^/]+\.mp4(?:\?.*)?",
+ "#range" : "1-100",
+ "#count" : 25,
+
+ "count" : range(20, 100),
+ "extension" : "mp4",
+ "filename" : str,
+ "group_id" : "bump___grind",
+ "group" : "bump___grind",
+ "id" : str,
+ "num" : int,
+ "thumbnail" : r"re:https://cdn5-thumbs\.motherlessmedia\.com/thumbs/[^/]+\.\w+",
+ "title" : str,
+ "type" : "video",
+ "url" : r"re:https://cdn5-videos.motherlessmedia.com/videos/[^/]+\.mp4(?:\?.*)?",
},
)