From 089f815560949b642eae5ff4f11420ff1e23fe6e Mon Sep 17 00:00:00 2001 From: NecRaul Date: Mon, 14 Jul 2025 18:50:07 +0400 Subject: [PATCH] [motherless] Add group support (#7774 #7787) * [motherless] Add group support * [motherless] Update old unit tests * [motherless] Add new unit tests * [motherless] Update docs --- docs/supportedsites.md | 2 +- gallery_dl/extractor/motherless.py | 136 +++++++++++++++++++++-------- test/results/motherless.py | 136 ++++++++++++++++++++--------- 3 files changed, 193 insertions(+), 81 deletions(-) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 51d6fe68..d4b0ba7b 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -622,7 +622,7 @@ Consider all listed sites to potentially be NSFW. Motherless https://motherless.com/ - Galleries, Media Files + Galleries, Groups, Media Files diff --git a/gallery_dl/extractor/motherless.py b/gallery_dl/extractor/motherless.py index 35454807..c81a4d15 100644 --- a/gallery_dl/extractor/motherless.py +++ b/gallery_dl/extractor/motherless.py @@ -24,7 +24,7 @@ class MotherlessExtractor(Extractor): archive_fmt = "{id}" def _extract_media(self, path): - url = self.root + "/" + path + url = f"{self.root}/{path}" page = self.request(url).text extr = text.extract_from(page) @@ -48,10 +48,59 @@ class MotherlessExtractor(Extractor): "uploader": text.unescape(extr('class="username">', "<").strip()), } - if path and path[0] == "G": + if not path: + pass + elif path[0] == "G": data["gallery_id"] = path[1:] data["gallery_title"] = self._extract_gallery_title( page, data["gallery_id"]) + elif path[0] == "g": + data["group_id"] = path[2:] + data["group_title"] = self._extract_group_title( + page, data["group_id"]) + + return data + + def _pagination(self, page): + while True: + for thumb in text.extract_iter( + page, 'class="thumb-container', ""): + yield thumb + + url = text.extr(page, '', "', ")") + .rpartition("(")[2].replace(",", "")), + } + + def _parse_thumb_data(self, thumb): + extr = text.extract_from(thumb) + + data = { + "id" : extr('data-codename="', '"'), + "type" : extr('data-mediatype="', '"'), + "thumbnail": extr('class="static" src="', '"'), + "title" : extr(' alt="', '"'), + } + data["url"] = data["thumbnail"].replace("thumb", data["type"]) return data @@ -72,13 +121,23 @@ class MotherlessExtractor(Extractor): if title: return text.unescape(title.strip()) - pos = page.find(' href="/G' + gallery_id + '"') + pos = page.find(f' href="/G{gallery_id}"') if pos >= 0: return text.unescape(text.extract( page, ' title="', '"', pos)[0]) return "" + @memcache(keyarg=2) + def _extract_group_title(self, page, group_id): + title = text.extr( + text.extr(page, '

', "

"), + ">", "<") + if title: + return text.unescape(title.strip()) + + return "" + class MotherlessMediaExtractor(MotherlessExtractor): """Extractor for a single image/video from motherless.com""" @@ -109,59 +168,62 @@ class MotherlessGalleryExtractor(MotherlessExtractor): if not type: data = {"_extractor": MotherlessGalleryExtractor} - yield Message.Queue, self.root + "/GI" + gid, data - yield Message.Queue, self.root + "/GV" + gid, data + yield Message.Queue, f"{self.root}/GI{gid}", data + yield Message.Queue, f"{self.root}/GV{gid}", data return url = f"{self.root}/G{type}{gid}" page = self.request(url).text - data = self._extract_gallery_data(page) + data = self._extract_data(page, "gallery") for num, thumb in enumerate(self._pagination(page), 1): file = self._parse_thumb_data(thumb) + thumbnail = file["thumbnail"] if file["type"] == "video": file = self._extract_media(file["id"]) file.update(data) file["num"] = num + file["thumbnail"] = thumbnail url = file["url"] yield Message.Directory, file yield Message.Url, url, text.nameext_from_url(url, file) - def _pagination(self, page): - while True: - for thumb in text.extract_iter( - page, 'class="thumb-container', ""): - yield thumb - url = text.extr(page, '", "<").rpartition(" | ")[0]), - "uploader": text.remove_html(extr( - 'class="gallery-member-username">', "', ")") - .rpartition("(")[2].replace(",", "")), - } + def items(self): + type, gid = self.groups - def _parse_thumb_data(self, thumb): - extr = text.extract_from(thumb) + if not type: + data = {"_extractor": MotherlessGroupExtractor} + yield Message.Queue, f"{self.root}/gi/{gid}", data + yield Message.Queue, f"{self.root}/gv/{gid}", data + return - data = { - "id" : extr('data-codename="', '"'), - "type" : extr('data-mediatype="', '"'), - "thumbnail": extr('class="static" src="', '"'), - "title" : extr(' alt="', '"'), - } - data["url"] = data["thumbnail"].replace("thumb", data["type"]) + url = f"{self.root}/g{type}/{gid}" + page = self.request(url).text + data = self._extract_data(page, "group") - return data + for num, thumb in enumerate(self._pagination(page), 1): + file = self._parse_thumb_data(thumb) + thumbnail = file["thumbnail"] + + file = self._extract_media(file["id"]) + + uploader = file.get("uploader") + file.update(data) + file["num"] = num + file["thumbnail"] = thumbnail + file["uploader"] = uploader + file["group"] = file["group_id"] + url = file["url"] + yield Message.Directory, file + yield Message.Url, url, text.nameext_from_url(url, file) diff --git a/test/results/motherless.py b/test/results/motherless.py index f0d469d7..b4742ad3 100644 --- a/test/results/motherless.py +++ b/test/results/motherless.py @@ -33,27 +33,6 @@ __tests__ = ( }, -{ - "#url" : "https://motherless.com/G43D8704/F0C07D3", - "#class": motherless.MotherlessMediaExtractor, - "#results": "https://cdn5-images.motherlessmedia.com/images/F0C07D3.jpg", - - "date" : "dt:2014-08-13 00:00:00", - "extension" : "jpg", - "favorites" : range(100, 200), - "filename" : "F0C07D3", - "gallery_id": "43D8704", - "gallery_title": "SpeechLess", - "group" : "", - "id" : "F0C07D3", - "tags" : [], - "title" : "Spunky Angels Amy Black Dress", - "type" : "image", - "uploader" : "jonesyjonesy", - "url" : "https://cdn5-images.motherlessmedia.com/images/F0C07D3.jpg", - "views" : range(14000, 20000), -}, - { "#url" : "https://motherless.com/g/classic_porn/19D6C80", "#class": motherless.MotherlessMediaExtractor, @@ -74,54 +53,125 @@ __tests__ = ( }, { - "#url" : "https://motherless.com/G43D8704", + "#url" : "https://motherless.com/G444B6FA/46ABC1A", + "#class": motherless.MotherlessMediaExtractor, + "#results": "https://cdn5-images.motherlessmedia.com/images/46ABC1A.jpg", + + "date" : "dt:2017-11-24 00:00:00", + "extension" : "jpg", + "favorites" : range(0, 100), + "filename" : "46ABC1A", + "gallery_id": "444B6FA", + "group" : "", + "id" : "46ABC1A", + "tags" : [ + "rope", + "bondage", + "bdsm" + ], + "title" : "Some More Pix", + "type" : "image", + "uploader" : "FATBOY114", + "url" : "https://cdn5-images.motherlessmedia.com/images/46ABC1A.jpg", + "views" : range(100, 2000), +}, + +{ + "#url" : "https://motherless.com/G444B6FA", "#class": motherless.MotherlessGalleryExtractor, "#results": ( - "https://motherless.com/GI43D8704", - "https://motherless.com/GV43D8704", + "https://motherless.com/GI444B6FA", + "https://motherless.com/GV444B6FA", ), }, { - "#url" : "https://motherless.com/GI43D8704", + "#url" : "https://motherless.com/GI444B6FA", "#class": motherless.MotherlessGalleryExtractor, - "#pattern": r"https://cdn5-images\.motherlessmedia\.com/images/\w+\.(jpg|png|gif)", + "#pattern": r"https://cdn5-images\.motherlessmedia\.com/images/[^/]+\.(jpg|jpeg|png|gif)", "#range" : "1-100", - "#count" : 100, + "#count" : 10, - "count" : range(5000, 8000), - "extension" : {"jpg", "png", "gif"}, + "count" : range(5, 50), + "extension" : {"jpg", "jpeg", "png", "gif"}, "filename" : str, - "gallery_id" : "43D8704", - "gallery_title": "SpeechLess", + "gallery_id" : "444B6FA", "id" : str, "num" : int, - "thumbnail" : r"re:https://cdn5-thumbs\.motherlessmedia\.com/thumbs/\w+\.\w+", + "thumbnail" : r"re:https://cdn5-thumbs\.motherlessmedia\.com/thumbs/[^/]+\.\w+", "title" : str, "type" : "image", - "uploader" : "gaylobe", - "url" : r"re:https://cdn5-images\.motherlessmedia\.com/images/\w+\.(jpg|png|gif)", + "uploader" : "WawaWeWa", + "url" : r"re:https://cdn5-images\.motherlessmedia\.com/images/[^/]+\.(jpg|jpeg|png|gif)", }, { - "#url" : "https://motherless.com/GV43D8704", + "#url" : "https://motherless.com/GV444B6FA", "#class": motherless.MotherlessGalleryExtractor, - "#pattern": r"https://cdn5-videos.motherlessmedia.com/videos/\w+\.mp4", + "#pattern": r"https://cdn5-videos\.motherlessmedia\.com/videos/[^/]+\.mp4(?:\?.*)?", "#range" : "1-100", - "#count" : 100, + "#count" : 29, - "count" : range(500, 900), + "count" : range(20, 100), "extension" : "mp4", "filename" : str, - "gallery_id" : "43D8704", - "gallery_title": "SpeechLess", + "gallery_id" : "444B6FA", "id" : str, "num" : int, - "thumbnail" : r"re:https://cdn5-thumbs\.motherlessmedia\.com/thumbs/[\w-]+\.\w+", + "thumbnail" : r"re:https://cdn5-thumbs\.motherlessmedia\.com/thumbs/[^/]+\.\w+", "title" : str, "type" : "video", - "uploader" : "gaylobe", - "url" : r"re:https://cdn5-videos.motherlessmedia.com/videos/\w+\.mp4", + "uploader" : "WawaWeWa", + "url" : r"re:https://cdn5-videos.motherlessmedia.com/videos/[^/]+\.mp4(?:\?.*)?", +}, + +{ + "#url" : "https://motherless.com/g/bump___grind", + "#class": motherless.MotherlessGroupExtractor, + "#results": ( + "https://motherless.com/gi/bump___grind", + "https://motherless.com/gv/bump___grind", + ), +}, + +{ + "#url" : "https://motherless.com/gi/bump___grind", + "#class": motherless.MotherlessGroupExtractor, + "#pattern": r"https://cdn5-images\.motherlessmedia\.com/images/[^/]+\.(jpg|jpeg|png|gif)", + "#range" : "1-100", + "#count" : 18, + + "count" : range(5, 50), + "extension" : {"jpg", "jpeg", "png", "gif"}, + "filename" : str, + "group_id" : "bump___grind", + "group" : "bump___grind", + "id" : str, + "num" : int, + "thumbnail" : r"re:https://cdn5-thumbs\.motherlessmedia\.com/thumbs/[^/]+\.\w+", + "title" : str, + "type" : "image", + "url" : r"re:https://cdn5-images\.motherlessmedia\.com/images/[^/]+\.(jpg|jpeg|png|gif)", +}, + +{ + "#url" : "https://motherless.com/gv/bump___grind", + "#class": motherless.MotherlessGroupExtractor, + "#pattern": r"https://cdn5-videos\.motherlessmedia\.com/videos/[^/]+\.mp4(?:\?.*)?", + "#range" : "1-100", + "#count" : 25, + + "count" : range(20, 100), + "extension" : "mp4", + "filename" : str, + "group_id" : "bump___grind", + "group" : "bump___grind", + "id" : str, + "num" : int, + "thumbnail" : r"re:https://cdn5-thumbs\.motherlessmedia\.com/thumbs/[^/]+\.\w+", + "title" : str, + "type" : "video", + "url" : r"re:https://cdn5-videos.motherlessmedia.com/videos/[^/]+\.mp4(?:\?.*)?", }, )