[motherless] Add group support (#7774 #7787)

* [motherless] Add group support * [motherless] Update old unit tests * [motherless] Add new unit tests * [motherless] Update docs
2025-07-14 18:50:07 +04:00
parent 75582e38e9
commit 089f815560
3 changed files with 193 additions and 81 deletions
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -622,7 +622,7 @@ Consider all listed sites to potentially be NSFW.
 <tr>
    <td>Motherless</td>
    <td>https://motherless.com/</td>
-    <td>Galleries, Media Files</td>
+    <td>Galleries, Groups, Media Files</td>
    <td></td>
 </tr>
 <tr>
--- a/gallery_dl/extractor/motherless.py
+++ b/gallery_dl/extractor/motherless.py
@@ -24,7 +24,7 @@ class MotherlessExtractor(Extractor):
    archive_fmt = "{id}"

    def _extract_media(self, path):
-        url = self.root + "/" + path
+        url = f"{self.root}/{path}"
        page = self.request(url).text
        extr = text.extract_from(page)

@@ -48,10 +48,59 @@ class MotherlessExtractor(Extractor):
            "uploader": text.unescape(extr('class="username">', "<").strip()),
        }

-        if path and path[0] == "G":
+        if not path:
+            pass
+        elif path[0] == "G":
            data["gallery_id"] = path[1:]
            data["gallery_title"] = self._extract_gallery_title(
                page, data["gallery_id"])
+        elif path[0] == "g":
+            data["group_id"] = path[2:]
+            data["group_title"] = self._extract_group_title(
+                page, data["group_id"])
+
+        return data
+
+    def _pagination(self, page):
+        while True:
+            for thumb in text.extract_iter(
+                    page, 'class="thumb-container', "</div>"):
+                yield thumb
+
+            url = text.extr(page, '<link rel="next" href="', '"')
+            if not url:
+                return
+            page = self.request(text.unescape(url)).text
+
+    def _extract_data(self, page, category):
+        extr = text.extract_from(page)
+
+        gid = self.groups[-1]
+        if category == "gallery":
+            title = self._extract_gallery_title(page, gid)
+        else:
+            title = self._extract_group_title(page, gid)
+
+        return {
+            f"{category}_id": gid,
+            f"{category}_title": title,
+            "uploader": text.remove_html(extr(
+                f'class="{category}-member-username">', "</")),
+            "count": text.parse_int(
+                extr('<span class="active">', ")")
+                .rpartition("(")[2].replace(",", "")),
+        }
+
+    def _parse_thumb_data(self, thumb):
+        extr = text.extract_from(thumb)
+
+        data = {
+            "id"       : extr('data-codename="', '"'),
+            "type"     : extr('data-mediatype="', '"'),
+            "thumbnail": extr('class="static" src="', '"'),
+            "title"    : extr(' alt="', '"'),
+        }
+        data["url"] = data["thumbnail"].replace("thumb", data["type"])

        return data

@@ -72,13 +121,23 @@ class MotherlessExtractor(Extractor):
        if title:
            return text.unescape(title.strip())

-        pos = page.find(' href="/G' + gallery_id + '"')
+        pos = page.find(f' href="/G{gallery_id}"')
        if pos >= 0:
            return text.unescape(text.extract(
                page, ' title="', '"', pos)[0])

        return ""

+    @memcache(keyarg=2)
+    def _extract_group_title(self, page, group_id):
+        title = text.extr(
+            text.extr(page, '<h1 class="group-bio-name">', "</h1>"),
+            ">", "<")
+        if title:
+            return text.unescape(title.strip())
+
+        return ""
+

 class MotherlessMediaExtractor(MotherlessExtractor):
    """Extractor for a single image/video from motherless.com"""
@@ -109,59 +168,62 @@ class MotherlessGalleryExtractor(MotherlessExtractor):

        if not type:
            data = {"_extractor": MotherlessGalleryExtractor}
-            yield Message.Queue, self.root + "/GI" + gid, data
-            yield Message.Queue, self.root + "/GV" + gid, data
+            yield Message.Queue, f"{self.root}/GI{gid}", data
+            yield Message.Queue, f"{self.root}/GV{gid}", data
            return

        url = f"{self.root}/G{type}{gid}"
        page = self.request(url).text
-        data = self._extract_gallery_data(page)
+        data = self._extract_data(page, "gallery")

        for num, thumb in enumerate(self._pagination(page), 1):
            file = self._parse_thumb_data(thumb)
+            thumbnail = file["thumbnail"]

            if file["type"] == "video":
                file = self._extract_media(file["id"])

            file.update(data)
            file["num"] = num
+            file["thumbnail"] = thumbnail
            url = file["url"]
            yield Message.Directory, file
            yield Message.Url, url, text.nameext_from_url(url, file)

-    def _pagination(self, page):
-        while True:
-            for thumb in text.extract_iter(
-                    page, 'class="thumb-container', "</div>"):
-                yield thumb

-            url = text.extr(page, '<link rel="next" href="', '"')
-            if not url:
-                return
-            page = self.request(text.unescape(url)).text
+class MotherlessGroupExtractor(MotherlessExtractor):
+    subcategory = "group"
+    directory_fmt = ("{category}", "{uploader}",
+                     "{group_id} {group_title}")
+    archive_fmt = "{group_id}_{id}"
+    pattern = BASE_PATTERN + "/g([iv]?)/?([a-z0-9_]+)/?$"
+    example = "https://motherless.com/g/abc123"

-    def _extract_gallery_data(self, page):
-        extr = text.extract_from(page)
-        return {
-            "gallery_id": self.groups[-1],
-            "gallery_title": text.unescape(extr(
-                "<title>", "<").rpartition(" | ")[0]),
-            "uploader": text.remove_html(extr(
-                'class="gallery-member-username">', "</")),
-            "count": text.parse_int(
-                extr('<span class="active">', ")")
-                .rpartition("(")[2].replace(",", "")),
-        }
+    def items(self):
+        type, gid = self.groups

-    def _parse_thumb_data(self, thumb):
-        extr = text.extract_from(thumb)
+        if not type:
+            data = {"_extractor": MotherlessGroupExtractor}
+            yield Message.Queue, f"{self.root}/gi/{gid}", data
+            yield Message.Queue, f"{self.root}/gv/{gid}", data
+            return

-        data = {
-            "id"       : extr('data-codename="', '"'),
-            "type"     : extr('data-mediatype="', '"'),
-            "thumbnail": extr('class="static" src="', '"'),
-            "title"    : extr(' alt="', '"'),
-        }
-        data["url"] = data["thumbnail"].replace("thumb", data["type"])
+        url = f"{self.root}/g{type}/{gid}"
+        page = self.request(url).text
+        data = self._extract_data(page, "group")

-        return data
+        for num, thumb in enumerate(self._pagination(page), 1):
+            file = self._parse_thumb_data(thumb)
+            thumbnail = file["thumbnail"]
+
+            file = self._extract_media(file["id"])
+
+            uploader = file.get("uploader")
+            file.update(data)
+            file["num"] = num
+            file["thumbnail"] = thumbnail
+            file["uploader"] = uploader
+            file["group"] = file["group_id"]
+            url = file["url"]
+            yield Message.Directory, file
+            yield Message.Url, url, text.nameext_from_url(url, file)
--- a/test/results/motherless.py
+++ b/test/results/motherless.py
@@ -33,27 +33,6 @@ __tests__ = (

 },

-{
-    "#url"  : "https://motherless.com/G43D8704/F0C07D3",
-    "#class": motherless.MotherlessMediaExtractor,
-    "#results": "https://cdn5-images.motherlessmedia.com/images/F0C07D3.jpg",
-
-    "date"      : "dt:2014-08-13 00:00:00",
-    "extension" : "jpg",
-    "favorites" : range(100, 200),
-    "filename"  : "F0C07D3",
-    "gallery_id": "43D8704",
-    "gallery_title": "SpeechLess",
-    "group"     : "",
-    "id"        : "F0C07D3",
-    "tags"      : [],
-    "title"     : "Spunky Angels Amy Black Dress",
-    "type"      : "image",
-    "uploader"  : "jonesyjonesy",
-    "url"       : "https://cdn5-images.motherlessmedia.com/images/F0C07D3.jpg",
-    "views"     : range(14000, 20000),
-},
-
 {
    "#url"  : "https://motherless.com/g/classic_porn/19D6C80",
    "#class": motherless.MotherlessMediaExtractor,
@@ -74,54 +53,125 @@ __tests__ = (
 },

 {
-    "#url"  : "https://motherless.com/G43D8704",
+    "#url"  : "https://motherless.com/G444B6FA/46ABC1A",
+    "#class": motherless.MotherlessMediaExtractor,
+    "#results": "https://cdn5-images.motherlessmedia.com/images/46ABC1A.jpg",
+
+    "date"      : "dt:2017-11-24 00:00:00",
+    "extension" : "jpg",
+    "favorites" : range(0, 100),
+    "filename"  : "46ABC1A",
+    "gallery_id": "444B6FA",
+    "group"     : "",
+    "id"        : "46ABC1A",
+    "tags"      : [
+        "rope",
+        "bondage",
+        "bdsm"
+    ],
+    "title"     : "Some More Pix",
+    "type"      : "image",
+    "uploader"  : "FATBOY114",
+    "url"       : "https://cdn5-images.motherlessmedia.com/images/46ABC1A.jpg",
+    "views"     : range(100, 2000),
+},
+
+{
+    "#url"  : "https://motherless.com/G444B6FA",
    "#class": motherless.MotherlessGalleryExtractor,
    "#results": (
-        "https://motherless.com/GI43D8704",
-        "https://motherless.com/GV43D8704",
+        "https://motherless.com/GI444B6FA",
+        "https://motherless.com/GV444B6FA",
    ),
 },

 {
-    "#url"  : "https://motherless.com/GI43D8704",
+    "#url"  : "https://motherless.com/GI444B6FA",
    "#class": motherless.MotherlessGalleryExtractor,
-    "#pattern": r"https://cdn5-images\.motherlessmedia\.com/images/\w+\.(jpg|png|gif)",
+    "#pattern": r"https://cdn5-images\.motherlessmedia\.com/images/[^/]+\.(jpg|jpeg|png|gif)",
    "#range"  : "1-100",
-    "#count"  : 100,
+    "#count"  : 10,

-    "count"        : range(5000, 8000),
-    "extension"    : {"jpg", "png", "gif"},
+    "count"        : range(5, 50),
+    "extension"    : {"jpg", "jpeg", "png", "gif"},
    "filename"     : str,
-    "gallery_id"   : "43D8704",
-    "gallery_title": "SpeechLess",
+    "gallery_id"   : "444B6FA",
    "id"           : str,
    "num"          : int,
-    "thumbnail"    : r"re:https://cdn5-thumbs\.motherlessmedia\.com/thumbs/\w+\.\w+",
+    "thumbnail"    : r"re:https://cdn5-thumbs\.motherlessmedia\.com/thumbs/[^/]+\.\w+",
    "title"        : str,
    "type"         : "image",
-    "uploader"     : "gaylobe",
-    "url"          : r"re:https://cdn5-images\.motherlessmedia\.com/images/\w+\.(jpg|png|gif)",
+    "uploader"     : "WawaWeWa",
+    "url"          : r"re:https://cdn5-images\.motherlessmedia\.com/images/[^/]+\.(jpg|jpeg|png|gif)",
 },

 {
-    "#url"  : "https://motherless.com/GV43D8704",
+    "#url"  : "https://motherless.com/GV444B6FA",
    "#class": motherless.MotherlessGalleryExtractor,
-    "#pattern": r"https://cdn5-videos.motherlessmedia.com/videos/\w+\.mp4",
+    "#pattern": r"https://cdn5-videos\.motherlessmedia\.com/videos/[^/]+\.mp4(?:\?.*)?",
    "#range"  : "1-100",
-    "#count"  : 100,
+    "#count"  : 29,

-    "count"        : range(500, 900),
+    "count"        : range(20, 100),
    "extension"    : "mp4",
    "filename"     : str,
-    "gallery_id"   : "43D8704",
-    "gallery_title": "SpeechLess",
+    "gallery_id"   : "444B6FA",
    "id"           : str,
    "num"          : int,
-    "thumbnail"    : r"re:https://cdn5-thumbs\.motherlessmedia\.com/thumbs/[\w-]+\.\w+",
+    "thumbnail"    : r"re:https://cdn5-thumbs\.motherlessmedia\.com/thumbs/[^/]+\.\w+",
    "title"        : str,
    "type"         : "video",
-    "uploader"     : "gaylobe",
-    "url"          : r"re:https://cdn5-videos.motherlessmedia.com/videos/\w+\.mp4",
+    "uploader"     : "WawaWeWa",
+    "url"          : r"re:https://cdn5-videos.motherlessmedia.com/videos/[^/]+\.mp4(?:\?.*)?",
+},
+
+{
+    "#url"  : "https://motherless.com/g/bump___grind",
+    "#class": motherless.MotherlessGroupExtractor,
+    "#results": (
+        "https://motherless.com/gi/bump___grind",
+        "https://motherless.com/gv/bump___grind",
+    ),
+},
+
+{
+    "#url"  : "https://motherless.com/gi/bump___grind",
+    "#class": motherless.MotherlessGroupExtractor,
+    "#pattern": r"https://cdn5-images\.motherlessmedia\.com/images/[^/]+\.(jpg|jpeg|png|gif)",
+    "#range"  : "1-100",
+    "#count"  : 18,
+
+    "count"        : range(5, 50),
+    "extension"    : {"jpg", "jpeg", "png", "gif"},
+    "filename"     : str,
+    "group_id"     : "bump___grind",
+    "group"        : "bump___grind",
+    "id"           : str,
+    "num"          : int,
+    "thumbnail"    : r"re:https://cdn5-thumbs\.motherlessmedia\.com/thumbs/[^/]+\.\w+",
+    "title"        : str,
+    "type"         : "image",
+    "url"          : r"re:https://cdn5-images\.motherlessmedia\.com/images/[^/]+\.(jpg|jpeg|png|gif)",
+},
+
+{
+    "#url"  : "https://motherless.com/gv/bump___grind",
+    "#class": motherless.MotherlessGroupExtractor,
+    "#pattern": r"https://cdn5-videos\.motherlessmedia\.com/videos/[^/]+\.mp4(?:\?.*)?",
+    "#range"  : "1-100",
+    "#count"  : 25,
+
+    "count"        : range(20, 100),
+    "extension"    : "mp4",
+    "filename"     : str,
+    "group_id"     : "bump___grind",
+    "group"        : "bump___grind",
+    "id"           : str,
+    "num"          : int,
+    "thumbnail"    : r"re:https://cdn5-thumbs\.motherlessmedia\.com/thumbs/[^/]+\.\w+",
+    "title"        : str,
+    "type"         : "video",
+    "url"          : r"re:https://cdn5-videos.motherlessmedia.com/videos/[^/]+\.mp4(?:\?.*)?",
 },

 )