[girlswithmuscle] add support (#4493 #6016)

* [girlswithmuscle] init * [girlswithmuscle]: fix metadata extraction (site layout change) * [girlswithmuscle]: fix tags extraction (site layout change) * update login code * update 'post' extractor * update 'gallery' extractor, rename to 'search' extractor * update docs * add test cases --------- Co-authored-by: Mike Fährmann <mike_faehrmann@web.de>
2025-06-15 00:05:49 +03:00
parent b583891df6
commit 96f5cfb305
7 changed files with 249 additions and 0 deletions
--- a/docs/configuration.rst
+++ b/docs/configuration.rst
@@ -461,6 +461,7 @@ Description
    * ``e6ai`` (*)
    * ``e926`` (*)
    * ``exhentai``
+    * ``girlswithmuscle``
    * ``horne`` (R)
    * ``idolcomplex``
    * ``imgbb``
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -328,6 +328,11 @@
        {
            "enabled": false
        },
+        "girlswithmuscle":
+        {
+            "username": "",
+            "password": ""
+        },
        "gofile":
        {
            "api-token": null,
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -319,6 +319,12 @@ Consider all listed sites to potentially be NSFW.
    <td>Favorites, Pools, Posts, Redirects, Tag Searches</td>
    <td></td>
 </tr>
+<tr>
+    <td>Girls with Muscle</td>
+    <td>https://www.girlswithmuscle.com/</td>
+    <td>Posts, Search Results</td>
+    <td>Supported</td>
+</tr>
 <tr>
    <td>Gofile</td>
    <td>https://gofile.io/</td>
--- a/gallery_dl/extractor/init.py
+++ b/gallery_dl/extractor/init.py
@@ -63,6 +63,7 @@ modules = [
    "gelbooru",
    "gelbooru_v01",
    "gelbooru_v02",
+    "girlswithmuscle",
    "gofile",
    "hatenablog",
    "hentai2read",
--- a/gallery_dl/extractor/girlswithmuscle.py
+++ b/gallery_dl/extractor/girlswithmuscle.py
@@ -0,0 +1,179 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+from .common import Extractor, Message
+from .. import text, util, exception
+from ..cache import cache
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?girlswithmuscle\.com"
+
+
+class GirlswithmuscleExtractor(Extractor):
+    """Base class for girlswithmuscle extractors"""
+    category = "girlswithmuscle"
+    root = "https://www.girlswithmuscle.com"
+    directory_fmt = ("{category}", "{model}")
+    filename_fmt = "{model}_{id}.{extension}"
+    archive_fmt = "{type}_{model}_{id}"
+
+    def login(self):
+        username, password = self._get_auth_info()
+        if username:
+            self.cookies_update(self._login_impl(username, password))
+
+    @cache(maxage=14*86400, keyarg=1)
+    def _login_impl(self, username, password):
+        self.log.info("Logging in as %s", username)
+
+        url = self.root + "/login/"
+        page = self.request(url).text
+        csrf_token = text.extr(page, 'name="csrfmiddlewaretoken" value="', '"')
+
+        headers = {
+            "Origin" : self.root,
+            "Referer": url,
+        }
+        data = {
+            "csrfmiddlewaretoken": csrf_token,
+            "username": username,
+            "password": password,
+            "next": "/",
+        }
+        response = self.request(
+            url, method="POST", headers=headers, data=data)
+
+        if not response.history:
+            raise exception.AuthenticationError()
+
+        page = response.text
+        if ">Wrong username or password" in page:
+            raise exception.AuthenticationError()
+        if ">Log in<" in page:
+            raise exception.AuthenticationError("Account data is missing")
+
+        return {c.name: c.value for c in response.history[0].cookies}
+
+
+class GirlswithmusclePostExtractor(GirlswithmuscleExtractor):
+    """Extractor for individual posts on girlswithmuscle.com"""
+    subcategory = "post"
+    pattern = BASE_PATTERN + r"/(\d+)"
+    example = "https://www.girlswithmuscle.com/12345/"
+
+    def items(self):
+        self.login()
+
+        url = "{}/{}/".format(self.root, self.groups[0])
+        page = self.request(url).text
+        if not page:
+            raise exception.NotFoundError("post")
+
+        metadata = self.metadata(page)
+
+        url = text.extr(page, 'class="main-image" src="', '"')
+        if url:
+            metadata["type"] = "picture"
+        else:
+            url = text.extr(page, '<source src="', '"')
+            metadata["type"] = "video"
+
+        text.nameext_from_url(url, metadata)
+        yield Message.Directory, metadata
+        yield Message.Url, url, metadata
+
+    def metadata(self, page):
+        source = text.remove_html(text.extr(
+            page, '<div id="info-source" style="display: none">', "</div>"))
+        image_info = text.extr(
+            page, '<div class="image-info">', "</div>")
+        uploader = text.remove_html(text.extr(
+            image_info, '<span class="username-html">', "</a>"))
+
+        tags = text.extr(page, 'id="tags-text">', "</div>")
+        score = text.parse_int(text.remove_html(text.extr(
+            page, "Score: <b>", "</span")))
+        model = self._extract_model(page)
+
+        return {
+            "id": self.groups[0],
+            "model": model,
+            "model_list": self._parse_model_list(model),
+            "tags": text.split_html(tags)[1::2],
+            "date": text.parse_datetime(
+                text.extr(page, 'class="hover-time"  title="', '"')[:19],
+                "%Y-%m-%d %H:%M:%S"),
+            "is_favorite": self._parse_is_favorite(page),
+            "source_filename": source,
+            "uploader": uploader,
+            "score": score,
+            "comments": self._extract_comments(page),
+        }
+
+    def _extract_model(self, page):
+        model = text.extr(page, "<title>", "</title>")
+        return "unknown" if model.startswith("Picture #") else model
+
+    def _parse_model_list(self, model):
+        if model == "unknown":
+            return []
+        else:
+            return [name.strip() for name in model.split(",")]
+
+    def _parse_is_favorite(self, page):
+        fav_button = text.extr(
+            page, 'id="favorite-button">', "</span>")
+        unfav_button = text.extr(
+            page, 'class="actionbutton unfavorite-button">', "</span>")
+
+        is_favorite = None
+        if unfav_button == "Unfavorite":
+            is_favorite = True
+        if fav_button == "Favorite":
+            is_favorite = False
+
+        return is_favorite
+
+    def _extract_comments(self, page):
+        comments = text.extract_iter(
+            page, '<div class="comment-body-inner">', "</div>")
+        return [comment.strip() for comment in comments]
+
+
+class GirlswithmuscleSearchExtractor(GirlswithmuscleExtractor):
+    """Extractor for search results on girlswithmuscle.com"""
+    subcategory = "search"
+    pattern = BASE_PATTERN + r"/images/(.*)"
+    example = "https://www.girlswithmuscle.com/images/?name=MODEL"
+
+    def pages(self):
+        query = self.groups[0]
+        url = "{}/images/{}".format(self.root, query)
+        response = self.request(url)
+        if response.history:
+            msg = 'Request was redirected to "{}", try logging in'.format(
+                response.url)
+            raise exception.AuthorizationError(msg)
+        page = response.text
+
+        match = util.re(r"Page (\d+) of (\d+)").search(page)
+        current, total = match.groups()
+        current, total = text.parse_int(current), text.parse_int(total)
+
+        yield page
+        for i in range(current + 1, total + 1):
+            url = "{}/images/{}/{}".format(self.root, i, query)
+            yield self.request(url).text
+
+    def items(self):
+        self.login()
+        for page in self.pages():
+            data = {
+                "_extractor"  : GirlswithmusclePostExtractor,
+                "gallery_name": text.unescape(text.extr(page, "<title>", "<")),
+            }
+            for imgid in text.extract_iter(page, 'id="imgid-', '"'):
+                url = "{}/{}/".format(self.root, imgid)
+                yield Message.Queue, url, data
--- a/scripts/supportedsites.py
+++ b/scripts/supportedsites.py
@@ -55,6 +55,7 @@ CATEGORY_MAP = {
    "fashionnova"    : "Fashion Nova",
    "furaffinity"    : "Fur Affinity",
    "furry34"        : "Furry 34 com",
+    "girlswithmuscle": "Girls with Muscle",
    "hatenablog"     : "HatenaBlog",
    "hbrowse"        : "HBrowse",
    "hentai2read"    : "Hentai2Read",
@@ -458,6 +459,7 @@ AUTH_MAP = {
    "flickr"         : _OAUTH,
    "furaffinity"    : _COOKIES,
    "furbooru"       : "API Key",
+    "girlswithmuscle": "Supported",
    "horne"          : "Required",
    "idolcomplex"    : "Supported",
    "imgbb"          : "Supported",
--- a/test/results/girlswithmuscle.py
+++ b/test/results/girlswithmuscle.py
@@ -0,0 +1,55 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+from gallery_dl.extractor import girlswithmuscle
+
+
+__tests__ = (
+{
+    "#url"     : "https://www.girlswithmuscle.com/2526619/",
+    "#category": ("", "girlswithmuscle", "post"),
+    "#class"   : girlswithmuscle.GirlswithmusclePostExtractor,
+    "#results" : "https://www.girlswithmuscle.com/images/full/2526619.jpg",
+
+    "comments" : [],
+    "date"     : "dt:2025-05-21 20:01:03",
+    "extension": "jpg",
+    "filename" : "2526619",
+    "id"       : "2526619",
+    "is_favorite": None,
+    "model"    : "Vladislava Galagan",
+    "model_list" : [
+        "Vladislava Galagan"
+    ],
+    "score"    : range(190, 250),
+    "source_filename": "",
+    "type"     : "picture",
+    "uploader" : "mrt",
+    "tags": [
+        "delts/shoulders",
+        "abs",
+        "casual",
+        "triceps",
+        "traps",
+        "bikini/competition suit",
+        "white",
+        "figure/fitness",
+        "bodybuilder",
+        "slavic",
+        "women's physique",
+        "russian",
+    ],
+},
+
+{
+    "#url"     : "https://www.girlswithmuscle.com/images/?name=Harmony%20Doughty",
+    "#category": ("", "girlswithmuscle", "search"),
+    "#class"   : girlswithmuscle.GirlswithmuscleSearchExtractor,
+    "#pattern" : girlswithmuscle.GirlswithmusclePostExtractor.pattern,
+    "#count"   : range(130, 150),
+},
+
+)