* [girlswithmuscle] init * [girlswithmuscle]: fix metadata extraction (site layout change) * [girlswithmuscle]: fix tags extraction (site layout change) * update login code * update 'post' extractor * update 'gallery' extractor, rename to 'search' extractor * update docs * add test cases --------- Co-authored-by: Mike Fährmann <mike_faehrmann@web.de>
This commit is contained in:
@@ -461,6 +461,7 @@ Description
|
||||
* ``e6ai`` (*)
|
||||
* ``e926`` (*)
|
||||
* ``exhentai``
|
||||
* ``girlswithmuscle``
|
||||
* ``horne`` (R)
|
||||
* ``idolcomplex``
|
||||
* ``imgbb``
|
||||
|
||||
@@ -328,6 +328,11 @@
|
||||
{
|
||||
"enabled": false
|
||||
},
|
||||
"girlswithmuscle":
|
||||
{
|
||||
"username": "",
|
||||
"password": ""
|
||||
},
|
||||
"gofile":
|
||||
{
|
||||
"api-token": null,
|
||||
|
||||
@@ -319,6 +319,12 @@ Consider all listed sites to potentially be NSFW.
|
||||
<td>Favorites, Pools, Posts, Redirects, Tag Searches</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Girls with Muscle</td>
|
||||
<td>https://www.girlswithmuscle.com/</td>
|
||||
<td>Posts, Search Results</td>
|
||||
<td>Supported</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Gofile</td>
|
||||
<td>https://gofile.io/</td>
|
||||
|
||||
@@ -63,6 +63,7 @@ modules = [
|
||||
"gelbooru",
|
||||
"gelbooru_v01",
|
||||
"gelbooru_v02",
|
||||
"girlswithmuscle",
|
||||
"gofile",
|
||||
"hatenablog",
|
||||
"hentai2read",
|
||||
|
||||
179
gallery_dl/extractor/girlswithmuscle.py
Normal file
179
gallery_dl/extractor/girlswithmuscle.py
Normal file
@@ -0,0 +1,179 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text, util, exception
|
||||
from ..cache import cache
|
||||
|
||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?girlswithmuscle\.com"
|
||||
|
||||
|
||||
class GirlswithmuscleExtractor(Extractor):
|
||||
"""Base class for girlswithmuscle extractors"""
|
||||
category = "girlswithmuscle"
|
||||
root = "https://www.girlswithmuscle.com"
|
||||
directory_fmt = ("{category}", "{model}")
|
||||
filename_fmt = "{model}_{id}.{extension}"
|
||||
archive_fmt = "{type}_{model}_{id}"
|
||||
|
||||
def login(self):
|
||||
username, password = self._get_auth_info()
|
||||
if username:
|
||||
self.cookies_update(self._login_impl(username, password))
|
||||
|
||||
@cache(maxage=14*86400, keyarg=1)
|
||||
def _login_impl(self, username, password):
|
||||
self.log.info("Logging in as %s", username)
|
||||
|
||||
url = self.root + "/login/"
|
||||
page = self.request(url).text
|
||||
csrf_token = text.extr(page, 'name="csrfmiddlewaretoken" value="', '"')
|
||||
|
||||
headers = {
|
||||
"Origin" : self.root,
|
||||
"Referer": url,
|
||||
}
|
||||
data = {
|
||||
"csrfmiddlewaretoken": csrf_token,
|
||||
"username": username,
|
||||
"password": password,
|
||||
"next": "/",
|
||||
}
|
||||
response = self.request(
|
||||
url, method="POST", headers=headers, data=data)
|
||||
|
||||
if not response.history:
|
||||
raise exception.AuthenticationError()
|
||||
|
||||
page = response.text
|
||||
if ">Wrong username or password" in page:
|
||||
raise exception.AuthenticationError()
|
||||
if ">Log in<" in page:
|
||||
raise exception.AuthenticationError("Account data is missing")
|
||||
|
||||
return {c.name: c.value for c in response.history[0].cookies}
|
||||
|
||||
|
||||
class GirlswithmusclePostExtractor(GirlswithmuscleExtractor):
|
||||
"""Extractor for individual posts on girlswithmuscle.com"""
|
||||
subcategory = "post"
|
||||
pattern = BASE_PATTERN + r"/(\d+)"
|
||||
example = "https://www.girlswithmuscle.com/12345/"
|
||||
|
||||
def items(self):
|
||||
self.login()
|
||||
|
||||
url = "{}/{}/".format(self.root, self.groups[0])
|
||||
page = self.request(url).text
|
||||
if not page:
|
||||
raise exception.NotFoundError("post")
|
||||
|
||||
metadata = self.metadata(page)
|
||||
|
||||
url = text.extr(page, 'class="main-image" src="', '"')
|
||||
if url:
|
||||
metadata["type"] = "picture"
|
||||
else:
|
||||
url = text.extr(page, '<source src="', '"')
|
||||
metadata["type"] = "video"
|
||||
|
||||
text.nameext_from_url(url, metadata)
|
||||
yield Message.Directory, metadata
|
||||
yield Message.Url, url, metadata
|
||||
|
||||
def metadata(self, page):
|
||||
source = text.remove_html(text.extr(
|
||||
page, '<div id="info-source" style="display: none">', "</div>"))
|
||||
image_info = text.extr(
|
||||
page, '<div class="image-info">', "</div>")
|
||||
uploader = text.remove_html(text.extr(
|
||||
image_info, '<span class="username-html">', "</a>"))
|
||||
|
||||
tags = text.extr(page, 'id="tags-text">', "</div>")
|
||||
score = text.parse_int(text.remove_html(text.extr(
|
||||
page, "Score: <b>", "</span")))
|
||||
model = self._extract_model(page)
|
||||
|
||||
return {
|
||||
"id": self.groups[0],
|
||||
"model": model,
|
||||
"model_list": self._parse_model_list(model),
|
||||
"tags": text.split_html(tags)[1::2],
|
||||
"date": text.parse_datetime(
|
||||
text.extr(page, 'class="hover-time" title="', '"')[:19],
|
||||
"%Y-%m-%d %H:%M:%S"),
|
||||
"is_favorite": self._parse_is_favorite(page),
|
||||
"source_filename": source,
|
||||
"uploader": uploader,
|
||||
"score": score,
|
||||
"comments": self._extract_comments(page),
|
||||
}
|
||||
|
||||
def _extract_model(self, page):
|
||||
model = text.extr(page, "<title>", "</title>")
|
||||
return "unknown" if model.startswith("Picture #") else model
|
||||
|
||||
def _parse_model_list(self, model):
|
||||
if model == "unknown":
|
||||
return []
|
||||
else:
|
||||
return [name.strip() for name in model.split(",")]
|
||||
|
||||
def _parse_is_favorite(self, page):
|
||||
fav_button = text.extr(
|
||||
page, 'id="favorite-button">', "</span>")
|
||||
unfav_button = text.extr(
|
||||
page, 'class="actionbutton unfavorite-button">', "</span>")
|
||||
|
||||
is_favorite = None
|
||||
if unfav_button == "Unfavorite":
|
||||
is_favorite = True
|
||||
if fav_button == "Favorite":
|
||||
is_favorite = False
|
||||
|
||||
return is_favorite
|
||||
|
||||
def _extract_comments(self, page):
|
||||
comments = text.extract_iter(
|
||||
page, '<div class="comment-body-inner">', "</div>")
|
||||
return [comment.strip() for comment in comments]
|
||||
|
||||
|
||||
class GirlswithmuscleSearchExtractor(GirlswithmuscleExtractor):
|
||||
"""Extractor for search results on girlswithmuscle.com"""
|
||||
subcategory = "search"
|
||||
pattern = BASE_PATTERN + r"/images/(.*)"
|
||||
example = "https://www.girlswithmuscle.com/images/?name=MODEL"
|
||||
|
||||
def pages(self):
|
||||
query = self.groups[0]
|
||||
url = "{}/images/{}".format(self.root, query)
|
||||
response = self.request(url)
|
||||
if response.history:
|
||||
msg = 'Request was redirected to "{}", try logging in'.format(
|
||||
response.url)
|
||||
raise exception.AuthorizationError(msg)
|
||||
page = response.text
|
||||
|
||||
match = util.re(r"Page (\d+) of (\d+)").search(page)
|
||||
current, total = match.groups()
|
||||
current, total = text.parse_int(current), text.parse_int(total)
|
||||
|
||||
yield page
|
||||
for i in range(current + 1, total + 1):
|
||||
url = "{}/images/{}/{}".format(self.root, i, query)
|
||||
yield self.request(url).text
|
||||
|
||||
def items(self):
|
||||
self.login()
|
||||
for page in self.pages():
|
||||
data = {
|
||||
"_extractor" : GirlswithmusclePostExtractor,
|
||||
"gallery_name": text.unescape(text.extr(page, "<title>", "<")),
|
||||
}
|
||||
for imgid in text.extract_iter(page, 'id="imgid-', '"'):
|
||||
url = "{}/{}/".format(self.root, imgid)
|
||||
yield Message.Queue, url, data
|
||||
@@ -55,6 +55,7 @@ CATEGORY_MAP = {
|
||||
"fashionnova" : "Fashion Nova",
|
||||
"furaffinity" : "Fur Affinity",
|
||||
"furry34" : "Furry 34 com",
|
||||
"girlswithmuscle": "Girls with Muscle",
|
||||
"hatenablog" : "HatenaBlog",
|
||||
"hbrowse" : "HBrowse",
|
||||
"hentai2read" : "Hentai2Read",
|
||||
@@ -458,6 +459,7 @@ AUTH_MAP = {
|
||||
"flickr" : _OAUTH,
|
||||
"furaffinity" : _COOKIES,
|
||||
"furbooru" : "API Key",
|
||||
"girlswithmuscle": "Supported",
|
||||
"horne" : "Required",
|
||||
"idolcomplex" : "Supported",
|
||||
"imgbb" : "Supported",
|
||||
|
||||
55
test/results/girlswithmuscle.py
Normal file
55
test/results/girlswithmuscle.py
Normal file
@@ -0,0 +1,55 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from gallery_dl.extractor import girlswithmuscle
|
||||
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url" : "https://www.girlswithmuscle.com/2526619/",
|
||||
"#category": ("", "girlswithmuscle", "post"),
|
||||
"#class" : girlswithmuscle.GirlswithmusclePostExtractor,
|
||||
"#results" : "https://www.girlswithmuscle.com/images/full/2526619.jpg",
|
||||
|
||||
"comments" : [],
|
||||
"date" : "dt:2025-05-21 20:01:03",
|
||||
"extension": "jpg",
|
||||
"filename" : "2526619",
|
||||
"id" : "2526619",
|
||||
"is_favorite": None,
|
||||
"model" : "Vladislava Galagan",
|
||||
"model_list" : [
|
||||
"Vladislava Galagan"
|
||||
],
|
||||
"score" : range(190, 250),
|
||||
"source_filename": "",
|
||||
"type" : "picture",
|
||||
"uploader" : "mrt",
|
||||
"tags": [
|
||||
"delts/shoulders",
|
||||
"abs",
|
||||
"casual",
|
||||
"triceps",
|
||||
"traps",
|
||||
"bikini/competition suit",
|
||||
"white",
|
||||
"figure/fitness",
|
||||
"bodybuilder",
|
||||
"slavic",
|
||||
"women's physique",
|
||||
"russian",
|
||||
],
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.girlswithmuscle.com/images/?name=Harmony%20Doughty",
|
||||
"#category": ("", "girlswithmuscle", "search"),
|
||||
"#class" : girlswithmuscle.GirlswithmuscleSearchExtractor,
|
||||
"#pattern" : girlswithmuscle.GirlswithmusclePostExtractor.pattern,
|
||||
"#count" : range(130, 150),
|
||||
},
|
||||
|
||||
)
|
||||
Reference in New Issue
Block a user