[hdoujin] add support (#6810)

This commit is contained in:
Mike Fährmann
2025-09-17 18:54:39 +02:00
parent 1b9e1ff9ff
commit db208ca1bd
8 changed files with 233 additions and 65 deletions

View File

@@ -3092,6 +3092,70 @@ Description
Recursively download files from subfolders.
extractor.hdoujin.crt
---------------------
Type
``string``
Example
* ``"0542daa9-352c-4fd5-a497-6c6d5cf07423"``
* ``"/12345/a1b2c3d4e5f6?crt=0542daa9-352c-4fd5-a497-6c6d5cf07423"``
Description
The ``crt`` query parameter value
sent when fetching gallery data.
To get this value:
* Open your browser's Developer Tools (F12)
* Select `Network` -> `XHR`
* Open a gallery page
* Select the last `Network` entry and copy its ``crt`` value
Note: You will also need your browser's
`user-agent <extractor.*.user-agent_>`__
extractor.hdoujin.format
------------------------
Type
* ``string``
* ``list`` of ``strings``
Default
``["0", "1600", "1280", "980", "780"]``
Description
Name(s) of the image format to download.
When more than one format is given, the first available one is selected.
| Possible formats are
| ``"780"``, ``"980"``, ``"1280"``, ``"1600"``, ``"0"`` (original)
extractor.hdoujin.tags
----------------------
Type
``bool``
Default
``false``
Description
Group ``tags`` by type and
provide them as ``tags_<type>`` metadata fields,
for example ``tags_artist`` or ``tags_character``.
extractor.hdoujin.token
-----------------------
Type
``string``
Example
* ``"3f1a9b72-4e4d-4f4e-9e5d-4a2b99f7c893"``
* ``"Bearer 3f1a9b72-4e4d-4f4e-9e5d-4a2b99f7c893"``
* ``"Authorization: Bearer 3f1a9b72-4e4d-4f4e-9e5d-4a2b99f7c893"``
Description
``Authorization`` header value
used for requests to ``https://api.hdoujin.org``
to access ``favorite`` galleries.
extractor.hentaifoundry.descriptions
------------------------------------
Type
@@ -3586,70 +3650,6 @@ Description
the first in the list gets chosen (usually `mp3`).
extractor.schalenetwork.crt
---------------------------
Type
``string``
Example
* ``"0542daa9-352c-4fd5-a497-6c6d5cf07423"``
* ``"/12345/a1b2c3d4e5f6?crt=0542daa9-352c-4fd5-a497-6c6d5cf07423"``
Description
The ``crt`` query parameter value
sent when fetching gallery data.
To get this value:
* Open your browser's Developer Tools (F12)
* Select `Network` -> `XHR`
* Open a gallery page
* Select the last `Network` entry and copy its ``crt`` value
Note: You will also need your browser's
`user-agent <extractor.*.user-agent_>`__
extractor.schalenetwork.format
------------------------------
Type
* ``string``
* ``list`` of ``strings``
Default
``["0", "1600", "1280", "980", "780"]``
Description
Name(s) of the image format to download.
When more than one format is given, the first available one is selected.
| Possible formats are
| ``"780"``, ``"980"``, ``"1280"``, ``"1600"``, ``"0"`` (original)
extractor.schalenetwork.tags
----------------------------
Type
``bool``
Default
``false``
Description
Group ``tags`` by type and
provide them as ``tags_<type>`` metadata fields,
for example ``tags_artist`` or ``tags_character``.
extractor.schalenetwork.token
-----------------------------
Type
``string``
Example
* ``"3f1a9b72-4e4d-4f4e-9e5d-4a2b99f7c893"``
* ``"Bearer 3f1a9b72-4e4d-4f4e-9e5d-4a2b99f7c893"``
* ``"Authorization: Bearer 3f1a9b72-4e4d-4f4e-9e5d-4a2b99f7c893"``
Description
``Authorization`` header value
used for requests to ``https://api.schale.network``
to access ``favorite`` galleries.
extractor.lolisafe.domain
-------------------------
Type
@@ -4880,6 +4880,70 @@ Description
Download videos.
extractor.schalenetwork.crt
---------------------------
Type
``string``
Example
* ``"0542daa9-352c-4fd5-a497-6c6d5cf07423"``
* ``"/12345/a1b2c3d4e5f6?crt=0542daa9-352c-4fd5-a497-6c6d5cf07423"``
Description
The ``crt`` query parameter value
sent when fetching gallery data.
To get this value:
* Open your browser's Developer Tools (F12)
* Select `Network` -> `XHR`
* Open a gallery page
* Select the last `Network` entry and copy its ``crt`` value
Note: You will also need your browser's
`user-agent <extractor.*.user-agent_>`__
extractor.schalenetwork.format
------------------------------
Type
* ``string``
* ``list`` of ``strings``
Default
``["0", "1600", "1280", "980", "780"]``
Description
Name(s) of the image format to download.
When more than one format is given, the first available one is selected.
| Possible formats are
| ``"780"``, ``"980"``, ``"1280"``, ``"1600"``, ``"0"`` (original)
extractor.schalenetwork.tags
----------------------------
Type
``bool``
Default
``false``
Description
Group ``tags`` by type and
provide them as ``tags_<type>`` metadata fields,
for example ``tags_artist`` or ``tags_character``.
extractor.schalenetwork.token
-----------------------------
Type
``string``
Example
* ``"3f1a9b72-4e4d-4f4e-9e5d-4a2b99f7c893"``
* ``"Bearer 3f1a9b72-4e4d-4f4e-9e5d-4a2b99f7c893"``
* ``"Authorization: Bearer 3f1a9b72-4e4d-4f4e-9e5d-4a2b99f7c893"``
Description
``Authorization`` header value
used for requests to ``https://api.schale.network``
to access ``favorite`` galleries.
extractor.sexcom.gifs
---------------------
Type

View File

@@ -386,6 +386,15 @@
"website-token": null,
"recursive": false
},
"hdoujin":
{
"crt" : "",
"token": "",
"sleep-request": "0.5-1.5",
"format": ["0", "1600", "1280", "980", "780"],
"tags" : false
},
"hentaifoundry":
{
"descriptions": "text",

View File

@@ -373,6 +373,12 @@ Consider all listed sites to potentially be NSFW.
<td>Archive, Individual Posts, Home Feed, Search Results</td>
<td></td>
</tr>
<tr id="hdoujin" title="hdoujin">
<td>HDoujin Galleries</td>
<td>https://hdoujin.org/</td>
<td>Favorites, Galleries, Search Results</td>
<td></td>
</tr>
<tr id="hentaifoundry" title="hentaifoundry">
<td>Hentai Foundry</td>
<td>https://www.hentai-foundry.com/</td>

View File

@@ -73,6 +73,7 @@ modules = [
"girlswithmuscle",
"gofile",
"hatenablog",
"hdoujin",
"hentai2read",
"hentaicosplays",
"hentaifoundry",

View File

@@ -0,0 +1,42 @@
# -*- coding: utf-8 -*-
# Copyright 2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://hdoujin.org/"""
from . import schalenetwork
BASE_PATTERN = r"(?:https?://)?(?:www\.)?(hdoujin\.(?:org|net))"
class HdoujinBase():
"""Base class for hdoujin extractors"""
category = "hdoujin"
root = "https://hdoujin.org"
root_api = "https://api.hdoujin.org"
root_auth = "https://auth.hdoujin.org"
class HdoujinGalleryExtractor(
HdoujinBase, schalenetwork.SchalenetworkGalleryExtractor):
pattern = rf"{BASE_PATTERN}/(?:g|reader)/(\d+)/(\w+)"
example = "https://hdoujin.org/g/12345/67890abcdef/"
class HdoujinSearchExtractor(
HdoujinBase, schalenetwork.SchalenetworkSearchExtractor):
pattern = rf"{BASE_PATTERN}/(?:tag/([^/?#]+)|browse)?(?:/?\?([^#]*))?$"
example = "https://hdoujin.org/browse?s=QUERY"
class HdoujinFavoriteExtractor(
HdoujinBase, schalenetwork.SchalenetworkFavoriteExtractor):
pattern = rf"{BASE_PATTERN}/favorites(?:\?([^#]*))?"
example = "https://hdoujin.org/favorites"
HdoujinBase.extr_class = HdoujinGalleryExtractor

View File

@@ -27,6 +27,7 @@ class SchalenetworkExtractor(Extractor):
root = "https://niyaniya.moe"
root_api = "https://api.schale.network"
root_auth = "https://auth.schale.network"
extr_class = None
request_interval = (0.5, 1.5)
def _init(self):
@@ -38,6 +39,7 @@ class SchalenetworkExtractor(Extractor):
def _pagination(self, endpoint, params):
url_api = self.root_api + endpoint
cls = self.extr_class
while True:
data = self.request_json(
@@ -50,7 +52,7 @@ class SchalenetworkExtractor(Extractor):
for entry in entries:
url = f"{self.root}/g/{entry['id']}/{entry['key']}"
entry["_extractor"] = SchalenetworkGalleryExtractor
entry["_extractor"] = cls
yield Message.Queue, url, entry
try:
@@ -236,3 +238,6 @@ class SchalenetworkFavoriteExtractor(SchalenetworkExtractor):
params["page"] = text.parse_int(params.get("page"), 1)
self.headers["Authorization"] = self._token()
return self._pagination(f"/books/favorites?crt={self._crt()}", params)
SchalenetworkExtractor.extr_class = SchalenetworkGalleryExtractor

View File

@@ -63,6 +63,7 @@ CATEGORY_MAP = {
"girlswithmuscle": "Girls with Muscle",
"hatenablog" : "HatenaBlog",
"hbrowse" : "HBrowse",
"hdoujin" : "HDoujin Galleries",
"hentai2read" : "Hentai2Read",
"hentaicosplay" : "Hentai Cosplay",
"hentaienvy" : "HentaiEnvy",

40
test/results/hdoujin.py Normal file
View File

@@ -0,0 +1,40 @@
# -*- coding: utf-8 -*-
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
from gallery_dl.extractor import hdoujin
__tests__ = (
{
"#url" : "https://hdoujin.org/g/119874/bd0a5217dfc6",
"#class" : hdoujin.HdoujinGalleryExtractor,
},
{
"#url" : "https://hdoujin.net/g/119874/bd0a5217dfc6",
"#class" : hdoujin.HdoujinGalleryExtractor,
},
{
"#url" : "https://hdoujin.org/browse?s=beach",
"#class" : hdoujin.HdoujinSearchExtractor,
},
{
"#url" : "https://hdoujin.org/tag/female:maid",
"#class" : hdoujin.HdoujinSearchExtractor,
"#pattern" : hdoujin.HdoujinGalleryExtractor.pattern,
"#range" : "1-80",
"#count" : 80,
},
{
"#url" : "https://hdoujin.org/favorites",
"#class" : hdoujin.HdoujinFavoriteExtractor,
"#auth" : True,
},
)