[hdoujin] add support (#6810)
This commit is contained in:
@@ -3092,6 +3092,70 @@ Description
|
||||
Recursively download files from subfolders.
|
||||
|
||||
|
||||
extractor.hdoujin.crt
|
||||
---------------------
|
||||
Type
|
||||
``string``
|
||||
Example
|
||||
* ``"0542daa9-352c-4fd5-a497-6c6d5cf07423"``
|
||||
* ``"/12345/a1b2c3d4e5f6?crt=0542daa9-352c-4fd5-a497-6c6d5cf07423"``
|
||||
Description
|
||||
The ``crt`` query parameter value
|
||||
sent when fetching gallery data.
|
||||
|
||||
To get this value:
|
||||
|
||||
* Open your browser's Developer Tools (F12)
|
||||
* Select `Network` -> `XHR`
|
||||
* Open a gallery page
|
||||
* Select the last `Network` entry and copy its ``crt`` value
|
||||
|
||||
Note: You will also need your browser's
|
||||
`user-agent <extractor.*.user-agent_>`__
|
||||
|
||||
|
||||
extractor.hdoujin.format
|
||||
------------------------
|
||||
Type
|
||||
* ``string``
|
||||
* ``list`` of ``strings``
|
||||
Default
|
||||
``["0", "1600", "1280", "980", "780"]``
|
||||
Description
|
||||
Name(s) of the image format to download.
|
||||
|
||||
When more than one format is given, the first available one is selected.
|
||||
|
||||
| Possible formats are
|
||||
| ``"780"``, ``"980"``, ``"1280"``, ``"1600"``, ``"0"`` (original)
|
||||
|
||||
|
||||
extractor.hdoujin.tags
|
||||
----------------------
|
||||
Type
|
||||
``bool``
|
||||
Default
|
||||
``false``
|
||||
Description
|
||||
Group ``tags`` by type and
|
||||
provide them as ``tags_<type>`` metadata fields,
|
||||
for example ``tags_artist`` or ``tags_character``.
|
||||
|
||||
|
||||
extractor.hdoujin.token
|
||||
-----------------------
|
||||
Type
|
||||
``string``
|
||||
Example
|
||||
* ``"3f1a9b72-4e4d-4f4e-9e5d-4a2b99f7c893"``
|
||||
* ``"Bearer 3f1a9b72-4e4d-4f4e-9e5d-4a2b99f7c893"``
|
||||
* ``"Authorization: Bearer 3f1a9b72-4e4d-4f4e-9e5d-4a2b99f7c893"``
|
||||
Description
|
||||
``Authorization`` header value
|
||||
used for requests to ``https://api.hdoujin.org``
|
||||
to access ``favorite`` galleries.
|
||||
|
||||
|
||||
extractor.hentaifoundry.descriptions
|
||||
------------------------------------
|
||||
Type
|
||||
@@ -3586,70 +3650,6 @@ Description
|
||||
the first in the list gets chosen (usually `mp3`).
|
||||
|
||||
|
||||
extractor.schalenetwork.crt
|
||||
---------------------------
|
||||
Type
|
||||
``string``
|
||||
Example
|
||||
* ``"0542daa9-352c-4fd5-a497-6c6d5cf07423"``
|
||||
* ``"/12345/a1b2c3d4e5f6?crt=0542daa9-352c-4fd5-a497-6c6d5cf07423"``
|
||||
Description
|
||||
The ``crt`` query parameter value
|
||||
sent when fetching gallery data.
|
||||
|
||||
To get this value:
|
||||
|
||||
* Open your browser's Developer Tools (F12)
|
||||
* Select `Network` -> `XHR`
|
||||
* Open a gallery page
|
||||
* Select the last `Network` entry and copy its ``crt`` value
|
||||
|
||||
Note: You will also need your browser's
|
||||
`user-agent <extractor.*.user-agent_>`__
|
||||
|
||||
|
||||
extractor.schalenetwork.format
|
||||
------------------------------
|
||||
Type
|
||||
* ``string``
|
||||
* ``list`` of ``strings``
|
||||
Default
|
||||
``["0", "1600", "1280", "980", "780"]``
|
||||
Description
|
||||
Name(s) of the image format to download.
|
||||
|
||||
When more than one format is given, the first available one is selected.
|
||||
|
||||
| Possible formats are
|
||||
| ``"780"``, ``"980"``, ``"1280"``, ``"1600"``, ``"0"`` (original)
|
||||
|
||||
|
||||
extractor.schalenetwork.tags
|
||||
----------------------------
|
||||
Type
|
||||
``bool``
|
||||
Default
|
||||
``false``
|
||||
Description
|
||||
Group ``tags`` by type and
|
||||
provide them as ``tags_<type>`` metadata fields,
|
||||
for example ``tags_artist`` or ``tags_character``.
|
||||
|
||||
|
||||
extractor.schalenetwork.token
|
||||
-----------------------------
|
||||
Type
|
||||
``string``
|
||||
Example
|
||||
* ``"3f1a9b72-4e4d-4f4e-9e5d-4a2b99f7c893"``
|
||||
* ``"Bearer 3f1a9b72-4e4d-4f4e-9e5d-4a2b99f7c893"``
|
||||
* ``"Authorization: Bearer 3f1a9b72-4e4d-4f4e-9e5d-4a2b99f7c893"``
|
||||
Description
|
||||
``Authorization`` header value
|
||||
used for requests to ``https://api.schale.network``
|
||||
to access ``favorite`` galleries.
|
||||
|
||||
|
||||
extractor.lolisafe.domain
|
||||
-------------------------
|
||||
Type
|
||||
@@ -4880,6 +4880,70 @@ Description
|
||||
Download videos.
|
||||
|
||||
|
||||
extractor.schalenetwork.crt
|
||||
---------------------------
|
||||
Type
|
||||
``string``
|
||||
Example
|
||||
* ``"0542daa9-352c-4fd5-a497-6c6d5cf07423"``
|
||||
* ``"/12345/a1b2c3d4e5f6?crt=0542daa9-352c-4fd5-a497-6c6d5cf07423"``
|
||||
Description
|
||||
The ``crt`` query parameter value
|
||||
sent when fetching gallery data.
|
||||
|
||||
To get this value:
|
||||
|
||||
* Open your browser's Developer Tools (F12)
|
||||
* Select `Network` -> `XHR`
|
||||
* Open a gallery page
|
||||
* Select the last `Network` entry and copy its ``crt`` value
|
||||
|
||||
Note: You will also need your browser's
|
||||
`user-agent <extractor.*.user-agent_>`__
|
||||
|
||||
|
||||
extractor.schalenetwork.format
|
||||
------------------------------
|
||||
Type
|
||||
* ``string``
|
||||
* ``list`` of ``strings``
|
||||
Default
|
||||
``["0", "1600", "1280", "980", "780"]``
|
||||
Description
|
||||
Name(s) of the image format to download.
|
||||
|
||||
When more than one format is given, the first available one is selected.
|
||||
|
||||
| Possible formats are
|
||||
| ``"780"``, ``"980"``, ``"1280"``, ``"1600"``, ``"0"`` (original)
|
||||
|
||||
|
||||
extractor.schalenetwork.tags
|
||||
----------------------------
|
||||
Type
|
||||
``bool``
|
||||
Default
|
||||
``false``
|
||||
Description
|
||||
Group ``tags`` by type and
|
||||
provide them as ``tags_<type>`` metadata fields,
|
||||
for example ``tags_artist`` or ``tags_character``.
|
||||
|
||||
|
||||
extractor.schalenetwork.token
|
||||
-----------------------------
|
||||
Type
|
||||
``string``
|
||||
Example
|
||||
* ``"3f1a9b72-4e4d-4f4e-9e5d-4a2b99f7c893"``
|
||||
* ``"Bearer 3f1a9b72-4e4d-4f4e-9e5d-4a2b99f7c893"``
|
||||
* ``"Authorization: Bearer 3f1a9b72-4e4d-4f4e-9e5d-4a2b99f7c893"``
|
||||
Description
|
||||
``Authorization`` header value
|
||||
used for requests to ``https://api.schale.network``
|
||||
to access ``favorite`` galleries.
|
||||
|
||||
|
||||
extractor.sexcom.gifs
|
||||
---------------------
|
||||
Type
|
||||
|
||||
@@ -386,6 +386,15 @@
|
||||
"website-token": null,
|
||||
"recursive": false
|
||||
},
|
||||
"hdoujin":
|
||||
{
|
||||
"crt" : "",
|
||||
"token": "",
|
||||
"sleep-request": "0.5-1.5",
|
||||
|
||||
"format": ["0", "1600", "1280", "980", "780"],
|
||||
"tags" : false
|
||||
},
|
||||
"hentaifoundry":
|
||||
{
|
||||
"descriptions": "text",
|
||||
|
||||
@@ -373,6 +373,12 @@ Consider all listed sites to potentially be NSFW.
|
||||
<td>Archive, Individual Posts, Home Feed, Search Results</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr id="hdoujin" title="hdoujin">
|
||||
<td>HDoujin Galleries</td>
|
||||
<td>https://hdoujin.org/</td>
|
||||
<td>Favorites, Galleries, Search Results</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr id="hentaifoundry" title="hentaifoundry">
|
||||
<td>Hentai Foundry</td>
|
||||
<td>https://www.hentai-foundry.com/</td>
|
||||
|
||||
@@ -73,6 +73,7 @@ modules = [
|
||||
"girlswithmuscle",
|
||||
"gofile",
|
||||
"hatenablog",
|
||||
"hdoujin",
|
||||
"hentai2read",
|
||||
"hentaicosplays",
|
||||
"hentaifoundry",
|
||||
|
||||
42
gallery_dl/extractor/hdoujin.py
Normal file
42
gallery_dl/extractor/hdoujin.py
Normal file
@@ -0,0 +1,42 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2025 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Extractors for https://hdoujin.org/"""
|
||||
|
||||
from . import schalenetwork
|
||||
|
||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?(hdoujin\.(?:org|net))"
|
||||
|
||||
|
||||
class HdoujinBase():
|
||||
"""Base class for hdoujin extractors"""
|
||||
category = "hdoujin"
|
||||
root = "https://hdoujin.org"
|
||||
root_api = "https://api.hdoujin.org"
|
||||
root_auth = "https://auth.hdoujin.org"
|
||||
|
||||
|
||||
class HdoujinGalleryExtractor(
|
||||
HdoujinBase, schalenetwork.SchalenetworkGalleryExtractor):
|
||||
pattern = rf"{BASE_PATTERN}/(?:g|reader)/(\d+)/(\w+)"
|
||||
example = "https://hdoujin.org/g/12345/67890abcdef/"
|
||||
|
||||
|
||||
class HdoujinSearchExtractor(
|
||||
HdoujinBase, schalenetwork.SchalenetworkSearchExtractor):
|
||||
pattern = rf"{BASE_PATTERN}/(?:tag/([^/?#]+)|browse)?(?:/?\?([^#]*))?$"
|
||||
example = "https://hdoujin.org/browse?s=QUERY"
|
||||
|
||||
|
||||
class HdoujinFavoriteExtractor(
|
||||
HdoujinBase, schalenetwork.SchalenetworkFavoriteExtractor):
|
||||
pattern = rf"{BASE_PATTERN}/favorites(?:\?([^#]*))?"
|
||||
example = "https://hdoujin.org/favorites"
|
||||
|
||||
|
||||
HdoujinBase.extr_class = HdoujinGalleryExtractor
|
||||
@@ -27,6 +27,7 @@ class SchalenetworkExtractor(Extractor):
|
||||
root = "https://niyaniya.moe"
|
||||
root_api = "https://api.schale.network"
|
||||
root_auth = "https://auth.schale.network"
|
||||
extr_class = None
|
||||
request_interval = (0.5, 1.5)
|
||||
|
||||
def _init(self):
|
||||
@@ -38,6 +39,7 @@ class SchalenetworkExtractor(Extractor):
|
||||
|
||||
def _pagination(self, endpoint, params):
|
||||
url_api = self.root_api + endpoint
|
||||
cls = self.extr_class
|
||||
|
||||
while True:
|
||||
data = self.request_json(
|
||||
@@ -50,7 +52,7 @@ class SchalenetworkExtractor(Extractor):
|
||||
|
||||
for entry in entries:
|
||||
url = f"{self.root}/g/{entry['id']}/{entry['key']}"
|
||||
entry["_extractor"] = SchalenetworkGalleryExtractor
|
||||
entry["_extractor"] = cls
|
||||
yield Message.Queue, url, entry
|
||||
|
||||
try:
|
||||
@@ -236,3 +238,6 @@ class SchalenetworkFavoriteExtractor(SchalenetworkExtractor):
|
||||
params["page"] = text.parse_int(params.get("page"), 1)
|
||||
self.headers["Authorization"] = self._token()
|
||||
return self._pagination(f"/books/favorites?crt={self._crt()}", params)
|
||||
|
||||
|
||||
SchalenetworkExtractor.extr_class = SchalenetworkGalleryExtractor
|
||||
|
||||
@@ -63,6 +63,7 @@ CATEGORY_MAP = {
|
||||
"girlswithmuscle": "Girls with Muscle",
|
||||
"hatenablog" : "HatenaBlog",
|
||||
"hbrowse" : "HBrowse",
|
||||
"hdoujin" : "HDoujin Galleries",
|
||||
"hentai2read" : "Hentai2Read",
|
||||
"hentaicosplay" : "Hentai Cosplay",
|
||||
"hentaienvy" : "HentaiEnvy",
|
||||
|
||||
40
test/results/hdoujin.py
Normal file
40
test/results/hdoujin.py
Normal file
@@ -0,0 +1,40 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from gallery_dl.extractor import hdoujin
|
||||
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url" : "https://hdoujin.org/g/119874/bd0a5217dfc6",
|
||||
"#class" : hdoujin.HdoujinGalleryExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://hdoujin.net/g/119874/bd0a5217dfc6",
|
||||
"#class" : hdoujin.HdoujinGalleryExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://hdoujin.org/browse?s=beach",
|
||||
"#class" : hdoujin.HdoujinSearchExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://hdoujin.org/tag/female:maid",
|
||||
"#class" : hdoujin.HdoujinSearchExtractor,
|
||||
"#pattern" : hdoujin.HdoujinGalleryExtractor.pattern,
|
||||
"#range" : "1-80",
|
||||
"#count" : 80,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://hdoujin.org/favorites",
|
||||
"#class" : hdoujin.HdoujinFavoriteExtractor,
|
||||
"#auth" : True,
|
||||
},
|
||||
|
||||
)
|
||||
Reference in New Issue
Block a user