diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index a15566df..94cef0f7 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -829,6 +829,12 @@ Consider all sites to be NSFW unless otherwise known.
Galleries |
|
+
+ | Tmohentai |
+ https://tmohentai.com/ |
+ Galleries |
+ |
+
| Toyhouse |
https://toyhou.se/ |
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 22e4fe34..efdcde78 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -147,6 +147,7 @@ modules = [
"tapas",
"tcbscans",
"telegraph",
+ "tmohentai",
"toyhouse",
"tsumino",
"tumblr",
diff --git a/gallery_dl/extractor/tmohentai.py b/gallery_dl/extractor/tmohentai.py
new file mode 100644
index 00000000..462e51dd
--- /dev/null
+++ b/gallery_dl/extractor/tmohentai.py
@@ -0,0 +1,78 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://tmohentai.com/"""
+
+from .common import Extractor, Message
+from .. import text
+
+BASE_PATTERN = r'(?:https?://)?tmohentai\.com'
+
+
+class TmohentaiExtractor(Extractor):
+ category = 'tmohentai'
+ root = 'http://tmohentai.com'
+ directory_fmt = ('{category}', '{title}')
+ filename_fmt = '{filename}.{extension}'
+ archive_fmt = '{title}_{filename}'
+ pattern = BASE_PATTERN + r'/((contents)|(reader))/(\w+)'
+ example = 'https://tmohentai.com/contents/12345a67b89c0'
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.contents = match.group(2)
+ self.reader = match.group(3)
+ self.id_string = match.group(4)
+
+ def parse_location(self):
+ if self.contents:
+ url = f'{self.root}/reader/{self.id_string}/paginated'
+ else:
+ url = self.url
+ return url
+
+ def items(self):
+ url = self.parse_location()
+ page_src = self.request(
+ text.ensure_http_scheme(url)).text
+
+ data = self.metadata()
+ yield Message.Directory, data
+
+ page_nums = text.extract_iter(page_src, 'option value="', '"')
+ pages = [text.extr(page_src, 'data-original="', '"')]
+ base_page = pages[0].rpartition('/')[0]
+ for num, page in enumerate(page_nums, start=1):
+ file = f'{base_page}/{num:>03}.webp'
+ img = text.nameext_from_url(file, {
+ 'num': num,
+ })
+ yield Message.Url, file, img
+
+ def metadata(self):
+ contents = f'{self.root}/contents/{self.id_string}'
+ contents_src = self.request(text.ensure_http_scheme(contents)).text
+
+ genders_src = text.extr(contents_src, 'Genders', '')
+ genders_list = text.extract_iter(genders_src, '">', '')
+
+ tags_src = text.extr(contents_src, 'Tags', '')
+ tags_list = text.extract_iter(tags_src, '">', '')
+
+ upload_src = text.extr(contents_src, 'Uploaded By', '/a>')
+ data = {
+ 'title' : text.extr(contents_src, '', '
'),
+ 'id_string': self.id_string,
+ 'artists' : text.remove_html(
+ text.extr(contents_src, 'tag tag-accepted">', '')),
+ 'genders' : list(genders_list),
+ 'tags' : list(tags_list),
+ 'uploader' : text.extr(upload_src, '">', '<'),
+ 'language' : text.extr(
+ contents_src, ' ', ''),
+ }
+ return data