diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index 94cef0f7..8aadcde5 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -830,7 +830,7 @@ Consider all sites to be NSFW unless otherwise known.
- | Tmohentai |
+ TMOHentai |
https://tmohentai.com/ |
Galleries |
|
diff --git a/gallery_dl/extractor/tmohentai.py b/gallery_dl/extractor/tmohentai.py
index d4e16086..be45702a 100644
--- a/gallery_dl/extractor/tmohentai.py
+++ b/gallery_dl/extractor/tmohentai.py
@@ -6,59 +6,43 @@
"""Extractors for https://tmohentai.com/"""
-from .common import GalleryExtractor, Message
+from .common import GalleryExtractor
from .. import text
-BASE_PATTERN = r'(?:https?://)?tmohentai\.com'
+BASE_PATTERN = r"(?:https?://)?tmohentai\.com"
class TmohentaiGalleryExtractor(GalleryExtractor):
- category = 'tmohentai'
- subcategory = 'gallery'
- root = 'http://tmohentai.com'
- directory_fmt = ('{category}', '{title}')
- filename_fmt = '{title}_{filename}.{extension}'
- archive_fmt = '{id_string}_{filename}'
- pattern = BASE_PATTERN + r'/(contents)|(reader)/(\w+)'
- example = 'https://tmohentai.com/contents/12345a67b89c0'
+ category = "tmohentai"
+ root = "http://tmohentai.com"
+ directory_fmt = ("{category}", "{title} ({gallery_id})")
+ pattern = BASE_PATTERN + r"/(?:contents|reader)/(\w+)"
+ example = "https://tmohentai.com/contents/12345a67b89c0"
def __init__(self, match):
- self.id_string = match.group(2)
- url = '{}/contents/{}'.format(self.root, self.id_string)
+ self.gallery_id = match.group(1)
+ url = "{}/contents/{}".format(self.root, self.gallery_id)
GalleryExtractor.__init__(self, match, url)
- def items(self):
- page = self.request(
- text.ensure_http_scheme(self.url)).text
- data = self.metadata(page)
-
- yield Message.Directory, data
- imgs = self.images(page)
-
- cdn = 'https://imgrojo.tmohentai.com/contents'
- for num, _ in enumerate(imgs, start=0):
- url = ('{}/{}/{:>03}.webp'.format(cdn, self.id_string, num))
- img = text.nameext_from_url(url, {
- 'num' : num + 1,
- 'title' : data['title'],
- 'id_string': self.id_string,
- })
- yield Message.Url, url, img
-
def images(self, page):
- pages = text.extract_iter(
- page, 'class="lanzador', '>')
- return pages
+ fmt = "https://imgrojo.tmohentai.com/contents/{}/{{:>03}}.webp".format(
+ self.gallery_id).format
+ cnt = page.count('class="lanzador')
+ return [(fmt(i), None) for i in range(0, cnt)]
def metadata(self, page):
- extr = text.extract_from(page, page.index('tag tag-accepted">'))
+ extr = text.extract_from(page)
return {
- 'title' : text.extr(page, '', '
').strip(),
- 'id_string': self.id_string,
- 'artists' : text.remove_html(extr('">', '')),
- 'genders' : text.split_html(extr('Genders', '', '')),
- 'uploader' : text.remove_html(extr('Uploaded By', '')),
- 'language' : extr(' ', '\n'),
+ "gallery_id": self.gallery_id,
+ "title" : text.unescape(extr("
", "<").strip()),
+ "artists" : text.split_html(extr(
+ "", "")),
+ "categories": text.split_html(extr(
+ "", "")),
+ "tags" : text.split_html(extr(
+ "", "")),
+ "uploader" : text.remove_html(extr(
+ "", "")),
+ "language" : extr(" ", "\n"),
}
diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py
index 470b629d..695108e0 100755
--- a/scripts/supportedsites.py
+++ b/scripts/supportedsites.py
@@ -121,6 +121,7 @@ CATEGORY_MAP = {
"tbib" : "The Big ImageBoard",
"tcbscans" : "TCB Scans",
"tco" : "Twitter t.co",
+ "tmohentai" : "TMOHentai",
"thatpervert" : "ThatPervert",
"thebarchive" : "The /b/ Archive",
"thecollection" : "The /co/llection",
diff --git a/test/results/tmohentai.py b/test/results/tmohentai.py
new file mode 100644
index 00000000..2bae050a
--- /dev/null
+++ b/test/results/tmohentai.py
@@ -0,0 +1,54 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+from gallery_dl.extractor import tmohentai
+
+
+__tests__ = (
+{
+ "#url" : "https://tmohentai.com/contents/653c2aeaa693c",
+ "#category": ("", "tmohentai", "gallery"),
+ "#class" : tmohentai.TmohentaiGalleryExtractor,
+ "#pattern" : r"https://imgrojo\.tmohentai\.com/contents/653c2aeaa693c/\d\d\d\.webp",
+ "#count" : 46,
+
+ "artists" : ["Andoryu"],
+ "categories": [
+ "Big Breasts",
+ "BlowJob",
+ "Cheating",
+ "Mature",
+ "Milf",
+ "Student",
+ ],
+ "count" : 46,
+ "extension" : "webp",
+ "gallery_id": "653c2aeaa693c",
+ "language" : "EspaƱol",
+ "num" : int,
+ "tags" : [
+ "milf",
+ "Madre",
+ "enormes pechos",
+ "Peluda",
+ "nakadashi",
+ "cheating",
+ "madura",
+ "sexo a escondidas",
+ "Ama de casa",
+ "mamada",
+ ],
+ "title" : "La Mama de mi Novia es tan Pervertida que no Pude Soportarlo mas",
+ "uploader" : "NekoCreme Fansub",
+},
+
+{
+ "#url" : "https://tmohentai.com/reader/653c2aeaa693c/paginated/1",
+ "#category": ("", "tmohentai", "gallery"),
+ "#class" : tmohentai.TmohentaiGalleryExtractor,
+},
+
+)