[35photo] add 'tag' extractor
This commit is contained in:
@@ -5,7 +5,7 @@ Unless otherwise known, assume all sites to be NSFW
|
||||
==================== =================================== ================================================== ================
|
||||
Site URL Capabilities Authentication
|
||||
==================== =================================== ================================================== ================
|
||||
35PHOTO https://35photo.pro/ Genres, individual Images, User Profiles
|
||||
35PHOTO https://35photo.pro/ |35photo-C|
|
||||
3dbooru http://behoimi.org/ Pools, Popular Images, Posts, Tag-Searches
|
||||
4chan https://www.4chan.org/ Boards, Threads
|
||||
4plebs https://archive.4plebs.org/ Threads
|
||||
@@ -141,6 +141,7 @@ Turboimagehost https://www.turboimagehost.com/ individual Images
|
||||
半次元 https://bcy.net/ Posts, User Profiles
|
||||
==================== =================================== ================================================== ================
|
||||
|
||||
.. |35photo-C| replace:: Genres, individual Images, Tag-Searches, User Profiles
|
||||
.. |artstation-C| replace:: Albums, Artwork Listings, Challenges, individual Images, Likes, Search Results, User Profiles
|
||||
.. |deviantart-C| replace:: Collections, Deviations, Favorites, Folders, Galleries, Journals, Popular Images, Scraps, Sta.sh, User Profiles
|
||||
.. |flickr-C| replace:: Albums, Favorites, Galleries, Groups, individual Images, Search Results, User Profiles
|
||||
|
||||
@@ -101,7 +101,7 @@ class _35photoUserExtractor(_35photoExtractor):
|
||||
"""Extractor for all images of a user on 35photo.pro"""
|
||||
subcategory = "user"
|
||||
pattern = (r"(?:https?://)?(?:[a-z]+\.)?35photo\.pro"
|
||||
r"/(?!photo_|genre_|rating/)([^/?&#]+)")
|
||||
r"/(?!photo_|genre_|tags/|rating/)([^/?&#]+)")
|
||||
test = (
|
||||
("https://35photo.pro/liya", {
|
||||
"pattern": r"https://m\d+.35photo.pro/photos_(main|series)/.*.jpg",
|
||||
@@ -137,6 +137,42 @@ class _35photoUserExtractor(_35photoExtractor):
|
||||
})
|
||||
|
||||
|
||||
class _35photoTagExtractor(_35photoExtractor):
|
||||
"""Extractor for all photos from a tag listing"""
|
||||
subcategory = "tag"
|
||||
directory_fmt = ("{category}", "Tags", "{search_tag}")
|
||||
archive_fmt = "t{search_tag}_{id}_{num}"
|
||||
pattern = r"(?:https?://)?(?:[a-z]+\.)?35photo\.pro/tags/([^/?&#]+)"
|
||||
test = ("https://35photo.pro/tags/landscape/", {
|
||||
"range": "1-25",
|
||||
"count": 25,
|
||||
})
|
||||
|
||||
def __init__(self, match):
|
||||
_35photoExtractor.__init__(self, match)
|
||||
self.tag = match.group(1)
|
||||
|
||||
def metadata(self):
|
||||
return {"search_tag": text.unquote(self.tag).lower()}
|
||||
|
||||
def photos(self):
|
||||
num = 1
|
||||
|
||||
while True:
|
||||
url = "{}/tags/{}/list_{}/".format(self.root, self.tag, num)
|
||||
page = self.request(url).text
|
||||
prev = None
|
||||
|
||||
for photo_id in text.extract_iter(page, "35photo.pro/photo_", "/"):
|
||||
if photo_id != prev:
|
||||
prev = photo_id
|
||||
yield photo_id
|
||||
|
||||
if not prev:
|
||||
return
|
||||
num += 1
|
||||
|
||||
|
||||
class _35photoGenreExtractor(_35photoExtractor):
|
||||
"""Extractor for images of a specific genre on 35photo.pro"""
|
||||
subcategory = "genre"
|
||||
|
||||
Reference in New Issue
Block a user