[35photo] fix extraction

This commit is contained in:
Mike Fährmann
2019-07-09 20:28:04 +02:00
parent 04b8d0894a
commit a7b42b37a2
2 changed files with 14 additions and 5 deletions

View File

@@ -72,7 +72,6 @@ class _35photoExtractor(Extractor):
"user" : data["user_login"], "user" : data["user_login"],
"user_id" : data["user_id"], "user_id" : data["user_id"],
"user_name" : data["user_name"], "user_name" : data["user_name"],
"other" : data["otherData"],
} }
if "series" in data: if "series" in data:
@@ -89,6 +88,8 @@ class _35photoExtractor(Extractor):
def _photo_ids(page): def _photo_ids(page):
"""Extract unique photo IDs and return them as sorted list""" """Extract unique photo IDs and return them as sorted list"""
# searching for photo-id="..." doesn't always work (see unit tests) # searching for photo-id="..." doesn't always work (see unit tests)
if not page:
return ()
return sorted( return sorted(
set(text.extract_iter(page, "/photo_", "/")), set(text.extract_iter(page, "/photo_", "/")),
key=text.parse_int, key=text.parse_int,
@@ -100,7 +101,7 @@ class _35photoUserExtractor(_35photoExtractor):
"""Extractor for all images of a user on 35photo.pro""" """Extractor for all images of a user on 35photo.pro"""
subcategory = "user" subcategory = "user"
pattern = (r"(?:https?://)?(?:[a-z]+\.)?35photo\.pro" pattern = (r"(?:https?://)?(?:[a-z]+\.)?35photo\.pro"
r"/(?!photo_|genre_)([^/?&#]+)") r"/(?!photo_|genre_|rating/)([^/?&#]+)")
test = ( test = (
("https://35photo.pro/liya", { ("https://35photo.pro/liya", {
"pattern": r"https://m\d+.35photo.pro/photos_(main|series)/.*.jpg", "pattern": r"https://m\d+.35photo.pro/photos_(main|series)/.*.jpg",
@@ -146,7 +147,14 @@ class _35photoGenreExtractor(_35photoExtractor):
("https://35photo.pro/genre_109/", { ("https://35photo.pro/genre_109/", {
"range": "1-30", "range": "1-30",
}), }),
("https://35photo.pro/genre_109/new/"), ("https://35photo.pro/genre_103/", {
"range": "1-30",
"count": 30,
}),
("https://35photo.pro/genre_103/new/", {
"range": "1-30",
"count": 30,
}),
) )
def __init__(self, match): def __init__(self, match):
@@ -165,6 +173,8 @@ class _35photoGenreExtractor(_35photoExtractor):
} }
def photos(self): def photos(self):
if not self.photo_ids:
return ()
return self._pagination({ return self._pagination({
"page": "genre", "page": "genre",
"community_id": self.genre_id, "community_id": self.genre_id,
@@ -193,7 +203,6 @@ class _35photoImageExtractor(_35photoExtractor):
"user" : "liya", "user" : "liya",
"user_id" : 20415, "user_id" : 20415,
"user_name" : "Liya Mirzaeva", "user_name" : "Liya Mirzaeva",
"other" : str,
}, },
}) })

View File

@@ -31,7 +31,7 @@ class XvideosGalleryExtractor(XvideosExtractor):
(("https://www.xvideos.com/profiles" (("https://www.xvideos.com/profiles"
"/pervertedcouple/photos/751031/random_stuff"), { "/pervertedcouple/photos/751031/random_stuff"), {
"url": "4f0d992e5dc39def2c3ac8e099d17bf09e76e3c7", "url": "4f0d992e5dc39def2c3ac8e099d17bf09e76e3c7",
"keyword": "8d637b372c6231cc4ada92dd5918db5fdbd06520", "keyword": "65979d63a69576cf692b41d5fbbd995cc40a51b9",
}), }),
("https://www.xvideos.com/profiles/pervertedcouple/photos/751032/", { ("https://www.xvideos.com/profiles/pervertedcouple/photos/751032/", {
"exception": exception.NotFoundError, "exception": exception.NotFoundError,