[artstation] fix search result pagination (closes #537)
This commit is contained in:
@@ -83,14 +83,20 @@ class ArtstationExtractor(Extractor):
|
|||||||
response = self.request(url, notfound="user")
|
response = self.request(url, notfound="user")
|
||||||
return response.json()
|
return response.json()
|
||||||
|
|
||||||
def _pagination(self, url, params=None):
|
def _pagination(self, url, params=None, json=None):
|
||||||
if not params:
|
if json:
|
||||||
params = {}
|
params = json
|
||||||
|
kwargs = {"json": json}
|
||||||
|
else:
|
||||||
|
if not params:
|
||||||
|
params = {}
|
||||||
|
kwargs = {"params": params}
|
||||||
|
|
||||||
params["page"] = 1
|
params["page"] = 1
|
||||||
total = 0
|
total = 0
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
data = self.request(url, params=params).json()
|
data = self.request(url, **kwargs).json()
|
||||||
yield from data["data"]
|
yield from data["data"]
|
||||||
|
|
||||||
total += len(data["data"])
|
total += len(data["data"])
|
||||||
@@ -268,34 +274,38 @@ class ArtstationChallengeExtractor(ArtstationExtractor):
|
|||||||
class ArtstationSearchExtractor(ArtstationExtractor):
|
class ArtstationSearchExtractor(ArtstationExtractor):
|
||||||
"""Extractor for artstation search results"""
|
"""Extractor for artstation search results"""
|
||||||
subcategory = "search"
|
subcategory = "search"
|
||||||
directory_fmt = ("{category}", "Searches", "{search[searchterm]}")
|
directory_fmt = ("{category}", "Searches", "{search[query]}")
|
||||||
archive_fmt = "s_{search[searchterm]}_{asset[id]}"
|
archive_fmt = "s_{search[query]}_{asset[id]}"
|
||||||
pattern = (r"(?:https?://)?(?:\w+\.)?artstation\.com"
|
pattern = (r"(?:https?://)?(?:\w+\.)?artstation\.com"
|
||||||
r"/search/?\?([^#]+)")
|
r"/search/?\?([^#]+)")
|
||||||
test = ("https://www.artstation.com/search?sorting=recent&q=ancient",)
|
test = ("https://www.artstation.com/search?q=ancient&sort_by=rank", {
|
||||||
|
"range": "1-20",
|
||||||
|
"count": 20,
|
||||||
|
})
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
ArtstationExtractor.__init__(self, match)
|
ArtstationExtractor.__init__(self, match)
|
||||||
query = text.parse_query(match.group(1))
|
query = text.parse_query(match.group(1))
|
||||||
self.searchterm = query.get("q", "")
|
self.query = query.get("q", "")
|
||||||
self.order = query.get("sorting", "recent").lower()
|
self.sorting = query.get("sort_by", "rank").lower()
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
return {"search": {
|
return {"search": {
|
||||||
"searchterm": self.searchterm,
|
"query" : self.query,
|
||||||
"order": self.order,
|
"sorting": self.sorting,
|
||||||
}}
|
}}
|
||||||
|
|
||||||
def projects(self):
|
def projects(self):
|
||||||
order = "likes_count" if self.order == "likes" else "published_at"
|
url = "{}/api/v2/search/projects.json".format(self.root)
|
||||||
url = "{}/search/projects.json".format(self.root)
|
return self._pagination(url, json={
|
||||||
params = {
|
"additional_fields": "[]",
|
||||||
"direction": "desc",
|
"filters" : "[]",
|
||||||
"order": order,
|
"page" : None,
|
||||||
"q": self.searchterm,
|
"per_page" : "50",
|
||||||
# "show_pro_first": "true",
|
"pro_first" : "1",
|
||||||
}
|
"query" : self.query,
|
||||||
return self._pagination(url, params)
|
"sorting" : self.sorting,
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
class ArtstationArtworkExtractor(ArtstationExtractor):
|
class ArtstationArtworkExtractor(ArtstationExtractor):
|
||||||
@@ -305,7 +315,10 @@ class ArtstationArtworkExtractor(ArtstationExtractor):
|
|||||||
archive_fmt = "A_{asset[id]}"
|
archive_fmt = "A_{asset[id]}"
|
||||||
pattern = (r"(?:https?://)?(?:\w+\.)?artstation\.com"
|
pattern = (r"(?:https?://)?(?:\w+\.)?artstation\.com"
|
||||||
r"/artwork/?\?([^#]+)")
|
r"/artwork/?\?([^#]+)")
|
||||||
test = ("https://www.artstation.com/artwork?sorting=latest",)
|
test = ("https://www.artstation.com/artwork?sorting=latest", {
|
||||||
|
"range": "1-20",
|
||||||
|
"count": 20,
|
||||||
|
})
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
ArtstationExtractor.__init__(self, match)
|
ArtstationExtractor.__init__(self, match)
|
||||||
@@ -316,9 +329,7 @@ class ArtstationArtworkExtractor(ArtstationExtractor):
|
|||||||
|
|
||||||
def projects(self):
|
def projects(self):
|
||||||
url = "{}/projects.json".format(self.root)
|
url = "{}/projects.json".format(self.root)
|
||||||
params = self.query.copy()
|
return self._pagination(url, self.query.copy())
|
||||||
params["page"] = 1
|
|
||||||
return self._pagination(url, params)
|
|
||||||
|
|
||||||
|
|
||||||
class ArtstationImageExtractor(ArtstationExtractor):
|
class ArtstationImageExtractor(ArtstationExtractor):
|
||||||
|
|||||||
Reference in New Issue
Block a user