[skeb] add 'search_tags' metadata to search results (#2945)
This commit is contained in:
@@ -26,8 +26,11 @@ class SkebExtractor(Extractor):
|
|||||||
self.article = self.config("article", False)
|
self.article = self.config("article", False)
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
|
metadata = self.metadata()
|
||||||
for user_name, post_num in self.posts():
|
for user_name, post_num in self.posts():
|
||||||
response, post = self._get_post_data(user_name, post_num)
|
response, post = self._get_post_data(user_name, post_num)
|
||||||
|
if metadata:
|
||||||
|
post.update(metadata)
|
||||||
yield Message.Directory, post
|
yield Message.Directory, post
|
||||||
for data in self._get_urls_from_post(response, post):
|
for data in self._get_urls_from_post(response, post):
|
||||||
url = data["file_url"]
|
url = data["file_url"]
|
||||||
@@ -36,6 +39,9 @@ class SkebExtractor(Extractor):
|
|||||||
def posts(self):
|
def posts(self):
|
||||||
"""Return post number"""
|
"""Return post number"""
|
||||||
|
|
||||||
|
def metadata(self):
|
||||||
|
"""Return additional metadata"""
|
||||||
|
|
||||||
def _pagination(self, url, params):
|
def _pagination(self, url, params):
|
||||||
headers = {"Referer": self.root, "Authorization": "Bearer null"}
|
headers = {"Referer": self.root, "Authorization": "Bearer null"}
|
||||||
params["offset"] = 0
|
params["offset"] = 0
|
||||||
@@ -229,8 +235,12 @@ class SkebSearchExtractor(SkebExtractor):
|
|||||||
pattern = r"(?:https?://)?skeb\.jp/search\?q=([^&#]+)"
|
pattern = r"(?:https?://)?skeb\.jp/search\?q=([^&#]+)"
|
||||||
test = ("https://skeb.jp/search?q=bunny%20tree&t=works", {
|
test = ("https://skeb.jp/search?q=bunny%20tree&t=works", {
|
||||||
"count": ">= 18",
|
"count": ">= 18",
|
||||||
|
"keyword": {"search_tags": "bunny tree"},
|
||||||
})
|
})
|
||||||
|
|
||||||
|
def metadata(self):
|
||||||
|
return {"search_tags": text.unquote(self.user_name)}
|
||||||
|
|
||||||
def posts(self):
|
def posts(self):
|
||||||
url = "https://hb1jt3kre9-2.algolianet.com/1/indexes/*/queries"
|
url = "https://hb1jt3kre9-2.algolianet.com/1/indexes/*/queries"
|
||||||
params = {
|
params = {
|
||||||
@@ -243,10 +253,10 @@ class SkebSearchExtractor(SkebExtractor):
|
|||||||
"x-algolia-application-id": "HB1JT3KRE9",
|
"x-algolia-application-id": "HB1JT3KRE9",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
page = 0
|
||||||
pams = ("hitsPerPage=40&filters=genre%3Aart%20OR%20genre%3Avoice%20OR"
|
pams = ("hitsPerPage=40&filters=genre%3Aart%20OR%20genre%3Avoice%20OR"
|
||||||
"%20genre%3Anovel%20OR%20genre%3Avideo%20OR%20genre%3Amusic%2"
|
"%20genre%3Anovel%20OR%20genre%3Avideo%20OR%20genre%3Amusic%2"
|
||||||
"0OR%20genre%3Acorrection&page=")
|
"0OR%20genre%3Acorrection&page=")
|
||||||
page = 0
|
|
||||||
|
|
||||||
request = {
|
request = {
|
||||||
"indexName": "Request",
|
"indexName": "Request",
|
||||||
@@ -262,9 +272,7 @@ class SkebSearchExtractor(SkebExtractor):
|
|||||||
|
|
||||||
for post in result["hits"]:
|
for post in result["hits"]:
|
||||||
parts = post["path"].split("/")
|
parts = post["path"].split("/")
|
||||||
user_name = parts[1][1:]
|
yield parts[1][1:], parts[3]
|
||||||
post_num = parts[3]
|
|
||||||
yield user_name, post_num
|
|
||||||
|
|
||||||
if page >= result["nbPages"]:
|
if page >= result["nbPages"]:
|
||||||
return
|
return
|
||||||
@@ -287,8 +295,8 @@ class SkebFollowingExtractor(SkebExtractor):
|
|||||||
def users(self):
|
def users(self):
|
||||||
url = "{}/api/users/{}/following_creators".format(
|
url = "{}/api/users/{}/following_creators".format(
|
||||||
self.root, self.user_name)
|
self.root, self.user_name)
|
||||||
headers = {"Referer": self.root, "Authorization": "Bearer null"}
|
|
||||||
params = {"sort": "date", "offset": 0, "limit": 90}
|
params = {"sort": "date", "offset": 0, "limit": 90}
|
||||||
|
headers = {"Referer": self.root, "Authorization": "Bearer null"}
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
data = self.request(url, params=params, headers=headers).json()
|
data = self.request(url, params=params, headers=headers).json()
|
||||||
|
|||||||
Reference in New Issue
Block a user