[deviantart:search] fix regex (#8083)
use custom regex for extracting search results from HTML instead of the 'deviation' pattern
This commit is contained in:
@@ -1356,7 +1356,8 @@ class DeviantartSearchExtractor(DeviantartExtractor):
|
|||||||
|
|
||||||
def _search_html(self, params):
|
def _search_html(self, params):
|
||||||
url = self.root + "/search"
|
url = self.root + "/search"
|
||||||
|
find = text.re(r'''href="https://www.deviantart.com/([^/?#]+)'''
|
||||||
|
r'''/(art|journal)/(?:[^"]+-)?(\d+)''').findall
|
||||||
while True:
|
while True:
|
||||||
response = self.request(url, params=params)
|
response = self.request(url, params=params)
|
||||||
|
|
||||||
@@ -1364,12 +1365,11 @@ class DeviantartSearchExtractor(DeviantartExtractor):
|
|||||||
raise exception.AbortExtraction("HTTP redirect to login page")
|
raise exception.AbortExtraction("HTTP redirect to login page")
|
||||||
page = response.text
|
page = response.text
|
||||||
|
|
||||||
for dev in DeviantartDeviationExtractor.pattern.findall(
|
for user, type, did in find(page)[:-3:3]:
|
||||||
page)[2::3]:
|
|
||||||
yield {
|
yield {
|
||||||
"deviationId": dev[3],
|
"deviationId": did,
|
||||||
"author": {"username": dev[0]},
|
"author": {"username": user},
|
||||||
"isJournal": dev[2] == "journal",
|
"isJournal": type == "journal",
|
||||||
}
|
}
|
||||||
|
|
||||||
cursor = text.extr(page, r'\"cursor\":\"', '\\',)
|
cursor = text.extr(page, r'\"cursor\":\"', '\\',)
|
||||||
|
|||||||
Reference in New Issue
Block a user