[mangoxo] fix login and extraction
This commit is contained in:
@@ -36,12 +36,16 @@ class MangoxoExtractor(Extractor):
|
|||||||
def _login_impl(self, username, password):
|
def _login_impl(self, username, password):
|
||||||
self.log.info("Logging in as %s", username)
|
self.log.info("Logging in as %s", username)
|
||||||
|
|
||||||
|
url = self.root + "/login"
|
||||||
|
page = self.request(url).text
|
||||||
|
token = text.extract(page, 'id="loginToken" value="', '"')[0]
|
||||||
|
|
||||||
url = self.root + "/api/login"
|
url = self.root + "/api/login"
|
||||||
headers = {
|
headers = {
|
||||||
"X-Requested-With": "XMLHttpRequest",
|
"X-Requested-With": "XMLHttpRequest",
|
||||||
"Referer": self.root + "/login",
|
"Referer": self.root + "/login",
|
||||||
}
|
}
|
||||||
data = self._sign_by_md5(username, password)
|
data = self._sign_by_md5(username, password, token)
|
||||||
response = self.request(url, method="POST", headers=headers, data=data)
|
response = self.request(url, method="POST", headers=headers, data=data)
|
||||||
|
|
||||||
data = response.json()
|
data = response.json()
|
||||||
@@ -50,11 +54,12 @@ class MangoxoExtractor(Extractor):
|
|||||||
return {"SESSION": self.session.cookies.get("SESSION")}
|
return {"SESSION": self.session.cookies.get("SESSION")}
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _sign_by_md5(username, password):
|
def _sign_by_md5(username, password, token):
|
||||||
# https://dns.mangoxo.com/libs/plugins/phoenix-ui/js/phoenix-ui.js
|
# https://dns.mangoxo.com/libs/plugins/phoenix-ui/js/phoenix-ui.js
|
||||||
params = [
|
params = [
|
||||||
("username" , username),
|
("username" , username),
|
||||||
("password" , password),
|
("password" , password),
|
||||||
|
("token" , token),
|
||||||
("timestamp", str(int(time.time()))),
|
("timestamp", str(int(time.time()))),
|
||||||
]
|
]
|
||||||
query = "&".join("=".join(item) for item in sorted(params))
|
query = "&".join("=".join(item) for item in sorted(params))
|
||||||
@@ -79,8 +84,8 @@ class MangoxoAlbumExtractor(MangoxoExtractor):
|
|||||||
"url": "ad921fe62663b06e7d73997f7d00646cab7bdd0d",
|
"url": "ad921fe62663b06e7d73997f7d00646cab7bdd0d",
|
||||||
"keyword": {
|
"keyword": {
|
||||||
"channel": {
|
"channel": {
|
||||||
"id": "Jpw9ywQ4",
|
"id": "gaxO16d8",
|
||||||
"name": "绘画艺术赏析",
|
"name": "Phoenix",
|
||||||
"cover": str,
|
"cover": str,
|
||||||
},
|
},
|
||||||
"album": {
|
"album": {
|
||||||
@@ -116,14 +121,14 @@ class MangoxoAlbumExtractor(MangoxoExtractor):
|
|||||||
|
|
||||||
def metadata(self, page):
|
def metadata(self, page):
|
||||||
"""Return general metadata"""
|
"""Return general metadata"""
|
||||||
title, pos = text.extract(page, '<title>', '</title>')
|
extr = text.extract_from(page)
|
||||||
_ , pos = text.extract(page, 'class="desc"', '', pos)
|
title = extr('<title>', '</title>')
|
||||||
cid , pos = text.extract(page, '//www.mangoxo.com/channel/', '"', pos)
|
count = extr('id="pic-count">', '<')
|
||||||
cname, pos = text.extract(page, '>', '<', pos)
|
cid = extr('<img alt="', '"')
|
||||||
count, pos = text.extract(page, 'id="pic-count">', '<', pos)
|
cover = extr(' src="', '"')
|
||||||
cover, pos = text.extract(page, ' src="', '"', pos)
|
cname = extr('target="_blank">', '<')
|
||||||
date , pos = text.extract(page, '</i>', '<', pos)
|
date = extr('</i>', '<')
|
||||||
descr, pos = text.extract(page, '<pre>', '</pre>', pos)
|
descr = extr('<pre>', '</pre>')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"channel": {
|
"channel": {
|
||||||
@@ -157,8 +162,8 @@ class MangoxoAlbumExtractor(MangoxoExtractor):
|
|||||||
class MangoxoChannelExtractor(MangoxoExtractor):
|
class MangoxoChannelExtractor(MangoxoExtractor):
|
||||||
"""Extractor for all albums on a mangoxo channel"""
|
"""Extractor for all albums on a mangoxo channel"""
|
||||||
subcategory = "channel"
|
subcategory = "channel"
|
||||||
pattern = r"(?:https?://)?(?:www\.)?mangoxo\.com/channel/(\w+)"
|
pattern = r"(?:https?://)?(?:www\.)?mangoxo\.com/(\w+)/album"
|
||||||
test = ("https://www.mangoxo.com/channel/QeYKRkO0", {
|
test = ("https://www.mangoxo.com/phoenix/album", {
|
||||||
"pattern": MangoxoAlbumExtractor.pattern,
|
"pattern": MangoxoAlbumExtractor.pattern,
|
||||||
"range": "1-30",
|
"range": "1-30",
|
||||||
"count": "> 20",
|
"count": "> 20",
|
||||||
@@ -166,12 +171,12 @@ class MangoxoChannelExtractor(MangoxoExtractor):
|
|||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
MangoxoExtractor.__init__(self, match)
|
MangoxoExtractor.__init__(self, match)
|
||||||
self.channel_id = match.group(1)
|
self.user = match.group(1)
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
self.login()
|
self.login()
|
||||||
num = total = 1
|
num = total = 1
|
||||||
url = "{}/channel/{}/album/".format(self.root, self.channel_id)
|
url = "{}/{}/album/".format(self.root, self.user)
|
||||||
data = {"_extractor": MangoxoAlbumExtractor}
|
data = {"_extractor": MangoxoAlbumExtractor}
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
|
|||||||
Reference in New Issue
Block a user