[mangoxo] fix login and extraction

This commit is contained in:
Mike Fährmann
2021-10-31 02:11:06 +01:00
parent 4c49174579
commit fa5646eadc

View File

@@ -36,12 +36,16 @@ class MangoxoExtractor(Extractor):
def _login_impl(self, username, password): def _login_impl(self, username, password):
self.log.info("Logging in as %s", username) self.log.info("Logging in as %s", username)
url = self.root + "/login"
page = self.request(url).text
token = text.extract(page, 'id="loginToken" value="', '"')[0]
url = self.root + "/api/login" url = self.root + "/api/login"
headers = { headers = {
"X-Requested-With": "XMLHttpRequest", "X-Requested-With": "XMLHttpRequest",
"Referer": self.root + "/login", "Referer": self.root + "/login",
} }
data = self._sign_by_md5(username, password) data = self._sign_by_md5(username, password, token)
response = self.request(url, method="POST", headers=headers, data=data) response = self.request(url, method="POST", headers=headers, data=data)
data = response.json() data = response.json()
@@ -50,11 +54,12 @@ class MangoxoExtractor(Extractor):
return {"SESSION": self.session.cookies.get("SESSION")} return {"SESSION": self.session.cookies.get("SESSION")}
@staticmethod @staticmethod
def _sign_by_md5(username, password): def _sign_by_md5(username, password, token):
# https://dns.mangoxo.com/libs/plugins/phoenix-ui/js/phoenix-ui.js # https://dns.mangoxo.com/libs/plugins/phoenix-ui/js/phoenix-ui.js
params = [ params = [
("username" , username), ("username" , username),
("password" , password), ("password" , password),
("token" , token),
("timestamp", str(int(time.time()))), ("timestamp", str(int(time.time()))),
] ]
query = "&".join("=".join(item) for item in sorted(params)) query = "&".join("=".join(item) for item in sorted(params))
@@ -79,8 +84,8 @@ class MangoxoAlbumExtractor(MangoxoExtractor):
"url": "ad921fe62663b06e7d73997f7d00646cab7bdd0d", "url": "ad921fe62663b06e7d73997f7d00646cab7bdd0d",
"keyword": { "keyword": {
"channel": { "channel": {
"id": "Jpw9ywQ4", "id": "gaxO16d8",
"name": "绘画艺术赏析", "name": "Phoenix",
"cover": str, "cover": str,
}, },
"album": { "album": {
@@ -116,14 +121,14 @@ class MangoxoAlbumExtractor(MangoxoExtractor):
def metadata(self, page): def metadata(self, page):
"""Return general metadata""" """Return general metadata"""
title, pos = text.extract(page, '<title>', '</title>') extr = text.extract_from(page)
_ , pos = text.extract(page, 'class="desc"', '', pos) title = extr('<title>', '</title>')
cid , pos = text.extract(page, '//www.mangoxo.com/channel/', '"', pos) count = extr('id="pic-count">', '<')
cname, pos = text.extract(page, '>', '<', pos) cid = extr('<img alt="', '"')
count, pos = text.extract(page, 'id="pic-count">', '<', pos) cover = extr(' src="', '"')
cover, pos = text.extract(page, ' src="', '"', pos) cname = extr('target="_blank">', '<')
date , pos = text.extract(page, '</i>', '<', pos) date = extr('</i>', '<')
descr, pos = text.extract(page, '<pre>', '</pre>', pos) descr = extr('<pre>', '</pre>')
return { return {
"channel": { "channel": {
@@ -157,8 +162,8 @@ class MangoxoAlbumExtractor(MangoxoExtractor):
class MangoxoChannelExtractor(MangoxoExtractor): class MangoxoChannelExtractor(MangoxoExtractor):
"""Extractor for all albums on a mangoxo channel""" """Extractor for all albums on a mangoxo channel"""
subcategory = "channel" subcategory = "channel"
pattern = r"(?:https?://)?(?:www\.)?mangoxo\.com/channel/(\w+)" pattern = r"(?:https?://)?(?:www\.)?mangoxo\.com/(\w+)/album"
test = ("https://www.mangoxo.com/channel/QeYKRkO0", { test = ("https://www.mangoxo.com/phoenix/album", {
"pattern": MangoxoAlbumExtractor.pattern, "pattern": MangoxoAlbumExtractor.pattern,
"range": "1-30", "range": "1-30",
"count": "> 20", "count": "> 20",
@@ -166,12 +171,12 @@ class MangoxoChannelExtractor(MangoxoExtractor):
def __init__(self, match): def __init__(self, match):
MangoxoExtractor.__init__(self, match) MangoxoExtractor.__init__(self, match)
self.channel_id = match.group(1) self.user = match.group(1)
def items(self): def items(self):
self.login() self.login()
num = total = 1 num = total = 1
url = "{}/channel/{}/album/".format(self.root, self.channel_id) url = "{}/{}/album/".format(self.root, self.user)
data = {"_extractor": MangoxoAlbumExtractor} data = {"_extractor": MangoxoAlbumExtractor}
while True: while True: