From 7f34f99a2604026497a545afb0ef93d557889326 Mon Sep 17 00:00:00 2001 From: nifnat Date: Fri, 27 Jan 2023 21:30:06 +0000 Subject: [PATCH 1/5] Reverse engineered obfuscated JS function and reimplemented in python. --- gallery_dl/extractor/hotleak.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/gallery_dl/extractor/hotleak.py b/gallery_dl/extractor/hotleak.py index eb64db0d..7bfec568 100644 --- a/gallery_dl/extractor/hotleak.py +++ b/gallery_dl/extractor/hotleak.py @@ -8,6 +8,7 @@ from .common import Extractor, Message from .. import text, exception +from base64 import b64decode BASE_PATTERN = r"(?:https?://)?(?:www\.)?hotleak\.vip" @@ -83,6 +84,12 @@ class HotleakPostExtractor(HotleakExtractor): HotleakExtractor.__init__(self, match) self.creator, self.type, self.id = match.groups() + def decode_video_url(self, encoded_url): + encoded_url = encoded_url[16:] + encoded_url = encoded_url[:-16] + encoded_url = encoded_url[::-1] + return b64decode(encoded_url).decode('utf-8') + def posts(self): url = "{}/{}/{}/{}".format( self.root, self.creator, self.type, self.id) @@ -100,8 +107,8 @@ class HotleakPostExtractor(HotleakExtractor): text.nameext_from_url(data["url"], data) elif self.type == "video": - data["url"] = "ytdl:" + text.extr( - text.unescape(page), '"src":"', '"') + data["url"] = "ytdl:" + self.decode_video_url(text.extr( + text.unescape(page), '"src":"', '"')) text.nameext_from_url(data["url"], data) data["extension"] = "mp4" @@ -133,6 +140,12 @@ class HotleakCreatorExtractor(HotleakExtractor): url = "{}/{}".format(self.root, self.creator) return self._pagination(url) + def decode_video_url(self, encoded_url): + encoded_url = encoded_url[16:] + encoded_url = encoded_url[:-16] + encoded_url = encoded_url[::-1] + return b64decode(encoded_url).decode('utf-8') + def _pagination(self, url): headers = {"X-Requested-With": "XMLHttpRequest"} params = {"page": 1} @@ -163,7 +176,7 @@ class HotleakCreatorExtractor(HotleakExtractor): elif post["type"] == 1: data["type"] = "video" - data["url"] = "ytdl:" + post["stream_url_play"] + data["url"] = "ytdl:" + self.decode_video_url(post["stream_url_play"]) text.nameext_from_url(data["url"], data) data["extension"] = "mp4" From 224098dd2815baa4a647e909107a2deaa88479f7 Mon Sep 17 00:00:00 2001 From: nifnat Date: Fri, 27 Jan 2023 21:52:47 +0000 Subject: [PATCH 2/5] Tidy up code. --- gallery_dl/extractor/hotleak.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/gallery_dl/extractor/hotleak.py b/gallery_dl/extractor/hotleak.py index 7bfec568..729ab628 100644 --- a/gallery_dl/extractor/hotleak.py +++ b/gallery_dl/extractor/hotleak.py @@ -8,7 +8,7 @@ from .common import Extractor, Message from .. import text, exception -from base64 import b64decode +import base64 BASE_PATTERN = r"(?:https?://)?(?:www\.)?hotleak\.vip" @@ -85,10 +85,9 @@ class HotleakPostExtractor(HotleakExtractor): self.creator, self.type, self.id = match.groups() def decode_video_url(self, encoded_url): - encoded_url = encoded_url[16:] - encoded_url = encoded_url[:-16] - encoded_url = encoded_url[::-1] - return b64decode(encoded_url).decode('utf-8') + sliced_url = encoded_url[16:-16] + reversed_url = sliced_url[::-1] + return base64.b64decode(reversed_url).decode('utf-8') def posts(self): url = "{}/{}/{}/{}".format( @@ -141,10 +140,9 @@ class HotleakCreatorExtractor(HotleakExtractor): return self._pagination(url) def decode_video_url(self, encoded_url): - encoded_url = encoded_url[16:] - encoded_url = encoded_url[:-16] - encoded_url = encoded_url[::-1] - return b64decode(encoded_url).decode('utf-8') + sliced_url = encoded_url[16:-16] + reversed_url = sliced_url[::-1] + return base64.b64decode(reversed_url).decode('utf-8') def _pagination(self, url): headers = {"X-Requested-With": "XMLHttpRequest"} From bd23a701f33e12b08bc4139820025eec5540e591 Mon Sep 17 00:00:00 2001 From: nifnat Date: Fri, 27 Jan 2023 21:52:47 +0000 Subject: [PATCH 3/5] Tidy up code. --- gallery_dl/extractor/hotleak.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/gallery_dl/extractor/hotleak.py b/gallery_dl/extractor/hotleak.py index 7bfec568..27bd3e30 100644 --- a/gallery_dl/extractor/hotleak.py +++ b/gallery_dl/extractor/hotleak.py @@ -8,7 +8,7 @@ from .common import Extractor, Message from .. import text, exception -from base64 import b64decode +import base64 BASE_PATTERN = r"(?:https?://)?(?:www\.)?hotleak\.vip" @@ -85,10 +85,9 @@ class HotleakPostExtractor(HotleakExtractor): self.creator, self.type, self.id = match.groups() def decode_video_url(self, encoded_url): - encoded_url = encoded_url[16:] - encoded_url = encoded_url[:-16] - encoded_url = encoded_url[::-1] - return b64decode(encoded_url).decode('utf-8') + sliced_url = encoded_url[16:-16] + reversed_url = sliced_url[::-1] + return base64.b64decode(reversed_url).decode('utf-8') def posts(self): url = "{}/{}/{}/{}".format( @@ -141,10 +140,9 @@ class HotleakCreatorExtractor(HotleakExtractor): return self._pagination(url) def decode_video_url(self, encoded_url): - encoded_url = encoded_url[16:] - encoded_url = encoded_url[:-16] - encoded_url = encoded_url[::-1] - return b64decode(encoded_url).decode('utf-8') + sliced_url = encoded_url[16:-16] + reversed_url = sliced_url[::-1] + return base64.b64decode(reversed_url).decode('utf-8') def _pagination(self, url): headers = {"X-Requested-With": "XMLHttpRequest"} @@ -176,7 +174,8 @@ class HotleakCreatorExtractor(HotleakExtractor): elif post["type"] == 1: data["type"] = "video" - data["url"] = "ytdl:" + self.decode_video_url(post["stream_url_play"]) + data["url"] = "ytdl:" + self.decode_video_url( + post["stream_url_play"]) text.nameext_from_url(data["url"], data) data["extension"] = "mp4" From f14dbfe0798e39893d357d49b94e28e3720d4ce0 Mon Sep 17 00:00:00 2001 From: nifnat Date: Sat, 28 Jan 2023 14:36:49 +0000 Subject: [PATCH 4/5] Make decode_video_url static (used in both post and creator extractor). --- gallery_dl/extractor/hotleak.py | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/gallery_dl/extractor/hotleak.py b/gallery_dl/extractor/hotleak.py index 27bd3e30..d12c4125 100644 --- a/gallery_dl/extractor/hotleak.py +++ b/gallery_dl/extractor/hotleak.py @@ -50,6 +50,12 @@ class HotleakExtractor(Extractor): params["page"] += 1 +def decode_video_url(encoded_url): + sliced_url = encoded_url[16:-16] + reversed_url = sliced_url[::-1] + return base64.b64decode(reversed_url).decode('utf-8') + + class HotleakPostExtractor(HotleakExtractor): """Extractor for individual posts on hotleak""" subcategory = "post" @@ -84,11 +90,6 @@ class HotleakPostExtractor(HotleakExtractor): HotleakExtractor.__init__(self, match) self.creator, self.type, self.id = match.groups() - def decode_video_url(self, encoded_url): - sliced_url = encoded_url[16:-16] - reversed_url = sliced_url[::-1] - return base64.b64decode(reversed_url).decode('utf-8') - def posts(self): url = "{}/{}/{}/{}".format( self.root, self.creator, self.type, self.id) @@ -106,7 +107,7 @@ class HotleakPostExtractor(HotleakExtractor): text.nameext_from_url(data["url"], data) elif self.type == "video": - data["url"] = "ytdl:" + self.decode_video_url(text.extr( + data["url"] = "ytdl:" + decode_video_url(text.extr( text.unescape(page), '"src":"', '"')) text.nameext_from_url(data["url"], data) data["extension"] = "mp4" @@ -139,11 +140,6 @@ class HotleakCreatorExtractor(HotleakExtractor): url = "{}/{}".format(self.root, self.creator) return self._pagination(url) - def decode_video_url(self, encoded_url): - sliced_url = encoded_url[16:-16] - reversed_url = sliced_url[::-1] - return base64.b64decode(reversed_url).decode('utf-8') - def _pagination(self, url): headers = {"X-Requested-With": "XMLHttpRequest"} params = {"page": 1} @@ -174,7 +170,7 @@ class HotleakCreatorExtractor(HotleakExtractor): elif post["type"] == 1: data["type"] = "video" - data["url"] = "ytdl:" + self.decode_video_url( + data["url"] = "ytdl:" + decode_video_url( post["stream_url_play"]) text.nameext_from_url(data["url"], data) data["extension"] = "mp4" From 7c9b1ec830cdb002e23df7b272603d7425b8c2b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sat, 28 Jan 2023 15:27:08 +0100 Subject: [PATCH 5/5] [hotleak] optimize decoding video URLs - use binascii module - combine slice and reverse step --- gallery_dl/extractor/hotleak.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/gallery_dl/extractor/hotleak.py b/gallery_dl/extractor/hotleak.py index d12c4125..7c656be0 100644 --- a/gallery_dl/extractor/hotleak.py +++ b/gallery_dl/extractor/hotleak.py @@ -8,7 +8,7 @@ from .common import Extractor, Message from .. import text, exception -import base64 +import binascii BASE_PATTERN = r"(?:https?://)?(?:www\.)?hotleak\.vip" @@ -50,10 +50,9 @@ class HotleakExtractor(Extractor): params["page"] += 1 -def decode_video_url(encoded_url): - sliced_url = encoded_url[16:-16] - reversed_url = sliced_url[::-1] - return base64.b64decode(reversed_url).decode('utf-8') +def decode_video_url(url): + # cut first and last 16 characters, reverse, base64 decode + return binascii.a2b_base64(url[-17:15:-1]).decode() class HotleakPostExtractor(HotleakExtractor):