[readcomiconline] fix chapter extractor (#6070, #6335)

2024-12-02 21:07:11 +01:00
parent 63e042dec7
commit 26163db69d
1 changed files with 26 additions and 13 deletions
--- a/gallery_dl/extractor/readcomiconline.py
+++ b/gallery_dl/extractor/readcomiconline.py
@@ -79,13 +79,22 @@ class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor):

    def images(self, page):
        results = []
+        referer = {"_http_headers": {"Referer": self.gallery_url}}
+        root = text.extr(page, "return baeu(l, '", "'")
+
+        replacements = re.findall(
+            r"l = l\.replace\(/([^/]+)/g, [\"']([^\"']*)", page)

        for block in page.split("    pth = '")[1:]:
            pth = text.extr(block, "", "'")
+
            for needle, repl in re.findall(
                    r"pth = pth\.replace\(/([^/]+)/g, [\"']([^\"']*)", block):
                pth = pth.replace(needle, repl)
-            results.append((beau(pth), None))
+            for needle, repl in replacements:
+                pth = pth.replace(needle, repl)
+
+            results.append((baeu(pth, root), referer))

        return results

@@ -119,20 +128,24 @@ class ReadcomiconlineComicExtractor(ReadcomiconlineBase, MangaExtractor):
        return results


-def beau(url):
-    """https://readcomiconline.li/Scripts/rguard.min.js?v=1.5.1"""
+def baeu(url, root="", root_blogspot="https://2.bp.blogspot.com"):
+    """https://readcomiconline.li/Scripts/rguard.min.js"""
+    if not root:
+        root = root_blogspot
+
    url = url.replace("pw_.g28x", "b")
    url = url.replace("d2pr.x_27", "h")

    if url.startswith("https"):
-        return url
+        return url.replace(root_blogspot, root, 1)

-    url, sep, rest = url.partition("?")
-    containsS0 = "=s0" in url
-    url = url[:-3 if containsS0 else -6]
-    url = url[15:33] + url[50:]
-    url = url[0:-11] + url[-2:]
-    url = binascii.a2b_base64(url).decode()
-    url = url[0:13] + url[17:]
-    url = url[0:-2] + ("=s0" if containsS0 else "=s1600")
-    return "https://2.bp.blogspot.com/" + url + sep + rest
+    path, sep, query = url.partition("?")
+
+    contains_s0 = "=s0" in path
+    path = path[:-3 if contains_s0 else -6]
+    path = path[15:33] + path[50:]  # step1()
+    path = path[0:-11] + path[-2:]  # step2()
+    path = binascii.a2b_base64(path).decode()  # atob()
+    path = path[0:13] + path[17:]
+    path = path[0:-2] + ("=s0" if contains_s0 else "=s1600")
+    return root + "/" + path + sep + query