From 6d8b191ea7743cbac06b5525d34f399af12c5d2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Fri, 13 Apr 2018 19:21:32 +0200 Subject: [PATCH 01/21] improve 'parse_query()' and add tests - another irrelevant micro-optimization ! - use urllib.parse.parse_qsl directly instead of parse_qs, which just packs the results of parse_qsl in a different data structure - reduced memory requirements since no additional dict and lists are created --- gallery_dl/text.py | 6 +++++- test/test_text.py | 31 +++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/gallery_dl/text.py b/gallery_dl/text.py index 82c56a8b..7e5cb29f 100644 --- a/gallery_dl/text.py +++ b/gallery_dl/text.py @@ -130,7 +130,11 @@ def extract_iter(txt, begin, end, pos=0): def parse_query(qs): """Parse a query string into key-value pairs""" - return {key: vlist[0] for key, vlist in urllib.parse.parse_qs(qs).items()} + result = {} + for key, value in urllib.parse.parse_qsl(qs): + if key not in result: + result[key] = value + return result if os.name == "nt": diff --git a/test/test_text.py b/test/test_text.py index e26dde76..767952fd 100644 --- a/test/test_text.py +++ b/test/test_text.py @@ -123,6 +123,37 @@ class TestText(unittest.TestCase): result = ["c", "b", "a", "d"] self.assertEqual(list(text.extract_iter(txt, "[", "]")), result) + def test_parse_query(self): + # standard stuff + self.assertEqual( + text.parse_query(""), {}) + self.assertEqual( + text.parse_query("foo=1"), {"foo": "1"}) + self.assertEqual( + text.parse_query("foo=1&bar=2"), {"foo": "1", "bar": "2"}) + + # missing value + self.assertEqual( + text.parse_query("bar"), {}) + self.assertEqual( + text.parse_query("foo=1&bar"), {"foo": "1"}) + self.assertEqual( + text.parse_query("foo=1&bar&baz=3"), {"foo": "1", "baz": "3"}) + + # keys with identical names + self.assertEqual( + text.parse_query("foo=1&foo=2"), {"foo": "1"}) + self.assertEqual( + text.parse_query("foo=1&bar=2&foo=3&bar=4"), + {"foo": "1", "bar": "2"}, + ) + + # non-string arguments + self.assertEqual(text.parse_query(()), {}) + self.assertEqual(text.parse_query([]), {}) + self.assertEqual(text.parse_query({}), {}) + self.assertEqual(text.parse_query(None), {}) + if __name__ == '__main__': unittest.main() From e3f2bd4087b953a8df8e8e1c925f59225752c4d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sat, 14 Apr 2018 20:56:21 +0200 Subject: [PATCH 02/21] add tests for 'text.clean_xml()' and improve it --- gallery_dl/text.py | 16 ++++++++++++---- test/test_text.py | 28 ++++++++++++++++++++++++++-- 2 files changed, 38 insertions(+), 6 deletions(-) diff --git a/gallery_dl/text.py b/gallery_dl/text.py index 7e5cb29f..e439c2b8 100644 --- a/gallery_dl/text.py +++ b/gallery_dl/text.py @@ -15,14 +15,22 @@ import html import urllib.parse -INVALID_XML_CHARS = (1, 2, 3, 4, 5, 6, 7, 8, 11, 12, 14, 15, 16, 17, 18, - 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31) +INVALID_XML_CHARS = ( + "\x00", "\x01", "\x02", "\x03", "\x04", "\x05", "\x06", "\x07", + "\x08", "\x0b", "\x0c", "\x0e", "\x0f", "\x10", "\x11", "\x12", + "\x13", "\x14", "\x15", "\x16", "\x17", "\x18", "\x19", "\x1a", + "\x1b", "\x1c", "\x1d", "\x1e", "\x1f", +) def clean_xml(xmldata, repl=""): - """Replace/Remove invalid control characters in XML data""" + """Replace/Remove invalid control characters in 'xmldata'""" + if not isinstance(xmldata, str): + try: + xmldata = "".join(xmldata) + except TypeError: + return "" for char in INVALID_XML_CHARS: - char = chr(char) if char in xmldata: xmldata = xmldata.replace(char, repl) return xmldata diff --git a/test/test_text.py b/test/test_text.py index 767952fd..c4b02969 100644 --- a/test/test_text.py +++ b/test/test_text.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -# Copyright 2015 Mike Fährmann +# Copyright 2015-2018 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -9,11 +9,35 @@ import unittest import sys -import gallery_dl.text as text + +from gallery_dl import text class TestText(unittest.TestCase): + def test_clean_xml(self, f=text.clean_xml): + # standard usage + self.assertEqual(f(""), "") + self.assertEqual(f("foo"), "foo") + self.assertEqual(f("\tfoo\nbar\r"), "\tfoo\nbar\r") + self.assertEqual(f("\ab\ba\fr\v"), "bar") + + # 'repl' argument + repl = "#" + self.assertEqual(f("", repl), "") + self.assertEqual(f("foo", repl), "foo") + self.assertEqual(f("\tfoo\nbar\r", repl), "\tfoo\nbar\r") + self.assertEqual( + f("\ab\ba\fr\v", repl), "#b#a#r#") + + # removal of all illegal control characters + value = "".join(chr(x) for x in range(32)) + self.assertEqual(f(value), "\t\n\r") + + # 'invalid' arguments + for value in ((), [], {}, None, 1, 2.3): + self.assertEqual(f(value), "") + def test_remove_html(self): cases = ( "Hello World.", From 27eab4e4672caf1a2266c85161ff6ef79d7005ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sat, 14 Apr 2018 22:09:42 +0200 Subject: [PATCH 03/21] rewrite text tests and improve functions - test more edge cases - consistently return an empty string for invalid arguments - remove the ungreedy-flag in 'remove_html()' --- gallery_dl/text.py | 26 +++-- test/test_text.py | 249 ++++++++++++++++++++++++++++----------------- 2 files changed, 173 insertions(+), 102 deletions(-) diff --git a/gallery_dl/text.py b/gallery_dl/text.py index e439c2b8..9ce7ef1f 100644 --- a/gallery_dl/text.py +++ b/gallery_dl/text.py @@ -36,17 +36,20 @@ def clean_xml(xmldata, repl=""): return xmldata -def remove_html(text): +def remove_html(txt): """Remove html-tags from a string""" - return " ".join(re.sub("<[^>]+?>", " ", text).split()) + try: + return " ".join(re.sub("<[^>]+>", " ", txt).split()) + except TypeError: + return "" def filename_from_url(url): """Extract the last part of an url to use as a filename""" try: return urllib.parse.urlsplit(url).path.rpartition("/")[2] - except ValueError: - return url + except (TypeError, AttributeError): + return "" def nameext_from_url(url, data=None): @@ -64,7 +67,7 @@ def clean_path_windows(path): try: return re.sub(r'[<>:"\\/|?*]', "_", path) except TypeError: - return path + return "" def clean_path_posix(path): @@ -72,7 +75,7 @@ def clean_path_posix(path): try: return path.replace("/", "_") except AttributeError: - return path + return "" def shorten_path(path, limit=255, encoding=sys.getfilesystemencoding()): @@ -112,7 +115,7 @@ def extract(txt, begin, end, pos=0): first = txt.index(begin, pos) + len(begin) last = txt.index(end, first) return txt[first:last], last+len(end) - except ValueError: + except (ValueError, TypeError, AttributeError): return None, pos @@ -139,9 +142,12 @@ def extract_iter(txt, begin, end, pos=0): def parse_query(qs): """Parse a query string into key-value pairs""" result = {} - for key, value in urllib.parse.parse_qsl(qs): - if key not in result: - result[key] = value + try: + for key, value in urllib.parse.parse_qsl(qs): + if key not in result: + result[key] = value + except AttributeError: + pass return result diff --git a/test/test_text.py b/test/test_text.py index c4b02969..4afa058d 100644 --- a/test/test_text.py +++ b/test/test_text.py @@ -13,6 +13,9 @@ import sys from gallery_dl import text +INVALID = ((), [], {}, None, 1, 2.3) + + class TestText(unittest.TestCase): def test_clean_xml(self, f=text.clean_xml): @@ -35,59 +38,85 @@ class TestText(unittest.TestCase): self.assertEqual(f(value), "\t\n\r") # 'invalid' arguments - for value in ((), [], {}, None, 1, 2.3): + for value in INVALID: self.assertEqual(f(value), "") - def test_remove_html(self): - cases = ( - "Hello World.", - " Hello World. ", - "Hello
World.", - "
HelloWorld.
" - ) + def test_remove_html(self, f=text.remove_html): result = "Hello World." - for case in cases: - self.assertEqual(text.remove_html(case), result) - def test_filename_from_url(self): - cases = ( - "http://example.org/v2/filename.ext", - "http://example.org/v2/filename.ext?param=value#fragment", - "example.org/filename.ext", - "/filename.ext", - "filename.ext", - ) + # standard usage + self.assertEqual(f(""), "") + self.assertEqual(f("Hello World."), result) + self.assertEqual(f(" Hello World. "), result) + self.assertEqual(f("Hello
World."), result) + self.assertEqual( + f("
HelloWorld.
"), result) + + # empty HTML + self.assertEqual(f("
"), "") + self.assertEqual(f("
"), "") + + # malformed HTML + self.assertEqual(f(""), "") + self.assertEqual(f(""), "") + + # invalid arguments + for value in INVALID: + self.assertEqual(f(value), "") + + def test_filename_from_url(self, f=text.filename_from_url): result = "filename.ext" - for case in cases: - self.assertEqual(text.filename_from_url(case), result) - def test_nameext_from_url(self): - cases = ( - "http://example.org/v2/filename.ext", - "http://example.org/v2/filename.ext?param=value#fragment", - "example.org/filename.ext", - "/filename.ext", - "filename.ext", - ) - result = { - "filename" : "filename.ext", - "name" : "filename", - "extension": "ext", - } - for case in cases: - self.assertEqual(text.nameext_from_url(case), result) + # standard usage + self.assertEqual(f(""), "") + self.assertEqual(f("filename.ext"), result) + self.assertEqual(f("/filename.ext"), result) + self.assertEqual(f("example.org/filename.ext"), result) + self.assertEqual(f("http://example.org/v2/filename.ext"), result) + self.assertEqual( + f("http://example.org/v2/filename.ext?param=value#frag"), result) - def test_clean_path(self): - cases = { - "Hello World." : ("Hello World.", "Hello World."), - "Hello/World/.": ("Hello_World_.", "Hello_World_."), - r':|"World\*?': ( - '_Hello____World___', r':|"World\*?' - ), - } - for case, result in cases.items(): - self.assertEqual(text.clean_path_windows(case), result[0]) - self.assertEqual(text.clean_path_posix(case), result[1]) + # invalid arguments + for value in INVALID: + self.assertEqual(f(value), "") + + def test_nameext_from_url(self, f=text.nameext_from_url): + empty = {"filename": "", "name": "", "extension": ""} + result = {"filename": "filename.ext", + "name": "filename", "extension": "ext"} + + # standard usage + self.assertEqual(f(""), empty) + self.assertEqual(f("filename.ext"), result) + self.assertEqual(f("/filename.ext"), result) + self.assertEqual(f("example.org/filename.ext"), result) + self.assertEqual(f("http://example.org/v2/filename.ext"), result) + self.assertEqual( + f("http://example.org/v2/filename.ext?param=value#frag"), result) + + # invalid arguments + for value in INVALID: + self.assertEqual(f(value), empty) + + def test_clean_path_windows(self, f=text.clean_path_windows): + self.assertEqual(f(""), "") + self.assertEqual(f("foo"), "foo") + self.assertEqual(f("foo/bar"), "foo_bar") + self.assertEqual(f("foo<>:\"\\/|?*bar"), "foo_________bar") + + # invalid arguments + for value in INVALID: + self.assertEqual(f(value), "") + + def test_clean_path_posix(self, f=text.clean_path_posix): + self.assertEqual(f(""), "") + self.assertEqual(f("foo"), "foo") + self.assertEqual(f("foo/bar"), "foo_bar") + self.assertEqual(f("foo<>:\"\\/|?*bar"), "foo<>:\"\\_|?*bar") + + # invalid arguments + for value in INVALID: + self.assertEqual(f(value), "") def test_shorten_path(self): cases = { @@ -115,68 +144,104 @@ class TestText(unittest.TestCase): self.assertEqual(fname, result) self.assertTrue(len(fname.encode(enc)) <= 255) - def test_extract(self): - cases = { - ("", "<", ">") : ("a", 3), - ("", "X", ">") : (None, 0), - ("", "<", "X") : (None, 0), - ("", "<", ">", 3): ("b", 6), - ("", "X", ">", 3): (None, 3), - ("", "<", "X", 3): (None, 3), - } - for case, result in cases.items(): - self.assertEqual(text.extract(*case), result) + def test_extract(self, f=text.extract): + txt = "" + self.assertEqual(f(txt, "<", ">"), ("a", 3)) + self.assertEqual(f(txt, "X", ">"), (None, 0)) + self.assertEqual(f(txt, "<", "X"), (None, 0)) - def test_extract_all(self): + # 'pos' argument + for i in range(1, 4): + self.assertEqual(f(txt, "<", ">", i), ("b", 6)) + for i in range(4, 10): + self.assertEqual(f(txt, "<", ">", i), (None, i)) + + # invalid arguments + for value in INVALID: + self.assertEqual(f(value , "<" , ">") , (None, 0)) + self.assertEqual(f(txt, value, ">") , (None, 0)) + self.assertEqual(f(txt, "<" , value), (None, 0)) + + def test_extract_all(self, f=text.extract_all): txt = "[c][b][a]: xyz! [d][e" - result = ({ - "A": "a", - "B": "b", - "X": "xyz", - "E": "xtra", - }, 15) - self.assertEqual(text.extract_all(txt, ( - (None, "[", "]"), - ("B" , "[", "]"), - ("A" , "[", "]"), - ("X" , ": ", "!"), - ), values={"E": "xtra"}), result) - def test_extract_iter(self): + self.assertEqual( + f(txt, ()), ({}, 0)) + self.assertEqual( + f(txt, (("C", "[", "]"), ("B", "[", "]"), ("A", "[", "]"))), + ({"A": "a", "B": "b", "C": "c"}, 9), + ) + + # 'None' as field name + self.assertEqual( + f(txt, ((None, "[", "]"), (None, "[", "]"), ("A", "[", "]"))), + ({"A": "a"}, 9), + ) + self.assertEqual( + f(txt, ((None, "[", "]"), (None, "[", "]"), (None, "[", "]"))), + ({}, 9), + ) + + # failed matches + self.assertEqual( + f(txt, (("C", "[", "]"), ("X", "X", "X"), ("B", "[", "]"))), + ({"B": "b", "C": "c", "X": None}, 6), + ) + + # 'pos' argument + self.assertEqual( + f(txt, (("B", "[", "]"), ("A", "[", "]")), pos=1), + ({"A": "a", "B": "b"}, 9), + ) + + # 'values' argument + self.assertEqual( + f(txt, (("C", "[", "]"),), values={"A": "a", "B": "b"}), + ({"A": "a", "B": "b", "C": "c"}, 3), + ) + + vdict = {} + rdict, pos = f(txt, (), values=vdict) + self.assertIs(vdict, rdict) + + def test_extract_iter(self, f=text.extract_iter): txt = "[c][b][a]: xyz! [d][e" - result = ["c", "b", "a", "d"] - self.assertEqual(list(text.extract_iter(txt, "[", "]")), result) - def test_parse_query(self): - # standard stuff + def g(*args): + return list(f(*args)) + self.assertEqual( - text.parse_query(""), {}) + g("", "[", "]"), []) self.assertEqual( - text.parse_query("foo=1"), {"foo": "1"}) + g("[a]", "[", "]"), ["a"]) self.assertEqual( - text.parse_query("foo=1&bar=2"), {"foo": "1", "bar": "2"}) + g(txt, "[", "]"), ["c", "b", "a", "d"]) + self.assertEqual( + g(txt, "X", "X"), []) + self.assertEqual( + g(txt, "[", "]", 6), ["a", "d"]) + + def test_parse_query(self, f=text.parse_query): + # standard usage + self.assertEqual(f(""), {}) + self.assertEqual(f("foo=1"), {"foo": "1"}) + self.assertEqual(f("foo=1&bar=2"), {"foo": "1", "bar": "2"}) # missing value - self.assertEqual( - text.parse_query("bar"), {}) - self.assertEqual( - text.parse_query("foo=1&bar"), {"foo": "1"}) - self.assertEqual( - text.parse_query("foo=1&bar&baz=3"), {"foo": "1", "baz": "3"}) + self.assertEqual(f("bar"), {}) + self.assertEqual(f("foo=1&bar"), {"foo": "1"}) + self.assertEqual(f("foo=1&bar&baz=3"), {"foo": "1", "baz": "3"}) # keys with identical names + self.assertEqual(f("foo=1&foo=2"), {"foo": "1"}) self.assertEqual( - text.parse_query("foo=1&foo=2"), {"foo": "1"}) - self.assertEqual( - text.parse_query("foo=1&bar=2&foo=3&bar=4"), + f("foo=1&bar=2&foo=3&bar=4"), {"foo": "1", "bar": "2"}, ) - # non-string arguments - self.assertEqual(text.parse_query(()), {}) - self.assertEqual(text.parse_query([]), {}) - self.assertEqual(text.parse_query({}), {}) - self.assertEqual(text.parse_query(None), {}) + # invalid arguments + for value in INVALID: + self.assertEqual(f(value), {}) if __name__ == '__main__': From 4ffa94f634cbdd6d566defb2bcaf97b418e08c57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sun, 15 Apr 2018 18:44:13 +0200 Subject: [PATCH 04/21] remove 'shorten_path()' and 'shorten_filename()' --- gallery_dl/text.py | 14 -------------- test/test_text.py | 27 --------------------------- 2 files changed, 41 deletions(-) diff --git a/gallery_dl/text.py b/gallery_dl/text.py index 9ce7ef1f..4bed863a 100644 --- a/gallery_dl/text.py +++ b/gallery_dl/text.py @@ -8,7 +8,6 @@ """Collection of functions that work in strings/text""" -import sys import re import os.path import html @@ -78,19 +77,6 @@ def clean_path_posix(path): return "" -def shorten_path(path, limit=255, encoding=sys.getfilesystemencoding()): - """Shorten a path segment to at most 'limit' bytes""" - return (path.encode(encoding)[:limit]).decode(encoding, "ignore") - - -def shorten_filename(fname, limit=255, encoding=sys.getfilesystemencoding()): - """Shorten filename to at most 'limit' bytes while preserving extension""" - name, extension = os.path.splitext(fname) - bext = extension.encode(encoding) - bname = name.encode(encoding)[:limit-len(bext)] - return bname.decode(encoding, "ignore") + extension - - def extract(txt, begin, end, pos=0): """Extract the text between 'begin' and 'end' from 'txt' diff --git a/test/test_text.py b/test/test_text.py index 4afa058d..b07dff10 100644 --- a/test/test_text.py +++ b/test/test_text.py @@ -8,7 +8,6 @@ # published by the Free Software Foundation. import unittest -import sys from gallery_dl import text @@ -118,32 +117,6 @@ class TestText(unittest.TestCase): for value in INVALID: self.assertEqual(f(value), "") - def test_shorten_path(self): - cases = { - "dirname": "dirname", - "X"*255: "X"*255, - "X"*256: "X"*255, - "Ä"*255: "Ä"*127, - } - enc = sys.getfilesystemencoding() - for case, result in cases.items(): - self.assertEqual(text.shorten_path(case), result) - self.assertTrue(len(text.shorten_path(case).encode(enc)) <= 255) - - def test_shorten_filename(self): - self.maxDiff = None - cases = { - "filename.ext": "filename.ext", - "X"*251 + ".ext": "X"*251 + ".ext", - "X"*255 + ".ext": "X"*251 + ".ext", - "Ä"*251 + ".ext": "Ä"*125 + ".ext", - } - enc = sys.getfilesystemencoding() - for case, result in cases.items(): - fname = text.shorten_filename(case) - self.assertEqual(fname, result) - self.assertTrue(len(fname.encode(enc)) <= 255) - def test_extract(self, f=text.extract): txt = "" self.assertEqual(f(txt, "<", ">"), ("a", 3)) From 728c64a3fb2fce8e4c75bbc5d39d255852ddf587 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sun, 15 Apr 2018 18:58:32 +0200 Subject: [PATCH 05/21] [tumblr] rename 'offset' to 'num and adjust formats Trying to somehow emulate Tumblr filenames is a bad idea ... --- gallery_dl/extractor/tumblr.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py index f853d964..bb951856 100644 --- a/gallery_dl/extractor/tumblr.py +++ b/gallery_dl/extractor/tumblr.py @@ -52,8 +52,8 @@ class TumblrExtractor(Extractor): """Base class for tumblr extractors""" category = "tumblr" directory_fmt = ["{category}", "{name}"] - filename_fmt = "{category}_{blog_name}_{id}o{offset}.{extension}" - archive_fmt = "{id}_{offset}" + filename_fmt = "{category}_{blog_name}_{id}_{num:>02}.{extension}" + archive_fmt = "{id}_{num}" def __init__(self, match): Extractor.__init__(self) @@ -87,7 +87,7 @@ class TumblrExtractor(Extractor): post["reblogged"] = reblog post["blog"] = blog - post["offset"] = 0 + post["num"] = 0 if "trail" in post: del post["trail"] @@ -148,14 +148,14 @@ class TumblrExtractor(Extractor): @staticmethod def _prepare(url, post): text.nameext_from_url(url, post) - post["offset"] += 1 + post["num"] += 1 post["hash"] = post["name"].partition("_")[2] return Message.Url, url, post @staticmethod def _prepare_image(url, post): text.nameext_from_url(url, post) - post["offset"] += 1 + post["num"] += 1 parts = post["name"].split("_") post["hash"] = parts[1] if parts[1] != "inline" else parts[2] @@ -237,7 +237,7 @@ class TumblrLikesExtractor(TumblrExtractor): """Extractor for images from a tumblr-user by tag""" subcategory = "likes" directory_fmt = ["{category}", "{name}", "likes"] - archive_fmt = "f_{blog[name]}_{id}_{offset}" + archive_fmt = "f_{blog[name]}_{id}_{num}" pattern = [BASE_PATTERN + r"/likes"] test = [("http://mikf123.tumblr.com/likes", { "count": 1, From ff643793bd7a279035689e3a9fecc5ac497ab088 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Thu, 19 Apr 2018 21:32:10 +0200 Subject: [PATCH 06/21] improve and document cloudflare bypass code --- gallery_dl/cloudflare.py | 60 ++++++++++++++++++++++++++-------------- 1 file changed, 39 insertions(+), 21 deletions(-) diff --git a/gallery_dl/cloudflare.py b/gallery_dl/cloudflare.py index 53cc58e8..676e723f 100644 --- a/gallery_dl/cloudflare.py +++ b/gallery_dl/cloudflare.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2015-2017 Mike Fährmann +# Copyright 2015-2018 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -8,6 +8,7 @@ """Methods to access sites behind Cloudflare protection""" +import re import time import operator import urllib.parse @@ -30,6 +31,7 @@ def request_func(self, *args, **kwargs): def solve_challenge(session, response): + session.headers["Referer"] = response.url page = response.text params = text.extract_all(page, ( @@ -37,6 +39,7 @@ def solve_challenge(session, response): ('pass' , 'name="pass" value="', '"'), ))[0] params["jschl_answer"] = solve_jschl(response.url, page) + time.sleep(4) url = urllib.parse.urljoin(response.url, "/cdn-cgi/l/chk_jschl") return session.get(url, params=params) @@ -44,51 +47,66 @@ def solve_challenge(session, response): def solve_jschl(url, page): """Solve challenge to get 'jschl_answer' value""" + + # build variable name + # e.g. '...f, wqnVscP={"DERKbJk":+(...' --> wqnVscP.DERKbJk data, pos = text.extract_all(page, ( ('var' , ',f, ', '='), ('key' , '"', '"'), ('expr', ':', '}'), )) - solution = evaluate_expression(data["expr"]) variable = "{}.{}".format(data["var"], data["key"]) vlength = len(variable) + + # evaluate the initial expression + solution = evaluate_expression(data["expr"]) + + # iterator over all remaining expressions + # and combine their values in 'solution' expressions = text.extract( - page, "'challenge-form');", "f.submit();", pos - )[0] + page, "'challenge-form');", "f.submit();", pos)[0] for expr in expressions.split(";")[1:]: + if expr.startswith(variable): + # select arithmetc function based on operator (+, -, *) func = operator_functions[expr[vlength]] + # evaluate the rest of the expression value = evaluate_expression(expr[vlength+2:]) + # combine the expression value with our current solution solution = func(solution, value) + elif expr.startswith("a.value"): + # add length of the hostname, i.e. add 11 for 'example.org' solution += len(urllib.parse.urlsplit(url).netloc) + if ".toFixed(" in expr: + # trim the solution to 10 decimal places + # and strip trailing zeros solution = "{:.10f}".format(solution).rstrip("0") + return solution -def evaluate_expression(expr): +def evaluate_expression(expr, split_re=re.compile(r"\(+([^)]*)\)")): """Evaluate a Javascript expression for the challenge""" + if "/" in expr: + # split the expression in numerator and denominator subexpressions, + # evaluate them separately, + # and return their fraction-result num, _, denom = expr.partition("/") return evaluate_expression(num) / evaluate_expression(denom) - stack = [] - ranges = [] - value = "" - for index, char in enumerate(expr): - if char == "(": - stack.append(index+1) - elif char == ")": - begin = stack.pop() - if stack: - ranges.append((begin, index)) - for subexpr in [expr[begin:end] for begin, end in ranges] or (expr,): - num = 0 - for part in subexpr.split("[]"): - num += expression_values[part] - value += str(num) - return int(value) + # iterate over all subexpressions, + # evaluate them, + # and accumulate their values in 'result' + result = "" + for subexpr in split_re.findall(expr): + result += str(sum( + expression_values[part] + for part in subexpr.split("[]") + )) + return int(result) operator_functions = { From cc36f8858689bec4e00eee0979136fce6b929e81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Fri, 20 Apr 2018 14:53:21 +0200 Subject: [PATCH 07/21] rename safe_int to parse_int; move parse_* to text module --- gallery_dl/downloader/http.py | 6 ++-- gallery_dl/extractor/artstation.py | 4 +-- gallery_dl/extractor/deviantart.py | 4 +-- gallery_dl/extractor/dynastyscans.py | 4 +-- gallery_dl/extractor/exhentai.py | 18 +++++------ gallery_dl/extractor/fallenangels.py | 4 +-- gallery_dl/extractor/foolslide.py | 8 ++--- gallery_dl/extractor/gelbooru.py | 4 +-- gallery_dl/extractor/hbrowse.py | 12 +++---- gallery_dl/extractor/hentai2read.py | 15 ++++----- gallery_dl/extractor/hentaifoundry.py | 8 ++--- gallery_dl/extractor/hentaihere.py | 14 ++++----- gallery_dl/extractor/hitomi.py | 2 +- gallery_dl/extractor/imagefap.py | 4 +-- gallery_dl/extractor/kissmanga.py | 10 +++--- gallery_dl/extractor/komikcast.py | 8 ++--- gallery_dl/extractor/mangadex.py | 20 ++++++------ gallery_dl/extractor/mangafox.py | 4 +-- gallery_dl/extractor/mangahere.py | 14 ++++----- gallery_dl/extractor/mangapark.py | 12 +++---- gallery_dl/extractor/mangareader.py | 12 +++---- gallery_dl/extractor/mangastream.py | 6 ++-- gallery_dl/extractor/nhentai.py | 4 +-- gallery_dl/extractor/nijie.py | 8 ++--- gallery_dl/extractor/paheal.py | 2 +- gallery_dl/extractor/pinterest.py | 10 +++--- gallery_dl/extractor/readcomiconline.py | 6 ++-- gallery_dl/extractor/sankaku.py | 14 ++++----- gallery_dl/extractor/seiga.py | 8 ++--- gallery_dl/extractor/senmanga.py | 4 +-- gallery_dl/extractor/slideshare.py | 4 +-- gallery_dl/extractor/spectrumnexus.py | 8 ++--- gallery_dl/extractor/xvideos.py | 10 +++--- gallery_dl/text.py | 32 ++++++++++++++++++- gallery_dl/util.py | 28 +---------------- test/test_text.py | 42 +++++++++++++++++++++++++ test/test_util.py | 26 --------------- 37 files changed, 210 insertions(+), 189 deletions(-) diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py index bf461ae2..b590485f 100644 --- a/gallery_dl/downloader/http.py +++ b/gallery_dl/downloader/http.py @@ -12,7 +12,7 @@ import time import mimetypes from requests.exceptions import ConnectionError, Timeout from .common import DownloaderBase -from .. import util, exception +from .. import text, exception class Downloader(DownloaderBase): @@ -28,7 +28,7 @@ class Downloader(DownloaderBase): self.chunk_size = 16384 if self.rate: - self.rate = util.parse_bytes(self.rate) + self.rate = text.parse_bytes(self.rate) if not self.rate: self.log.warning("Invalid rate limit specified") elif self.rate < self.chunk_size: @@ -61,7 +61,7 @@ class Downloader(DownloaderBase): else: self.response.raise_for_status() - return offset, util.safe_int(size) + return offset, text.parse_int(size) def receive(self, file): if self.rate: diff --git a/gallery_dl/extractor/artstation.py b/gallery_dl/extractor/artstation.py index fbea9595..6f8dbd53 100644 --- a/gallery_dl/extractor/artstation.py +++ b/gallery_dl/extractor/artstation.py @@ -158,7 +158,7 @@ class ArtstationAlbumExtractor(ArtstationExtractor): def __init__(self, match): ArtstationExtractor.__init__(self, match) - self.album_id = util.safe_int(match.group(2)) + self.album_id = text.parse_int(match.group(2)) def metadata(self): userinfo = self.get_user_info(self.user) @@ -256,7 +256,7 @@ class ArtstationChallengeExtractor(ArtstationExtractor): def _id_from_url(url): """Get an image's submission ID from its URL""" parts = url.split("/") - return util.safe_int("".join(parts[7:10])) + return text.parse_int("".join(parts[7:10])) class ArtstationSearchExtractor(ArtstationExtractor): diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index 19f7a58d..ee156342 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -9,7 +9,7 @@ """Extract images from https://www.deviantart.com/""" from .common import Extractor, Message -from .. import text, util, exception +from .. import text, exception from ..cache import cache, memcache import itertools import datetime @@ -62,7 +62,7 @@ class DeviantartExtractor(Extractor): if "videos" in deviation: video = max(deviation["videos"], - key=lambda x: util.safe_int(x["quality"][:-1])) + key=lambda x: text.parse_int(x["quality"][:-1])) yield self.commit(deviation, video) if "flash" in deviation: diff --git a/gallery_dl/extractor/dynastyscans.py b/gallery_dl/extractor/dynastyscans.py index bd9107ac..d63ddc0a 100644 --- a/gallery_dl/extractor/dynastyscans.py +++ b/gallery_dl/extractor/dynastyscans.py @@ -9,7 +9,7 @@ """Extract manga-chapters from https://dynasty-scans.com/""" from .common import ChapterExtractor -from .. import text, util +from .. import text import re import json @@ -53,7 +53,7 @@ class DynastyscansChapterExtractor(ChapterExtractor): return { "manga": text.unescape(match.group(1)), - "chapter": util.safe_int(match.group(2)), + "chapter": text.parse_int(match.group(2)), "chapter_minor": match.group(3) or "", "title": text.unescape(match.group(4) or ""), "author": text.remove_html(author), diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py index 41eaeca1..2af58110 100644 --- a/gallery_dl/extractor/exhentai.py +++ b/gallery_dl/extractor/exhentai.py @@ -120,7 +120,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): self.key = {} self.count = 0 self.version, self.gid, self.token = match.groups() - self.gid = util.safe_int(self.gid) + self.gid = text.parse_int(self.gid) def items(self): self.login() @@ -163,7 +163,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): data["lang"] = util.language_to_code(data["language"]) data["title"] = text.unescape(data["title"]) data["title_jp"] = text.unescape(data["title_jp"]) - data["count"] = util.safe_int(data["count"]) + data["count"] = text.parse_int(data["count"]) data["gallery_size"] = util.parse_bytes( data["gallery_size"].rstrip("Bb")) return data @@ -245,17 +245,17 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): def _parse_image_info(url): parts = url.split("/")[4].split("-") return { - "width": util.safe_int(parts[2]), - "height": util.safe_int(parts[3]), - "size": util.safe_int(parts[1]), + "width": text.parse_int(parts[2]), + "height": text.parse_int(parts[3]), + "size": text.parse_int(parts[1]), } @staticmethod def _parse_original_info(info): parts = info.lstrip().split(" ") return { - "width": util.safe_int(parts[0]), - "height": util.safe_int(parts[2]), + "width": text.parse_int(parts[0]), + "height": text.parse_int(parts[2]), "size": util.parse_bytes(parts[3] + parts[4][0]), } @@ -274,7 +274,7 @@ class ExhentaiSearchExtractor(ExhentaiExtractor): def __init__(self, match): ExhentaiExtractor.__init__(self) self.params = text.parse_query(match.group(1) or "") - self.params["page"] = util.safe_int(self.params.get("page")) + self.params["page"] = text.parse_int(self.params.get("page")) self.url = self.root def items(self): @@ -308,7 +308,7 @@ class ExhentaiSearchExtractor(ExhentaiExtractor): return Message.Queue, url, { "type": gtype, "date": date, - "gallery_id": util.safe_int(parts[1]), + "gallery_id": text.parse_int(parts[1]), "gallery_token": parts[2], "title": text.unescape(title), key: last, diff --git a/gallery_dl/extractor/fallenangels.py b/gallery_dl/extractor/fallenangels.py index a4ea6f58..3cd3f7a2 100644 --- a/gallery_dl/extractor/fallenangels.py +++ b/gallery_dl/extractor/fallenangels.py @@ -98,8 +98,8 @@ class FallenangelsMangaExtractor(MangaExtractor): chapter, dot, minor = chapter.partition(".") results.append((url, { "manga": manga, "title": title, - "volume": util.safe_int(volume), - "chapter": util.safe_int(chapter), + "volume": text.parse_int(volume), + "chapter": text.parse_int(chapter), "chapter_minor": dot + minor, "lang": self.lang, "language": language, })) diff --git a/gallery_dl/extractor/foolslide.py b/gallery_dl/extractor/foolslide.py index 117c3bdc..cf92b3cf 100644 --- a/gallery_dl/extractor/foolslide.py +++ b/gallery_dl/extractor/foolslide.py @@ -50,8 +50,8 @@ class FoolslideExtractor(SharedConfigExtractor): lang = info[1].partition("-")[0] data["lang"] = lang data["language"] = util.code_to_language(lang) - data["volume"] = util.safe_int(info[2]) - data["chapter"] = util.safe_int(info[3]) + data["volume"] = text.parse_int(info[2]) + data["chapter"] = text.parse_int(info[3]) data["chapter_minor"] = "." + info[4] if len(info) >= 5 else "" return data @@ -75,7 +75,7 @@ class FoolslideChapterExtractor(FoolslideExtractor): imgs = self.get_images(page) data["count"] = len(imgs) - data["chapter_id"] = util.safe_int(imgs[0]["chapter_id"]) + data["chapter_id"] = text.parse_int(imgs[0]["chapter_id"]) yield Message.Version, 1 yield Message.Directory, data @@ -88,7 +88,7 @@ class FoolslideChapterExtractor(FoolslideExtractor): except KeyError: pass for key in ("height", "id", "size", "width"): - image[key] = util.safe_int(image[key]) + image[key] = text.parse_int(image[key]) data.update(image) text.nameext_from_url(data["filename"], data) yield Message.Url, url, data diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py index 33abdbd4..110160a6 100644 --- a/gallery_dl/extractor/gelbooru.py +++ b/gallery_dl/extractor/gelbooru.py @@ -37,7 +37,7 @@ class GelbooruExtractor(SharedConfigExtractor): if isinstance(post, str): post = self.get_post_data(post) for key in ("id", "width", "height", "score", "change"): - post[key] = util.safe_int(post[key]) + post[key] = text.parse_int(post[key]) url = post["file_url"] post.update(data) yield Message.Url, url, text.nameext_from_url(url, post) @@ -174,7 +174,7 @@ class GelbooruPoolExtractor(GelbooruExtractor): raise exception.NotFoundError("pool") return { - "pool": util.safe_int(self.pool_id), + "pool": text.parse_int(self.pool_id), "pool_name": text.unescape(name), "count": len(self.posts), } diff --git a/gallery_dl/extractor/hbrowse.py b/gallery_dl/extractor/hbrowse.py index 03232799..fde517ac 100644 --- a/gallery_dl/extractor/hbrowse.py +++ b/gallery_dl/extractor/hbrowse.py @@ -9,7 +9,7 @@ """Extract images from http://www.hbrowse.com/""" from .common import ChapterExtractor, MangaExtractor -from .. import text, util +from .. import text from urllib.parse import urljoin import json @@ -30,7 +30,7 @@ class HbrowseExtractor(): ), values=data) data["manga"] = text.unescape(data["manga"]) - data["total"] = util.safe_int(data["total"]) + data["total"] = text.parse_int(data["total"]) data["artist"] = text.remove_html(data["artist"]) data["origin"] = text.remove_html(data["origin"]) return data @@ -48,7 +48,7 @@ class HbrowseMangaExtractor(HbrowseExtractor, MangaExtractor): def chapters(self, page): results = [] data = self.parse_page(page, { - "manga_id": util.safe_int( + "manga_id": text.parse_int( self.url.rstrip("/").rpartition("/")[2]) }) @@ -59,7 +59,7 @@ class HbrowseMangaExtractor(HbrowseExtractor, MangaExtractor): if not url: return results title, pos = text.extract(page, '>View ', '<', pos) - data["chapter"] = util.safe_int(url.rpartition("/")[2][1:]) + data["chapter"] = text.parse_int(url.rpartition("/")[2][1:]) data["title"] = title results.append((urljoin(self.root, url), data.copy())) @@ -84,8 +84,8 @@ class HbrowseChapterExtractor(HbrowseExtractor, ChapterExtractor): def get_metadata(self, page): return self.parse_page(page, { - "manga_id": util.safe_int(self.gid), - "chapter": util.safe_int(self.chapter) + "manga_id": text.parse_int(self.gid), + "chapter": text.parse_int(self.chapter) }) def get_images(self, page): diff --git a/gallery_dl/extractor/hentai2read.py b/gallery_dl/extractor/hentai2read.py index 6d2cd75f..34a7749c 100644 --- a/gallery_dl/extractor/hentai2read.py +++ b/gallery_dl/extractor/hentai2read.py @@ -9,7 +9,7 @@ """Extract hentai-manga from https://hentai2read.com/""" from .common import ChapterExtractor, MangaExtractor -from .. import text, util +from .. import text import re import json @@ -36,7 +36,8 @@ class Hentai2readMangaExtractor(MangaExtractor): page, '', '') mtype, pos = text.extract( page, '[', ']', pos) - manga_id = util.safe_int(text.extract(page, 'data-mid="', '"', pos)[0]) + manga_id = text.parse_int(text.extract( + page, 'data-mid="', '"', pos)[0]) while True: chapter_id, pos = text.extract(page, ' data-cid="', '"', pos) @@ -49,8 +50,8 @@ class Hentai2readMangaExtractor(MangaExtractor): chapter, _, title = text.unescape(chapter).strip().partition(" - ") results.append((url, { "manga_id": manga_id, "manga": manga, "type": mtype, - "chapter_id": util.safe_int(chapter_id), - "chapter": util.safe_int(chapter), + "chapter_id": text.parse_int(chapter_id), + "chapter": text.parse_int(chapter), "title": title, "lang": "en", "language": "English", })) @@ -78,9 +79,9 @@ class Hentai2readChapterExtractor(ChapterExtractor): r"(\d+): (.+) . Page 1 ", title) return { "manga": match.group(1), - "manga_id": util.safe_int(manga_id), - "chapter": util.safe_int(self.chapter), - "chapter_id": util.safe_int(chapter_id), + "manga_id": text.parse_int(manga_id), + "chapter": text.parse_int(self.chapter), + "chapter_id": text.parse_int(chapter_id), "type": match.group(2), "author": match.group(3), "title": match.group(5), diff --git a/gallery_dl/extractor/hentaifoundry.py b/gallery_dl/extractor/hentaifoundry.py index 2fe4daa7..d3d2e8a4 100644 --- a/gallery_dl/extractor/hentaifoundry.py +++ b/gallery_dl/extractor/hentaifoundry.py @@ -9,7 +9,7 @@ """Extract images from https://www.hentai-foundry.com/""" from .common import Extractor, Message -from .. import text, util, exception +from .. import text, exception class HentaifoundryUserExtractor(Extractor): @@ -69,7 +69,7 @@ class HentaifoundryUserExtractor(Extractor): page = response.text token, pos = text.extract(page, 'hidden" value="', '"') count, pos = text.extract(page, 'class="active" >Pictures (', ')', pos) - return {"artist": self.artist, "count": util.safe_int(count)}, token + return {"artist": self.artist, "count": text.parse_int(count)}, token def get_image_metadata(self, url): """Collect metadata for an image""" @@ -79,7 +79,7 @@ class HentaifoundryUserExtractor(Extractor): page, 'Pictures » ', '<') part, pos = text.extract( page, '//pictures.hentai-foundry.com', '"', pos) - data = {"index": util.safe_int(index), "title": text.unescape(title)} + data = {"index": text.parse_int(index), "title": text.unescape(title)} text.nameext_from_url(part, data) return "https://pictures.hentai-foundry.com" + part, data @@ -161,7 +161,7 @@ class HentaifoundryImageExtractor(Extractor): url , pos = extr(page, '//pictures.hentai-foundry.com', '"', pos) data = { "artist": artist, - "index": util.safe_int(self.index), + "index": text.parse_int(self.index), "title": text.unescape(title), } text.nameext_from_url(url, data) diff --git a/gallery_dl/extractor/hentaihere.py b/gallery_dl/extractor/hentaihere.py index d1c5c932..50150cb8 100644 --- a/gallery_dl/extractor/hentaihere.py +++ b/gallery_dl/extractor/hentaihere.py @@ -9,7 +9,7 @@ """Extract hentai-manga from https://hentaihere.com/""" from .common import ChapterExtractor, MangaExtractor -from .. import text, util +from .. import text import re import json @@ -32,7 +32,7 @@ class HentaihereMangaExtractor(MangaExtractor): def chapters(self, page): results = [] - manga_id = util.safe_int( + manga_id = text.parse_int( self.url.rstrip("/").rpartition("/")[2][1:]) manga, pos = text.extract( page, '', '') @@ -50,8 +50,8 @@ class HentaihereMangaExtractor(MangaExtractor): chapter, _, title = text.unescape(chapter).strip().partition(" - ") results.append((url, { "manga_id": manga_id, "manga": manga, "type": mtype, - "chapter_id": util.safe_int(chapter_id), - "chapter": util.safe_int(chapter), + "chapter_id": text.parse_int(chapter_id), + "chapter": text.parse_int(chapter), "title": title, "lang": "en", "language": "English", })) @@ -79,9 +79,9 @@ class HentaihereChapterExtractor(ChapterExtractor): match = re.match(pattern, title) return { "manga": match.group(1), - "manga_id": util.safe_int(self.manga_id), - "chapter": util.safe_int(self.chapter), - "chapter_id": util.safe_int(chapter_id), + "manga_id": text.parse_int(self.manga_id), + "chapter": text.parse_int(self.chapter), + "chapter_id": text.parse_int(chapter_id), "type": match.group(2), "title": match.group(3), "author": match.group(4), diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py index 64ae1e12..60d91a3f 100644 --- a/gallery_dl/extractor/hitomi.py +++ b/gallery_dl/extractor/hitomi.py @@ -30,7 +30,7 @@ class HitomiGalleryExtractor(ChapterExtractor): ] def __init__(self, match): - self.gid = util.safe_int(match.group(1)) + self.gid = text.parse_int(match.group(1)) url = "https://hitomi.la/galleries/{}.html".format(self.gid) ChapterExtractor.__init__(self, url) diff --git a/gallery_dl/extractor/imagefap.py b/gallery_dl/extractor/imagefap.py index 97d8cb68..75f2e623 100644 --- a/gallery_dl/extractor/imagefap.py +++ b/gallery_dl/extractor/imagefap.py @@ -9,7 +9,7 @@ """Extract images from http://imagefap.com/""" from .common import Extractor, Message -from .. import text, util +from .. import text import json @@ -159,7 +159,7 @@ class ImagefapUserExtractor(ImagefapExtractor): yield Message.Version, 1 for gid, name in self.get_gallery_data(): url = "http://www.imagefap.com/gallery/" + gid - data = {"gallery_id": util.safe_int(gid), "title": name} + data = {"gallery_id": text.parse_int(gid), "title": name} yield Message.Queue, url, data def get_gallery_data(self): diff --git a/gallery_dl/extractor/kissmanga.py b/gallery_dl/extractor/kissmanga.py index 36e1f02f..534c36bf 100644 --- a/gallery_dl/extractor/kissmanga.py +++ b/gallery_dl/extractor/kissmanga.py @@ -9,7 +9,7 @@ """Extract manga-chapters and entire manga from http://kissmanga.com/""" from .common import ChapterExtractor, MangaExtractor -from .. import text, util, cloudflare, aes, exception +from .. import text, cloudflare, aes, exception from ..cache import cache import re import hashlib @@ -56,8 +56,8 @@ class KissmangaBase(): ), data["chapter_string"]) volume, chapter, minor, title = match.groups() - data["volume"] = util.safe_int(volume) - data["chapter"] = util.safe_int(chapter) + data["volume"] = text.parse_int(volume) + data["chapter"] = text.parse_int(chapter) data["chapter_minor"] = "." + minor if minor else "" data["title"] = title if title and title != "Read Online" else "" return data @@ -89,7 +89,7 @@ class KissmangaMangaExtractor(KissmangaBase, MangaExtractor): url, _, chapter = item.partition(needle) data = { "manga": manga, "chapter_string": chapter, - "chapter_id": util.safe_int(url.rpartition("=")[2]), + "chapter_id": text.parse_int(url.rpartition("=")[2]), "lang": "en", "language": "English", } self.parse_chapter_string(data) @@ -128,7 +128,7 @@ class KissmangaChapterExtractor(KissmangaBase, ChapterExtractor): data = { "manga": manga.strip(), "chapter_string": cinfo.strip(), - "chapter_id": util.safe_int(self.chapter_id), + "chapter_id": text.parse_int(self.chapter_id), "lang": "en", "language": "English", } diff --git a/gallery_dl/extractor/komikcast.py b/gallery_dl/extractor/komikcast.py index 04805001..9270fdef 100644 --- a/gallery_dl/extractor/komikcast.py +++ b/gallery_dl/extractor/komikcast.py @@ -9,7 +9,7 @@ """Extract manga-chapters and entire manga from https://komikcast.com/""" from .common import ChapterExtractor, MangaExtractor -from .. import text, util, cloudflare +from .. import text, cloudflare import re @@ -39,7 +39,7 @@ class KomikcastBase(): data["title"] = title.strip() else: data["title"] = "" - data["chapter"] = util.safe_int(chapter) + data["chapter"] = text.parse_int(chapter) data["lang"] = "id" data["language"] = "Indonesian" @@ -75,8 +75,8 @@ class KomikcastChapterExtractor(KomikcastBase, ChapterExtractor): page, '
', '