From c874071f5af82738710b6ac5db7a18ec66bad6bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sun, 4 Oct 2020 22:46:41 +0200 Subject: [PATCH] [kissmanga] remove module --- docs/configuration.rst | 13 - docs/gallery-dl.conf | 8 - docs/supportedsites.rst | 1 - gallery_dl/aes.py | 337 ------------------------ gallery_dl/extractor/__init__.py | 1 - gallery_dl/extractor/kissmanga.py | 222 ---------------- gallery_dl/extractor/readcomiconline.py | 30 ++- test/test_results.py | 7 +- 8 files changed, 27 insertions(+), 592 deletions(-) delete mode 100644 gallery_dl/aes.py delete mode 100644 gallery_dl/extractor/kissmanga.py diff --git a/docs/configuration.rst b/docs/configuration.rst index ced97f1d..7a66223e 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -1049,19 +1049,6 @@ Description the first in the list gets chosen (usually `mp3`). -extractor.kissmanga.captcha ---------------------------- -Type - ``string`` -Default - ``"stop"`` -Description - Controls how to handle redirects to CAPTCHA pages. - - * ``"stop``: Stop the current extractor run. - * ``"wait``: Ask the user to solve the CAPTCHA and wait. - - extractor.newgrounds.include ---------------------------- Type diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 2d7b0ff2..ecb9f9ba 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -88,10 +88,6 @@ "highlights": false, "videos": true }, - "kissmanga": - { - "captcha": "stop" - }, "nijie": { "username": null, @@ -115,10 +111,6 @@ "wait-min": 3.0, "wait-max": 6.0 }, - "readcomiconline": - { - "captcha": "stop" - }, "reddit": { "comments": 0, diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index 22e8d5b7..e88f9b10 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -65,7 +65,6 @@ Joyreactor http://joyreactor.com/ Posts, Search Results, Keenspot http://www.keenspot.com/ Comics Khinsider https://downloads.khinsider.com/ Soundtracks Kirei Cake https://reader.kireicake.com/ Chapters, Manga -KissManga https://kissmanga.com/ Chapters, Manga Komikcast https://komikcast.com/ Chapters, Manga Konachan https://konachan.com/ Pools, Popular Images, Posts, Tag Searches LINE BLOG https://www.lineblog.me/ Blogs, Posts diff --git a/gallery_dl/aes.py b/gallery_dl/aes.py deleted file mode 100644 index a45f50ed..00000000 --- a/gallery_dl/aes.py +++ /dev/null @@ -1,337 +0,0 @@ -# -*- coding: utf-8 -*- - -# This is a stripped down version of youtube-dl's aes module. -# All credit for this code goes to the authors of the youtube-dl project. -# https://ytdl-org.github.io/youtube-dl/ -# https://github.com/ytdl-org/youtube-dl/ - -import base64 -from math import ceil - -BLOCK_SIZE_BYTES = 16 - - -def aes_cbc_decrypt(data, key, iv): - """ - Decrypt with aes in CBC mode - - @param {int[]} data cipher - @param {int[]} key 16/24/32-Byte cipher key - @param {int[]} iv 16-Byte IV - @returns {int[]} decrypted data - """ - expanded_key = key_expansion(key) - block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) - - decrypted_data = [] - previous_cipher_block = iv - for i in range(block_count): - block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] - block += [0] * (BLOCK_SIZE_BYTES - len(block)) - - decrypted_block = aes_decrypt(block, expanded_key) - decrypted_data += xor(decrypted_block, previous_cipher_block) - previous_cipher_block = block - decrypted_data = decrypted_data[:len(data)] - - return decrypted_data - - -def aes_cbc_decrypt_text(data, key, iv): - """ - Decrypt with aes in CBC mode - - @param {string} data base64 encoded cipher - @param {int[]} key 16/24/32-Byte cipher key - @param {int[]} iv 16-Byte IV - @returns {string} decrypted data as utf8 encoded string - """ - data = base64.standard_b64decode(bytes(data, "ascii")) - charcodes = aes_cbc_decrypt(list(data), key, iv) - last = charcodes[-1] - if last <= 16: - charcodes = charcodes[:-last] - return bytes(charcodes).decode() - - -def key_expansion(data): - """ - Generate key schedule - - @param {int[]} data 16/24/32-Byte cipher key - @returns {int[]} 176/208/240-Byte expanded key - """ - data = data[:] # copy - rcon_iteration = 1 - key_size_bytes = len(data) - expanded_key_size_bytes = (key_size_bytes // 4 + 7) * BLOCK_SIZE_BYTES - - while len(data) < expanded_key_size_bytes: - temp = data[-4:] - temp = key_schedule_core(temp, rcon_iteration) - rcon_iteration += 1 - data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes]) - - for _ in range(3): - temp = data[-4:] - data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes]) - - if key_size_bytes == 32: - temp = data[-4:] - temp = sub_bytes(temp) - data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes]) - - if key_size_bytes == 32: - rounds = 3 - elif key_size_bytes == 24: - rounds = 2 - else: - rounds = 0 - for _ in range(rounds): - temp = data[-4:] - data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes]) - data = data[:expanded_key_size_bytes] - - return data - - -def aes_decrypt(data, expanded_key): - """ - Decrypt one block with aes - - @param {int[]} data 16-Byte cipher - @param {int[]} expanded_key 176/208/240-Byte expanded key - @returns {int[]} 16-Byte state - """ - rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1 - - for i in range(rounds, 0, -1): - data = xor( - data, - expanded_key[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] - ) - if i != rounds: - data = mix_columns_inv(data) - data = shift_rows_inv(data) - data = sub_bytes_inv(data) - data = xor(data, expanded_key[:BLOCK_SIZE_BYTES]) - - return data - - -RCON = ( - 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, -) -SBOX = ( - 0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, - 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76, - 0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, - 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0, - 0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, - 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15, - 0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, - 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75, - 0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, - 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84, - 0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, - 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF, - 0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, - 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8, - 0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, - 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2, - 0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, - 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73, - 0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, - 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB, - 0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, - 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79, - 0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, - 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08, - 0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, - 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A, - 0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, - 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E, - 0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, - 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF, - 0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, - 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16, -) -SBOX_INV = ( - 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, - 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, - 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, - 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, - 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, - 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, - 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, - 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, - 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, - 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, - 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, - 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, - 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, - 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, - 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, - 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, - 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, - 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, - 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, - 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, - 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, - 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, - 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, - 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, - 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, - 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, - 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, - 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, - 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, - 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, - 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, - 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d, -) -MIX_COLUMN_MATRIX = ( - (0x2, 0x3, 0x1, 0x1), - (0x1, 0x2, 0x3, 0x1), - (0x1, 0x1, 0x2, 0x3), - (0x3, 0x1, 0x1, 0x2), -) -MIX_COLUMN_MATRIX_INV = ( - (0xE, 0xB, 0xD, 0x9), - (0x9, 0xE, 0xB, 0xD), - (0xD, 0x9, 0xE, 0xB), - (0xB, 0xD, 0x9, 0xE), -) -RIJNDAEL_EXP_TABLE = ( - 0x01, 0x03, 0x05, 0x0F, 0x11, 0x33, 0x55, 0xFF, - 0x1A, 0x2E, 0x72, 0x96, 0xA1, 0xF8, 0x13, 0x35, - 0x5F, 0xE1, 0x38, 0x48, 0xD8, 0x73, 0x95, 0xA4, - 0xF7, 0x02, 0x06, 0x0A, 0x1E, 0x22, 0x66, 0xAA, - 0xE5, 0x34, 0x5C, 0xE4, 0x37, 0x59, 0xEB, 0x26, - 0x6A, 0xBE, 0xD9, 0x70, 0x90, 0xAB, 0xE6, 0x31, - 0x53, 0xF5, 0x04, 0x0C, 0x14, 0x3C, 0x44, 0xCC, - 0x4F, 0xD1, 0x68, 0xB8, 0xD3, 0x6E, 0xB2, 0xCD, - 0x4C, 0xD4, 0x67, 0xA9, 0xE0, 0x3B, 0x4D, 0xD7, - 0x62, 0xA6, 0xF1, 0x08, 0x18, 0x28, 0x78, 0x88, - 0x83, 0x9E, 0xB9, 0xD0, 0x6B, 0xBD, 0xDC, 0x7F, - 0x81, 0x98, 0xB3, 0xCE, 0x49, 0xDB, 0x76, 0x9A, - 0xB5, 0xC4, 0x57, 0xF9, 0x10, 0x30, 0x50, 0xF0, - 0x0B, 0x1D, 0x27, 0x69, 0xBB, 0xD6, 0x61, 0xA3, - 0xFE, 0x19, 0x2B, 0x7D, 0x87, 0x92, 0xAD, 0xEC, - 0x2F, 0x71, 0x93, 0xAE, 0xE9, 0x20, 0x60, 0xA0, - 0xFB, 0x16, 0x3A, 0x4E, 0xD2, 0x6D, 0xB7, 0xC2, - 0x5D, 0xE7, 0x32, 0x56, 0xFA, 0x15, 0x3F, 0x41, - 0xC3, 0x5E, 0xE2, 0x3D, 0x47, 0xC9, 0x40, 0xC0, - 0x5B, 0xED, 0x2C, 0x74, 0x9C, 0xBF, 0xDA, 0x75, - 0x9F, 0xBA, 0xD5, 0x64, 0xAC, 0xEF, 0x2A, 0x7E, - 0x82, 0x9D, 0xBC, 0xDF, 0x7A, 0x8E, 0x89, 0x80, - 0x9B, 0xB6, 0xC1, 0x58, 0xE8, 0x23, 0x65, 0xAF, - 0xEA, 0x25, 0x6F, 0xB1, 0xC8, 0x43, 0xC5, 0x54, - 0xFC, 0x1F, 0x21, 0x63, 0xA5, 0xF4, 0x07, 0x09, - 0x1B, 0x2D, 0x77, 0x99, 0xB0, 0xCB, 0x46, 0xCA, - 0x45, 0xCF, 0x4A, 0xDE, 0x79, 0x8B, 0x86, 0x91, - 0xA8, 0xE3, 0x3E, 0x42, 0xC6, 0x51, 0xF3, 0x0E, - 0x12, 0x36, 0x5A, 0xEE, 0x29, 0x7B, 0x8D, 0x8C, - 0x8F, 0x8A, 0x85, 0x94, 0xA7, 0xF2, 0x0D, 0x17, - 0x39, 0x4B, 0xDD, 0x7C, 0x84, 0x97, 0xA2, 0xFD, - 0x1C, 0x24, 0x6C, 0xB4, 0xC7, 0x52, 0xF6, 0x01, -) -RIJNDAEL_LOG_TABLE = ( - 0x00, 0x00, 0x19, 0x01, 0x32, 0x02, 0x1a, 0xc6, - 0x4b, 0xc7, 0x1b, 0x68, 0x33, 0xee, 0xdf, 0x03, - 0x64, 0x04, 0xe0, 0x0e, 0x34, 0x8d, 0x81, 0xef, - 0x4c, 0x71, 0x08, 0xc8, 0xf8, 0x69, 0x1c, 0xc1, - 0x7d, 0xc2, 0x1d, 0xb5, 0xf9, 0xb9, 0x27, 0x6a, - 0x4d, 0xe4, 0xa6, 0x72, 0x9a, 0xc9, 0x09, 0x78, - 0x65, 0x2f, 0x8a, 0x05, 0x21, 0x0f, 0xe1, 0x24, - 0x12, 0xf0, 0x82, 0x45, 0x35, 0x93, 0xda, 0x8e, - 0x96, 0x8f, 0xdb, 0xbd, 0x36, 0xd0, 0xce, 0x94, - 0x13, 0x5c, 0xd2, 0xf1, 0x40, 0x46, 0x83, 0x38, - 0x66, 0xdd, 0xfd, 0x30, 0xbf, 0x06, 0x8b, 0x62, - 0xb3, 0x25, 0xe2, 0x98, 0x22, 0x88, 0x91, 0x10, - 0x7e, 0x6e, 0x48, 0xc3, 0xa3, 0xb6, 0x1e, 0x42, - 0x3a, 0x6b, 0x28, 0x54, 0xfa, 0x85, 0x3d, 0xba, - 0x2b, 0x79, 0x0a, 0x15, 0x9b, 0x9f, 0x5e, 0xca, - 0x4e, 0xd4, 0xac, 0xe5, 0xf3, 0x73, 0xa7, 0x57, - 0xaf, 0x58, 0xa8, 0x50, 0xf4, 0xea, 0xd6, 0x74, - 0x4f, 0xae, 0xe9, 0xd5, 0xe7, 0xe6, 0xad, 0xe8, - 0x2c, 0xd7, 0x75, 0x7a, 0xeb, 0x16, 0x0b, 0xf5, - 0x59, 0xcb, 0x5f, 0xb0, 0x9c, 0xa9, 0x51, 0xa0, - 0x7f, 0x0c, 0xf6, 0x6f, 0x17, 0xc4, 0x49, 0xec, - 0xd8, 0x43, 0x1f, 0x2d, 0xa4, 0x76, 0x7b, 0xb7, - 0xcc, 0xbb, 0x3e, 0x5a, 0xfb, 0x60, 0xb1, 0x86, - 0x3b, 0x52, 0xa1, 0x6c, 0xaa, 0x55, 0x29, 0x9d, - 0x97, 0xb2, 0x87, 0x90, 0x61, 0xbe, 0xdc, 0xfc, - 0xbc, 0x95, 0xcf, 0xcd, 0x37, 0x3f, 0x5b, 0xd1, - 0x53, 0x39, 0x84, 0x3c, 0x41, 0xa2, 0x6d, 0x47, - 0x14, 0x2a, 0x9e, 0x5d, 0x56, 0xf2, 0xd3, 0xab, - 0x44, 0x11, 0x92, 0xd9, 0x23, 0x20, 0x2e, 0x89, - 0xb4, 0x7c, 0xb8, 0x26, 0x77, 0x99, 0xe3, 0xa5, - 0x67, 0x4a, 0xed, 0xde, 0xc5, 0x31, 0xfe, 0x18, - 0x0d, 0x63, 0x8c, 0x80, 0xc0, 0xf7, 0x70, 0x07, -) - - -def sub_bytes(data): - return [SBOX[x] for x in data] - - -def sub_bytes_inv(data): - return [SBOX_INV[x] for x in data] - - -def rotate(data): - return data[1:] + [data[0]] - - -def key_schedule_core(data, rcon_iteration): - data = rotate(data) - data = sub_bytes(data) - data[0] = data[0] ^ RCON[rcon_iteration] - return data - - -def xor(data1, data2): - return [x ^ y for x, y in zip(data1, data2)] - - -def rijndael_mul(a, b): - if a == 0 or b == 0: - return 0 - return RIJNDAEL_EXP_TABLE[ - (RIJNDAEL_LOG_TABLE[a] + RIJNDAEL_LOG_TABLE[b]) % 0xFF - ] - - -def mix_column(data, matrix): - data_mixed = [] - for row in range(4): - mixed = 0 - for column in range(4): - # xor is (+) and (-) - mixed ^= rijndael_mul(data[column], matrix[row][column]) - data_mixed.append(mixed) - return data_mixed - - -def mix_columns(data, matrix=MIX_COLUMN_MATRIX): - data_mixed = [] - for i in range(4): - column = data[i * 4: (i + 1) * 4] - data_mixed += mix_column(column, matrix) - return data_mixed - - -def mix_columns_inv(data): - return mix_columns(data, MIX_COLUMN_MATRIX_INV) - - -def shift_rows_inv(data): - data_shifted = [] - for column in range(4): - for row in range(4): - data_shifted.append(data[((column - row) & 0b11) * 4 + row]) - return data_shifted - - -__all__ = ['key_expansion', 'aes_cbc_decrypt', 'aes_cbc_decrypt_text'] diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 4a71f08d..b8e39bcc 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -59,7 +59,6 @@ modules = [ "kabeuchi", "keenspot", "khinsider", - "kissmanga", "komikcast", "konachan", "lineblog", diff --git a/gallery_dl/extractor/kissmanga.py b/gallery_dl/extractor/kissmanga.py deleted file mode 100644 index 348453d7..00000000 --- a/gallery_dl/extractor/kissmanga.py +++ /dev/null @@ -1,222 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright 2015-2020 Mike Fährmann -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. - -"""Extract manga-chapters and entire manga from https://kissmanga.com/""" - -from .common import ChapterExtractor, MangaExtractor, Extractor -from .. import text, aes, exception -from ..cache import cache -import hashlib -import ast -import re - - -class RedirectMixin(): - """Detect and handle redirects to CAPTCHA pages""" - - def request(self, url, **kwargs): - while True: - response = Extractor.request(self, url, **kwargs) - if not response.history or "/AreYouHuman" not in response.url: - return response - if self.config("captcha", "stop") == "wait": - self.log.warning( - "Redirect to \n%s\nVisit this URL in your browser, solve " - "the CAPTCHA, and press ENTER to continue", response.url) - try: - input() - except (EOFError, OSError): - pass - else: - raise exception.StopExtraction( - "Redirect to \n%s\nVisit this URL in your browser and " - "solve the CAPTCHA to continue", response.url) - - -class KissmangaBase(RedirectMixin): - """Base class for kissmanga extractors""" - category = "kissmanga" - archive_fmt = "{chapter_id}_{page}" - root = "https://kissmanga.com" - - @staticmethod - def parse_chapter_string(data): - """Parse 'chapter_string' value contained in 'data'""" - data["chapter_string"] = text.unescape(data["chapter_string"]) - - match = re.match(( - r"(?:[Vv]ol\.0*(\d+) )?" - r"(?:[Cc]h\.)?0*(\d+)" - r"(?:[.:]0*(\d+))?" - r"(?: *[:-]? *(.+))?" - ), data["chapter_string"]) - - if not match: - match = re.match(( - r".+?(?: -)? ()" - r"0*(\d+)(?:[Vv.]0*(\d+))?" - r"(?: *[:-]? *(.+))?" - ), data["chapter_string"]) - - if match: - volume, chapter, minor, title = match.groups() - else: - volume, chapter, minor, title = 0, 0, "", data["chapter_string"] - - data["volume"] = text.parse_int(volume) - data["chapter"] = text.parse_int(chapter) - data["chapter_minor"] = "." + minor if minor else "" - data["title"] = title if title and title != "Read Online" else "" - return data - - -class KissmangaChapterExtractor(KissmangaBase, ChapterExtractor): - """Extractor for manga-chapters from kissmanga.com""" - pattern = (r"(?i)(?:https?://)?(?:www\.)?kissmanga\.com" - r"(/Manga/[^/?&#]+/[^/?&#]+\?id=(\d+))") - test = ( - ("https://kissmanga.com/Manga/Dropout/Ch-000---Oneshot-?id=145847", { - "url": "46e63fd63e9e16f19bc1e6c7a45dc060815642fd", - "keyword": "1cd0b5214ac7ae4d53e2fd8fec40ceec84cd09bf", - }), - ("https://kissmanga.com/Manga/Urban-Tales/a?id=256717", { - "url": "c26be8bf9c2abacee2076979d021634092cf38f1", - "keyword": "e1d16780df8e04076ed2b5f0637c5b710ec2f2ea", - }), - ("https://kissmanga.com/Manga/Monster/Monster-79?id=7608", { - "count": 23, - "keyword": "f433a7a8fae840e17dace316a243fa27faab86de", - }), - ("https://kissmanga.com/Manga/Houseki-no-Kuni/Oneshot?id=404189", { - "count": 49, - "keyword": "cea131c9fe9c71309b3270cd86718d4d1198c31c", - }), - ("https://kissmanga.com/mAnGa/mOnStEr/Monster-79?id=7608"), - ) - - def __init__(self, match): - ChapterExtractor.__init__(self, match) - self.chapter_id = match.group(2) - self.session.headers["Referer"] = self.root - - def metadata(self, page): - title = text.extract(page, "", "")[0].strip() - manga, cinfo = title.split("\n")[1:3] - data = { - "manga": manga.strip(), - "chapter_string": cinfo.strip(), - "chapter_id": text.parse_int(self.chapter_id), - "lang": "en", - "language": "English", - } - return self.parse_chapter_string(data) - - def images(self, page): - self.session.headers["Referer"] = None - try: - key = self.build_aes_key(page) - iv = (0xa5, 0xe8, 0xe2, 0xe9, 0xc2, 0x72, 0x1b, 0xe0, - 0xa8, 0x4a, 0xd6, 0x60, 0xc4, 0x72, 0xc1, 0xf3) - return [ - (aes.aes_cbc_decrypt_text( - data, key, iv).partition("&")[0], None) - for data in text.extract_iter( - page, 'push(wrapKA("', '"' - ) - ] - except UnicodeDecodeError: - self.log.error("Failed to decrypt image URLs") - except (ValueError, IndexError): - self.log.error("Failed to get AES key") - return [] - - def build_aes_key(self, page): - chko = self._chko_from_external_script() - - for script in self._scripts(page): - for stmt in [s.strip() for s in script.split(";")]: - - if stmt.startswith("var _"): - name, _, value = stmt[4:].partition(" = ") - name += "[0]" - value = ast.literal_eval(value)[0] - - elif stmt.startswith("chko = "): - stmt = stmt[7:] - if stmt == name: - chko = value - elif stmt == "chko + " + name: - chko = chko + value - elif stmt == name + " + chko": - chko = value + chko - else: - self.log.warning("unrecognized expression: '%s'", stmt) - - elif stmt.startswith("key = "): - pass - - else: - self.log.warning("unrecognized statement: '%s'", stmt) - - return list(hashlib.sha256(chko.encode("ascii")).digest()) - - @staticmethod - def _scripts(page): - end = 0 - while True: - pos = page.find("key = ", end) - if pos == -1: - return - beg = page.rindex('', pos) - yield page[beg:end] - - @cache(maxage=3600) - def _chko_from_external_script(self): - script = self.request(self.root + "/Scripts/lo.js").text - - pos = script.index("var chko") - var = text.extract(script, "=", "[", pos)[0].lstrip() - idx = text.extract(script, "[", "]", pos)[0] - - pos = script.index(var) - lst = text.extract(script, "=", ";", pos)[0] - return ast.literal_eval(lst.strip())[int(idx)] - - -class KissmangaMangaExtractor(KissmangaBase, MangaExtractor): - """Extractor for manga from kissmanga.com""" - chapterclass = KissmangaChapterExtractor - pattern = (r"(?i)(?:https?://)?(?:www\.)?kissmanga\.com" - r"(/Manga/[^/?&#]+/?)$") - test = ( - ("https://kissmanga.com/Manga/Dropout", { - "url": "9e3a6f715b229aa3fafa42a1d5da5d65614cb532", - "keyword": "32b09711c28b481845acc32e3bb6054cfc90224d", - }), - ("https://kissmanga.com/manga/feng-shen-ji"), # lowercase - ) - - def chapters(self, page): - results = [] - manga, pos = text.extract(page, ' class="barTitle">', '\ninformation') - page , pos = text.extract(page, ' class="listing">', '', pos) - manga = manga.strip() - needle = '" title="Read ' + manga + ' ' - manga = text.unescape(manga) - - for item in text.extract_iter(page, ''): - url, _, chapter = item.partition(needle) - data = { - "manga": manga, "chapter_string": chapter, - "chapter_id": text.parse_int(url.rpartition("=")[2]), - "lang": "en", "language": "English", - } - self.parse_chapter_string(data) - results.append((self.root + url, data)) - return results diff --git a/gallery_dl/extractor/readcomiconline.py b/gallery_dl/extractor/readcomiconline.py index dda48090..7030c819 100644 --- a/gallery_dl/extractor/readcomiconline.py +++ b/gallery_dl/extractor/readcomiconline.py @@ -1,20 +1,19 @@ # -*- coding: utf-8 -*- -# Copyright 2016-2019 Mike Fährmann +# Copyright 2016-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extract comic-issues and entire comics from https://readcomiconline.to/""" +"""Extractors for https://readcomiconline.to/""" -from .common import ChapterExtractor, MangaExtractor -from .kissmanga import RedirectMixin -from .. import text +from .common import Extractor, ChapterExtractor, MangaExtractor +from .. import text, exception import re -class ReadcomiconlineBase(RedirectMixin): +class ReadcomiconlineBase(): """Base class for readcomiconline extractors""" category = "readcomiconline" directory_fmt = ("{category}", "{comic}", "{issue:>03}") @@ -22,6 +21,25 @@ class ReadcomiconlineBase(RedirectMixin): archive_fmt = "{issue_id}_{page}" root = "https://readcomiconline.to" + def request(self, url, **kwargs): + """Detect and handle redirects to CAPTCHA pages""" + while True: + response = Extractor.request(self, url, **kwargs) + if not response.history or "/AreYouHuman" not in response.url: + return response + if self.config("captcha", "stop") == "wait": + self.log.warning( + "Redirect to \n%s\nVisit this URL in your browser, solve " + "the CAPTCHA, and press ENTER to continue", response.url) + try: + input() + except (EOFError, OSError): + pass + else: + raise exception.StopExtraction( + "Redirect to \n%s\nVisit this URL in your browser and " + "solve the CAPTCHA to continue", response.url) + class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor): """Extractor for comic-issues from readcomiconline.to""" diff --git a/test/test_results.py b/test/test_results.py index a4c1485f..1756a42b 100644 --- a/test/test_results.py +++ b/test/test_results.py @@ -22,11 +22,10 @@ from gallery_dl import extractor, util, job, config, exception # noqa E402 # these don't work on Travis CI TRAVIS_SKIP = { - "exhentai", "kissmanga", "mangafox", "dynastyscans", "nijie", "bobx", + "exhentai", "mangafox", "dynastyscans", "nijie", "instagram", "ngomik", "archivedmoe", "archiveofsins", "thebarchive", "fireden", "4plebs", - "sankaku", "idolcomplex", "mangahere", "readcomiconline", "mangadex", - "sankakucomplex", "warosu", "fuskator", "patreon", "komikcast", - "instagram", "ngomik", + "sankaku", "idolcomplex", "mangahere", "mangadex", "sankakucomplex", + "warosu", "fuskator", "patreon", "komikcast", } # temporary issues, etc.