diff --git a/src/Discord.Net.Core/DiscordConfig.cs b/src/Discord.Net.Core/DiscordConfig.cs index 50140d94..41748b69 100644 --- a/src/Discord.Net.Core/DiscordConfig.cs +++ b/src/Discord.Net.Core/DiscordConfig.cs @@ -26,7 +26,7 @@ namespace Discord /// An representing the API version that Discord.Net uses to communicate with Discord's /// voice server. /// - public const int VoiceAPIVersion = 3; + public const int VoiceAPIVersion = 8; /// /// Gets the Discord.Net version, including the build number. /// diff --git a/src/Discord.Net.WebSocket/API/Voice/HeartbeatParams.cs b/src/Discord.Net.WebSocket/API/Voice/HeartbeatParams.cs new file mode 100644 index 00000000..5a8083c5 --- /dev/null +++ b/src/Discord.Net.WebSocket/API/Voice/HeartbeatParams.cs @@ -0,0 +1,12 @@ +using Newtonsoft.Json; + +namespace Discord.API.Voice +{ + internal class HeartbeatParams + { + [JsonProperty("t")] + public long Timestamp { get; set; } + [JsonProperty("seq_ack")] + public int SequenceAck { get; set; } + } +} diff --git a/src/Discord.Net.WebSocket/API/Voice/VoiceOpCode.cs b/src/Discord.Net.WebSocket/API/Voice/VoiceOpCode.cs index 94006505..e70be394 100644 --- a/src/Discord.Net.WebSocket/API/Voice/VoiceOpCode.cs +++ b/src/Discord.Net.WebSocket/API/Voice/VoiceOpCode.cs @@ -22,7 +22,13 @@ namespace Discord.API.Voice Hello = 8, /// C←S - Used to acknowledge a resumed connection. Resumed = 9, + /// C←S - One or more clients have connected to the voice channel. + ClientConnect = 11, /// C←S - Used to notify that a client has disconnected. ClientDisconnect = 13, + /// C←S - Contains the flags of a user that connected to voice, also sent on initial connection for each existing user. + ClientFlags = 18, + /// C←S - Contains the platform type of a user that connected to voice, also sent on initial connection for each existing user. + ClientPlatform = 20, } } diff --git a/src/Discord.Net.WebSocket/Audio/AudioClient.cs b/src/Discord.Net.WebSocket/Audio/AudioClient.cs index 40ef631d..0dc20115 100644 --- a/src/Discord.Net.WebSocket/Audio/AudioClient.cs +++ b/src/Discord.Net.WebSocket/Audio/AudioClient.cs @@ -315,7 +315,7 @@ namespace Discord.Audio _ssrc = data.SSRC; if (!data.Modes.Contains(DiscordVoiceAPIClient.Mode)) - throw new InvalidOperationException($"Discord does not support {DiscordVoiceAPIClient.Mode}"); + throw new InvalidOperationException($"Discord does not support {DiscordVoiceAPIClient.Mode}. Available modes: {string.Join(", ", data.Modes)}"); ApiClient.SetUdpEndpoint(data.Ip, data.Port); await ApiClient.SendDiscoveryAsync(_ssrc).ConfigureAwait(false); @@ -371,6 +371,9 @@ namespace Discord.Audio await _speakingUpdatedEvent.InvokeAsync(data.UserId, data.Speaking); } break; + case VoiceOpCode.ClientConnect: + await _audioLogger.DebugAsync("Received ClientConnect").ConfigureAwait(false); + break; case VoiceOpCode.ClientDisconnect: { await _audioLogger.DebugAsync("Received ClientDisconnect").ConfigureAwait(false); @@ -391,6 +394,10 @@ namespace Discord.Audio _ = _connection.CompleteAsync(); } break; + // Client flags and platform should be ignored: https://docs.discord.food/topics/voice-connections#client-connections + case VoiceOpCode.ClientFlags: + case VoiceOpCode.ClientPlatform: + break; default: await _audioLogger.WarningAsync($"Unknown OpCode ({opCode})").ConfigureAwait(false); break; @@ -513,7 +520,9 @@ namespace Discord.Audio _heartbeatTimes.Enqueue(now); try { - await ApiClient.SendHeartbeatAsync().ConfigureAwait(false); + // TODO: The last sequence number received should be sent. + // https://discord.com/developers/docs/topics/voice-connections#buffered-resume + await ApiClient.SendHeartbeatAsync(-1).ConfigureAwait(false); } catch (Exception ex) { diff --git a/src/Discord.Net.WebSocket/Audio/Sodium/SecretBox.cs b/src/Discord.Net.WebSocket/Audio/Sodium/SecretBox.cs index 0cec5402..7d9d76ef 100644 --- a/src/Discord.Net.WebSocket/Audio/Sodium/SecretBox.cs +++ b/src/Discord.Net.WebSocket/Audio/Sodium/SecretBox.cs @@ -1,4 +1,3 @@ -using System; using System.Runtime.InteropServices; using System.Security; @@ -6,31 +5,50 @@ namespace Discord.Audio { public unsafe static class SecretBox { - [DllImport("libsodium", EntryPoint = "crypto_secretbox_easy", CallingConvention = CallingConvention.Cdecl)] - private static extern int SecretBoxEasy(byte* output, byte* input, long inputLength, byte[] nonce, byte[] secret); - [DllImport("libsodium", EntryPoint = "crypto_secretbox_open_easy", CallingConvention = CallingConvention.Cdecl)] - private static extern int SecretBoxOpenEasy(byte* output, byte* input, long inputLength, byte[] nonce, byte[] secret); + [DllImport("libsodium", EntryPoint = "crypto_aead_xchacha20poly1305_ietf_encrypt", CallingConvention = CallingConvention.Cdecl)] + private static extern int Encrypt(byte* ciphertext, out ulong ciphertextLength, byte* message, ulong messageLength, byte* ad, ulong adLength, byte* nsec, byte[] nonce, byte[] key); - public static int Encrypt(byte[] input, int inputOffset, int inputLength, byte[] output, int outputOffset, byte[] nonce, byte[] secret) + [DllImport("libsodium", EntryPoint = "crypto_aead_xchacha20poly1305_ietf_decrypt", CallingConvention = CallingConvention.Cdecl)] + private static extern int Decrypt(byte* plaintext, out ulong plaintextLength, byte* nsec, byte* ciphertext, ulong ciphertextLength, byte* ad, ulong adLength, byte[] nonce, byte[] key); + + public static int Encrypt(byte[] input, int inputOffset, int inputLength, byte[] output, int outputOffset, byte[] header, byte[] nonce, byte[] key) { fixed (byte* inPtr = input) fixed (byte* outPtr = output) + fixed (byte* adPtr = header) { - int error = SecretBoxEasy(outPtr + outputOffset, inPtr + inputOffset, inputLength, nonce, secret); + int error = Encrypt( + outPtr + outputOffset, out ulong cipherLen, + inPtr + inputOffset, (ulong)inputLength, + adPtr, (ulong)header.Length, + null, nonce, key + ); + if (error != 0) - throw new SecurityException($"Sodium Error: {error}"); - return inputLength + 16; + throw new SecurityException($"Sodium AEAD Error: {error}"); + + return (int)cipherLen; } } - public static int Decrypt(byte[] input, int inputOffset, int inputLength, byte[] output, int outputOffset, byte[] nonce, byte[] secret) + + public static int Decrypt(byte[] input, int inputOffset, int inputLength, byte[] output, int outputOffset, byte[] header, byte[] nonce, byte[] key) { fixed (byte* inPtr = input) fixed (byte* outPtr = output) + fixed (byte* adPtr = header) { - int error = SecretBoxOpenEasy(outPtr + outputOffset, inPtr + inputOffset, inputLength, nonce, secret); + int error = Decrypt( + outPtr + outputOffset, out ulong plainLen, + null, + inPtr + inputOffset, (ulong)inputLength, + adPtr, (ulong)header.Length, + nonce, key + ); + if (error != 0) - throw new SecurityException($"Sodium Error: {error}"); - return inputLength - 16; + throw new SecurityException($"Sodium AEAD Decrypt Error: {error}"); + + return (int)plainLen; } } } diff --git a/src/Discord.Net.WebSocket/Audio/Streams/SodiumDecryptStream.cs b/src/Discord.Net.WebSocket/Audio/Streams/SodiumDecryptStream.cs index f343f0cc..34646df2 100644 --- a/src/Discord.Net.WebSocket/Audio/Streams/SodiumDecryptStream.cs +++ b/src/Discord.Net.WebSocket/Audio/Streams/SodiumDecryptStream.cs @@ -9,6 +9,10 @@ namespace Discord.Audio.Streams /// public class SodiumDecryptStream : AudioOutStream { + private const int RtpHeaderSize = 12; + private const int NonceSize = 24; + private const int NonceCounterSize = 4; + private readonly AudioClient _client; private readonly AudioStream _next; private readonly byte[] _nonce; @@ -21,7 +25,7 @@ namespace Discord.Audio.Streams { _next = next; _client = (AudioClient)client; - _nonce = new byte[24]; + _nonce = new byte[NonceSize]; } public override Task WriteAsync(byte[] buffer, int offset, int count, CancellationToken cancelToken) @@ -31,9 +35,27 @@ namespace Discord.Audio.Streams if (_client.SecretKey == null) return Task.CompletedTask; - Buffer.BlockCopy(buffer, 0, _nonce, 0, 12); //Copy RTP header to nonce - count = SecretBox.Decrypt(buffer, offset + 12, count - 12, buffer, offset + 12, _nonce, _client.SecretKey); - return _next.WriteAsync(buffer, 0, count + 12, cancelToken); + // Extract nonce from the payload. + for (int i = 0; i < NonceCounterSize; i++ ) + _nonce[i] = buffer[offset + count + NonceCounterSize - i - 1]; // Big-endian to little-endian + + // Decrypt payload + byte[] rtpHeader = new byte[RtpHeaderSize]; + Buffer.BlockCopy(buffer, offset, rtpHeader, 0, rtpHeader.Length); + int payloadOffset = offset + rtpHeader.Length; + int payloadLength = count - rtpHeader.Length - NonceCounterSize; + int decryptedLength = SecretBox.Decrypt( + buffer, + payloadOffset, + payloadLength, + buffer, + payloadOffset, + rtpHeader, + _nonce, + _client.SecretKey); + + int packageLength = rtpHeader.Length + decryptedLength; + return _next.WriteAsync(buffer, offset, packageLength, cancelToken); } public override Task FlushAsync(CancellationToken cancelToken) diff --git a/src/Discord.Net.WebSocket/Audio/Streams/SodiumEncryptStream.cs b/src/Discord.Net.WebSocket/Audio/Streams/SodiumEncryptStream.cs index 30799e82..c9ee9fb7 100644 --- a/src/Discord.Net.WebSocket/Audio/Streams/SodiumEncryptStream.cs +++ b/src/Discord.Net.WebSocket/Audio/Streams/SodiumEncryptStream.cs @@ -9,18 +9,23 @@ namespace Discord.Audio.Streams /// public class SodiumEncryptStream : AudioOutStream { + private const int RtpHeaderSize = 12; + private const int NonceSize = 24; + private readonly AudioClient _client; private readonly AudioStream _next; private readonly byte[] _nonce; private bool _hasHeader; private ushort _nextSeq; private uint _nextTimestamp; + private uint _nonceCounter; public SodiumEncryptStream(AudioStream next, IAudioClient client) { _next = next; _client = (AudioClient)client; - _nonce = new byte[24]; + _nonce = new byte[NonceSize]; + _nonceCounter = 0; } /// Header received with no payload. @@ -46,10 +51,35 @@ namespace Discord.Audio.Streams if (_client.SecretKey == null) return; - Buffer.BlockCopy(buffer, offset, _nonce, 0, 12); //Copy nonce from RTP header - count = SecretBox.Encrypt(buffer, offset + 12, count - 12, buffer, 12, _nonce, _client.SecretKey); + // The first bytes of the nonce are the counter in big-endian. + byte[] counterBytes = BitConverter.GetBytes(_nonceCounter); + if (BitConverter.IsLittleEndian) + Array.Reverse(counterBytes); // big-endian + Buffer.BlockCopy(counterBytes, offset, _nonce, 0, counterBytes.Length); + if (++_nonceCounter >= uint.MaxValue) + _nonceCounter = 0; + + // Encrypt payload + byte[] rtpHeader = new byte[RtpHeaderSize]; + Buffer.BlockCopy(buffer, offset, rtpHeader, 0, rtpHeader.Length); + int payloadOffset = offset + rtpHeader.Length; + int payloadLength = count - rtpHeader.Length; + int encryptedLength = SecretBox.Encrypt( + buffer, + payloadOffset, + payloadLength, + buffer, + payloadOffset, + rtpHeader, + _nonce, + _client.SecretKey); + + // Append nonce to encripted payload + Buffer.BlockCopy(counterBytes, 0, buffer, payloadOffset + encryptedLength, counterBytes.Length); + int packageLength = rtpHeader.Length + encryptedLength + counterBytes.Length; + _next.WriteHeader(_nextSeq, _nextTimestamp, false); - await _next.WriteAsync(buffer, 0, count + 12, cancelToken).ConfigureAwait(false); + await _next.WriteAsync(buffer, offset, packageLength, cancelToken).ConfigureAwait(false); } public override Task FlushAsync(CancellationToken cancelToken) diff --git a/src/Discord.Net.WebSocket/DiscordVoiceApiClient.cs b/src/Discord.Net.WebSocket/DiscordVoiceApiClient.cs index cc810d42..e0529134 100644 --- a/src/Discord.Net.WebSocket/DiscordVoiceApiClient.cs +++ b/src/Discord.Net.WebSocket/DiscordVoiceApiClient.cs @@ -19,7 +19,7 @@ namespace Discord.Audio { #region DiscordVoiceAPIClient public const int MaxBitrate = 128 * 1024; - public const string Mode = "xsalsa20_poly1305"; + public const string Mode = "aead_xchacha20_poly1305_rtpsize"; public event Func SentRequest { add { _sentRequestEvent.Add(value); } remove { _sentRequestEvent.Remove(value); } } private readonly AsyncEvent> _sentRequestEvent = new AsyncEvent>(); @@ -129,8 +129,15 @@ namespace Discord.Audio #endregion #region WebSocket - public Task SendHeartbeatAsync(RequestOptions options = null) - => SendAsync(VoiceOpCode.Heartbeat, DateTimeOffset.UtcNow.ToUnixTimeMilliseconds(), options: options); + public Task SendHeartbeatAsync(int sequenceAck, RequestOptions options = null) + { + return SendAsync(VoiceOpCode.Heartbeat, new HeartbeatParams + { + Timestamp = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds(), + SequenceAck = sequenceAck + }, + options: options); + } public Task SendIdentityAsync(ulong userId, string sessionId, string token) {