diff --git a/src/Discord.Net.Core/DiscordConfig.cs b/src/Discord.Net.Core/DiscordConfig.cs
index 50140d94..41748b69 100644
--- a/src/Discord.Net.Core/DiscordConfig.cs
+++ b/src/Discord.Net.Core/DiscordConfig.cs
@@ -26,7 +26,7 @@ namespace Discord
/// An representing the API version that Discord.Net uses to communicate with Discord's
/// voice server.
///
- public const int VoiceAPIVersion = 3;
+ public const int VoiceAPIVersion = 8;
///
/// Gets the Discord.Net version, including the build number.
///
diff --git a/src/Discord.Net.WebSocket/API/Voice/HeartbeatParams.cs b/src/Discord.Net.WebSocket/API/Voice/HeartbeatParams.cs
new file mode 100644
index 00000000..5a8083c5
--- /dev/null
+++ b/src/Discord.Net.WebSocket/API/Voice/HeartbeatParams.cs
@@ -0,0 +1,12 @@
+using Newtonsoft.Json;
+
+namespace Discord.API.Voice
+{
+ internal class HeartbeatParams
+ {
+ [JsonProperty("t")]
+ public long Timestamp { get; set; }
+ [JsonProperty("seq_ack")]
+ public int SequenceAck { get; set; }
+ }
+}
diff --git a/src/Discord.Net.WebSocket/API/Voice/VoiceOpCode.cs b/src/Discord.Net.WebSocket/API/Voice/VoiceOpCode.cs
index 94006505..e70be394 100644
--- a/src/Discord.Net.WebSocket/API/Voice/VoiceOpCode.cs
+++ b/src/Discord.Net.WebSocket/API/Voice/VoiceOpCode.cs
@@ -22,7 +22,13 @@ namespace Discord.API.Voice
Hello = 8,
/// C←S - Used to acknowledge a resumed connection.
Resumed = 9,
+ /// C←S - One or more clients have connected to the voice channel.
+ ClientConnect = 11,
/// C←S - Used to notify that a client has disconnected.
ClientDisconnect = 13,
+ /// C←S - Contains the flags of a user that connected to voice, also sent on initial connection for each existing user.
+ ClientFlags = 18,
+ /// C←S - Contains the platform type of a user that connected to voice, also sent on initial connection for each existing user.
+ ClientPlatform = 20,
}
}
diff --git a/src/Discord.Net.WebSocket/Audio/AudioClient.cs b/src/Discord.Net.WebSocket/Audio/AudioClient.cs
index 40ef631d..0dc20115 100644
--- a/src/Discord.Net.WebSocket/Audio/AudioClient.cs
+++ b/src/Discord.Net.WebSocket/Audio/AudioClient.cs
@@ -315,7 +315,7 @@ namespace Discord.Audio
_ssrc = data.SSRC;
if (!data.Modes.Contains(DiscordVoiceAPIClient.Mode))
- throw new InvalidOperationException($"Discord does not support {DiscordVoiceAPIClient.Mode}");
+ throw new InvalidOperationException($"Discord does not support {DiscordVoiceAPIClient.Mode}. Available modes: {string.Join(", ", data.Modes)}");
ApiClient.SetUdpEndpoint(data.Ip, data.Port);
await ApiClient.SendDiscoveryAsync(_ssrc).ConfigureAwait(false);
@@ -371,6 +371,9 @@ namespace Discord.Audio
await _speakingUpdatedEvent.InvokeAsync(data.UserId, data.Speaking);
}
break;
+ case VoiceOpCode.ClientConnect:
+ await _audioLogger.DebugAsync("Received ClientConnect").ConfigureAwait(false);
+ break;
case VoiceOpCode.ClientDisconnect:
{
await _audioLogger.DebugAsync("Received ClientDisconnect").ConfigureAwait(false);
@@ -391,6 +394,10 @@ namespace Discord.Audio
_ = _connection.CompleteAsync();
}
break;
+ // Client flags and platform should be ignored: https://docs.discord.food/topics/voice-connections#client-connections
+ case VoiceOpCode.ClientFlags:
+ case VoiceOpCode.ClientPlatform:
+ break;
default:
await _audioLogger.WarningAsync($"Unknown OpCode ({opCode})").ConfigureAwait(false);
break;
@@ -513,7 +520,9 @@ namespace Discord.Audio
_heartbeatTimes.Enqueue(now);
try
{
- await ApiClient.SendHeartbeatAsync().ConfigureAwait(false);
+ // TODO: The last sequence number received should be sent.
+ // https://discord.com/developers/docs/topics/voice-connections#buffered-resume
+ await ApiClient.SendHeartbeatAsync(-1).ConfigureAwait(false);
}
catch (Exception ex)
{
diff --git a/src/Discord.Net.WebSocket/Audio/Sodium/SecretBox.cs b/src/Discord.Net.WebSocket/Audio/Sodium/SecretBox.cs
index 0cec5402..7d9d76ef 100644
--- a/src/Discord.Net.WebSocket/Audio/Sodium/SecretBox.cs
+++ b/src/Discord.Net.WebSocket/Audio/Sodium/SecretBox.cs
@@ -1,4 +1,3 @@
-using System;
using System.Runtime.InteropServices;
using System.Security;
@@ -6,31 +5,50 @@ namespace Discord.Audio
{
public unsafe static class SecretBox
{
- [DllImport("libsodium", EntryPoint = "crypto_secretbox_easy", CallingConvention = CallingConvention.Cdecl)]
- private static extern int SecretBoxEasy(byte* output, byte* input, long inputLength, byte[] nonce, byte[] secret);
- [DllImport("libsodium", EntryPoint = "crypto_secretbox_open_easy", CallingConvention = CallingConvention.Cdecl)]
- private static extern int SecretBoxOpenEasy(byte* output, byte* input, long inputLength, byte[] nonce, byte[] secret);
+ [DllImport("libsodium", EntryPoint = "crypto_aead_xchacha20poly1305_ietf_encrypt", CallingConvention = CallingConvention.Cdecl)]
+ private static extern int Encrypt(byte* ciphertext, out ulong ciphertextLength, byte* message, ulong messageLength, byte* ad, ulong adLength, byte* nsec, byte[] nonce, byte[] key);
- public static int Encrypt(byte[] input, int inputOffset, int inputLength, byte[] output, int outputOffset, byte[] nonce, byte[] secret)
+ [DllImport("libsodium", EntryPoint = "crypto_aead_xchacha20poly1305_ietf_decrypt", CallingConvention = CallingConvention.Cdecl)]
+ private static extern int Decrypt(byte* plaintext, out ulong plaintextLength, byte* nsec, byte* ciphertext, ulong ciphertextLength, byte* ad, ulong adLength, byte[] nonce, byte[] key);
+
+ public static int Encrypt(byte[] input, int inputOffset, int inputLength, byte[] output, int outputOffset, byte[] header, byte[] nonce, byte[] key)
{
fixed (byte* inPtr = input)
fixed (byte* outPtr = output)
+ fixed (byte* adPtr = header)
{
- int error = SecretBoxEasy(outPtr + outputOffset, inPtr + inputOffset, inputLength, nonce, secret);
+ int error = Encrypt(
+ outPtr + outputOffset, out ulong cipherLen,
+ inPtr + inputOffset, (ulong)inputLength,
+ adPtr, (ulong)header.Length,
+ null, nonce, key
+ );
+
if (error != 0)
- throw new SecurityException($"Sodium Error: {error}");
- return inputLength + 16;
+ throw new SecurityException($"Sodium AEAD Error: {error}");
+
+ return (int)cipherLen;
}
}
- public static int Decrypt(byte[] input, int inputOffset, int inputLength, byte[] output, int outputOffset, byte[] nonce, byte[] secret)
+
+ public static int Decrypt(byte[] input, int inputOffset, int inputLength, byte[] output, int outputOffset, byte[] header, byte[] nonce, byte[] key)
{
fixed (byte* inPtr = input)
fixed (byte* outPtr = output)
+ fixed (byte* adPtr = header)
{
- int error = SecretBoxOpenEasy(outPtr + outputOffset, inPtr + inputOffset, inputLength, nonce, secret);
+ int error = Decrypt(
+ outPtr + outputOffset, out ulong plainLen,
+ null,
+ inPtr + inputOffset, (ulong)inputLength,
+ adPtr, (ulong)header.Length,
+ nonce, key
+ );
+
if (error != 0)
- throw new SecurityException($"Sodium Error: {error}");
- return inputLength - 16;
+ throw new SecurityException($"Sodium AEAD Decrypt Error: {error}");
+
+ return (int)plainLen;
}
}
}
diff --git a/src/Discord.Net.WebSocket/Audio/Streams/SodiumDecryptStream.cs b/src/Discord.Net.WebSocket/Audio/Streams/SodiumDecryptStream.cs
index f343f0cc..34646df2 100644
--- a/src/Discord.Net.WebSocket/Audio/Streams/SodiumDecryptStream.cs
+++ b/src/Discord.Net.WebSocket/Audio/Streams/SodiumDecryptStream.cs
@@ -9,6 +9,10 @@ namespace Discord.Audio.Streams
///
public class SodiumDecryptStream : AudioOutStream
{
+ private const int RtpHeaderSize = 12;
+ private const int NonceSize = 24;
+ private const int NonceCounterSize = 4;
+
private readonly AudioClient _client;
private readonly AudioStream _next;
private readonly byte[] _nonce;
@@ -21,7 +25,7 @@ namespace Discord.Audio.Streams
{
_next = next;
_client = (AudioClient)client;
- _nonce = new byte[24];
+ _nonce = new byte[NonceSize];
}
public override Task WriteAsync(byte[] buffer, int offset, int count, CancellationToken cancelToken)
@@ -31,9 +35,27 @@ namespace Discord.Audio.Streams
if (_client.SecretKey == null)
return Task.CompletedTask;
- Buffer.BlockCopy(buffer, 0, _nonce, 0, 12); //Copy RTP header to nonce
- count = SecretBox.Decrypt(buffer, offset + 12, count - 12, buffer, offset + 12, _nonce, _client.SecretKey);
- return _next.WriteAsync(buffer, 0, count + 12, cancelToken);
+ // Extract nonce from the payload.
+ for (int i = 0; i < NonceCounterSize; i++ )
+ _nonce[i] = buffer[offset + count + NonceCounterSize - i - 1]; // Big-endian to little-endian
+
+ // Decrypt payload
+ byte[] rtpHeader = new byte[RtpHeaderSize];
+ Buffer.BlockCopy(buffer, offset, rtpHeader, 0, rtpHeader.Length);
+ int payloadOffset = offset + rtpHeader.Length;
+ int payloadLength = count - rtpHeader.Length - NonceCounterSize;
+ int decryptedLength = SecretBox.Decrypt(
+ buffer,
+ payloadOffset,
+ payloadLength,
+ buffer,
+ payloadOffset,
+ rtpHeader,
+ _nonce,
+ _client.SecretKey);
+
+ int packageLength = rtpHeader.Length + decryptedLength;
+ return _next.WriteAsync(buffer, offset, packageLength, cancelToken);
}
public override Task FlushAsync(CancellationToken cancelToken)
diff --git a/src/Discord.Net.WebSocket/Audio/Streams/SodiumEncryptStream.cs b/src/Discord.Net.WebSocket/Audio/Streams/SodiumEncryptStream.cs
index 30799e82..c9ee9fb7 100644
--- a/src/Discord.Net.WebSocket/Audio/Streams/SodiumEncryptStream.cs
+++ b/src/Discord.Net.WebSocket/Audio/Streams/SodiumEncryptStream.cs
@@ -9,18 +9,23 @@ namespace Discord.Audio.Streams
///
public class SodiumEncryptStream : AudioOutStream
{
+ private const int RtpHeaderSize = 12;
+ private const int NonceSize = 24;
+
private readonly AudioClient _client;
private readonly AudioStream _next;
private readonly byte[] _nonce;
private bool _hasHeader;
private ushort _nextSeq;
private uint _nextTimestamp;
+ private uint _nonceCounter;
public SodiumEncryptStream(AudioStream next, IAudioClient client)
{
_next = next;
_client = (AudioClient)client;
- _nonce = new byte[24];
+ _nonce = new byte[NonceSize];
+ _nonceCounter = 0;
}
/// Header received with no payload.
@@ -46,10 +51,35 @@ namespace Discord.Audio.Streams
if (_client.SecretKey == null)
return;
- Buffer.BlockCopy(buffer, offset, _nonce, 0, 12); //Copy nonce from RTP header
- count = SecretBox.Encrypt(buffer, offset + 12, count - 12, buffer, 12, _nonce, _client.SecretKey);
+ // The first bytes of the nonce are the counter in big-endian.
+ byte[] counterBytes = BitConverter.GetBytes(_nonceCounter);
+ if (BitConverter.IsLittleEndian)
+ Array.Reverse(counterBytes); // big-endian
+ Buffer.BlockCopy(counterBytes, offset, _nonce, 0, counterBytes.Length);
+ if (++_nonceCounter >= uint.MaxValue)
+ _nonceCounter = 0;
+
+ // Encrypt payload
+ byte[] rtpHeader = new byte[RtpHeaderSize];
+ Buffer.BlockCopy(buffer, offset, rtpHeader, 0, rtpHeader.Length);
+ int payloadOffset = offset + rtpHeader.Length;
+ int payloadLength = count - rtpHeader.Length;
+ int encryptedLength = SecretBox.Encrypt(
+ buffer,
+ payloadOffset,
+ payloadLength,
+ buffer,
+ payloadOffset,
+ rtpHeader,
+ _nonce,
+ _client.SecretKey);
+
+ // Append nonce to encripted payload
+ Buffer.BlockCopy(counterBytes, 0, buffer, payloadOffset + encryptedLength, counterBytes.Length);
+ int packageLength = rtpHeader.Length + encryptedLength + counterBytes.Length;
+
_next.WriteHeader(_nextSeq, _nextTimestamp, false);
- await _next.WriteAsync(buffer, 0, count + 12, cancelToken).ConfigureAwait(false);
+ await _next.WriteAsync(buffer, offset, packageLength, cancelToken).ConfigureAwait(false);
}
public override Task FlushAsync(CancellationToken cancelToken)
diff --git a/src/Discord.Net.WebSocket/DiscordVoiceApiClient.cs b/src/Discord.Net.WebSocket/DiscordVoiceApiClient.cs
index cc810d42..e0529134 100644
--- a/src/Discord.Net.WebSocket/DiscordVoiceApiClient.cs
+++ b/src/Discord.Net.WebSocket/DiscordVoiceApiClient.cs
@@ -19,7 +19,7 @@ namespace Discord.Audio
{
#region DiscordVoiceAPIClient
public const int MaxBitrate = 128 * 1024;
- public const string Mode = "xsalsa20_poly1305";
+ public const string Mode = "aead_xchacha20_poly1305_rtpsize";
public event Func SentRequest { add { _sentRequestEvent.Add(value); } remove { _sentRequestEvent.Remove(value); } }
private readonly AsyncEvent> _sentRequestEvent = new AsyncEvent>();
@@ -129,8 +129,15 @@ namespace Discord.Audio
#endregion
#region WebSocket
- public Task SendHeartbeatAsync(RequestOptions options = null)
- => SendAsync(VoiceOpCode.Heartbeat, DateTimeOffset.UtcNow.ToUnixTimeMilliseconds(), options: options);
+ public Task SendHeartbeatAsync(int sequenceAck, RequestOptions options = null)
+ {
+ return SendAsync(VoiceOpCode.Heartbeat, new HeartbeatParams
+ {
+ Timestamp = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds(),
+ SequenceAck = sequenceAck
+ },
+ options: options);
+ }
public Task SendIdentityAsync(ulong userId, string sessionId, string token)
{