Update voice API to version 8 (#3183)

This commit is contained in:
José Santos Garrido
2025-09-03 11:45:08 +02:00
committed by GitHub
parent e61eb519bf
commit 927c9053e4
8 changed files with 131 additions and 27 deletions

View File

@@ -26,7 +26,7 @@ namespace Discord
/// An <see cref="int"/> representing the API version that Discord.Net uses to communicate with Discord's
/// voice server.
/// </returns>
public const int VoiceAPIVersion = 3;
public const int VoiceAPIVersion = 8;
/// <summary>
/// Gets the Discord.Net version, including the build number.
/// </summary>

View File

@@ -0,0 +1,12 @@
using Newtonsoft.Json;
namespace Discord.API.Voice
{
internal class HeartbeatParams
{
[JsonProperty("t")]
public long Timestamp { get; set; }
[JsonProperty("seq_ack")]
public int SequenceAck { get; set; }
}
}

View File

@@ -22,7 +22,13 @@ namespace Discord.API.Voice
Hello = 8,
/// <summary> C←S - Used to acknowledge a resumed connection. </summary>
Resumed = 9,
/// <summary> C←S - One or more clients have connected to the voice channel. </summary>
ClientConnect = 11,
/// <summary> C←S - Used to notify that a client has disconnected. </summary>
ClientDisconnect = 13,
/// <summary> C←S - Contains the flags of a user that connected to voice, also sent on initial connection for each existing user. </summary>
ClientFlags = 18,
/// <summary> C←S - Contains the platform type of a user that connected to voice, also sent on initial connection for each existing user. </summary>
ClientPlatform = 20,
}
}

View File

@@ -315,7 +315,7 @@ namespace Discord.Audio
_ssrc = data.SSRC;
if (!data.Modes.Contains(DiscordVoiceAPIClient.Mode))
throw new InvalidOperationException($"Discord does not support {DiscordVoiceAPIClient.Mode}");
throw new InvalidOperationException($"Discord does not support {DiscordVoiceAPIClient.Mode}. Available modes: {string.Join(", ", data.Modes)}");
ApiClient.SetUdpEndpoint(data.Ip, data.Port);
await ApiClient.SendDiscoveryAsync(_ssrc).ConfigureAwait(false);
@@ -371,6 +371,9 @@ namespace Discord.Audio
await _speakingUpdatedEvent.InvokeAsync(data.UserId, data.Speaking);
}
break;
case VoiceOpCode.ClientConnect:
await _audioLogger.DebugAsync("Received ClientConnect").ConfigureAwait(false);
break;
case VoiceOpCode.ClientDisconnect:
{
await _audioLogger.DebugAsync("Received ClientDisconnect").ConfigureAwait(false);
@@ -391,6 +394,10 @@ namespace Discord.Audio
_ = _connection.CompleteAsync();
}
break;
// Client flags and platform should be ignored: https://docs.discord.food/topics/voice-connections#client-connections
case VoiceOpCode.ClientFlags:
case VoiceOpCode.ClientPlatform:
break;
default:
await _audioLogger.WarningAsync($"Unknown OpCode ({opCode})").ConfigureAwait(false);
break;
@@ -513,7 +520,9 @@ namespace Discord.Audio
_heartbeatTimes.Enqueue(now);
try
{
await ApiClient.SendHeartbeatAsync().ConfigureAwait(false);
// TODO: The last sequence number received should be sent.
// https://discord.com/developers/docs/topics/voice-connections#buffered-resume
await ApiClient.SendHeartbeatAsync(-1).ConfigureAwait(false);
}
catch (Exception ex)
{

View File

@@ -1,4 +1,3 @@
using System;
using System.Runtime.InteropServices;
using System.Security;
@@ -6,31 +5,50 @@ namespace Discord.Audio
{
public unsafe static class SecretBox
{
[DllImport("libsodium", EntryPoint = "crypto_secretbox_easy", CallingConvention = CallingConvention.Cdecl)]
private static extern int SecretBoxEasy(byte* output, byte* input, long inputLength, byte[] nonce, byte[] secret);
[DllImport("libsodium", EntryPoint = "crypto_secretbox_open_easy", CallingConvention = CallingConvention.Cdecl)]
private static extern int SecretBoxOpenEasy(byte* output, byte* input, long inputLength, byte[] nonce, byte[] secret);
[DllImport("libsodium", EntryPoint = "crypto_aead_xchacha20poly1305_ietf_encrypt", CallingConvention = CallingConvention.Cdecl)]
private static extern int Encrypt(byte* ciphertext, out ulong ciphertextLength, byte* message, ulong messageLength, byte* ad, ulong adLength, byte* nsec, byte[] nonce, byte[] key);
public static int Encrypt(byte[] input, int inputOffset, int inputLength, byte[] output, int outputOffset, byte[] nonce, byte[] secret)
[DllImport("libsodium", EntryPoint = "crypto_aead_xchacha20poly1305_ietf_decrypt", CallingConvention = CallingConvention.Cdecl)]
private static extern int Decrypt(byte* plaintext, out ulong plaintextLength, byte* nsec, byte* ciphertext, ulong ciphertextLength, byte* ad, ulong adLength, byte[] nonce, byte[] key);
public static int Encrypt(byte[] input, int inputOffset, int inputLength, byte[] output, int outputOffset, byte[] header, byte[] nonce, byte[] key)
{
fixed (byte* inPtr = input)
fixed (byte* outPtr = output)
fixed (byte* adPtr = header)
{
int error = SecretBoxEasy(outPtr + outputOffset, inPtr + inputOffset, inputLength, nonce, secret);
int error = Encrypt(
outPtr + outputOffset, out ulong cipherLen,
inPtr + inputOffset, (ulong)inputLength,
adPtr, (ulong)header.Length,
null, nonce, key
);
if (error != 0)
throw new SecurityException($"Sodium Error: {error}");
return inputLength + 16;
throw new SecurityException($"Sodium AEAD Error: {error}");
return (int)cipherLen;
}
}
public static int Decrypt(byte[] input, int inputOffset, int inputLength, byte[] output, int outputOffset, byte[] nonce, byte[] secret)
public static int Decrypt(byte[] input, int inputOffset, int inputLength, byte[] output, int outputOffset, byte[] header, byte[] nonce, byte[] key)
{
fixed (byte* inPtr = input)
fixed (byte* outPtr = output)
fixed (byte* adPtr = header)
{
int error = SecretBoxOpenEasy(outPtr + outputOffset, inPtr + inputOffset, inputLength, nonce, secret);
int error = Decrypt(
outPtr + outputOffset, out ulong plainLen,
null,
inPtr + inputOffset, (ulong)inputLength,
adPtr, (ulong)header.Length,
nonce, key
);
if (error != 0)
throw new SecurityException($"Sodium Error: {error}");
return inputLength - 16;
throw new SecurityException($"Sodium AEAD Decrypt Error: {error}");
return (int)plainLen;
}
}
}

View File

@@ -9,6 +9,10 @@ namespace Discord.Audio.Streams
/// </summary>
public class SodiumDecryptStream : AudioOutStream
{
private const int RtpHeaderSize = 12;
private const int NonceSize = 24;
private const int NonceCounterSize = 4;
private readonly AudioClient _client;
private readonly AudioStream _next;
private readonly byte[] _nonce;
@@ -21,7 +25,7 @@ namespace Discord.Audio.Streams
{
_next = next;
_client = (AudioClient)client;
_nonce = new byte[24];
_nonce = new byte[NonceSize];
}
public override Task WriteAsync(byte[] buffer, int offset, int count, CancellationToken cancelToken)
@@ -31,9 +35,27 @@ namespace Discord.Audio.Streams
if (_client.SecretKey == null)
return Task.CompletedTask;
Buffer.BlockCopy(buffer, 0, _nonce, 0, 12); //Copy RTP header to nonce
count = SecretBox.Decrypt(buffer, offset + 12, count - 12, buffer, offset + 12, _nonce, _client.SecretKey);
return _next.WriteAsync(buffer, 0, count + 12, cancelToken);
// Extract nonce from the payload.
for (int i = 0; i < NonceCounterSize; i++ )
_nonce[i] = buffer[offset + count + NonceCounterSize - i - 1]; // Big-endian to little-endian
// Decrypt payload
byte[] rtpHeader = new byte[RtpHeaderSize];
Buffer.BlockCopy(buffer, offset, rtpHeader, 0, rtpHeader.Length);
int payloadOffset = offset + rtpHeader.Length;
int payloadLength = count - rtpHeader.Length - NonceCounterSize;
int decryptedLength = SecretBox.Decrypt(
buffer,
payloadOffset,
payloadLength,
buffer,
payloadOffset,
rtpHeader,
_nonce,
_client.SecretKey);
int packageLength = rtpHeader.Length + decryptedLength;
return _next.WriteAsync(buffer, offset, packageLength, cancelToken);
}
public override Task FlushAsync(CancellationToken cancelToken)

View File

@@ -9,18 +9,23 @@ namespace Discord.Audio.Streams
/// </summary>
public class SodiumEncryptStream : AudioOutStream
{
private const int RtpHeaderSize = 12;
private const int NonceSize = 24;
private readonly AudioClient _client;
private readonly AudioStream _next;
private readonly byte[] _nonce;
private bool _hasHeader;
private ushort _nextSeq;
private uint _nextTimestamp;
private uint _nonceCounter;
public SodiumEncryptStream(AudioStream next, IAudioClient client)
{
_next = next;
_client = (AudioClient)client;
_nonce = new byte[24];
_nonce = new byte[NonceSize];
_nonceCounter = 0;
}
/// <exception cref="InvalidOperationException">Header received with no payload.</exception>
@@ -46,10 +51,35 @@ namespace Discord.Audio.Streams
if (_client.SecretKey == null)
return;
Buffer.BlockCopy(buffer, offset, _nonce, 0, 12); //Copy nonce from RTP header
count = SecretBox.Encrypt(buffer, offset + 12, count - 12, buffer, 12, _nonce, _client.SecretKey);
// The first bytes of the nonce are the counter in big-endian.
byte[] counterBytes = BitConverter.GetBytes(_nonceCounter);
if (BitConverter.IsLittleEndian)
Array.Reverse(counterBytes); // big-endian
Buffer.BlockCopy(counterBytes, offset, _nonce, 0, counterBytes.Length);
if (++_nonceCounter >= uint.MaxValue)
_nonceCounter = 0;
// Encrypt payload
byte[] rtpHeader = new byte[RtpHeaderSize];
Buffer.BlockCopy(buffer, offset, rtpHeader, 0, rtpHeader.Length);
int payloadOffset = offset + rtpHeader.Length;
int payloadLength = count - rtpHeader.Length;
int encryptedLength = SecretBox.Encrypt(
buffer,
payloadOffset,
payloadLength,
buffer,
payloadOffset,
rtpHeader,
_nonce,
_client.SecretKey);
// Append nonce to encripted payload
Buffer.BlockCopy(counterBytes, 0, buffer, payloadOffset + encryptedLength, counterBytes.Length);
int packageLength = rtpHeader.Length + encryptedLength + counterBytes.Length;
_next.WriteHeader(_nextSeq, _nextTimestamp, false);
await _next.WriteAsync(buffer, 0, count + 12, cancelToken).ConfigureAwait(false);
await _next.WriteAsync(buffer, offset, packageLength, cancelToken).ConfigureAwait(false);
}
public override Task FlushAsync(CancellationToken cancelToken)

View File

@@ -19,7 +19,7 @@ namespace Discord.Audio
{
#region DiscordVoiceAPIClient
public const int MaxBitrate = 128 * 1024;
public const string Mode = "xsalsa20_poly1305";
public const string Mode = "aead_xchacha20_poly1305_rtpsize";
public event Func<string, string, double, Task> SentRequest { add { _sentRequestEvent.Add(value); } remove { _sentRequestEvent.Remove(value); } }
private readonly AsyncEvent<Func<string, string, double, Task>> _sentRequestEvent = new AsyncEvent<Func<string, string, double, Task>>();
@@ -129,8 +129,15 @@ namespace Discord.Audio
#endregion
#region WebSocket
public Task SendHeartbeatAsync(RequestOptions options = null)
=> SendAsync(VoiceOpCode.Heartbeat, DateTimeOffset.UtcNow.ToUnixTimeMilliseconds(), options: options);
public Task SendHeartbeatAsync(int sequenceAck, RequestOptions options = null)
{
return SendAsync(VoiceOpCode.Heartbeat, new HeartbeatParams
{
Timestamp = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds(),
SequenceAck = sequenceAck
},
options: options);
}
public Task SendIdentityAsync(ulong userId, string sessionId, string token)
{