feat: Added client implementation.

tryAGI · Nov 15, 2023 · 007d9ce · 007d9ce
1 parent bb16d85
commit 007d9ce
Show file tree

Hide file tree

Showing 85 changed files with 6,143 additions and 5 deletions.
diff --git a/LICENSE b/LICENSE
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2023 tryAGI
+Copyright (c) 2023 tryAGI and Stephen Hodgson
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

diff --git a/src/libs/OpenAI/Client/Assets/OpenAI-DotNet-Icon.png b/src/libs/OpenAI/Client/Assets/OpenAI-DotNet-Icon.png
diff --git a/src/libs/OpenAI/Client/Audio/AudioEndpoint.cs b/src/libs/OpenAI/Client/Audio/AudioEndpoint.cs
@@ -0,0 +1,154 @@
+using System;
+using OpenAI.Extensions;
+using System.IO;
+using System.Net.Http;
+using System.Text.Json;
+using System.Text.Json.Serialization;
+using System.Threading;
+using System.Threading.Tasks;
+
+namespace OpenAI.Audio
+{
+    /// <summary>
+    /// Transforms audio into text.<br/>
+    /// <see href="https://platform.openai.com/docs/api-reference/audio"/>
+    /// </summary>
+    public sealed class AudioEndpoint : BaseEndPoint
+    {
+        private class AudioResponse
+        {
+            public AudioResponse(string text)
+            {
+                Text = text;
+            }
+
+            [JsonPropertyName("text")]
+            public string Text { get; }
+        }
+
+        /// <inheritdoc />
+        public AudioEndpoint(OpenAIClient api) : base(api) { }
+
+        /// <inheritdoc />
+        protected override string Root => "audio";
+
+        /// <summary>
+        /// Generates audio from the input text.
+        /// </summary>
+        /// <param name="request"><see cref="SpeechRequest"/>.</param>
+        /// <param name="chunkCallback">Optional, partial chunk <see cref="ReadOnlyMemory{T}"/> callback to stream audio as it arrives.</param>
+        /// <param name="cancellationToken">Optional, <see cref="CancellationToken"/>.</param>
+        /// <returns><see cref="ReadOnlyMemory{T}"/></returns>
+        public async Task<ReadOnlyMemory<byte>> CreateSpeechAsync(SpeechRequest request, Func<ReadOnlyMemory<byte>, Task> chunkCallback = null, CancellationToken cancellationToken = default)
+        {
+            var jsonContent = JsonSerializer.Serialize(request, OpenAIClient.JsonSerializationOptions).ToJsonStringContent(EnableDebug);
+            var response = await Api.Client.PostAsync(GetUrl("/speech"), jsonContent, cancellationToken).ConfigureAwait(false);
+            await response.CheckResponseAsync(cancellationToken).ConfigureAwait(false);
+            await using var responseStream = await response.Content.ReadAsStreamAsync(cancellationToken).ConfigureAwait(false);
+            await using var memoryStream = new MemoryStream();
+            int bytesRead;
+            var totalBytesRead = 0;
+            var buffer = new byte[8192];
+
+            while ((bytesRead = await responseStream.ReadAsync(buffer, cancellationToken).ConfigureAwait(false)) > 0)
+            {
+                await memoryStream.WriteAsync(new ReadOnlyMemory<byte>(buffer, 0, bytesRead), cancellationToken).ConfigureAwait(false);
+
+                if (chunkCallback != null)
+                {
+                    try
+                    {
+                        await chunkCallback(new ReadOnlyMemory<byte>(memoryStream.GetBuffer(), totalBytesRead, bytesRead)).ConfigureAwait(false);
+                    }
+                    catch (Exception e)
+                    {
+                        Console.WriteLine(e);
+                    }
+                }
+
+                totalBytesRead += bytesRead;
+            }
+
+            return new ReadOnlyMemory<byte>(memoryStream.GetBuffer(), 0, totalBytesRead);
+        }
+
+        /// <summary>
+        /// Transcribes audio into the input language.
+        /// </summary>
+        /// <param name="request"><see cref="AudioTranscriptionRequest"/>.</param>
+        /// <param name="cancellationToken">Optional, <see cref="CancellationToken"/>.</param>
+        /// <returns>The transcribed text.</returns>
+        public async Task<string> CreateTranscriptionAsync(AudioTranscriptionRequest request, CancellationToken cancellationToken = default)
+        {
+            using var content = new MultipartFormDataContent();
+            using var audioData = new MemoryStream();
+            await request.Audio.CopyToAsync(audioData, cancellationToken).ConfigureAwait(false);
+            content.Add(new ByteArrayContent(audioData.ToArray()), "file", request.AudioName);
+            content.Add(new StringContent(request.Model), "model");
+
+            if (!string.IsNullOrWhiteSpace(request.Prompt))
+            {
+                content.Add(new StringContent(request.Prompt), "prompt");
+            }
+
+            var responseFormat = request.ResponseFormat;
+            content.Add(new StringContent(responseFormat.ToString().ToLower()), "response_format");
+
+            if (request.Temperature.HasValue)
+            {
+                content.Add(new StringContent(request.Temperature.ToString()), "temperature");
+            }
+
+            if (!string.IsNullOrWhiteSpace(request.Language))
+            {
+                content.Add(new StringContent(request.Language), "language");
+            }
+
+            request.Dispose();
+
+            var response = await Api.Client.PostAsync(GetUrl("/transcriptions"), content, cancellationToken).ConfigureAwait(false);
+            var responseAsString = await response.ReadAsStringAsync(EnableDebug, cancellationToken).ConfigureAwait(false);
+
+            return responseFormat == AudioResponseFormat.Json
+                ? JsonSerializer.Deserialize<AudioResponse>(responseAsString)?.Text
+                : responseAsString;
+        }
+
+        /// <summary>
+        /// Translates audio into English.
+        /// </summary>
+        /// <param name="request"></param>
+        /// <param name="cancellationToken"></param>
+        /// <returns>The translated text.</returns>
+        public async Task<string> CreateTranslationAsync(AudioTranslationRequest request, CancellationToken cancellationToken = default)
+        {
+            using var content = new MultipartFormDataContent();
+            using var audioData = new MemoryStream();
+            await request.Audio.CopyToAsync(audioData, cancellationToken).ConfigureAwait(false);
+            content.Add(new ByteArrayContent(audioData.ToArray()), "file", request.AudioName);
+            content.Add(new StringContent(request.Model), "model");
+
+            if (!string.IsNullOrWhiteSpace(request.Prompt))
+            {
+                content.Add(new StringContent(request.Prompt), "prompt");
+            }
+
+            var responseFormat = request.ResponseFormat;
+            content.Add(new StringContent(responseFormat.ToString().ToLower()), "response_format");
+
+            if (request.Temperature.HasValue)
+            {
+                content.Add(new StringContent(request.Temperature.ToString()), "temperature");
+            }
+
+            request.Dispose();
+
+            var response = await Api.Client.PostAsync(GetUrl("/translations"), content, cancellationToken).ConfigureAwait(false);
+            var responseAsString = await response.ReadAsStringAsync(EnableDebug, cancellationToken).ConfigureAwait(false);
+
+            return responseFormat == AudioResponseFormat.Json
+                ? JsonSerializer.Deserialize<AudioResponse>(responseAsString)?.Text
+                : responseAsString;
+        }
+    }
+}
diff --git a/src/libs/OpenAI/Client/Audio/AudioResponseFormat.cs b/src/libs/OpenAI/Client/Audio/AudioResponseFormat.cs
@@ -0,0 +1,11 @@
+namespace OpenAI.Audio
+{
+    public enum AudioResponseFormat
+    {
+        Json = 0,
+        Verbose_Json,
+        Text,
+        Srt,
+        Vtt
+    }
+}
diff --git a/src/libs/OpenAI/Client/Audio/AudioTranscriptionRequest.cs b/src/libs/OpenAI/Client/Audio/AudioTranscriptionRequest.cs
@@ -0,0 +1,173 @@
+using System;
+using System.IO;
+
+namespace OpenAI.Audio
+{
+    public sealed class AudioTranscriptionRequest : IDisposable
+    {
+        /// <summary>
+        /// Constructor.
+        /// </summary>
+        /// <param name="audioPath">
+        /// The audio file to transcribe, in one of these formats: mp3, mp4, mpeg, mpga, m4a, wav, or webm.
+        /// </param>
+        /// <param name="model">
+        /// ID of the model to use.
+        /// </param>
+        /// <param name="prompt">
+        /// Optional, An optional text to guide the model's style or continue a previous audio segment.<br/>
+        /// The prompt should be in English.
+        /// </param>
+        /// <param name="responseFormat">
+        /// Optional, The format of the transcript output, in one of these options: json, text, srt, verbose_json, or vtt.<br/>
+        /// Defaults to json.
+        /// </param>
+        /// <param name="temperature">
+        /// The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random,
+        /// while lower values like 0.2 will make it more focused and deterministic. If set to 0,
+        /// the model will use log probability to automatically increase the temperature until certain thresholds are hit.<br/>
+        /// Defaults to 0
+        /// </param>
+        /// <param name="language">
+        /// Optional, The language of the input audio.
+        /// Supplying the input language in ISO-639-1 format will improve accuracy and latency.<br/>
+        /// Currently supported languages: Afrikaans, Arabic, Armenian, Azerbaijani, Belarusian, Bosnian, Bulgarian, Catalan,
+        /// Chinese, Croatian, Czech, Danish, Dutch, English, Estonian, Finnish, French, Galician, German, Greek, Hebrew,
+        /// Hindi, Hungarian, Icelandic, Indonesian, Italian, Japanese, Kannada, Kazakh, Korean, Latvian, Lithuanian,
+        /// Macedonian, Malay, Marathi, Maori, Nepali, Norwegian, Persian, Polish, Portuguese, Romanian, Russian, Serbian,
+        /// Slovak, Slovenian, Spanish, Swahili, Swedish, Tagalog, Tamil, Thai, Turkish, Ukrainian, Urdu, Vietnamese, and Welsh.
+        /// </param>
+        public AudioTranscriptionRequest(
+            string audioPath,
+            string model = null,
+            string prompt = null,
+            AudioResponseFormat responseFormat = AudioResponseFormat.Json,
+            float? temperature = null,
+            string language = null)
+            : this(File.OpenRead(audioPath), Path.GetFileName(audioPath), model, prompt, responseFormat, temperature, language)
+        {
+        }
+
+        /// <summary>
+        /// Constructor.
+        /// </summary>
+        /// <param name="audio">
+        /// The audio stream to transcribe.
+        /// </param>
+        /// <param name="audioName">
+        /// The name of the audio file to transcribe.
+        /// </param>
+        /// <param name="model">
+        /// ID of the model to use. Only whisper-1 is currently available.
+        /// </param>
+        /// <param name="prompt">
+        /// Optional, An optional text to guide the model's style or continue a previous audio segment.<br/>
+        /// The prompt should be in English.
+        /// </param>
+        /// <param name="responseFormat">
+        /// Optional, The format of the transcript output, in one of these options: json, text, srt, verbose_json, or vtt.<br/>
+        /// Defaults to json.
+        /// </param>
+        /// <param name="temperature">
+        /// The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random,
+        /// while lower values like 0.2 will make it more focused and deterministic. If set to 0,
+        /// the model will use log probability to automatically increase the temperature until certain thresholds are hit.<br/>
+        /// Defaults to 0
+        /// </param>
+        /// <param name="language">
+        /// Optional, The language of the input audio.
+        /// Supplying the input language in ISO-639-1 format will improve accuracy and latency.<br/>
+        /// Currently supported languages: Afrikaans, Arabic, Armenian, Azerbaijani, Belarusian, Bosnian, Bulgarian, Catalan,
+        /// Chinese, Croatian, Czech, Danish, Dutch, English, Estonian, Finnish, French, Galician, German, Greek, Hebrew,
+        /// Hindi, Hungarian, Icelandic, Indonesian, Italian, Japanese, Kannada, Kazakh, Korean, Latvian, Lithuanian,
+        /// Macedonian, Malay, Marathi, Maori, Nepali, Norwegian, Persian, Polish, Portuguese, Romanian, Russian, Serbian,
+        /// Slovak, Slovenian, Spanish, Swahili, Swedish, Tagalog, Tamil, Thai, Turkish, Ukrainian, Urdu, Vietnamese, and Welsh.
+        /// </param>
+        public AudioTranscriptionRequest(
+            Stream audio,
+            string audioName,
+            string model = null,
+            string prompt = null,
+            AudioResponseFormat responseFormat = AudioResponseFormat.Json,
+            float? temperature = null,
+            string language = null)
+        {
+            Audio = audio;
+
+            if (string.IsNullOrWhiteSpace(audioName))
+            {
+                audioName = "audio.wav";
+            }
+
+            AudioName = audioName;
+            Model = string.IsNullOrWhiteSpace(model) ? Models.Model.Whisper1 : model;
+            Prompt = prompt;
+            ResponseFormat = responseFormat;
+            Temperature = temperature;
+            Language = language;
+        }
+
+        ~AudioTranscriptionRequest() => Dispose(false);
+
+        /// <summary>
+        /// The audio file to transcribe, in one of these formats: mp3, mp4, mpeg, mpga, m4a, wav, or webm.
+        /// </summary>
+        public Stream Audio { get; }
+
+        /// <summary>
+        /// The name of the audio file to transcribe.
+        /// </summary>
+        public string AudioName { get; }
+
+        /// <summary>
+        /// ID of the model to use. Only whisper-1 is currently available.
+        /// </summary>
+        public string Model { get; }
+
+        /// <summary>
+        /// Optional, An optional text to guide the model's style or continue a previous audio segment.<br/>
+        /// The prompt should be in English.
+        /// </summary>
+        public string Prompt { get; }
+
+        /// <summary>
+        /// Optional, The format of the transcript output, in one of these options: json, text, srt, verbose_json, or vtt.<br/>
+        /// Defaults to json.
+        /// </summary>
+        public AudioResponseFormat ResponseFormat { get; }
+
+        /// <summary>
+        /// The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random,
+        /// while lower values like 0.2 will make it more focused and deterministic. If set to 0,
+        /// the model will use log probability to automatically increase the temperature until certain thresholds are hit.<br/>
+        /// Defaults to 0
+        /// </summary>
+        public float? Temperature { get; }
+
+        /// <summary>
+        /// Optional, The language of the input audio.
+        /// Supplying the input language in ISO-639-1 format will improve accuracy and latency.<br/>
+        /// Currently supported languages: Afrikaans, Arabic, Armenian, Azerbaijani, Belarusian, Bosnian, Bulgarian, Catalan,
+        /// Chinese, Croatian, Czech, Danish, Dutch, English, Estonian, Finnish, French, Galician, German, Greek, Hebrew,
+        /// Hindi, Hungarian, Icelandic, Indonesian, Italian, Japanese, Kannada, Kazakh, Korean, Latvian, Lithuanian,
+        /// Macedonian, Malay, Marathi, Maori, Nepali, Norwegian, Persian, Polish, Portuguese, Romanian, Russian, Serbian,
+        /// Slovak, Slovenian, Spanish, Swahili, Swedish, Tagalog, Tamil, Thai, Turkish, Ukrainian, Urdu, Vietnamese, and Welsh.
+        /// </summary>
+        public string Language { get; }
+
+        private void Dispose(bool disposing)
+        {
+            if (disposing)
+            {
+                Audio?.Close();
+                Audio?.Dispose();
+            }
+        }
+
+        public void Dispose()
+        {
+            Dispose(true);
+            GC.SuppressFinalize(this);
+        }
+    }
+}