-
-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
85 changed files
with
6,143 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,154 @@ | ||
using System; | ||
using OpenAI.Extensions; | ||
using System.IO; | ||
using System.Net.Http; | ||
using System.Text.Json; | ||
using System.Text.Json.Serialization; | ||
using System.Threading; | ||
using System.Threading.Tasks; | ||
|
||
namespace OpenAI.Audio | ||
{ | ||
/// <summary> | ||
/// Transforms audio into text.<br/> | ||
/// <see href="https://platform.openai.com/docs/api-reference/audio"/> | ||
/// </summary> | ||
public sealed class AudioEndpoint : BaseEndPoint | ||
{ | ||
private class AudioResponse | ||
{ | ||
public AudioResponse(string text) | ||
{ | ||
Text = text; | ||
} | ||
|
||
[JsonPropertyName("text")] | ||
public string Text { get; } | ||
} | ||
|
||
/// <inheritdoc /> | ||
public AudioEndpoint(OpenAIClient api) : base(api) { } | ||
|
||
/// <inheritdoc /> | ||
protected override string Root => "audio"; | ||
|
||
/// <summary> | ||
/// Generates audio from the input text. | ||
/// </summary> | ||
/// <param name="request"><see cref="SpeechRequest"/>.</param> | ||
/// <param name="chunkCallback">Optional, partial chunk <see cref="ReadOnlyMemory{T}"/> callback to stream audio as it arrives.</param> | ||
/// <param name="cancellationToken">Optional, <see cref="CancellationToken"/>.</param> | ||
/// <returns><see cref="ReadOnlyMemory{T}"/></returns> | ||
public async Task<ReadOnlyMemory<byte>> CreateSpeechAsync(SpeechRequest request, Func<ReadOnlyMemory<byte>, Task> chunkCallback = null, CancellationToken cancellationToken = default) | ||
{ | ||
var jsonContent = JsonSerializer.Serialize(request, OpenAIClient.JsonSerializationOptions).ToJsonStringContent(EnableDebug); | ||
var response = await Api.Client.PostAsync(GetUrl("/speech"), jsonContent, cancellationToken).ConfigureAwait(false); | ||
await response.CheckResponseAsync(cancellationToken).ConfigureAwait(false); | ||
await using var responseStream = await response.Content.ReadAsStreamAsync(cancellationToken).ConfigureAwait(false); | ||
await using var memoryStream = new MemoryStream(); | ||
int bytesRead; | ||
var totalBytesRead = 0; | ||
var buffer = new byte[8192]; | ||
|
||
while ((bytesRead = await responseStream.ReadAsync(buffer, cancellationToken).ConfigureAwait(false)) > 0) | ||
{ | ||
await memoryStream.WriteAsync(new ReadOnlyMemory<byte>(buffer, 0, bytesRead), cancellationToken).ConfigureAwait(false); | ||
|
||
if (chunkCallback != null) | ||
{ | ||
try | ||
{ | ||
await chunkCallback(new ReadOnlyMemory<byte>(memoryStream.GetBuffer(), totalBytesRead, bytesRead)).ConfigureAwait(false); | ||
} | ||
catch (Exception e) | ||
{ | ||
Console.WriteLine(e); | ||
} | ||
} | ||
|
||
totalBytesRead += bytesRead; | ||
} | ||
|
||
return new ReadOnlyMemory<byte>(memoryStream.GetBuffer(), 0, totalBytesRead); | ||
} | ||
|
||
/// <summary> | ||
/// Transcribes audio into the input language. | ||
/// </summary> | ||
/// <param name="request"><see cref="AudioTranscriptionRequest"/>.</param> | ||
/// <param name="cancellationToken">Optional, <see cref="CancellationToken"/>.</param> | ||
/// <returns>The transcribed text.</returns> | ||
public async Task<string> CreateTranscriptionAsync(AudioTranscriptionRequest request, CancellationToken cancellationToken = default) | ||
{ | ||
using var content = new MultipartFormDataContent(); | ||
using var audioData = new MemoryStream(); | ||
await request.Audio.CopyToAsync(audioData, cancellationToken).ConfigureAwait(false); | ||
content.Add(new ByteArrayContent(audioData.ToArray()), "file", request.AudioName); | ||
content.Add(new StringContent(request.Model), "model"); | ||
|
||
if (!string.IsNullOrWhiteSpace(request.Prompt)) | ||
{ | ||
content.Add(new StringContent(request.Prompt), "prompt"); | ||
} | ||
|
||
var responseFormat = request.ResponseFormat; | ||
content.Add(new StringContent(responseFormat.ToString().ToLower()), "response_format"); | ||
|
||
if (request.Temperature.HasValue) | ||
{ | ||
content.Add(new StringContent(request.Temperature.ToString()), "temperature"); | ||
} | ||
|
||
if (!string.IsNullOrWhiteSpace(request.Language)) | ||
{ | ||
content.Add(new StringContent(request.Language), "language"); | ||
} | ||
|
||
request.Dispose(); | ||
|
||
var response = await Api.Client.PostAsync(GetUrl("/transcriptions"), content, cancellationToken).ConfigureAwait(false); | ||
var responseAsString = await response.ReadAsStringAsync(EnableDebug, cancellationToken).ConfigureAwait(false); | ||
|
||
return responseFormat == AudioResponseFormat.Json | ||
? JsonSerializer.Deserialize<AudioResponse>(responseAsString)?.Text | ||
: responseAsString; | ||
} | ||
|
||
/// <summary> | ||
/// Translates audio into English. | ||
/// </summary> | ||
/// <param name="request"></param> | ||
/// <param name="cancellationToken"></param> | ||
/// <returns>The translated text.</returns> | ||
public async Task<string> CreateTranslationAsync(AudioTranslationRequest request, CancellationToken cancellationToken = default) | ||
{ | ||
using var content = new MultipartFormDataContent(); | ||
using var audioData = new MemoryStream(); | ||
await request.Audio.CopyToAsync(audioData, cancellationToken).ConfigureAwait(false); | ||
content.Add(new ByteArrayContent(audioData.ToArray()), "file", request.AudioName); | ||
content.Add(new StringContent(request.Model), "model"); | ||
|
||
if (!string.IsNullOrWhiteSpace(request.Prompt)) | ||
{ | ||
content.Add(new StringContent(request.Prompt), "prompt"); | ||
} | ||
|
||
var responseFormat = request.ResponseFormat; | ||
content.Add(new StringContent(responseFormat.ToString().ToLower()), "response_format"); | ||
|
||
if (request.Temperature.HasValue) | ||
{ | ||
content.Add(new StringContent(request.Temperature.ToString()), "temperature"); | ||
} | ||
|
||
request.Dispose(); | ||
|
||
var response = await Api.Client.PostAsync(GetUrl("/translations"), content, cancellationToken).ConfigureAwait(false); | ||
var responseAsString = await response.ReadAsStringAsync(EnableDebug, cancellationToken).ConfigureAwait(false); | ||
|
||
return responseFormat == AudioResponseFormat.Json | ||
? JsonSerializer.Deserialize<AudioResponse>(responseAsString)?.Text | ||
: responseAsString; | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
namespace OpenAI.Audio | ||
{ | ||
public enum AudioResponseFormat | ||
{ | ||
Json = 0, | ||
Verbose_Json, | ||
Text, | ||
Srt, | ||
Vtt | ||
} | ||
} |
173 changes: 173 additions & 0 deletions
173
src/libs/OpenAI/Client/Audio/AudioTranscriptionRequest.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,173 @@ | ||
using System; | ||
using System.IO; | ||
|
||
namespace OpenAI.Audio | ||
{ | ||
public sealed class AudioTranscriptionRequest : IDisposable | ||
{ | ||
/// <summary> | ||
/// Constructor. | ||
/// </summary> | ||
/// <param name="audioPath"> | ||
/// The audio file to transcribe, in one of these formats: mp3, mp4, mpeg, mpga, m4a, wav, or webm. | ||
/// </param> | ||
/// <param name="model"> | ||
/// ID of the model to use. | ||
/// </param> | ||
/// <param name="prompt"> | ||
/// Optional, An optional text to guide the model's style or continue a previous audio segment.<br/> | ||
/// The prompt should be in English. | ||
/// </param> | ||
/// <param name="responseFormat"> | ||
/// Optional, The format of the transcript output, in one of these options: json, text, srt, verbose_json, or vtt.<br/> | ||
/// Defaults to json. | ||
/// </param> | ||
/// <param name="temperature"> | ||
/// The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, | ||
/// while lower values like 0.2 will make it more focused and deterministic. If set to 0, | ||
/// the model will use log probability to automatically increase the temperature until certain thresholds are hit.<br/> | ||
/// Defaults to 0 | ||
/// </param> | ||
/// <param name="language"> | ||
/// Optional, The language of the input audio. | ||
/// Supplying the input language in ISO-639-1 format will improve accuracy and latency.<br/> | ||
/// Currently supported languages: Afrikaans, Arabic, Armenian, Azerbaijani, Belarusian, Bosnian, Bulgarian, Catalan, | ||
/// Chinese, Croatian, Czech, Danish, Dutch, English, Estonian, Finnish, French, Galician, German, Greek, Hebrew, | ||
/// Hindi, Hungarian, Icelandic, Indonesian, Italian, Japanese, Kannada, Kazakh, Korean, Latvian, Lithuanian, | ||
/// Macedonian, Malay, Marathi, Maori, Nepali, Norwegian, Persian, Polish, Portuguese, Romanian, Russian, Serbian, | ||
/// Slovak, Slovenian, Spanish, Swahili, Swedish, Tagalog, Tamil, Thai, Turkish, Ukrainian, Urdu, Vietnamese, and Welsh. | ||
/// </param> | ||
public AudioTranscriptionRequest( | ||
string audioPath, | ||
string model = null, | ||
string prompt = null, | ||
AudioResponseFormat responseFormat = AudioResponseFormat.Json, | ||
float? temperature = null, | ||
string language = null) | ||
: this(File.OpenRead(audioPath), Path.GetFileName(audioPath), model, prompt, responseFormat, temperature, language) | ||
{ | ||
} | ||
|
||
/// <summary> | ||
/// Constructor. | ||
/// </summary> | ||
/// <param name="audio"> | ||
/// The audio stream to transcribe. | ||
/// </param> | ||
/// <param name="audioName"> | ||
/// The name of the audio file to transcribe. | ||
/// </param> | ||
/// <param name="model"> | ||
/// ID of the model to use. Only whisper-1 is currently available. | ||
/// </param> | ||
/// <param name="prompt"> | ||
/// Optional, An optional text to guide the model's style or continue a previous audio segment.<br/> | ||
/// The prompt should be in English. | ||
/// </param> | ||
/// <param name="responseFormat"> | ||
/// Optional, The format of the transcript output, in one of these options: json, text, srt, verbose_json, or vtt.<br/> | ||
/// Defaults to json. | ||
/// </param> | ||
/// <param name="temperature"> | ||
/// The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, | ||
/// while lower values like 0.2 will make it more focused and deterministic. If set to 0, | ||
/// the model will use log probability to automatically increase the temperature until certain thresholds are hit.<br/> | ||
/// Defaults to 0 | ||
/// </param> | ||
/// <param name="language"> | ||
/// Optional, The language of the input audio. | ||
/// Supplying the input language in ISO-639-1 format will improve accuracy and latency.<br/> | ||
/// Currently supported languages: Afrikaans, Arabic, Armenian, Azerbaijani, Belarusian, Bosnian, Bulgarian, Catalan, | ||
/// Chinese, Croatian, Czech, Danish, Dutch, English, Estonian, Finnish, French, Galician, German, Greek, Hebrew, | ||
/// Hindi, Hungarian, Icelandic, Indonesian, Italian, Japanese, Kannada, Kazakh, Korean, Latvian, Lithuanian, | ||
/// Macedonian, Malay, Marathi, Maori, Nepali, Norwegian, Persian, Polish, Portuguese, Romanian, Russian, Serbian, | ||
/// Slovak, Slovenian, Spanish, Swahili, Swedish, Tagalog, Tamil, Thai, Turkish, Ukrainian, Urdu, Vietnamese, and Welsh. | ||
/// </param> | ||
public AudioTranscriptionRequest( | ||
Stream audio, | ||
string audioName, | ||
string model = null, | ||
string prompt = null, | ||
AudioResponseFormat responseFormat = AudioResponseFormat.Json, | ||
float? temperature = null, | ||
string language = null) | ||
{ | ||
Audio = audio; | ||
|
||
if (string.IsNullOrWhiteSpace(audioName)) | ||
{ | ||
audioName = "audio.wav"; | ||
} | ||
|
||
AudioName = audioName; | ||
Model = string.IsNullOrWhiteSpace(model) ? Models.Model.Whisper1 : model; | ||
Prompt = prompt; | ||
ResponseFormat = responseFormat; | ||
Temperature = temperature; | ||
Language = language; | ||
} | ||
|
||
~AudioTranscriptionRequest() => Dispose(false); | ||
|
||
/// <summary> | ||
/// The audio file to transcribe, in one of these formats: mp3, mp4, mpeg, mpga, m4a, wav, or webm. | ||
/// </summary> | ||
public Stream Audio { get; } | ||
|
||
/// <summary> | ||
/// The name of the audio file to transcribe. | ||
/// </summary> | ||
public string AudioName { get; } | ||
|
||
/// <summary> | ||
/// ID of the model to use. Only whisper-1 is currently available. | ||
/// </summary> | ||
public string Model { get; } | ||
|
||
/// <summary> | ||
/// Optional, An optional text to guide the model's style or continue a previous audio segment.<br/> | ||
/// The prompt should be in English. | ||
/// </summary> | ||
public string Prompt { get; } | ||
|
||
/// <summary> | ||
/// Optional, The format of the transcript output, in one of these options: json, text, srt, verbose_json, or vtt.<br/> | ||
/// Defaults to json. | ||
/// </summary> | ||
public AudioResponseFormat ResponseFormat { get; } | ||
|
||
/// <summary> | ||
/// The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, | ||
/// while lower values like 0.2 will make it more focused and deterministic. If set to 0, | ||
/// the model will use log probability to automatically increase the temperature until certain thresholds are hit.<br/> | ||
/// Defaults to 0 | ||
/// </summary> | ||
public float? Temperature { get; } | ||
|
||
/// <summary> | ||
/// Optional, The language of the input audio. | ||
/// Supplying the input language in ISO-639-1 format will improve accuracy and latency.<br/> | ||
/// Currently supported languages: Afrikaans, Arabic, Armenian, Azerbaijani, Belarusian, Bosnian, Bulgarian, Catalan, | ||
/// Chinese, Croatian, Czech, Danish, Dutch, English, Estonian, Finnish, French, Galician, German, Greek, Hebrew, | ||
/// Hindi, Hungarian, Icelandic, Indonesian, Italian, Japanese, Kannada, Kazakh, Korean, Latvian, Lithuanian, | ||
/// Macedonian, Malay, Marathi, Maori, Nepali, Norwegian, Persian, Polish, Portuguese, Romanian, Russian, Serbian, | ||
/// Slovak, Slovenian, Spanish, Swahili, Swedish, Tagalog, Tamil, Thai, Turkish, Ukrainian, Urdu, Vietnamese, and Welsh. | ||
/// </summary> | ||
public string Language { get; } | ||
|
||
private void Dispose(bool disposing) | ||
{ | ||
if (disposing) | ||
{ | ||
Audio?.Close(); | ||
Audio?.Dispose(); | ||
} | ||
} | ||
|
||
public void Dispose() | ||
{ | ||
Dispose(true); | ||
GC.SuppressFinalize(this); | ||
} | ||
} | ||
} |
Oops, something went wrong.