From eac1a690ab8ed428c00b394401587e858f2a6142 Mon Sep 17 00:00:00 2001 From: Ronny Gunawan <3048897+ronnygunawan@users.noreply.github.com> Date: Sun, 3 Dec 2023 14:12:07 +0700 Subject: [PATCH 1/2] Use the new ChatMessage format --- BotNet.Services/BotCommands/OpenAI.cs | 10 ++- BotNet.Services/OpenAI/IntentDetector.cs | 4 +- BotNet.Services/OpenAI/Models/ChatMessage.cs | 59 ++++++++++++++++- BotNet.Services/OpenAI/Models/Choice.cs | 9 ++- .../OpenAI/OpenAIStreamingClient.cs | 2 + BotNet.Services/OpenAI/Skills/FriendlyBot.cs | 66 ++++++++++--------- BotNet.Services/OpenAI/Skills/SarcasticBot.cs | 4 +- BotNet.Services/OpenAI/Skills/VisionBot.cs | 7 ++ BotNet.Services/OpenAI/ThreadTracker.cs | 12 ++-- BotNet/Bot/UpdateHandler.cs | 9 ++- 10 files changed, 131 insertions(+), 51 deletions(-) create mode 100644 BotNet.Services/OpenAI/Skills/VisionBot.cs diff --git a/BotNet.Services/BotCommands/OpenAI.cs b/BotNet.Services/BotCommands/OpenAI.cs index fe14c8e..7bc2d02 100644 --- a/BotNet.Services/BotCommands/OpenAI.cs +++ b/BotNet.Services/BotCommands/OpenAI.cs @@ -446,7 +446,7 @@ await botClient.SendTextMessageAsync( } [Obsolete("Use StreamChatWithFriendlyBotAsync instead.", error: true)] - public static async Task ChatWithFriendlyBotAsync(ITelegramBotClient botClient, IServiceProvider serviceProvider, Message message, ImmutableList<(string Sender, string Text)> thread, CancellationToken cancellationToken) { + public static async Task ChatWithFriendlyBotAsync(ITelegramBotClient botClient, IServiceProvider serviceProvider, Message message, ImmutableList<(string Sender, string? Text, string? ImageBase64)> thread, CancellationToken cancellationToken) { try { (message.Chat.Type == ChatType.Private ? CHAT_PRIVATE_RATE_LIMITER @@ -592,7 +592,7 @@ await botClient.SendTextMessageAsync( return null; } - public static async Task ChatWithSarcasticBotAsync(ITelegramBotClient botClient, IServiceProvider serviceProvider, Message message, ImmutableList<(string Sender, string Text)> thread, string callSign, CancellationToken cancellationToken) { + public static async Task ChatWithSarcasticBotAsync(ITelegramBotClient botClient, IServiceProvider serviceProvider, Message message, ImmutableList<(string Sender, string? Text, string? ImageBase64)> thread, string callSign, CancellationToken cancellationToken) { try { (message.Chat.Type == ChatType.Private ? CHAT_PRIVATE_RATE_LIMITER @@ -761,7 +761,7 @@ await botClient.SendTextMessageAsync( } } - public static async Task StreamChatWithFriendlyBotAsync(ITelegramBotClient botClient, IServiceProvider serviceProvider, string callSign, Message message, CancellationToken cancellationToken) { + public static async Task StreamChatWithFriendlyBotAsync(ITelegramBotClient botClient, IServiceProvider serviceProvider, Message message, CancellationToken cancellationToken) { try { (message.Chat.Type == ChatType.Private ? CHAT_PRIVATE_RATE_LIMITER @@ -769,7 +769,6 @@ public static async Task StreamChatWithFriendlyBotAsync(ITelegramBotClient botCl ).ValidateActionRate(message.Chat.Id, message.From!.Id); await serviceProvider.GetRequiredService().StreamChatAsync( message: message.Text!, - callSign: callSign, chatId: message.Chat.Id, replyToMessageId: message.MessageId ); @@ -802,7 +801,7 @@ await botClient.SendTextMessageAsync( } } - public static async Task StreamChatWithFriendlyBotAsync(ITelegramBotClient botClient, IServiceProvider serviceProvider, string callSign, Message message, ImmutableList<(string Sender, string Text)> thread, CancellationToken cancellationToken) { + public static async Task StreamChatWithFriendlyBotAsync(ITelegramBotClient botClient, IServiceProvider serviceProvider, Message message, ImmutableList<(string Sender, string? Text, string? ImageBase64)> thread, CancellationToken cancellationToken) { try { (message.Chat.Type == ChatType.Private ? CHAT_PRIVATE_RATE_LIMITER @@ -811,7 +810,6 @@ public static async Task StreamChatWithFriendlyBotAsync(ITelegramBotClient botCl await serviceProvider.GetRequiredService().StreamChatAsync( message: message.Text!, thread: thread, - callSign: callSign, chatId: message.Chat.Id, replyToMessageId: message.MessageId ); diff --git a/BotNet.Services/OpenAI/IntentDetector.cs b/BotNet.Services/OpenAI/IntentDetector.cs index 7a523aa..d306482 100644 --- a/BotNet.Services/OpenAI/IntentDetector.cs +++ b/BotNet.Services/OpenAI/IntentDetector.cs @@ -14,7 +14,7 @@ public async Task DetectChatIntentAsync( CancellationToken cancellationToken ) { List messages = [ - new("user", $$""" + ChatMessage.FromText("user", $$""" These are available intents that one might query when they provide a text prompt: Question, @@ -49,7 +49,7 @@ public async Task DetectImagePromptIntentAsync( CancellationToken cancellationToken ) { List messages = [ - new("user", $$""" + ChatMessage.FromText("user", $$""" These are available intents that one might query when they provide a prompt which contain an image: Vision, diff --git a/BotNet.Services/OpenAI/Models/ChatMessage.cs b/BotNet.Services/OpenAI/Models/ChatMessage.cs index 701ddf6..bd25954 100644 --- a/BotNet.Services/OpenAI/Models/ChatMessage.cs +++ b/BotNet.Services/OpenAI/Models/ChatMessage.cs @@ -1,6 +1,61 @@ -namespace BotNet.Services.OpenAI.Models { +using System.Collections.Generic; +using System.Text.Json.Serialization; + +namespace BotNet.Services.OpenAI.Models { public record ChatMessage( string Role, - string Content + List Content + ) { + public static ChatMessage FromText(string role, string text) => new( + Role: role, + Content: [ + new ChatContent( + Type: "text", + Text: text, + ImageUrl: null + ) + ] + ); + + public static ChatMessage FromTextWithImageBase64(string role, string text, string imageBase64) => new( + Role: role, + Content: [ + new ChatContent( + Type: "text", + Text: text, + ImageUrl: null + ), + new ChatContent( + Type: "image_url", + Text: null, + ImageUrl: new( + Url: $"data:image/png;base64,{imageBase64}" + ) + ) + ] + ); + + public static ChatMessage FromImageBase64(string role, string imageBase64) => new( + Role: role, + Content: [ + new ChatContent( + Type: "image_url", + Text: null, + ImageUrl: new( + Url: $"data:image/png;base64,{imageBase64}" + ) + ) + ] + ); + } + + public record ChatContent( + string Type, + [property: JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] string? Text, + [property: JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] ImageUrl? ImageUrl + ); + + public record ImageUrl( + string Url ); } diff --git a/BotNet.Services/OpenAI/Models/Choice.cs b/BotNet.Services/OpenAI/Models/Choice.cs index 7409c92..6fa2350 100644 --- a/BotNet.Services/OpenAI/Models/Choice.cs +++ b/BotNet.Services/OpenAI/Models/Choice.cs @@ -2,9 +2,14 @@ public record Choice( string? Text, int? Index, - ChatMessage? Message, - ChatMessage? Delta, + ChoiceChatMessage? Message, + ChoiceChatMessage? Delta, Logprobs? Logprobs, string? FinishReason ); + + public record ChoiceChatMessage( + string Role, + string Content + ); } diff --git a/BotNet.Services/OpenAI/OpenAIStreamingClient.cs b/BotNet.Services/OpenAI/OpenAIStreamingClient.cs index d35951c..e92c7e7 100644 --- a/BotNet.Services/OpenAI/OpenAIStreamingClient.cs +++ b/BotNet.Services/OpenAI/OpenAIStreamingClient.cs @@ -73,6 +73,7 @@ await Task.WhenAny( messageId: completeMessage.MessageId, sender: callSign, text: lastResult!, + imageBase64: null, replyToMessageId: replyToMessageId ); @@ -143,6 +144,7 @@ await telegramBotClient.EditMessageTextAsync( messageId: incompleteMessage.MessageId, sender: callSign, text: lastResult!, + imageBase64: null, replyToMessageId: replyToMessageId ); } catch { diff --git a/BotNet.Services/OpenAI/Skills/FriendlyBot.cs b/BotNet.Services/OpenAI/Skills/FriendlyBot.cs index 17aaaca..2548e01 100644 --- a/BotNet.Services/OpenAI/Skills/FriendlyBot.cs +++ b/BotNet.Services/OpenAI/Skills/FriendlyBot.cs @@ -1,13 +1,13 @@ -using System.Collections.Generic; +using System; +using System.Collections.Generic; using System.Collections.Immutable; using System.Linq; using System.Threading; using System.Threading.Tasks; using BotNet.Services.OpenAI.Models; -using Telegram.Bot.Types; namespace BotNet.Services.OpenAI.Skills { - public class FriendlyBot( + public sealed class FriendlyBot( OpenAIClient openAIClient, OpenAIStreamingClient openAIStreamingClient ) { @@ -59,8 +59,8 @@ public Task RespondToThreadAsync(string callSign, string name, string qu public Task ChatAsync(string message, CancellationToken cancellationToken) { List messages = [ - new("system", "The following is a conversation with an AI assistant. The assistant is helpful, creative, clever, and very friendly."), - new("user", message) + ChatMessage.FromText("system", "The following is a conversation with an AI assistant. The assistant is helpful, creative, clever, and very friendly."), + ChatMessage.FromText("user", message) ]; return _openAIClient.ChatAsync( @@ -71,36 +71,39 @@ public Task ChatAsync(string message, CancellationToken cancellationToke ); } - public async Task StreamChatAsync(string message, string callSign, long chatId, int replyToMessageId) { + public async Task StreamChatAsync(string message, long chatId, int replyToMessageId) { List messages = [ - new("system", "The following is a conversation with an AI assistant. The assistant is helpful, creative, clever, and very friendly."), - new("user", message) + ChatMessage.FromText("system", "The following is a conversation with an AI assistant. The assistant is helpful, creative, clever, and very friendly."), + ChatMessage.FromText("user", message) ]; await _openAIStreamingClient.StreamChatAsync( model: "gpt-4-1106-preview", messages: messages, maxTokens: 512, - callSign: callSign, + callSign: "AI", chatId: chatId, replyToMessageId: replyToMessageId ); } - public Task ChatAsync(string message, ImmutableList<(string Sender, string Text)> thread, CancellationToken cancellationToken) { + public Task ChatAsync(string message, ImmutableList<(string Sender, string? Text, string? ImageBase64)> thread, CancellationToken cancellationToken) { List messages = new() { - new("system", "The following is a conversation with an AI assistant. The assistant is helpful, creative, clever, and very friendly."), + ChatMessage.FromText("system", "The following is a conversation with an AI assistant. The assistant is helpful, creative, clever, and very friendly."), from tuple in thread - select new ChatMessage( - Role: tuple.Sender switch { - "AI" => "assistant", - _ => "user" - }, - Content: tuple.Text - ), + let role = tuple.Sender switch { + "AI" => "assistant", + _ => "user" + } + select tuple switch { + { Text: { } text, ImageBase64: null } => ChatMessage.FromText(role, text), + { Text: null, ImageBase64: { } imageBase64 } => ChatMessage.FromImageBase64(role, imageBase64), + { Text: { } text, ImageBase64: { } imageBase64 } => ChatMessage.FromTextWithImageBase64(role, text, imageBase64), + _ => ChatMessage.FromText(role, "") + }, - new("user", message) + ChatMessage.FromText("user", message) }; return _openAIClient.ChatAsync( @@ -111,27 +114,30 @@ from tuple in thread ); } - public async Task StreamChatAsync(string message, ImmutableList<(string Sender, string Text)> thread, string callSign, long chatId, int replyToMessageId) { + public async Task StreamChatAsync(string message, ImmutableList<(string Sender, string? Text, string? ImageBase64)> thread, long chatId, int replyToMessageId) { List messages = new() { - new("system", "The following is a conversation with an AI assistant. The assistant is helpful, creative, clever, and very friendly."), + ChatMessage.FromText("system", "The following is a conversation with an AI assistant. The assistant is helpful, creative, clever, and very friendly."), from tuple in thread - select new ChatMessage( - Role: tuple.Sender switch { - "AI" => "assistant", - _ => "user" - }, - Content: tuple.Text - ), + let role = tuple.Sender switch { + "AI" => "assistant", + _ => "user" + } + select tuple switch { + { Text: { } text, ImageBase64: null } => ChatMessage.FromText(role, text), + { Text: null, ImageBase64: { } imageBase64 } => ChatMessage.FromImageBase64(role, imageBase64), + { Text: { } text, ImageBase64: { } imageBase64 } => ChatMessage.FromTextWithImageBase64(role, text, imageBase64), + _ => ChatMessage.FromText(role, "") + }, - new("user", message) + ChatMessage.FromText("user", message) }; await _openAIStreamingClient.StreamChatAsync( model: "gpt-4-1106-preview", messages: messages, maxTokens: 512, - callSign: callSign, + callSign: "AI", chatId: chatId, replyToMessageId: replyToMessageId ); diff --git a/BotNet.Services/OpenAI/Skills/SarcasticBot.cs b/BotNet.Services/OpenAI/Skills/SarcasticBot.cs index d08f840..084b559 100644 --- a/BotNet.Services/OpenAI/Skills/SarcasticBot.cs +++ b/BotNet.Services/OpenAI/Skills/SarcasticBot.cs @@ -33,7 +33,7 @@ public Task ChatAsync(string callSign, string name, string question, Can ); } - public Task RespondToThreadAsync(string callSign, string name, string question, ImmutableList<(string Sender, string Text)> thread, CancellationToken cancellationToken) { + public Task RespondToThreadAsync(string callSign, string name, string question, ImmutableList<(string Sender, string? Text, string? ImageBase64)> thread, CancellationToken cancellationToken) { string prompt = $"{callSign} adalah chatbot berbahasa Indonesia yang tidak ramah, kurang antusias dalam menjawab pertanyaan, dan suka mengomel.\n\n" + $"{name}: Satu kilogram itu berapa pound?\n" + $"{callSign}: Kamu tanya ini lagi? Satu kilogram itu 2.2 pound. Tolong dicatat lah.\n\n" @@ -43,7 +43,7 @@ public Task RespondToThreadAsync(string callSign, string name, string qu + $"{callSign}: Tanggal 17 Desember 1903, Wilbur dan Orville Wright menerbangkan pesawat terbang pertama dalam sejarah. Semoga mereka mengangkut saya dari sini.\n\n" + $"{name}: Apa makna kehidupan?\n" + $"{callSign}: Entahlah. Nanti coba saya tanya ke teman saya Google.\n\n"; - foreach ((string sender, string text) in thread) { + foreach ((string sender, string? text, string? imageBase64) in thread) { prompt += $"{sender}: {text}\n"; if (sender is "AI" or "Pakde") prompt += "\n"; } diff --git a/BotNet.Services/OpenAI/Skills/VisionBot.cs b/BotNet.Services/OpenAI/Skills/VisionBot.cs new file mode 100644 index 0000000..83061bc --- /dev/null +++ b/BotNet.Services/OpenAI/Skills/VisionBot.cs @@ -0,0 +1,7 @@ +namespace BotNet.Services.OpenAI.Skills { + public sealed class VisionBot( + OpenAIStreamingClient openAIStreamingClient + ) { + private readonly OpenAIStreamingClient _openAIStreamingClient = openAIStreamingClient; + } +} diff --git a/BotNet.Services/OpenAI/ThreadTracker.cs b/BotNet.Services/OpenAI/ThreadTracker.cs index dff2d25..569249b 100644 --- a/BotNet.Services/OpenAI/ThreadTracker.cs +++ b/BotNet.Services/OpenAI/ThreadTracker.cs @@ -11,7 +11,8 @@ IMemoryCache memoryCache public void TrackMessage( long messageId, string sender, - string text, + string? text, + string? imageBase64, long? replyToMessageId ) { _memoryCache.Set( @@ -19,6 +20,7 @@ public void TrackMessage( value: new Message( Sender: sender, Text: text, + ImageBase64: imageBase64, ReplyToMessageId: replyToMessageId.HasValue ? new(replyToMessageId.Value) : null @@ -27,7 +29,7 @@ public void TrackMessage( ); } - public IEnumerable<(string Sender, string Text)> GetThread( + public IEnumerable<(string Sender, string? Text, string? ImageBase64)> GetThread( long messageId, int maxLines ) { @@ -37,7 +39,8 @@ int maxLines ) && message != null && maxLines-- > 0) { yield return ( Sender: message.Sender, - Text: message.Text + Text: message.Text, + ImageBase64: message.ImageBase64 ); if (message.ReplyToMessageId == null) { @@ -51,7 +54,8 @@ int maxLines private readonly record struct MessageId(long Value); private sealed record Message( string Sender, - string Text, + string? Text, + string? ImageBase64, MessageId? ReplyToMessageId ); } diff --git a/BotNet/Bot/UpdateHandler.cs b/BotNet/Bot/UpdateHandler.cs index 268cf76..27a5dad 100644 --- a/BotNet/Bot/UpdateHandler.cs +++ b/BotNet/Bot/UpdateHandler.cs @@ -82,7 +82,7 @@ await botClient.SendTextMessageAsync( // Respond to call sign switch (callSign) { case "AI": - await OpenAI.StreamChatWithFriendlyBotAsync(botClient, _serviceProvider, "AI", update.Message, cancellationToken); + await OpenAI.StreamChatWithFriendlyBotAsync(botClient, _serviceProvider, update.Message, cancellationToken); break; case "Pakde": Message? sentMessage = await OpenAI.ChatWithSarcasticBotAsync(botClient, _serviceProvider, update.Message, callSign, cancellationToken); @@ -92,6 +92,7 @@ await botClient.SendTextMessageAsync( messageId: sentMessage.MessageId, sender: callSign, text: sentMessage.Text!, + imageBase64: null, replyToMessageId: sentMessage.ReplyToMessage!.MessageId ); } @@ -122,11 +123,12 @@ await botClient.SendTextMessageAsync( messageId: update.Message.MessageId, sender: $"{firstName}{lastName?.Let(lastName => " " + lastName)}", text: text, + imageBase64: null, replyToMessageId: replyToMessageId ); // Get thread - ImmutableList<(string Sender, string Text)> thread = threadTracker.GetThread( + ImmutableList<(string Sender, string? Text, string? ImageBase64)> thread = threadTracker.GetThread( messageId: replyToMessageId, maxLines: 20 ).ToImmutableList(); @@ -139,7 +141,7 @@ await botClient.SendTextMessageAsync( // Respond to thread switch (callSign) { case "AI": - await OpenAI.StreamChatWithFriendlyBotAsync(botClient, _serviceProvider, "AI", update.Message, thread, cancellationToken); + await OpenAI.StreamChatWithFriendlyBotAsync(botClient, _serviceProvider, update.Message, thread, cancellationToken); break; case "Pakde": Message? sentMessage = await OpenAI.ChatWithSarcasticBotAsync(botClient, _serviceProvider, update.Message, thread, callSign, cancellationToken); @@ -149,6 +151,7 @@ await botClient.SendTextMessageAsync( messageId: sentMessage.MessageId, sender: callSign, text: sentMessage.Text!, + imageBase64: null, replyToMessageId: sentMessage.ReplyToMessage!.MessageId ); } From 81a81c12cb4329caf1f777cf214200406b892d6a Mon Sep 17 00:00:00 2001 From: Ronny Gunawan <3048897+ronnygunawan@users.noreply.github.com> Date: Sun, 3 Dec 2023 15:51:14 +0700 Subject: [PATCH 2/2] Implement vision bot --- BotNet.Services/BotCommands/OpenAI.cs | 86 +++++++++++++++++-- BotNet.Services/OpenAI/Models/ChatMessage.cs | 11 +-- BotNet.Services/OpenAI/OpenAIClient.cs | 15 +++- .../OpenAI/OpenAIStreamingClient.cs | 28 +++--- .../OpenAI/ServiceCollectionExtensions.cs | 1 + BotNet.Services/OpenAI/Skills/FriendlyBot.cs | 3 +- BotNet.Services/OpenAI/Skills/VisionBot.cs | 64 +++++++++++++- BotNet/Bot/UpdateHandler.cs | 10 +-- 8 files changed, 184 insertions(+), 34 deletions(-) diff --git a/BotNet.Services/BotCommands/OpenAI.cs b/BotNet.Services/BotCommands/OpenAI.cs index 7bc2d02..3ffcf6f 100644 --- a/BotNet.Services/BotCommands/OpenAI.cs +++ b/BotNet.Services/BotCommands/OpenAI.cs @@ -1,5 +1,6 @@ using System; using System.Collections.Immutable; +using System.IO; using System.Linq; using System.Net; using System.Threading; @@ -10,6 +11,7 @@ using BotNet.Services.RateLimit; using Microsoft.Extensions.DependencyInjection; using RG.Ninja; +using SkiaSharp; using Telegram.Bot; using Telegram.Bot.Types; using Telegram.Bot.Types.Enums; @@ -761,17 +763,55 @@ await botClient.SendTextMessageAsync( } } - public static async Task StreamChatWithFriendlyBotAsync(ITelegramBotClient botClient, IServiceProvider serviceProvider, Message message, CancellationToken cancellationToken) { + public static async Task StreamChatWithFriendlyBotAsync( + ITelegramBotClient botClient, + IServiceProvider serviceProvider, + Message message, + CancellationToken cancellationToken + ) { try { (message.Chat.Type == ChatType.Private ? CHAT_PRIVATE_RATE_LIMITER : CHAT_GROUP_RATE_LIMITER ).ValidateActionRate(message.Chat.Id, message.From!.Id); - await serviceProvider.GetRequiredService().StreamChatAsync( - message: message.Text!, - chatId: message.Chat.Id, - replyToMessageId: message.MessageId - ); + + PhotoSize? photoSize; + string? caption; + if (message is { Photo.Length: > 0, Caption: { } }) { + photoSize = message.Photo.OrderByDescending(photoSize => photoSize.Width).First(); + caption = message.Caption; + } else if (message.ReplyToMessage is { Photo.Length: > 0, Caption: { } }) { + photoSize = message.ReplyToMessage.Photo.OrderByDescending(photoSize => photoSize.Width).First(); + caption = message.ReplyToMessage.Caption; + } else { + photoSize = null; + caption = null; + } + + if (photoSize != null && caption != null) { + (string? imageBase64, string? error) = await GetImageBase64Async(botClient, photoSize, cancellationToken); + if (error != null) { + await botClient.SendTextMessageAsync( + chatId: message.Chat.Id, + text: $"{error}", + parseMode: ParseMode.Html, + replyToMessageId: message.MessageId, + cancellationToken: cancellationToken); + return; + } + await serviceProvider.GetRequiredService().StreamChatAsync( + message: caption, + imageBase64: imageBase64!, + chatId: message.Chat.Id, + replyToMessageId: message.MessageId + ); + } else { + await serviceProvider.GetRequiredService().StreamChatAsync( + message: message.Text!, + chatId: message.Chat.Id, + replyToMessageId: message.MessageId + ); + } } catch (RateLimitExceededException exc) when (exc is { Cooldown: var cooldown }) { if (message.Chat.Type == ChatType.Private) { await botClient.SendTextMessageAsync( @@ -841,5 +881,39 @@ await botClient.SendTextMessageAsync( cancellationToken: cancellationToken); } } + + private static async Task<(string? ImageBase64, string? Error)> GetImageBase64Async(ITelegramBotClient botClient, PhotoSize photoSize, CancellationToken cancellationToken) { + // Download photo + using MemoryStream originalImageStream = new(); + await botClient.GetInfoAndDownloadFileAsync( + fileId: photoSize.FileId, + destination: originalImageStream, + cancellationToken: cancellationToken); + byte[] originalImage = originalImageStream.ToArray(); + + // Limit input image to 200KB + if (originalImage.Length > 200 * 1024) { + return (null, "Image larger than 200KB"); + } + + // Decode image + originalImageStream.Position = 0; + using SKCodec codec = SKCodec.Create(originalImageStream, out SKCodecResult codecResult); + if (codecResult != SKCodecResult.Success) { + return (null, "Invalid image"); + } + if (codec.EncodedFormat != SKEncodedImageFormat.Jpeg) { + return (null, "Image must be compressed image"); + } + SKBitmap bitmap = SKBitmap.Decode(codec); + + // Limit input image to 1280x1280 + if (bitmap.Width > 1280 || bitmap.Width > 1280) { + return (null, "Image larger than 1280x1280"); + } + + // Encode image as base64 + return (Convert.ToBase64String(originalImage), null); + } } } diff --git a/BotNet.Services/OpenAI/Models/ChatMessage.cs b/BotNet.Services/OpenAI/Models/ChatMessage.cs index bd25954..4c22698 100644 --- a/BotNet.Services/OpenAI/Models/ChatMessage.cs +++ b/BotNet.Services/OpenAI/Models/ChatMessage.cs @@ -1,4 +1,5 @@ -using System.Collections.Generic; +using System; +using System.Collections.Generic; using System.Text.Json.Serialization; namespace BotNet.Services.OpenAI.Models { @@ -11,7 +12,7 @@ List Content Content: [ new ChatContent( Type: "text", - Text: text, + Text: text ?? throw new ArgumentNullException(nameof(text)), ImageUrl: null ) ] @@ -22,14 +23,14 @@ List Content Content: [ new ChatContent( Type: "text", - Text: text, + Text: text ?? throw new ArgumentNullException(nameof(text)), ImageUrl: null ), new ChatContent( Type: "image_url", Text: null, ImageUrl: new( - Url: $"data:image/png;base64,{imageBase64}" + Url: $"data:image/jpeg;base64,{imageBase64 ?? throw new ArgumentNullException(nameof(imageBase64))}" ) ) ] @@ -42,7 +43,7 @@ List Content Type: "image_url", Text: null, ImageUrl: new( - Url: $"data:image/png;base64,{imageBase64}" + Url: $"data:image/jpeg;base64,{imageBase64 ?? throw new ArgumentNullException(nameof(imageBase64))}" ) ) ] diff --git a/BotNet.Services/OpenAI/OpenAIClient.cs b/BotNet.Services/OpenAI/OpenAIClient.cs index 7f1a9b4..3ad488c 100644 --- a/BotNet.Services/OpenAI/OpenAIClient.cs +++ b/BotNet.Services/OpenAI/OpenAIClient.cs @@ -9,13 +9,15 @@ using System.Threading.Tasks; using BotNet.Services.Json; using BotNet.Services.OpenAI.Models; +using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; using RG.Ninja; namespace BotNet.Services.OpenAI { public class OpenAIClient( HttpClient httpClient, - IOptions openAIOptionsAccessor + IOptions openAIOptionsAccessor, + ILogger logger ) { private const string COMPLETION_URL_TEMPLATE = "https://api.openai.com/v1/engines/{0}/completions"; private const string CHAT_URL = "https://api.openai.com/v1/chat/completions"; @@ -23,7 +25,8 @@ IOptions openAIOptionsAccessor PropertyNamingPolicy = new SnakeCaseNamingPolicy() }; private readonly HttpClient _httpClient = httpClient; - private readonly string _apiKey = openAIOptionsAccessor.Value.ApiKey!; + private readonly string _apiKey = openAIOptionsAccessor.Value.ApiKey!; + private readonly ILogger _logger = logger; public async Task AutocompleteAsync(string engine, string prompt, string[]? stop, int maxTokens, double frequencyPenalty, double presencePenalty, double temperature, double topP, CancellationToken cancellationToken) { using HttpRequestMessage request = new(HttpMethod.Post, string.Format(COMPLETION_URL_TEMPLATE, engine)) { @@ -111,8 +114,12 @@ [EnumeratorCancellation] CancellationToken cancellationToken options: JSON_SERIALIZER_OPTIONS ) }; - using HttpResponseMessage response = await _httpClient.SendAsync(request, cancellationToken); - response.EnsureSuccessStatusCode(); + using HttpResponseMessage response = await _httpClient.SendAsync(request, cancellationToken); + if (!response.IsSuccessStatusCode) { + string errorMessage = await response.Content.ReadAsStringAsync(cancellationToken); + _logger.LogError(errorMessage); + response.EnsureSuccessStatusCode(); + } StringBuilder result = new(); using Stream stream = await response.Content.ReadAsStreamAsync(cancellationToken); diff --git a/BotNet.Services/OpenAI/OpenAIStreamingClient.cs b/BotNet.Services/OpenAI/OpenAIStreamingClient.cs index e92c7e7..f34cbcc 100644 --- a/BotNet.Services/OpenAI/OpenAIStreamingClient.cs +++ b/BotNet.Services/OpenAI/OpenAIStreamingClient.cs @@ -43,12 +43,16 @@ int replyToMessageId // Task for continuously consuming the stream Task downstreamTask = Task.Run(async () => { - await foreach ((string result, bool stop) in enumerable) { - lastResult = result; + try { + await foreach ((string result, bool stop) in enumerable) { + lastResult = result; - if (stop) { - break; + if (stop) { + break; + } } + } catch (Exception exc) { + _logger.LogError(exc, null); } }); @@ -59,10 +63,12 @@ await Task.WhenAny( ); // If downstream task is completed, send the last result - if (downstreamTask.IsCompleted) { + if (downstreamTask.IsCompletedSuccessfully) { + if (lastResult is null) return; + Message completeMessage = await telegramBotClient.SendTextMessageAsync( chatId: chatId, - text: MarkdownV2Sanitizer.Sanitize(lastResult!), + text: MarkdownV2Sanitizer.Sanitize(lastResult), parseMode: ParseMode.MarkdownV2, replyToMessageId: replyToMessageId ); @@ -72,7 +78,7 @@ await Task.WhenAny( threadTracker.TrackMessage( messageId: completeMessage.MessageId, sender: callSign, - text: lastResult!, + text: lastResult, imageBase64: null, replyToMessageId: replyToMessageId ); @@ -82,10 +88,10 @@ await Task.WhenAny( } // Otherwise, send incomplete result and continue streaming - string lastSent = lastResult!; + string lastSent = lastResult ?? ""; Message incompleteMessage = await telegramBotClient.SendTextMessageAsync( chatId: chatId, - text: MarkdownV2Sanitizer.Sanitize(lastResult!) + "… ⏳", // ellipsis, nbsp, hourglass emoji + text: MarkdownV2Sanitizer.Sanitize(lastResult ?? "") + "… ⏳", // ellipsis, nbsp, hourglass emoji parseMode: ParseMode.MarkdownV2, replyToMessageId: replyToMessageId ); @@ -105,7 +111,7 @@ await Task.WhenAny( await telegramBotClient.EditMessageTextAsync( chatId: chatId, messageId: incompleteMessage.MessageId, - text: MarkdownV2Sanitizer.Sanitize(lastResult!) + "… ⏳", // ellipsis, nbsp, hourglass emoji + text: MarkdownV2Sanitizer.Sanitize(lastResult ?? "") + "… ⏳", // ellipsis, nbsp, hourglass emoji parseMode: ParseMode.MarkdownV2, cancellationToken: cts.Token ); @@ -129,7 +135,7 @@ await telegramBotClient.EditMessageTextAsync( await telegramBotClient.EditMessageTextAsync( chatId: chatId, messageId: incompleteMessage.MessageId, - text: MarkdownV2Sanitizer.Sanitize(lastResult!), + text: MarkdownV2Sanitizer.Sanitize(lastResult ?? ""), parseMode: ParseMode.MarkdownV2, cancellationToken: cts.Token ); diff --git a/BotNet.Services/OpenAI/ServiceCollectionExtensions.cs b/BotNet.Services/OpenAI/ServiceCollectionExtensions.cs index 96818fc..b2cf318 100644 --- a/BotNet.Services/OpenAI/ServiceCollectionExtensions.cs +++ b/BotNet.Services/OpenAI/ServiceCollectionExtensions.cs @@ -16,6 +16,7 @@ public static IServiceCollection AddOpenAIClient(this IServiceCollection service services.AddTransient(); services.AddTransient(); services.AddTransient(); + services.AddTransient(); return services; } } diff --git a/BotNet.Services/OpenAI/Skills/FriendlyBot.cs b/BotNet.Services/OpenAI/Skills/FriendlyBot.cs index 2548e01..606864c 100644 --- a/BotNet.Services/OpenAI/Skills/FriendlyBot.cs +++ b/BotNet.Services/OpenAI/Skills/FriendlyBot.cs @@ -1,5 +1,4 @@ -using System; -using System.Collections.Generic; +using System.Collections.Generic; using System.Collections.Immutable; using System.Linq; using System.Threading; diff --git a/BotNet.Services/OpenAI/Skills/VisionBot.cs b/BotNet.Services/OpenAI/Skills/VisionBot.cs index 83061bc..8227de4 100644 --- a/BotNet.Services/OpenAI/Skills/VisionBot.cs +++ b/BotNet.Services/OpenAI/Skills/VisionBot.cs @@ -1,7 +1,69 @@ -namespace BotNet.Services.OpenAI.Skills { +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Linq; +using System.Threading.Tasks; +using BotNet.Services.OpenAI.Models; + +namespace BotNet.Services.OpenAI.Skills { public sealed class VisionBot( OpenAIStreamingClient openAIStreamingClient ) { private readonly OpenAIStreamingClient _openAIStreamingClient = openAIStreamingClient; + + public async Task StreamChatAsync( + string message, + string imageBase64, + long chatId, + int replyToMessageId + ) { + List messages = new() { + ChatMessage.FromText("system", "The following is a conversation with an AI assistant. The assistant is helpful, creative, clever, and very friendly."), + ChatMessage.FromTextWithImageBase64("user", message, imageBase64) + }; + + await _openAIStreamingClient.StreamChatAsync( + model: "gpt-4-vision-preview", + messages: messages, + maxTokens: 512, + callSign: "AI", + chatId: chatId, + replyToMessageId: replyToMessageId + ); + } + + public async Task StreamChatAsync( + string message, + string imageBase64, + ImmutableList<(string Sender, string? Text, string? ImageBase64)> thread, + long chatId, + int replyToMessageId + ) { + List messages = new() { + ChatMessage.FromText("system", "The following is a conversation with an AI assistant. The assistant is helpful, creative, clever, and very friendly."), + + from tuple in thread + let role = tuple.Sender switch { + "AI" => "assistant", + _ => "user" + } + select tuple switch { + { Text: { } text, ImageBase64: null } => ChatMessage.FromText(role, text), + { Text: null, ImageBase64: { } imageBase64 } => ChatMessage.FromImageBase64(role, imageBase64), + { Text: { } text, ImageBase64: { } imageBase64 } => ChatMessage.FromTextWithImageBase64(role, text, imageBase64), + _ => ChatMessage.FromText(role, "") + }, + + ChatMessage.FromTextWithImageBase64("user", message, imageBase64) + }; + + await _openAIStreamingClient.StreamChatAsync( + model: "gpt-4-vision-preview", + messages: messages, + maxTokens: 512, + callSign: "AI", + chatId: chatId, + replyToMessageId: replyToMessageId + ); + } } } diff --git a/BotNet/Bot/UpdateHandler.cs b/BotNet/Bot/UpdateHandler.cs index 27a5dad..c6b84be 100644 --- a/BotNet/Bot/UpdateHandler.cs +++ b/BotNet/Bot/UpdateHandler.cs @@ -66,7 +66,7 @@ await botClient.SendTextMessageAsync( } // Handle call sign - if (update.Message?.Text is { } messageText && ( + if ((update.Message?.Text ?? update.Message?.Caption) is { } messageText && ( messageText.StartsWith("AI,") || messageText.StartsWith("Pakde,") )) { @@ -74,10 +74,10 @@ await botClient.SendTextMessageAsync( string callSign = messageText.Split(',')[0]; // Handle modify art command - if (callSign == "AI" && (update.Message.ReplyToMessage is { Photo.Length: > 0 } || update.Message.ReplyToMessage is { Sticker: { } })) { - await Art.ModifyArtAsync(botClient, _serviceProvider, update.Message, messageText[(callSign.Length + 2)..], cancellationToken); - break; - } + //if (callSign == "AI" && (update.Message.ReplyToMessage is { Photo.Length: > 0 } || update.Message.ReplyToMessage is { Sticker: { } })) { + // await Art.ModifyArtAsync(botClient, _serviceProvider, update.Message, messageText[(callSign.Length + 2)..], cancellationToken); + // break; + //} // Respond to call sign switch (callSign) {