diff --git a/BotNet.Services/BotCommands/OpenAI.cs b/BotNet.Services/BotCommands/OpenAI.cs index fe14c8e..3ffcf6f 100644 --- a/BotNet.Services/BotCommands/OpenAI.cs +++ b/BotNet.Services/BotCommands/OpenAI.cs @@ -1,5 +1,6 @@ using System; using System.Collections.Immutable; +using System.IO; using System.Linq; using System.Net; using System.Threading; @@ -10,6 +11,7 @@ using BotNet.Services.RateLimit; using Microsoft.Extensions.DependencyInjection; using RG.Ninja; +using SkiaSharp; using Telegram.Bot; using Telegram.Bot.Types; using Telegram.Bot.Types.Enums; @@ -446,7 +448,7 @@ await botClient.SendTextMessageAsync( } [Obsolete("Use StreamChatWithFriendlyBotAsync instead.", error: true)] - public static async Task ChatWithFriendlyBotAsync(ITelegramBotClient botClient, IServiceProvider serviceProvider, Message message, ImmutableList<(string Sender, string Text)> thread, CancellationToken cancellationToken) { + public static async Task ChatWithFriendlyBotAsync(ITelegramBotClient botClient, IServiceProvider serviceProvider, Message message, ImmutableList<(string Sender, string? Text, string? ImageBase64)> thread, CancellationToken cancellationToken) { try { (message.Chat.Type == ChatType.Private ? CHAT_PRIVATE_RATE_LIMITER @@ -592,7 +594,7 @@ await botClient.SendTextMessageAsync( return null; } - public static async Task ChatWithSarcasticBotAsync(ITelegramBotClient botClient, IServiceProvider serviceProvider, Message message, ImmutableList<(string Sender, string Text)> thread, string callSign, CancellationToken cancellationToken) { + public static async Task ChatWithSarcasticBotAsync(ITelegramBotClient botClient, IServiceProvider serviceProvider, Message message, ImmutableList<(string Sender, string? Text, string? ImageBase64)> thread, string callSign, CancellationToken cancellationToken) { try { (message.Chat.Type == ChatType.Private ? CHAT_PRIVATE_RATE_LIMITER @@ -761,18 +763,55 @@ await botClient.SendTextMessageAsync( } } - public static async Task StreamChatWithFriendlyBotAsync(ITelegramBotClient botClient, IServiceProvider serviceProvider, string callSign, Message message, CancellationToken cancellationToken) { + public static async Task StreamChatWithFriendlyBotAsync( + ITelegramBotClient botClient, + IServiceProvider serviceProvider, + Message message, + CancellationToken cancellationToken + ) { try { (message.Chat.Type == ChatType.Private ? CHAT_PRIVATE_RATE_LIMITER : CHAT_GROUP_RATE_LIMITER ).ValidateActionRate(message.Chat.Id, message.From!.Id); - await serviceProvider.GetRequiredService().StreamChatAsync( - message: message.Text!, - callSign: callSign, - chatId: message.Chat.Id, - replyToMessageId: message.MessageId - ); + + PhotoSize? photoSize; + string? caption; + if (message is { Photo.Length: > 0, Caption: { } }) { + photoSize = message.Photo.OrderByDescending(photoSize => photoSize.Width).First(); + caption = message.Caption; + } else if (message.ReplyToMessage is { Photo.Length: > 0, Caption: { } }) { + photoSize = message.ReplyToMessage.Photo.OrderByDescending(photoSize => photoSize.Width).First(); + caption = message.ReplyToMessage.Caption; + } else { + photoSize = null; + caption = null; + } + + if (photoSize != null && caption != null) { + (string? imageBase64, string? error) = await GetImageBase64Async(botClient, photoSize, cancellationToken); + if (error != null) { + await botClient.SendTextMessageAsync( + chatId: message.Chat.Id, + text: $"{error}", + parseMode: ParseMode.Html, + replyToMessageId: message.MessageId, + cancellationToken: cancellationToken); + return; + } + await serviceProvider.GetRequiredService().StreamChatAsync( + message: caption, + imageBase64: imageBase64!, + chatId: message.Chat.Id, + replyToMessageId: message.MessageId + ); + } else { + await serviceProvider.GetRequiredService().StreamChatAsync( + message: message.Text!, + chatId: message.Chat.Id, + replyToMessageId: message.MessageId + ); + } } catch (RateLimitExceededException exc) when (exc is { Cooldown: var cooldown }) { if (message.Chat.Type == ChatType.Private) { await botClient.SendTextMessageAsync( @@ -802,7 +841,7 @@ await botClient.SendTextMessageAsync( } } - public static async Task StreamChatWithFriendlyBotAsync(ITelegramBotClient botClient, IServiceProvider serviceProvider, string callSign, Message message, ImmutableList<(string Sender, string Text)> thread, CancellationToken cancellationToken) { + public static async Task StreamChatWithFriendlyBotAsync(ITelegramBotClient botClient, IServiceProvider serviceProvider, Message message, ImmutableList<(string Sender, string? Text, string? ImageBase64)> thread, CancellationToken cancellationToken) { try { (message.Chat.Type == ChatType.Private ? CHAT_PRIVATE_RATE_LIMITER @@ -811,7 +850,6 @@ public static async Task StreamChatWithFriendlyBotAsync(ITelegramBotClient botCl await serviceProvider.GetRequiredService().StreamChatAsync( message: message.Text!, thread: thread, - callSign: callSign, chatId: message.Chat.Id, replyToMessageId: message.MessageId ); @@ -843,5 +881,39 @@ await botClient.SendTextMessageAsync( cancellationToken: cancellationToken); } } + + private static async Task<(string? ImageBase64, string? Error)> GetImageBase64Async(ITelegramBotClient botClient, PhotoSize photoSize, CancellationToken cancellationToken) { + // Download photo + using MemoryStream originalImageStream = new(); + await botClient.GetInfoAndDownloadFileAsync( + fileId: photoSize.FileId, + destination: originalImageStream, + cancellationToken: cancellationToken); + byte[] originalImage = originalImageStream.ToArray(); + + // Limit input image to 200KB + if (originalImage.Length > 200 * 1024) { + return (null, "Image larger than 200KB"); + } + + // Decode image + originalImageStream.Position = 0; + using SKCodec codec = SKCodec.Create(originalImageStream, out SKCodecResult codecResult); + if (codecResult != SKCodecResult.Success) { + return (null, "Invalid image"); + } + if (codec.EncodedFormat != SKEncodedImageFormat.Jpeg) { + return (null, "Image must be compressed image"); + } + SKBitmap bitmap = SKBitmap.Decode(codec); + + // Limit input image to 1280x1280 + if (bitmap.Width > 1280 || bitmap.Width > 1280) { + return (null, "Image larger than 1280x1280"); + } + + // Encode image as base64 + return (Convert.ToBase64String(originalImage), null); + } } } diff --git a/BotNet.Services/OpenAI/IntentDetector.cs b/BotNet.Services/OpenAI/IntentDetector.cs index 7a523aa..d306482 100644 --- a/BotNet.Services/OpenAI/IntentDetector.cs +++ b/BotNet.Services/OpenAI/IntentDetector.cs @@ -14,7 +14,7 @@ public async Task DetectChatIntentAsync( CancellationToken cancellationToken ) { List messages = [ - new("user", $$""" + ChatMessage.FromText("user", $$""" These are available intents that one might query when they provide a text prompt: Question, @@ -49,7 +49,7 @@ public async Task DetectImagePromptIntentAsync( CancellationToken cancellationToken ) { List messages = [ - new("user", $$""" + ChatMessage.FromText("user", $$""" These are available intents that one might query when they provide a prompt which contain an image: Vision, diff --git a/BotNet.Services/OpenAI/Models/ChatMessage.cs b/BotNet.Services/OpenAI/Models/ChatMessage.cs index 701ddf6..4c22698 100644 --- a/BotNet.Services/OpenAI/Models/ChatMessage.cs +++ b/BotNet.Services/OpenAI/Models/ChatMessage.cs @@ -1,6 +1,62 @@ -namespace BotNet.Services.OpenAI.Models { +using System; +using System.Collections.Generic; +using System.Text.Json.Serialization; + +namespace BotNet.Services.OpenAI.Models { public record ChatMessage( string Role, - string Content + List Content + ) { + public static ChatMessage FromText(string role, string text) => new( + Role: role, + Content: [ + new ChatContent( + Type: "text", + Text: text ?? throw new ArgumentNullException(nameof(text)), + ImageUrl: null + ) + ] + ); + + public static ChatMessage FromTextWithImageBase64(string role, string text, string imageBase64) => new( + Role: role, + Content: [ + new ChatContent( + Type: "text", + Text: text ?? throw new ArgumentNullException(nameof(text)), + ImageUrl: null + ), + new ChatContent( + Type: "image_url", + Text: null, + ImageUrl: new( + Url: $"data:image/jpeg;base64,{imageBase64 ?? throw new ArgumentNullException(nameof(imageBase64))}" + ) + ) + ] + ); + + public static ChatMessage FromImageBase64(string role, string imageBase64) => new( + Role: role, + Content: [ + new ChatContent( + Type: "image_url", + Text: null, + ImageUrl: new( + Url: $"data:image/jpeg;base64,{imageBase64 ?? throw new ArgumentNullException(nameof(imageBase64))}" + ) + ) + ] + ); + } + + public record ChatContent( + string Type, + [property: JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] string? Text, + [property: JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] ImageUrl? ImageUrl + ); + + public record ImageUrl( + string Url ); } diff --git a/BotNet.Services/OpenAI/Models/Choice.cs b/BotNet.Services/OpenAI/Models/Choice.cs index 7409c92..6fa2350 100644 --- a/BotNet.Services/OpenAI/Models/Choice.cs +++ b/BotNet.Services/OpenAI/Models/Choice.cs @@ -2,9 +2,14 @@ public record Choice( string? Text, int? Index, - ChatMessage? Message, - ChatMessage? Delta, + ChoiceChatMessage? Message, + ChoiceChatMessage? Delta, Logprobs? Logprobs, string? FinishReason ); + + public record ChoiceChatMessage( + string Role, + string Content + ); } diff --git a/BotNet.Services/OpenAI/OpenAIClient.cs b/BotNet.Services/OpenAI/OpenAIClient.cs index 7f1a9b4..3ad488c 100644 --- a/BotNet.Services/OpenAI/OpenAIClient.cs +++ b/BotNet.Services/OpenAI/OpenAIClient.cs @@ -9,13 +9,15 @@ using System.Threading.Tasks; using BotNet.Services.Json; using BotNet.Services.OpenAI.Models; +using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; using RG.Ninja; namespace BotNet.Services.OpenAI { public class OpenAIClient( HttpClient httpClient, - IOptions openAIOptionsAccessor + IOptions openAIOptionsAccessor, + ILogger logger ) { private const string COMPLETION_URL_TEMPLATE = "https://api.openai.com/v1/engines/{0}/completions"; private const string CHAT_URL = "https://api.openai.com/v1/chat/completions"; @@ -23,7 +25,8 @@ IOptions openAIOptionsAccessor PropertyNamingPolicy = new SnakeCaseNamingPolicy() }; private readonly HttpClient _httpClient = httpClient; - private readonly string _apiKey = openAIOptionsAccessor.Value.ApiKey!; + private readonly string _apiKey = openAIOptionsAccessor.Value.ApiKey!; + private readonly ILogger _logger = logger; public async Task AutocompleteAsync(string engine, string prompt, string[]? stop, int maxTokens, double frequencyPenalty, double presencePenalty, double temperature, double topP, CancellationToken cancellationToken) { using HttpRequestMessage request = new(HttpMethod.Post, string.Format(COMPLETION_URL_TEMPLATE, engine)) { @@ -111,8 +114,12 @@ [EnumeratorCancellation] CancellationToken cancellationToken options: JSON_SERIALIZER_OPTIONS ) }; - using HttpResponseMessage response = await _httpClient.SendAsync(request, cancellationToken); - response.EnsureSuccessStatusCode(); + using HttpResponseMessage response = await _httpClient.SendAsync(request, cancellationToken); + if (!response.IsSuccessStatusCode) { + string errorMessage = await response.Content.ReadAsStringAsync(cancellationToken); + _logger.LogError(errorMessage); + response.EnsureSuccessStatusCode(); + } StringBuilder result = new(); using Stream stream = await response.Content.ReadAsStreamAsync(cancellationToken); diff --git a/BotNet.Services/OpenAI/OpenAIStreamingClient.cs b/BotNet.Services/OpenAI/OpenAIStreamingClient.cs index d35951c..f34cbcc 100644 --- a/BotNet.Services/OpenAI/OpenAIStreamingClient.cs +++ b/BotNet.Services/OpenAI/OpenAIStreamingClient.cs @@ -43,12 +43,16 @@ int replyToMessageId // Task for continuously consuming the stream Task downstreamTask = Task.Run(async () => { - await foreach ((string result, bool stop) in enumerable) { - lastResult = result; + try { + await foreach ((string result, bool stop) in enumerable) { + lastResult = result; - if (stop) { - break; + if (stop) { + break; + } } + } catch (Exception exc) { + _logger.LogError(exc, null); } }); @@ -59,10 +63,12 @@ await Task.WhenAny( ); // If downstream task is completed, send the last result - if (downstreamTask.IsCompleted) { + if (downstreamTask.IsCompletedSuccessfully) { + if (lastResult is null) return; + Message completeMessage = await telegramBotClient.SendTextMessageAsync( chatId: chatId, - text: MarkdownV2Sanitizer.Sanitize(lastResult!), + text: MarkdownV2Sanitizer.Sanitize(lastResult), parseMode: ParseMode.MarkdownV2, replyToMessageId: replyToMessageId ); @@ -72,7 +78,8 @@ await Task.WhenAny( threadTracker.TrackMessage( messageId: completeMessage.MessageId, sender: callSign, - text: lastResult!, + text: lastResult, + imageBase64: null, replyToMessageId: replyToMessageId ); @@ -81,10 +88,10 @@ await Task.WhenAny( } // Otherwise, send incomplete result and continue streaming - string lastSent = lastResult!; + string lastSent = lastResult ?? ""; Message incompleteMessage = await telegramBotClient.SendTextMessageAsync( chatId: chatId, - text: MarkdownV2Sanitizer.Sanitize(lastResult!) + "… ⏳", // ellipsis, nbsp, hourglass emoji + text: MarkdownV2Sanitizer.Sanitize(lastResult ?? "") + "… ⏳", // ellipsis, nbsp, hourglass emoji parseMode: ParseMode.MarkdownV2, replyToMessageId: replyToMessageId ); @@ -104,7 +111,7 @@ await Task.WhenAny( await telegramBotClient.EditMessageTextAsync( chatId: chatId, messageId: incompleteMessage.MessageId, - text: MarkdownV2Sanitizer.Sanitize(lastResult!) + "… ⏳", // ellipsis, nbsp, hourglass emoji + text: MarkdownV2Sanitizer.Sanitize(lastResult ?? "") + "… ⏳", // ellipsis, nbsp, hourglass emoji parseMode: ParseMode.MarkdownV2, cancellationToken: cts.Token ); @@ -128,7 +135,7 @@ await telegramBotClient.EditMessageTextAsync( await telegramBotClient.EditMessageTextAsync( chatId: chatId, messageId: incompleteMessage.MessageId, - text: MarkdownV2Sanitizer.Sanitize(lastResult!), + text: MarkdownV2Sanitizer.Sanitize(lastResult ?? ""), parseMode: ParseMode.MarkdownV2, cancellationToken: cts.Token ); @@ -143,6 +150,7 @@ await telegramBotClient.EditMessageTextAsync( messageId: incompleteMessage.MessageId, sender: callSign, text: lastResult!, + imageBase64: null, replyToMessageId: replyToMessageId ); } catch { diff --git a/BotNet.Services/OpenAI/ServiceCollectionExtensions.cs b/BotNet.Services/OpenAI/ServiceCollectionExtensions.cs index 96818fc..b2cf318 100644 --- a/BotNet.Services/OpenAI/ServiceCollectionExtensions.cs +++ b/BotNet.Services/OpenAI/ServiceCollectionExtensions.cs @@ -16,6 +16,7 @@ public static IServiceCollection AddOpenAIClient(this IServiceCollection service services.AddTransient(); services.AddTransient(); services.AddTransient(); + services.AddTransient(); return services; } } diff --git a/BotNet.Services/OpenAI/Skills/FriendlyBot.cs b/BotNet.Services/OpenAI/Skills/FriendlyBot.cs index 17aaaca..606864c 100644 --- a/BotNet.Services/OpenAI/Skills/FriendlyBot.cs +++ b/BotNet.Services/OpenAI/Skills/FriendlyBot.cs @@ -4,10 +4,9 @@ using System.Threading; using System.Threading.Tasks; using BotNet.Services.OpenAI.Models; -using Telegram.Bot.Types; namespace BotNet.Services.OpenAI.Skills { - public class FriendlyBot( + public sealed class FriendlyBot( OpenAIClient openAIClient, OpenAIStreamingClient openAIStreamingClient ) { @@ -59,8 +58,8 @@ public Task RespondToThreadAsync(string callSign, string name, string qu public Task ChatAsync(string message, CancellationToken cancellationToken) { List messages = [ - new("system", "The following is a conversation with an AI assistant. The assistant is helpful, creative, clever, and very friendly."), - new("user", message) + ChatMessage.FromText("system", "The following is a conversation with an AI assistant. The assistant is helpful, creative, clever, and very friendly."), + ChatMessage.FromText("user", message) ]; return _openAIClient.ChatAsync( @@ -71,36 +70,39 @@ public Task ChatAsync(string message, CancellationToken cancellationToke ); } - public async Task StreamChatAsync(string message, string callSign, long chatId, int replyToMessageId) { + public async Task StreamChatAsync(string message, long chatId, int replyToMessageId) { List messages = [ - new("system", "The following is a conversation with an AI assistant. The assistant is helpful, creative, clever, and very friendly."), - new("user", message) + ChatMessage.FromText("system", "The following is a conversation with an AI assistant. The assistant is helpful, creative, clever, and very friendly."), + ChatMessage.FromText("user", message) ]; await _openAIStreamingClient.StreamChatAsync( model: "gpt-4-1106-preview", messages: messages, maxTokens: 512, - callSign: callSign, + callSign: "AI", chatId: chatId, replyToMessageId: replyToMessageId ); } - public Task ChatAsync(string message, ImmutableList<(string Sender, string Text)> thread, CancellationToken cancellationToken) { + public Task ChatAsync(string message, ImmutableList<(string Sender, string? Text, string? ImageBase64)> thread, CancellationToken cancellationToken) { List messages = new() { - new("system", "The following is a conversation with an AI assistant. The assistant is helpful, creative, clever, and very friendly."), + ChatMessage.FromText("system", "The following is a conversation with an AI assistant. The assistant is helpful, creative, clever, and very friendly."), from tuple in thread - select new ChatMessage( - Role: tuple.Sender switch { - "AI" => "assistant", - _ => "user" - }, - Content: tuple.Text - ), + let role = tuple.Sender switch { + "AI" => "assistant", + _ => "user" + } + select tuple switch { + { Text: { } text, ImageBase64: null } => ChatMessage.FromText(role, text), + { Text: null, ImageBase64: { } imageBase64 } => ChatMessage.FromImageBase64(role, imageBase64), + { Text: { } text, ImageBase64: { } imageBase64 } => ChatMessage.FromTextWithImageBase64(role, text, imageBase64), + _ => ChatMessage.FromText(role, "") + }, - new("user", message) + ChatMessage.FromText("user", message) }; return _openAIClient.ChatAsync( @@ -111,27 +113,30 @@ from tuple in thread ); } - public async Task StreamChatAsync(string message, ImmutableList<(string Sender, string Text)> thread, string callSign, long chatId, int replyToMessageId) { + public async Task StreamChatAsync(string message, ImmutableList<(string Sender, string? Text, string? ImageBase64)> thread, long chatId, int replyToMessageId) { List messages = new() { - new("system", "The following is a conversation with an AI assistant. The assistant is helpful, creative, clever, and very friendly."), + ChatMessage.FromText("system", "The following is a conversation with an AI assistant. The assistant is helpful, creative, clever, and very friendly."), from tuple in thread - select new ChatMessage( - Role: tuple.Sender switch { - "AI" => "assistant", - _ => "user" - }, - Content: tuple.Text - ), + let role = tuple.Sender switch { + "AI" => "assistant", + _ => "user" + } + select tuple switch { + { Text: { } text, ImageBase64: null } => ChatMessage.FromText(role, text), + { Text: null, ImageBase64: { } imageBase64 } => ChatMessage.FromImageBase64(role, imageBase64), + { Text: { } text, ImageBase64: { } imageBase64 } => ChatMessage.FromTextWithImageBase64(role, text, imageBase64), + _ => ChatMessage.FromText(role, "") + }, - new("user", message) + ChatMessage.FromText("user", message) }; await _openAIStreamingClient.StreamChatAsync( model: "gpt-4-1106-preview", messages: messages, maxTokens: 512, - callSign: callSign, + callSign: "AI", chatId: chatId, replyToMessageId: replyToMessageId ); diff --git a/BotNet.Services/OpenAI/Skills/SarcasticBot.cs b/BotNet.Services/OpenAI/Skills/SarcasticBot.cs index d08f840..084b559 100644 --- a/BotNet.Services/OpenAI/Skills/SarcasticBot.cs +++ b/BotNet.Services/OpenAI/Skills/SarcasticBot.cs @@ -33,7 +33,7 @@ public Task ChatAsync(string callSign, string name, string question, Can ); } - public Task RespondToThreadAsync(string callSign, string name, string question, ImmutableList<(string Sender, string Text)> thread, CancellationToken cancellationToken) { + public Task RespondToThreadAsync(string callSign, string name, string question, ImmutableList<(string Sender, string? Text, string? ImageBase64)> thread, CancellationToken cancellationToken) { string prompt = $"{callSign} adalah chatbot berbahasa Indonesia yang tidak ramah, kurang antusias dalam menjawab pertanyaan, dan suka mengomel.\n\n" + $"{name}: Satu kilogram itu berapa pound?\n" + $"{callSign}: Kamu tanya ini lagi? Satu kilogram itu 2.2 pound. Tolong dicatat lah.\n\n" @@ -43,7 +43,7 @@ public Task RespondToThreadAsync(string callSign, string name, string qu + $"{callSign}: Tanggal 17 Desember 1903, Wilbur dan Orville Wright menerbangkan pesawat terbang pertama dalam sejarah. Semoga mereka mengangkut saya dari sini.\n\n" + $"{name}: Apa makna kehidupan?\n" + $"{callSign}: Entahlah. Nanti coba saya tanya ke teman saya Google.\n\n"; - foreach ((string sender, string text) in thread) { + foreach ((string sender, string? text, string? imageBase64) in thread) { prompt += $"{sender}: {text}\n"; if (sender is "AI" or "Pakde") prompt += "\n"; } diff --git a/BotNet.Services/OpenAI/Skills/VisionBot.cs b/BotNet.Services/OpenAI/Skills/VisionBot.cs new file mode 100644 index 0000000..8227de4 --- /dev/null +++ b/BotNet.Services/OpenAI/Skills/VisionBot.cs @@ -0,0 +1,69 @@ +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Linq; +using System.Threading.Tasks; +using BotNet.Services.OpenAI.Models; + +namespace BotNet.Services.OpenAI.Skills { + public sealed class VisionBot( + OpenAIStreamingClient openAIStreamingClient + ) { + private readonly OpenAIStreamingClient _openAIStreamingClient = openAIStreamingClient; + + public async Task StreamChatAsync( + string message, + string imageBase64, + long chatId, + int replyToMessageId + ) { + List messages = new() { + ChatMessage.FromText("system", "The following is a conversation with an AI assistant. The assistant is helpful, creative, clever, and very friendly."), + ChatMessage.FromTextWithImageBase64("user", message, imageBase64) + }; + + await _openAIStreamingClient.StreamChatAsync( + model: "gpt-4-vision-preview", + messages: messages, + maxTokens: 512, + callSign: "AI", + chatId: chatId, + replyToMessageId: replyToMessageId + ); + } + + public async Task StreamChatAsync( + string message, + string imageBase64, + ImmutableList<(string Sender, string? Text, string? ImageBase64)> thread, + long chatId, + int replyToMessageId + ) { + List messages = new() { + ChatMessage.FromText("system", "The following is a conversation with an AI assistant. The assistant is helpful, creative, clever, and very friendly."), + + from tuple in thread + let role = tuple.Sender switch { + "AI" => "assistant", + _ => "user" + } + select tuple switch { + { Text: { } text, ImageBase64: null } => ChatMessage.FromText(role, text), + { Text: null, ImageBase64: { } imageBase64 } => ChatMessage.FromImageBase64(role, imageBase64), + { Text: { } text, ImageBase64: { } imageBase64 } => ChatMessage.FromTextWithImageBase64(role, text, imageBase64), + _ => ChatMessage.FromText(role, "") + }, + + ChatMessage.FromTextWithImageBase64("user", message, imageBase64) + }; + + await _openAIStreamingClient.StreamChatAsync( + model: "gpt-4-vision-preview", + messages: messages, + maxTokens: 512, + callSign: "AI", + chatId: chatId, + replyToMessageId: replyToMessageId + ); + } + } +} diff --git a/BotNet.Services/OpenAI/ThreadTracker.cs b/BotNet.Services/OpenAI/ThreadTracker.cs index dff2d25..569249b 100644 --- a/BotNet.Services/OpenAI/ThreadTracker.cs +++ b/BotNet.Services/OpenAI/ThreadTracker.cs @@ -11,7 +11,8 @@ IMemoryCache memoryCache public void TrackMessage( long messageId, string sender, - string text, + string? text, + string? imageBase64, long? replyToMessageId ) { _memoryCache.Set( @@ -19,6 +20,7 @@ public void TrackMessage( value: new Message( Sender: sender, Text: text, + ImageBase64: imageBase64, ReplyToMessageId: replyToMessageId.HasValue ? new(replyToMessageId.Value) : null @@ -27,7 +29,7 @@ public void TrackMessage( ); } - public IEnumerable<(string Sender, string Text)> GetThread( + public IEnumerable<(string Sender, string? Text, string? ImageBase64)> GetThread( long messageId, int maxLines ) { @@ -37,7 +39,8 @@ int maxLines ) && message != null && maxLines-- > 0) { yield return ( Sender: message.Sender, - Text: message.Text + Text: message.Text, + ImageBase64: message.ImageBase64 ); if (message.ReplyToMessageId == null) { @@ -51,7 +54,8 @@ int maxLines private readonly record struct MessageId(long Value); private sealed record Message( string Sender, - string Text, + string? Text, + string? ImageBase64, MessageId? ReplyToMessageId ); } diff --git a/BotNet/Bot/UpdateHandler.cs b/BotNet/Bot/UpdateHandler.cs index 268cf76..c6b84be 100644 --- a/BotNet/Bot/UpdateHandler.cs +++ b/BotNet/Bot/UpdateHandler.cs @@ -66,7 +66,7 @@ await botClient.SendTextMessageAsync( } // Handle call sign - if (update.Message?.Text is { } messageText && ( + if ((update.Message?.Text ?? update.Message?.Caption) is { } messageText && ( messageText.StartsWith("AI,") || messageText.StartsWith("Pakde,") )) { @@ -74,15 +74,15 @@ await botClient.SendTextMessageAsync( string callSign = messageText.Split(',')[0]; // Handle modify art command - if (callSign == "AI" && (update.Message.ReplyToMessage is { Photo.Length: > 0 } || update.Message.ReplyToMessage is { Sticker: { } })) { - await Art.ModifyArtAsync(botClient, _serviceProvider, update.Message, messageText[(callSign.Length + 2)..], cancellationToken); - break; - } + //if (callSign == "AI" && (update.Message.ReplyToMessage is { Photo.Length: > 0 } || update.Message.ReplyToMessage is { Sticker: { } })) { + // await Art.ModifyArtAsync(botClient, _serviceProvider, update.Message, messageText[(callSign.Length + 2)..], cancellationToken); + // break; + //} // Respond to call sign switch (callSign) { case "AI": - await OpenAI.StreamChatWithFriendlyBotAsync(botClient, _serviceProvider, "AI", update.Message, cancellationToken); + await OpenAI.StreamChatWithFriendlyBotAsync(botClient, _serviceProvider, update.Message, cancellationToken); break; case "Pakde": Message? sentMessage = await OpenAI.ChatWithSarcasticBotAsync(botClient, _serviceProvider, update.Message, callSign, cancellationToken); @@ -92,6 +92,7 @@ await botClient.SendTextMessageAsync( messageId: sentMessage.MessageId, sender: callSign, text: sentMessage.Text!, + imageBase64: null, replyToMessageId: sentMessage.ReplyToMessage!.MessageId ); } @@ -122,11 +123,12 @@ await botClient.SendTextMessageAsync( messageId: update.Message.MessageId, sender: $"{firstName}{lastName?.Let(lastName => " " + lastName)}", text: text, + imageBase64: null, replyToMessageId: replyToMessageId ); // Get thread - ImmutableList<(string Sender, string Text)> thread = threadTracker.GetThread( + ImmutableList<(string Sender, string? Text, string? ImageBase64)> thread = threadTracker.GetThread( messageId: replyToMessageId, maxLines: 20 ).ToImmutableList(); @@ -139,7 +141,7 @@ await botClient.SendTextMessageAsync( // Respond to thread switch (callSign) { case "AI": - await OpenAI.StreamChatWithFriendlyBotAsync(botClient, _serviceProvider, "AI", update.Message, thread, cancellationToken); + await OpenAI.StreamChatWithFriendlyBotAsync(botClient, _serviceProvider, update.Message, thread, cancellationToken); break; case "Pakde": Message? sentMessage = await OpenAI.ChatWithSarcasticBotAsync(botClient, _serviceProvider, update.Message, thread, callSign, cancellationToken); @@ -149,6 +151,7 @@ await botClient.SendTextMessageAsync( messageId: sentMessage.MessageId, sender: callSign, text: sentMessage.Text!, + imageBase64: null, replyToMessageId: sentMessage.ReplyToMessage!.MessageId ); }