diff --git a/.github/workflows/dotnet-blazor-app-publish-to-gh-pages-beta.yml b/.github/workflows/dotnet-blazor-app-publish-to-gh-pages-beta.yml
index 8ea7fb5d..b0fe8780 100644
--- a/.github/workflows/dotnet-blazor-app-publish-to-gh-pages-beta.yml
+++ b/.github/workflows/dotnet-blazor-app-publish-to-gh-pages-beta.yml
@@ -37,7 +37,7 @@ jobs:
       - name: Setup .NET Core
         uses: actions/setup-dotnet@v4
         with:
-          dotnet-version: 8.0.x
+          dotnet-version: 9.0.x
 
       - name: Install DotNet workload - WASM tools
         run: |
diff --git a/doc/SYSTEMS_C64_AI_CODE_COMPLETION.md b/doc/SYSTEMS_C64_AI_CODE_COMPLETION.md
index 049f308b..f8c01a01 100644
--- a/doc/SYSTEMS_C64_AI_CODE_COMPLETION.md
+++ b/doc/SYSTEMS_C64_AI_CODE_COMPLETION.md
@@ -86,12 +86,12 @@ Configure `CodingAssistant` section in `appsettings.json`.
 Using OpenAI:
 - `CodingAssistantType:OpenAI:CodingAssistantType`: `OpenAI`
 - `CodingAssistantType:OpenAI:ApiKey`: Your own OpenAI API key
-- `CodingAssistantType:OpenAI:DeploymentName`: The OpenAI model (default: `gpt-4o`)
+- `CodingAssistantType:OpenAI:ModelName`: The OpenAI model (default: `gpt-4o`)
 
 Using self-hosted OpenAI API compatible LLM (Ollama with CodeLlama-code model):
 - `CodingAssistantType:OpenAISelfHostedCodeLlama:CodingAssistantType`: `OpenAI`
 - `CodingAssistantType:OpenAISelfHostedCodeLlama:EndPoint`: The local Ollama HTTP endpoint (ex: `http://localhost:11434/api`)
-- `CodingAssistantType:OpenAISelfHostedCodeLlama:DeploymentName`: A local CodeLlama-code model (ex: `codellama:13b-code` or `codellama:7b-code`.)
+- `CodingAssistantType:OpenAISelfHostedCodeLlama:ModelName`: A local CodeLlama-code model (ex: `codellama:13b-code` or `codellama:7b-code`.)
 - `CodingAssistantType:OpenAISelfHostedCodeLlama:ApiKey`: Optional. May be required if Open WebUI proxy is in front of Ollama.
 
 Using custom AI backend:
diff --git a/src/apps/Highbyte.DotNet6502.App.SadConsole/appsettings.json b/src/apps/Highbyte.DotNet6502.App.SadConsole/appsettings.json
index ef2b47fa..98a72fdb 100644
--- a/src/apps/Highbyte.DotNet6502.App.SadConsole/appsettings.json
+++ b/src/apps/Highbyte.DotNet6502.App.SadConsole/appsettings.json
@@ -50,29 +50,29 @@
   },
 
   "CodingAssistant": {
-    "CodingAssistantType": "OpenAI", // "None", "OpenAI", "CustomEndpoint"
+    "CodingAssistantType": "OpenAI", // "None", "OpenAI", "OpenAISelfHostedCodeLlama", "CustomEndpoint"
 
     "OpenAI": {
-      // Set to true to enable OpenAI Basic coding assistant. Also requires an API key (see below).
-      "Enabled": false,
-
       // dotnet user-secrets set "CodingAssistant:OpenAI:ApiKey" "[MY API KEY]"
       "ApiKey": "[SET IN DOTNET USER SECRETS]",
+      "ModelName": "gpt-4o" // Works good
+      //"ModelName": "gpt-3.5-turbo",  // Don't work
+      //"ModelName": "gpt-4-turbo"  // Works somewhat
+      //"ModelName": "gpt-4o-mini" // Works a bit better sometimes?
+      //"ModelName": "chatgpt-4o-latest" // Works good
 
-      //"DeploymentName": "gpt-3.5-turbo",  // Don't work
-      //"DeploymentName": "gpt-4-turbo"  // Works somewhat
-      //"DeploymentName": "gpt-4o-mini" // Works a bit better sometimes?
-      "DeploymentName": "gpt-4o" // Works good
-      //"DeploymentName": "chatgpt-4o-latest" // Works good
-
-      // Required for Azure OpenAI only. If you're using OpenAI, remove the following line.
-      //"Endpoint": "https://YOUR_ACCOUNT.openai.azure.com/"
     },
 
+    // TODO: support Azure OpenAI
+    //"AzureOpenAI": {
+    //  "Endpoint": "https://YOUR_ACCOUNT.openai.azure.com/",
+    //  "ModelName": "gpt-4o"
+    //},
+
     "OpenAISelfHostedCodeLlama": {
       "Endpoint": "http://localhost:11434/api",
-      //"DeploymentName": "codellama:7b-code", // Works sometimes (must be a CodeLlama:xxx-code model to work).
-      "DeploymentName": "codellama:13b-code" // Works ok (must be a CodeLlama:xxx-code model to work)
+      //"ModelName": "codellama:7b-code", // Works sometimes (must be a CodeLlama:xxx-code model to work).
+      "ModelName": "codellama:13b-code" // Works ok (must be a CodeLlama:xxx-code model to work)
       //"ApiKey": "[SET IN DOTNET USER SECRETS]" // API key may not be required for self-hosted
     },
 
@@ -83,6 +83,5 @@
       // dotnet user-secrets set "CodingAssistant:CustomEndpoint:ApiKey" "[MY API KEY]"
       "ApiKey": "[SET IN DOTNET USER SECRETS]"
     }
-
   }
 }
\ No newline at end of file
diff --git a/src/apps/Highbyte.DotNet6502.App.WASM/Emulator/SystemSetup/C64Setup.cs b/src/apps/Highbyte.DotNet6502.App.WASM/Emulator/SystemSetup/C64Setup.cs
index 5ac6e1a6..2eadf893 100644
--- a/src/apps/Highbyte.DotNet6502.App.WASM/Emulator/SystemSetup/C64Setup.cs
+++ b/src/apps/Highbyte.DotNet6502.App.WASM/Emulator/SystemSetup/C64Setup.cs
@@ -9,11 +9,11 @@
 using Highbyte.DotNet6502.Systems.Commodore64.Config;
 using Highbyte.DotNet6502.Systems.Commodore64.Models;
 using Blazored.LocalStorage;
-using Highbyte.DotNet6502.AI.CodingAssistant.Inference.OpenAI;
 using Highbyte.DotNet6502.AI.CodingAssistant;
 using Highbyte.DotNet6502.Systems.Commodore64.Utils.BasicAssistant;
 using static Highbyte.DotNet6502.AI.CodingAssistant.CustomAIEndpointCodeSuggestion;
 using System.Text.Json;
+using Highbyte.DotNet6502.AI.CodingAssistant.Inference.BackendConfig;
 
 namespace Highbyte.DotNet6502.App.WASM.Emulator.SystemSetup;
 
@@ -208,12 +208,14 @@ public static async Task<ICodeSuggestion> GetCodeSuggestionImplementation(C64Hos
             if (c64HostConfig.CodeSuggestionBackendType == CodeSuggestionBackendTypeEnum.OpenAI)
             {
                 var openAIApiConfig = await GetOpenAIConfig(localStorageService);
-                codeSuggestion = OpenAICodeSuggestion.CreateOpenAICodeSuggestion(openAIApiConfig, C64BasicCodingAssistant.CODE_COMPLETION_LANGUAGE_DESCRIPTION, C64BasicCodingAssistant.CODE_COMPLETION_ADDITIONAL_SYSTEM_INSTRUCTION);
+                var chatClient = ChatClientFactory.CreateOpenAIChatClient(openAIApiConfig);
+                codeSuggestion = OpenAICodeSuggestion.CreateOpenAICodeSuggestion(chatClient, C64BasicCodingAssistant.CODE_COMPLETION_LANGUAGE_DESCRIPTION, C64BasicCodingAssistant.CODE_COMPLETION_ADDITIONAL_SYSTEM_INSTRUCTION);
             }
             else if (c64HostConfig.CodeSuggestionBackendType == CodeSuggestionBackendTypeEnum.OpenAISelfHostedCodeLlama)
             {
-                var openAIApiConfig = await GetOpenAISelfHostedCodeLlamaConfig(localStorageService);
-                codeSuggestion = OpenAICodeSuggestion.CreateOpenAICodeSuggestionForCodeLlama(openAIApiConfig, C64BasicCodingAssistant.CODE_COMPLETION_LANGUAGE_DESCRIPTION, C64BasicCodingAssistant.CODE_COMPLETION_ADDITIONAL_SYSTEM_INSTRUCTION);
+                var ollamaConfig = await GetOpenAISelfHostedCodeLlamaConfig(localStorageService);
+                var chatClient = ChatClientFactory.CreateOllamaChatClient(ollamaConfig);
+                codeSuggestion = OpenAICodeSuggestion.CreateOpenAICodeSuggestionForCodeLlama(chatClient, C64BasicCodingAssistant.CODE_COMPLETION_LANGUAGE_DESCRIPTION, C64BasicCodingAssistant.CODE_COMPLETION_ADDITIONAL_SYSTEM_INSTRUCTION);
             }
             else if (c64HostConfig.CodeSuggestionBackendType == CodeSuggestionBackendTypeEnum.CustomEndpoint)
             {
@@ -240,47 +242,52 @@ public static async Task<ICodeSuggestion> GetCodeSuggestionImplementation(C64Hos
 
     }
 
-    public static async Task<ApiConfig> GetOpenAIConfig(ILocalStorageService localStorageService)
+    public static async Task<OpenAIConfig> GetOpenAIConfig(ILocalStorageService localStorageService)
     {
-        var apiKey = await localStorageService.GetItemAsStringAsync($"{ApiConfig.CONFIG_SECTION}:ApiKey");
+        var apiKey = await localStorageService.GetItemAsStringAsync($"{OpenAIConfig.CONFIG_SECTION}:ApiKey");
 
-        var deploymentName = await localStorageService.GetItemAsStringAsync($"{ApiConfig.CONFIG_SECTION}:DeploymentName");
-        if (string.IsNullOrEmpty(deploymentName))
-            deploymentName = "gpt-4o";  // Default to a OpenAI model that works well
+        // Model name is in ModelName (current) or DeploymentName (legacy)
+        var modelName = await localStorageService.GetItemAsStringAsync($"{OpenAIConfig.CONFIG_SECTION}:ModelName");
+        if (string.IsNullOrEmpty(modelName))
+            modelName = await localStorageService.GetItemAsStringAsync($"{OpenAIConfig.CONFIG_SECTION}:DeploymentName");
+        if (string.IsNullOrEmpty(modelName))
+            modelName = "gpt-4o";  // Default to a OpenAI model that works well
 
         // For future use: Endpoint can be set if OpenAI is accessed via Azure endpoint.
         //var endpoint = await localStorageService.GetItemAsStringAsync($"{ApiConfig.CONFIG_SECTION}:Endpoint");
         //Uri.TryCreate(endpoint, UriKind.Absolute, out var endPointUri);
 
-        var apiConfig = new ApiConfig()
+        var apiConfig = new OpenAIConfig()
         {
             ApiKey = apiKey,    // Api key for OpenAI (required), Azure OpenAI (required), or SelfHosted (optional).
-            DeploymentName = deploymentName, // AI model name
-            //Endpoint = endPointUri,     // Used if using Azure OpenAI
-            SelfHosted = false,
+            ModelName = modelName, // AI model name
         };
         return apiConfig;
     }
 
-    public static async Task<ApiConfig> GetOpenAISelfHostedCodeLlamaConfig(ILocalStorageService localStorageService)
+    public static async Task<OllamaConfig> GetOpenAISelfHostedCodeLlamaConfig(ILocalStorageService localStorageService)
     {
-        var apiKey = await localStorageService.GetItemAsStringAsync($"{ApiConfig.CONFIG_SECTION_SELF_HOSTED}:ApiKey");
+        var apiKey = await localStorageService.GetItemAsStringAsync($"{OllamaConfig.CONFIG_SECTION}:ApiKey");
         if (apiKey == string.Empty)
             apiKey = null;
-        var deploymentName = await localStorageService.GetItemAsStringAsync($"{ApiConfig.CONFIG_SECTION_SELF_HOSTED}:DeploymentName");
-        if (string.IsNullOrEmpty(deploymentName))
-            deploymentName = "codellama:13b-code"; // Default to a Ollama CodeLlama-code model that seems to work OK (but not as good as OpenAI gpt-4o)
-        var endpoint = await localStorageService.GetItemAsStringAsync($"{ApiConfig.CONFIG_SECTION_SELF_HOSTED}:Endpoint");
+
+        // Model name is in ModelName (current) or DeploymentName (legacy)
+        var modelName = await localStorageService.GetItemAsStringAsync($"{OllamaConfig.CONFIG_SECTION}:ModelName");
+        if (string.IsNullOrEmpty(modelName))
+            modelName = await localStorageService.GetItemAsStringAsync($"{OllamaConfig.CONFIG_SECTION}:DeploymentName");
+        if (string.IsNullOrEmpty(modelName))
+            modelName = "codellama:13b-code"; // Default to a Ollama CodeLlama-code model that seems to work OK (but not as good as OpenAI gpt-4o)
+
+        var endpoint = await localStorageService.GetItemAsStringAsync($"{OllamaConfig.CONFIG_SECTION}:Endpoint");
         if (string.IsNullOrEmpty(endpoint))
             endpoint = "http://localhost:11434/api"; // Default to local Ollama 
         Uri.TryCreate(endpoint, UriKind.Absolute, out var endPointUri);
 
-        var apiConfig = new ApiConfig()
+        var apiConfig = new OllamaConfig()
         {
             ApiKey = apiKey,    // Optional for Self-hosted model.
-            DeploymentName = deploymentName, // AI CodeLlama-code model name (ex: codellama:13b-code, codellama:7b-code)
+            ModelName = modelName, // AI CodeLlama-code model name (ex: codellama:13b-code, codellama:7b-code)
             Endpoint = endPointUri,     // Self-hosted OpenAI API compatible endpoint (for example Ollama)
-            SelfHosted = true // Set to true to use self-hosted OpenAI API compatible endpoint.
         };
         return apiConfig;
     }
@@ -304,17 +311,16 @@ public static async Task<CustomAIEndpointConfig> GetCustomAIEndpointConfig(ILoca
         return apiConfig;
     }
 
-    public static async Task SaveOpenAICodingAssistantConfigToLocalStorage(ILocalStorageService localStorageService, ApiConfig apiConfig)
+    public static async Task SaveOpenAICodingAssistantConfigToLocalStorage(ILocalStorageService localStorageService, OpenAIConfig openAIConfig)
     {
-        await localStorageService.SetItemAsStringAsync($"{ApiConfig.CONFIG_SECTION}:ApiKey", apiConfig.ApiKey ?? string.Empty);
-        //await localStorageService.SetItemAsStringAsync($"{ApiConfig.CONFIG_SECTION}:Endpoint", apiConfig.Endpoint != null ? apiConfig.Endpoint.OriginalString : string.Empty);
+        await localStorageService.SetItemAsStringAsync($"{OpenAIConfig.CONFIG_SECTION}:ApiKey", openAIConfig.ApiKey ?? string.Empty);
     }
 
-    public static async Task SaveOpenAISelfHostedCodeLlamaCodingAssistantConfigToLocalStorage(ILocalStorageService localStorageService, ApiConfig apiConfig)
+    public static async Task SaveOpenAISelfHostedCodeLlamaCodingAssistantConfigToLocalStorage(ILocalStorageService localStorageService, OllamaConfig ollamaConfig)
     {
-        await localStorageService.SetItemAsStringAsync($"{ApiConfig.CONFIG_SECTION_SELF_HOSTED}:ApiKey", apiConfig.ApiKey ?? string.Empty);
-        await localStorageService.SetItemAsStringAsync($"{ApiConfig.CONFIG_SECTION_SELF_HOSTED}:DeploymentName", apiConfig.DeploymentName ?? string.Empty);
-        await localStorageService.SetItemAsStringAsync($"{ApiConfig.CONFIG_SECTION_SELF_HOSTED}:Endpoint", apiConfig.Endpoint != null ? apiConfig.Endpoint.OriginalString : string.Empty);
+        await localStorageService.SetItemAsStringAsync($"{OllamaConfig.CONFIG_SECTION}:ApiKey", ollamaConfig.ApiKey ?? string.Empty);
+        await localStorageService.SetItemAsStringAsync($"{OllamaConfig.CONFIG_SECTION}:ModelName", ollamaConfig.ModelName ?? string.Empty);
+        await localStorageService.SetItemAsStringAsync($"{OllamaConfig.CONFIG_SECTION}:Endpoint", ollamaConfig.Endpoint != null ? ollamaConfig.Endpoint.OriginalString : string.Empty);
     }
 
     public static async Task SaveCustomCodingAssistantConfigToLocalStorage(ILocalStorageService localStorageService, CustomAIEndpointConfig customAIEndpointConfig)
diff --git a/src/apps/Highbyte.DotNet6502.App.WASM/Highbyte.DotNet6502.App.WASM.csproj b/src/apps/Highbyte.DotNet6502.App.WASM/Highbyte.DotNet6502.App.WASM.csproj
index c6301c8c..2faeb93c 100644
--- a/src/apps/Highbyte.DotNet6502.App.WASM/Highbyte.DotNet6502.App.WASM.csproj
+++ b/src/apps/Highbyte.DotNet6502.App.WASM/Highbyte.DotNet6502.App.WASM.csproj
@@ -37,7 +37,7 @@
     <PackageReference Include="Microsoft.AspNetCore.WebUtilities" Version="8.0.7" />
     <PackageReference Include="SkiaSharp.Views.Blazor" Version="3.0.0-preview.4.1" />
     <PackageReference Include="PublishSPAforGitHubPages.Build" Version="2.2.0" />
-    <PackageReference Include="System.Net.Http.Json" Version="8.0.0" />
+    <PackageReference Include="System.Net.Http.Json" Version="9.0.0-rc.2.24473.5" />
     <PackageReference Include="TextCopy" Version="6.2.1" />
     <PackageReference Include="Toolbelt.Blazor.Gamepad" Version="9.0.0" />
   </ItemGroup>
diff --git a/src/apps/Highbyte.DotNet6502.App.WASM/Pages/Commodore64/C64ConfigUI.razor b/src/apps/Highbyte.DotNet6502.App.WASM/Pages/Commodore64/C64ConfigUI.razor
index 99337a8b..78ec6f17 100644
--- a/src/apps/Highbyte.DotNet6502.App.WASM/Pages/Commodore64/C64ConfigUI.razor
+++ b/src/apps/Highbyte.DotNet6502.App.WASM/Pages/Commodore64/C64ConfigUI.razor
@@ -1,6 +1,6 @@
 ﻿@using Blazored.LocalStorage
 @using Highbyte.DotNet6502.AI.CodingAssistant
-@using Highbyte.DotNet6502.AI.CodingAssistant.Inference.OpenAI
+@using Highbyte.DotNet6502.AI.CodingAssistant.Inference.BackendConfig
 @using Highbyte.DotNet6502.App.WASM.Emulator.SystemSetup
 @using Highbyte.DotNet6502.Impl.AspNet.Commodore64.Input;
 @using Highbyte.DotNet6502.Systems;
@@ -222,7 +222,7 @@
                         <div class="table-cell table-cell-fixed-width-large twocol">
                             @if (_openAISelfHostedCodeLlamaAIApiConfig != null)
                             {
-                                <InputText @ref="_openAISelfHostedCodeLlamaModelNameInputText" @bind-Value="_openAISelfHostedCodeLlamaAIApiConfig.DeploymentName" style="width: inherit" />
+                                <InputText @ref="_openAISelfHostedCodeLlamaModelNameInputText" @bind-Value="_openAISelfHostedCodeLlamaAIApiConfig.ModelName" style="width: inherit" />
                             }
                        </div>
                     </div>
@@ -310,10 +310,10 @@
 
     private void UnloadROMs() => C64HostConfig.SystemConfig.ROMs = new List<ROM>();
 
-    private ApiConfig _openAIApiConfig = default!;
+    private OpenAIConfig _openAIApiConfig = default!;
     private InputText _openAIApiKeyInputText = default!;
 
-    private ApiConfig _openAISelfHostedCodeLlamaAIApiConfig = default!;
+    private OllamaConfig _openAISelfHostedCodeLlamaAIApiConfig = default!;
     private InputText _openAISelfHostedCodeLlamaEndpointInputText = default!;
     private InputText _openAISelfHostedCodeLlamaModelNameInputText = default!;
     private InputText _openAISelfHostedCodeLlamaApiKeyInputText = default!;
@@ -433,11 +433,13 @@
         ICodeSuggestion codeSuggestion;
         if(C64HostConfig.CodeSuggestionBackendType == CodeSuggestionBackendTypeEnum.OpenAI)
         {
-            codeSuggestion = OpenAICodeSuggestion.CreateOpenAICodeSuggestion(_openAIApiConfig, C64BasicCodingAssistant.CODE_COMPLETION_LANGUAGE_DESCRIPTION, C64BasicCodingAssistant.CODE_COMPLETION_ADDITIONAL_SYSTEM_INSTRUCTION);
+            var chatClient = ChatClientFactory.CreateOpenAIChatClient(_openAIApiConfig);
+            codeSuggestion = OpenAICodeSuggestion.CreateOpenAICodeSuggestion(chatClient, C64BasicCodingAssistant.CODE_COMPLETION_LANGUAGE_DESCRIPTION, C64BasicCodingAssistant.CODE_COMPLETION_ADDITIONAL_SYSTEM_INSTRUCTION);
         }
         else if(C64HostConfig.CodeSuggestionBackendType == CodeSuggestionBackendTypeEnum.OpenAISelfHostedCodeLlama)
         {
-            codeSuggestion = OpenAICodeSuggestion.CreateOpenAICodeSuggestionForCodeLlama(_openAISelfHostedCodeLlamaAIApiConfig, C64BasicCodingAssistant.CODE_COMPLETION_LANGUAGE_DESCRIPTION, C64BasicCodingAssistant.CODE_COMPLETION_ADDITIONAL_SYSTEM_INSTRUCTION);
+            var chatClient = ChatClientFactory.CreateOllamaChatClient(_openAISelfHostedCodeLlamaAIApiConfig);
+            codeSuggestion = OpenAICodeSuggestion.CreateOpenAICodeSuggestionForCodeLlama(chatClient, C64BasicCodingAssistant.CODE_COMPLETION_LANGUAGE_DESCRIPTION, C64BasicCodingAssistant.CODE_COMPLETION_ADDITIONAL_SYSTEM_INSTRUCTION);
         }        
         else if(C64HostConfig.CodeSuggestionBackendType == CodeSuggestionBackendTypeEnum.CustomEndpoint)
         {
diff --git a/src/apps/Highbyte.DotNet6502.App.WASM/Pages/Index.razor.cs b/src/apps/Highbyte.DotNet6502.App.WASM/Pages/Index.razor.cs
index 36888ebb..439aa583 100644
--- a/src/apps/Highbyte.DotNet6502.App.WASM/Pages/Index.razor.cs
+++ b/src/apps/Highbyte.DotNet6502.App.WASM/Pages/Index.razor.cs
@@ -13,8 +13,6 @@
 using Microsoft.AspNetCore.WebUtilities;
 using Toolbelt.Blazor.Gamepad;
 using Highbyte.DotNet6502.Systems.Logging.Console;
-using Microsoft.AspNetCore.Components.Rendering;
-using Microsoft.Extensions.Azure;
 
 namespace Highbyte.DotNet6502.App.WASM.Pages;
 
diff --git a/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/BackendConfig/AzureOpenAIConfig.cs b/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/BackendConfig/AzureOpenAIConfig.cs
new file mode 100644
index 00000000..672568b0
--- /dev/null
+++ b/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/BackendConfig/AzureOpenAIConfig.cs
@@ -0,0 +1,25 @@
+using Microsoft.Extensions.Configuration;
+
+namespace Highbyte.DotNet6502.AI.CodingAssistant.Inference.BackendConfig;
+
+public class AzureOpenAIConfig
+{
+    public string ModelName { get; set; }
+    public Uri Endpoint { get; set; }
+
+
+    public const string CONFIG_SECTION = "CodingAssistant:AzureOpenAI";
+
+    public AzureOpenAIConfig()
+    {
+    }
+
+    public AzureOpenAIConfig(IConfiguration config)
+    {
+        var configSection = config.GetRequiredSection(CONFIG_SECTION);
+        Endpoint = configSection.GetValue<Uri>("Endpoint")
+            ?? throw new InvalidOperationException($"Missing required configuration value: {CONFIG_SECTION}:Endpoint");
+        ModelName = configSection.GetValue<string>("ModelName")
+            ?? throw new InvalidOperationException($"Missing required configuration value: {CONFIG_SECTION}:ModelName");
+    }
+}
diff --git a/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/BackendConfig/ChatClientFactory.cs b/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/BackendConfig/ChatClientFactory.cs
new file mode 100644
index 00000000..0958ae01
--- /dev/null
+++ b/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/BackendConfig/ChatClientFactory.cs
@@ -0,0 +1,68 @@
+using System.Runtime.InteropServices;
+using Azure.AI.OpenAI;
+using Azure.Identity;
+using Microsoft.Extensions.AI;
+using Microsoft.Extensions.Caching.Distributed;
+using Microsoft.Extensions.Caching.Memory;
+using Microsoft.Extensions.Configuration;
+using Microsoft.Extensions.Options;
+using OpenAI;
+
+namespace Highbyte.DotNet6502.AI.CodingAssistant.Inference.BackendConfig;
+
+public static class ChatClientFactory
+{
+    public static IChatClient CreateChatClient(CodeCompletionBackendType codeCompletionBackendType, IConfiguration config)
+    {
+        var chatClient = codeCompletionBackendType switch
+        {
+            CodeCompletionBackendType.OpenAI => CreateOpenAIChatClient(new OpenAIConfig(config)),
+            CodeCompletionBackendType.AzureOpenAI => CreateAzureOpenAIChatClient(new AzureOpenAIConfig(config)),
+            CodeCompletionBackendType.Ollama => CreateOllamaChatClient(new OllamaConfig(config)),
+            _ => throw new InvalidOperationException($"Invalid backend type: {codeCompletionBackendType}")
+        };
+
+        // Skip DistributedCache caching for chat client, it doesn't work in wasm.
+        if (RuntimeInformation.OSArchitecture == Architecture.Wasm)
+        {
+            return chatClient;
+        }
+
+        // Use DistributedCache caching
+        var options = Options.Create(new MemoryDistributedCacheOptions
+        {
+            SizeLimit = 30 * 1024 * 1024    // Size in bytes
+        });
+        IDistributedCache cache = new MemoryDistributedCache(options);
+
+        IChatClient client = new ChatClientBuilder()
+                        .UseDistributedCache(cache)
+                        .Use(chatClient);
+        return client;
+    }
+
+    public static IChatClient CreateOpenAIChatClient(OpenAIConfig openAIConfig)
+    {
+        return new OpenAIClient(openAIConfig.ApiKey)
+                    .AsChatClient(modelId: openAIConfig.ModelName);
+    }
+
+    public static IChatClient CreateAzureOpenAIChatClient(AzureOpenAIConfig azureOpenAIConfig)
+    {
+        return new AzureOpenAIClient(
+               azureOpenAIConfig.Endpoint,
+               new DefaultAzureCredential())
+                   .AsChatClient(modelId: azureOpenAIConfig.ModelName);
+    }
+
+    public static IChatClient CreateOllamaChatClient(OllamaConfig ollamaConfig)
+    {
+        // TODO: Is custom httpClient with DisableActivityHandler needed (CORS fix for web client) when using Microsoft.Extensions.AI.Abstractions?
+        //var httpClientHandler = new HttpClientHandler();
+        //var disableActivityHandler = new DisableActivityHandler(httpClientHandler);
+        //var httpClient = new HttpClient(disableActivityHandler);
+        //return new OllamaChatClient(ollamaConfig.Endpoint, ollamaConfig.ModelName, httpClient);
+
+        return new OllamaChatClient(ollamaConfig.Endpoint, ollamaConfig.ModelName);
+    }
+}
diff --git a/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/BackendConfig/CodeCompletionBackendType.cs b/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/BackendConfig/CodeCompletionBackendType.cs
new file mode 100644
index 00000000..49b2042b
--- /dev/null
+++ b/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/BackendConfig/CodeCompletionBackendType.cs
@@ -0,0 +1,8 @@
+namespace Highbyte.DotNet6502.AI.CodingAssistant.Inference.BackendConfig;
+
+public enum CodeCompletionBackendType
+{
+    OpenAI,
+    AzureOpenAI,
+    Ollama
+}
diff --git a/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/OpenAI/DisableActivityHandler.cs b/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/BackendConfig/DisableActivityHandler.cs
similarity index 94%
rename from src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/OpenAI/DisableActivityHandler.cs
rename to src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/BackendConfig/DisableActivityHandler.cs
index 5bd5797f..d68aafbb 100644
--- a/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/OpenAI/DisableActivityHandler.cs
+++ b/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/BackendConfig/DisableActivityHandler.cs
@@ -1,6 +1,6 @@
 using System.Diagnostics;
 
-namespace Highbyte.DotNet6502.AI.CodingAssistant.Inference.OpenAI;
+namespace Highbyte.DotNet6502.AI.CodingAssistant.Inference.BackendConfig;
 public class DisableActivityHandler : DelegatingHandler
 {
     /// <summary>
diff --git a/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/BackendConfig/OllamaConfig.cs b/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/BackendConfig/OllamaConfig.cs
new file mode 100644
index 00000000..bb74be6f
--- /dev/null
+++ b/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/BackendConfig/OllamaConfig.cs
@@ -0,0 +1,39 @@
+using Microsoft.Extensions.Configuration;
+
+namespace Highbyte.DotNet6502.AI.CodingAssistant.Inference.BackendConfig;
+
+public class OllamaConfig
+{
+    public string? ApiKey { get; set; }
+    public string? ModelName { get; set; }
+    public string EndpointString
+    {
+        get
+        {
+            return Endpoint?.ToString() ?? string.Empty;
+        }
+        set
+        {
+            Endpoint = string.IsNullOrWhiteSpace(value) ? null : new Uri(value);
+        }
+    }
+    public Uri? Endpoint { get; set; }
+
+    public const string CONFIG_SECTION = "CodingAssistant:OpenAISelfHostedCodeLlama";
+
+    public OllamaConfig()
+    {
+    }
+
+    public OllamaConfig(IConfiguration config)
+    {
+        var configSection = config.GetRequiredSection(CONFIG_SECTION);
+
+        Endpoint = configSection.GetValue<Uri>("Endpoint")
+            ?? throw new InvalidOperationException($"Missing required configuration value: {CONFIG_SECTION}:Endpoint. This is required for SelfHosted Ollama inference.");
+        ModelName = configSection.GetValue<string>("ModelName")
+            ?? throw new InvalidOperationException($"Missing required configuration value: {CONFIG_SECTION}:ModelName. This is required for SelfHosted Ollama inference.");
+        // ApiKey is optional. Only used if there is a proxy like Open WebUI in front of Ollama API.
+        ApiKey = configSection.GetValue<string>("ApiKey");
+    }
+}
diff --git a/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/BackendConfig/OpenAIConfig.cs b/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/BackendConfig/OpenAIConfig.cs
new file mode 100644
index 00000000..cf722121
--- /dev/null
+++ b/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/BackendConfig/OpenAIConfig.cs
@@ -0,0 +1,24 @@
+using Microsoft.Extensions.Configuration;
+
+namespace Highbyte.DotNet6502.AI.CodingAssistant.Inference.BackendConfig;
+
+public class OpenAIConfig
+{
+    public string ApiKey { get; set; }
+    public string ModelName { get; set; }
+
+    public const string CONFIG_SECTION = "CodingAssistant:OpenAI";
+
+    public OpenAIConfig()
+    {
+    }
+
+    public OpenAIConfig(IConfiguration config)
+    {
+        var configSection = config.GetRequiredSection(CONFIG_SECTION);
+        ModelName = configSection.GetValue<string>("ModelName")
+            ?? throw new InvalidOperationException($"Missing required configuration value: {CONFIG_SECTION}:ModelName");
+        ApiKey = configSection.GetValue<string>("ApiKey")
+            ?? throw new InvalidOperationException($"Missing required configuration value: {CONFIG_SECTION}:ApiKey");
+    }
+}
diff --git a/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/ChatParameters.cs b/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/ChatParameters.cs
deleted file mode 100644
index cb913698..00000000
--- a/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/ChatParameters.cs
+++ /dev/null
@@ -1,27 +0,0 @@
-// Based on https://github.com/dotnet/smartcomponents
-
-namespace Highbyte.DotNet6502.AI.CodingAssistant.Inference;
-
-public class ChatParameters
-{
-    public IList<ChatMessage>? Messages { get; set; }
-    public float? Temperature { get; set; }
-    public float? TopP { get; set; }
-    public int? MaxTokens { get; set; }
-    public float? FrequencyPenalty { get; set; }
-    public float? PresencePenalty { get; set; }
-    public IList<string>? StopSequences { get; set; }
-}
-
-public class ChatMessage(ChatMessageRole role, string text)
-{
-    public ChatMessageRole Role => role;
-    public string Text => text;
-}
-
-public enum ChatMessageRole
-{
-    System,
-    User,
-    Assistant,
-}
diff --git a/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/CodeCompletionConfig.cs b/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/CodeCompletionConfig.cs
index dfd3c921..d03a05f1 100644
--- a/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/CodeCompletionConfig.cs
+++ b/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/CodeCompletionConfig.cs
@@ -1,18 +1,13 @@
-// Based on https://github.com/dotnet/smartcomponents
+using Microsoft.Extensions.AI;
 
 namespace Highbyte.DotNet6502.AI.CodingAssistant.Inference;
 
 public class CodeCompletionConfig
 {
-    public string ProgrammingLanguage { get; set; } = "Basic";
     public string SystemInstruction { get; set; } = "Insert the code that appears at the location indicated by ^^^";
     public List<ChatMessage> Examples { get; set; } = new();
     public string UserMessageFormat { get; set; } = "{0}^^^{1}";
     public List<string> StopSequences { get; set; } = new();
 
     public Func<string, string, string, string> ParseResponse { get; set; } = (response, textBefore, textAfter) => response;
-
-    //public string? Parameters { get; set; }
-    //public string? UserRole { get; set; }
-    //public string[]? UserPhrases { get; set; }
 }
diff --git a/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/CodeCompletionInference.cs b/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/CodeCompletionInference.cs
index 71b22616..285acf4f 100644
--- a/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/CodeCompletionInference.cs
+++ b/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/CodeCompletionInference.cs
@@ -1,20 +1,35 @@
 // Based on https://github.com/dotnet/smartcomponents
+// and modified for Microsoft.Extensions.AI
 using System.Text;
+using Microsoft.Extensions.AI;
 
 namespace Highbyte.DotNet6502.AI.CodingAssistant.Inference;
 
-public class CodeCompletionInference
-
+public class CodeCompletionInference(IChatClient chatClient)
 {
-    public virtual ChatParameters BuildPrompt(CodeCompletionConfig config, string textBefore, string textAfter)
+    private readonly IChatClient _chatClient = chatClient;
+
+    public virtual ChatOptions BuildChatOptions(CodeCompletionConfig config)
     {
+        return new ChatOptions
+        {
+            Temperature = 0,
+            TopP = 1,
+            MaxOutputTokens = 400,
+            StopSequences = config.StopSequences,
+            FrequencyPenalty = 0,
+            PresencePenalty = 0,
+        };
+    }
+
+    public virtual IList<ChatMessage> BuildChatMessages(CodeCompletionConfig config, string textBefore, string textAfter)
+    {
+        // Add system instruction
         var systemMessageBuilder = new StringBuilder();
         systemMessageBuilder.Append(config.SystemInstruction);
-
         List<ChatMessage> messages =
         [
-            // System instruction
-            new(ChatMessageRole.System, systemMessageBuilder.ToString()),
+            new(ChatRole.System, systemMessageBuilder.ToString()),
         ];
 
         // Add examples
@@ -22,23 +37,18 @@ public virtual ChatParameters BuildPrompt(CodeCompletionConfig config, string te
             messages.AddRange(config.Examples);
 
         // Add user-entered text
-        messages.Add(new(ChatMessageRole.User, string.Format(config.UserMessageFormat, textBefore, textAfter)));
+        messages.Add(new(ChatRole.User, string.Format(config.UserMessageFormat, textBefore, textAfter)));
 
-        return new ChatParameters
-        {
-            Messages = messages,
-            Temperature = 0,
-            MaxTokens = 400,
-            StopSequences = config.StopSequences,
-            FrequencyPenalty = 0,
-            PresencePenalty = 0,
-        };
+        return messages;
     }
 
-    public virtual async Task<string> GetInsertionSuggestionAsync(IInferenceBackend inference, CodeCompletionConfig config, string textBefore, string textAfter)
+    public virtual async Task<string> GetInsertionSuggestionAsync(CodeCompletionConfig config, string textBefore, string textAfter)
     {
-        var chatOptions = BuildPrompt(config, textBefore, textAfter);
-        var response = await inference.GetChatResponseAsync(chatOptions);
+        var chatOptions = BuildChatOptions(config);
+        var chatMessages = BuildChatMessages(config, textBefore, textAfter);
+        ChatCompletion completionsResponse = await _chatClient.CompleteAsync(chatMessages, chatOptions);
+
+        var response = completionsResponse.Choices.FirstOrDefault()?.Text ?? string.Empty;
 
         return config.ParseResponse(response, textBefore, textAfter);
 
diff --git a/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/IInferenceBackend.cs b/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/IInferenceBackend.cs
deleted file mode 100644
index 44f088d1..00000000
--- a/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/IInferenceBackend.cs
+++ /dev/null
@@ -1,8 +0,0 @@
-// Based on https://github.com/dotnet/smartcomponents
-
-namespace Highbyte.DotNet6502.AI.CodingAssistant.Inference;
-
-public interface IInferenceBackend
-{
-    Task<string> GetChatResponseAsync(ChatParameters options);
-}
diff --git a/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/OpenAI/ApiConfig.cs b/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/OpenAI/ApiConfig.cs
deleted file mode 100644
index f3f58865..00000000
--- a/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/OpenAI/ApiConfig.cs
+++ /dev/null
@@ -1,70 +0,0 @@
-// Based on https://github.com/dotnet/smartcomponents
-
-using Microsoft.Extensions.Configuration;
-
-namespace Highbyte.DotNet6502.AI.CodingAssistant.Inference.OpenAI;
-
-public class ApiConfig
-{
-    public string? ApiKey { get; set; }
-    public string? DeploymentName { get; set; }
-    public string EndpointString
-    {
-        get
-        {
-            return Endpoint?.ToString() ?? string.Empty;
-        }
-        set
-        {
-            Endpoint = string.IsNullOrWhiteSpace(value) ? null : new Uri(value);
-        }
-    }
-    public Uri? Endpoint { get; set; }
-
-    public bool SelfHosted { get; set; }
-
-    public const string CONFIG_SECTION = "CodingAssistant:OpenAI";
-    public const string CONFIG_SECTION_SELF_HOSTED = "CodingAssistant:OpenAISelfHostedCodeLlama";
-
-    public ApiConfig()
-    {
-    }
-
-    public ApiConfig(IConfiguration config, bool selfHosted)
-    {
-        // Using OpenAI API
-        if (selfHosted)
-        {
-            //Self-hosted API compatible with OpenAI (with CodeLllama-code model),
-            SelfHosted = true;
-
-            var configSection = config.GetRequiredSection(CONFIG_SECTION_SELF_HOSTED);
-
-            Endpoint = configSection.GetValue<Uri>("Endpoint")
-                ?? throw new InvalidOperationException($"Missing required configuration value: {CONFIG_SECTION_SELF_HOSTED}:Endpoint. This is required for SelfHosted inference.");
-
-            // Ollama uses this, but other self-hosted backends might not, so it's optional.
-            DeploymentName = configSection.GetValue<string>("DeploymentName");
-
-            // Ollama doesn't use this, but other self-hosted backends might do, so it's optional.
-            ApiKey = configSection.GetValue<string>("ApiKey");
-        }
-        else
-        {
-            // OpenAI or Azure OpenAI
-            SelfHosted = false;
-
-            var configSection = config.GetRequiredSection(CONFIG_SECTION);
-
-            // If set, we assume Azure OpenAI. If not, we assume OpenAI.
-            Endpoint = configSection.GetValue<Uri>("Endpoint");
-
-            // For Azure OpenAI, it's your deployment name. For OpenAI, it's the model name.
-            DeploymentName = configSection.GetValue<string>("DeploymentName")
-                ?? throw new InvalidOperationException($"Missing required configuration value: {CONFIG_SECTION}:DeploymentName");
-
-            ApiKey = configSection.GetValue<string>("ApiKey")
-                ?? throw new InvalidOperationException($"Missing required configuration value: {CONFIG_SECTION}:ApiKey");
-        }
-    }
-}
diff --git a/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/OpenAI/OpenAIInferenceBackend.cs b/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/OpenAI/OpenAIInferenceBackend.cs
deleted file mode 100644
index 9b129961..00000000
--- a/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/OpenAI/OpenAIInferenceBackend.cs
+++ /dev/null
@@ -1,94 +0,0 @@
-// Based on https://github.com/dotnet/smartcomponents
-
-using System.Net;
-using System.Runtime.InteropServices;
-using Azure;
-using Azure.AI.OpenAI;
-using Microsoft.Extensions.Configuration;
-
-namespace Highbyte.DotNet6502.AI.CodingAssistant.Inference.OpenAI;
-
-public class OpenAIInferenceBackend(ApiConfig apiConfig)
-    : IInferenceBackend
-{
-    public async Task<string> GetChatResponseAsync(ChatParameters options)
-    {
-#if DEBUG
-        if (RuntimeInformation.OSArchitecture != Architecture.Wasm)
-        {
-            if (ResponseCache.TryGetCachedResponse(options, out var cachedResponse))
-                return cachedResponse!;
-        }
-#endif
-
-        var client = CreateClient(apiConfig);
-        var chatCompletionsOptions = new ChatCompletionsOptions
-        {
-            DeploymentName = apiConfig.DeploymentName,
-            Temperature = options.Temperature ?? 0f,
-            NucleusSamplingFactor = options.TopP ?? 1,
-            MaxTokens = options.MaxTokens ?? 200,
-            FrequencyPenalty = options.FrequencyPenalty ?? 0,
-            PresencePenalty = options.PresencePenalty ?? 0,
-        };
-
-        foreach (var message in options.Messages ?? Enumerable.Empty<ChatMessage>())
-        {
-            chatCompletionsOptions.Messages.Add(message.Role switch
-            {
-                ChatMessageRole.System => new ChatRequestSystemMessage(message.Text),
-                ChatMessageRole.User => new ChatRequestUserMessage(message.Text),
-                ChatMessageRole.Assistant => new ChatRequestAssistantMessage(message.Text),
-                _ => throw new InvalidOperationException($"Unknown chat message role: {message.Role}")
-            });
-        }
-
-        if (options.StopSequences is { } stopSequences)
-        {
-            foreach (var stopSequence in stopSequences)
-            {
-                chatCompletionsOptions.StopSequences.Add(stopSequence);
-            }
-        }
-
-        var completionsResponse = await client.GetChatCompletionsAsync(chatCompletionsOptions);
-
-        var response = completionsResponse.Value.Choices.FirstOrDefault()?.Message.Content ?? string.Empty;
-
-#if DEBUG
-        if (RuntimeInformation.OSArchitecture != Architecture.Wasm)
-        {
-            ResponseCache.SetCachedResponse(options, response);
-        }
-#endif
-
-        return response;
-    }
-
-    private static OpenAIClient CreateClient(ApiConfig apiConfig)
-    {
-        if (apiConfig.SelfHosted)
-        {
-            //var transport = new SelfHostedLlmTransport(apiConfig.Endpoint!);
-
-            var httpClientHandler = new HttpClientHandler();
-            var disableActivityHandler = new DisableActivityHandler(httpClientHandler);
-            var httpClient = new HttpClient(disableActivityHandler);
-            var transport = new SelfHostedLlmTransport(apiConfig.Endpoint!, httpClient);
-
-            return new OpenAIClient(apiConfig.ApiKey, new() { Transport = transport });
-        }
-        else if (apiConfig.Endpoint is null)
-        {
-            // OpenAI
-            return new OpenAIClient(apiConfig.ApiKey);
-        }
-        else
-        {
-            // Azure OpenAI
-            return new OpenAIClient(
-                apiConfig.Endpoint,
-                new AzureKeyCredential(apiConfig.ApiKey!));
-        }
-    }
-}
diff --git a/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/OpenAI/ResponseCache.cs b/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/OpenAI/ResponseCache.cs
deleted file mode 100644
index c846653b..00000000
--- a/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/OpenAI/ResponseCache.cs
+++ /dev/null
@@ -1,124 +0,0 @@
-// Based on https://github.com/dotnet/smartcomponents
-
-#if DEBUG
-using System.Reflection;
-using System.Security.Cryptography;
-using System.Text;
-using System.Text.Json;
-using Console = System.Console;
-
-namespace Highbyte.DotNet6502.AI.CodingAssistant.Inference.OpenAI;
-
-// This is primarily so that E2E tests running in CI don't have to call OpenAI for real, so that:
-// [1] We don't have to make the API keys available to CI
-// [2] There's no risk of random failures due to network issues or the nondeterminism of the AI responses
-// It will not be used in real apps in production. Its other benefit is reducing OpenAI usage during local development.
-
-internal static class ResponseCache
-{
-    static bool IsEnabled = Environment.GetEnvironmentVariable("CODECOMPLETION_E2E_TEST") == "true";
-
-    readonly static Lazy<string> CacheDir = new(() =>
-    {
-        var dir = Path.Combine(GetSolutionDirectory(), "test", "CachedResponses");
-        Directory.CreateDirectory(dir);
-        return dir;
-    });
-
-    public static bool TryGetCachedResponse(ChatParameters request, out string? response)
-    {
-        if (!IsEnabled)
-        {
-            response = null;
-            return false;
-        }
-
-        var filePath = GetCacheFilePath(request);
-        if (File.Exists(filePath))
-        {
-            Console.WriteLine("Using cached response for " + Path.GetFileName(filePath));
-            response = File.ReadAllText(filePath);
-            return true;
-        }
-        else
-        {
-            Console.WriteLine("Did not find cached response for " + Path.GetFileName(filePath));
-            response = null;
-            return false;
-        }
-    }
-
-    public static void SetCachedResponse(ChatParameters request, string response)
-    {
-        if (IsEnabled)
-        {
-            var filePath = GetCacheFilePath(request);
-            File.WriteAllText(filePath, response);
-            File.WriteAllText(filePath.Replace(".response.txt", ".request.json"), GetCacheKeyInput(request));
-        }
-    }
-
-    private static string GetCacheFilePath(ChatParameters request)
-        => GetCacheFilePath(request, request.Messages.LastOrDefault()?.Text ?? "no_messages");
-
-    private static string GetCacheFilePath<T>(T request, string summary)
-        => Path.Combine(CacheDir.Value, $"{GetCacheKey(request, summary)}.response.txt");
-
-    private static string GetSolutionDirectory()
-    {
-        const string filename = "CodeCompletion.sln";
-        var dir = new DirectoryInfo(Path.GetDirectoryName(Assembly.GetEntryAssembly()!.Location)!);
-        while (dir != null)
-        {
-            if (dir.EnumerateFiles(filename).Any())
-                return dir.FullName;
-
-            dir = dir.Parent;
-        }
-
-        throw new InvalidOperationException($"Could not find directory containing {filename}");
-    }
-
-    private static string GetCacheKeyInput<T>(T request)
-    {
-        return JsonSerializer.Serialize(request).Replace("\\r", "");
-    }
-
-    private static string GetCacheKey<T>(T request, string summary)
-    {
-        var json = GetCacheKeyInput(request);
-        var sha256 = SHA256.Create();
-        var hash = sha256.ComputeHash(Encoding.UTF8.GetBytes(json));
-
-        var sb = new StringBuilder();
-        for (var i = 0; i < 8; i++)
-        {
-            sb.Append(hash[i].ToString("x2"));
-        }
-
-        sb.Append("_");
-        sb.Append(ToShortSafeString(summary));
-
-        return sb.ToString();
-    }
-
-    private static string ToShortSafeString(string summary)
-    {
-        // This is just to make the cache filenames more recognizable. Won't help much if there's a common long prefix.
-        var sb = new StringBuilder();
-        foreach (var c in summary)
-        {
-            if (char.IsLetterOrDigit(c))
-                sb.Append(c);
-            else if (c == ' ')
-            {
-                sb.Append('_');
-            }
-
-            if (sb.Length >= 30)
-                break;
-        }
-        return sb.ToString();
-    }
-}
-#endif
diff --git a/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/OpenAI/SelfHostedLlmTransport.cs b/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/OpenAI/SelfHostedLlmTransport.cs
deleted file mode 100644
index a01638fc..00000000
--- a/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/OpenAI/SelfHostedLlmTransport.cs
+++ /dev/null
@@ -1,32 +0,0 @@
-// Based on https://github.com/dotnet/smartcomponents
-
-using Azure.Core;
-using Azure.Core.Pipeline;
-
-namespace Highbyte.DotNet6502.AI.CodingAssistant.Inference.OpenAI;
-
-/// <summary>
-/// Used to resolve queries using Ollama or anything else that exposes an OpenAI-compatible
-/// endpoint with a scheme/host/port set of your choice.
-/// </summary>
-internal class SelfHostedLlmTransport : HttpClientTransport
-{
-    private readonly Uri _endpoint;
-
-    internal SelfHostedLlmTransport(Uri endpoint) : base()
-    {
-        _endpoint = endpoint;
-    }
-    internal SelfHostedLlmTransport(Uri endpoint, HttpClient httpClient) : base(httpClient)
-    {
-        _endpoint = endpoint;
-    }
-
-    public override ValueTask ProcessAsync(HttpMessage message)
-    {
-        message.Request.Uri.Scheme = _endpoint.Scheme;
-        message.Request.Uri.Host = _endpoint.Host;
-        message.Request.Uri.Port = _endpoint.Port;
-        return base.ProcessAsync(message);
-    }
-}
diff --git a/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/OpenAICodeSuggestion.cs b/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/OpenAICodeSuggestion.cs
index 9c835842..01c2fb1b 100644
--- a/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/OpenAICodeSuggestion.cs
+++ b/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/OpenAICodeSuggestion.cs
@@ -1,5 +1,6 @@
-using Highbyte.DotNet6502.AI.CodingAssistant.Inference.OpenAI;
 using Highbyte.DotNet6502.AI.CodingAssistant.Inference;
+using Highbyte.DotNet6502.AI.CodingAssistant.Inference.BackendConfig;
+using Microsoft.Extensions.AI;
 using Microsoft.Extensions.Configuration;
 
 namespace Highbyte.DotNet6502.AI.CodingAssistant;
@@ -7,35 +8,33 @@ public class OpenAICodeSuggestion : ICodeSuggestion
 {
     private bool _isAvailable;
     private string? _lastError;
-    private readonly OpenAIInferenceBackend _inferenceBackend;
     private readonly CodeCompletionConfig _codeCompletionConfig;
     private readonly CodeCompletionInference _codeCompletionInference;
 
     // OpenAI
     public static OpenAICodeSuggestion CreateOpenAICodeSuggestion(IConfiguration configuration, string programmingLanguage, string additionalSystemInstruction = "")
-    => CreateOpenAICodeSuggestion(new ApiConfig(configuration, selfHosted: false), programmingLanguage, additionalSystemInstruction);
-    public static OpenAICodeSuggestion CreateOpenAICodeSuggestion(ApiConfig apiConfig, string programmingLanguage, string additionalSystemInstruction)
+        => CreateOpenAICodeSuggestion(ChatClientFactory.CreateChatClient(CodeCompletionBackendType.OpenAI, configuration), programmingLanguage, additionalSystemInstruction);
+    public static OpenAICodeSuggestion CreateOpenAICodeSuggestion(IChatClient chatClient, string programmingLanguage, string additionalSystemInstruction)
     {
         var codeCompletionConfig = CodeSuggestionSystemInstructions.GetOpenAICodeCompletionConfig(programmingLanguage, additionalSystemInstruction);
-        return new OpenAICodeSuggestion(apiConfig, codeCompletionConfig);
+        return new OpenAICodeSuggestion(chatClient, codeCompletionConfig);
     }
 
     // CodeLlama via self-hosted OpenAI compatible API (Ollama)
     public static OpenAICodeSuggestion CreateOpenAICodeSuggestionForCodeLlama(IConfiguration configuration, string programmingLanguage, string additionalSystemInstruction)
-            => CreateOpenAICodeSuggestionForCodeLlama(new ApiConfig(configuration, selfHosted: true), programmingLanguage, additionalSystemInstruction);
-    public static OpenAICodeSuggestion CreateOpenAICodeSuggestionForCodeLlama(ApiConfig apiConfig, string programmingLanguage, string additionalSystemInstruction)
+            => CreateOpenAICodeSuggestionForCodeLlama(ChatClientFactory.CreateChatClient(CodeCompletionBackendType.Ollama, configuration), programmingLanguage, additionalSystemInstruction);
+    public static OpenAICodeSuggestion CreateOpenAICodeSuggestionForCodeLlama(IChatClient chatClient, string programmingLanguage, string additionalSystemInstruction)
     {
         var codeCompletionConfig = CodeSuggestionSystemInstructions.GetCodeLlamaCodeCompletionConfig(programmingLanguage, additionalSystemInstruction);
-        return new OpenAICodeSuggestion(apiConfig, codeCompletionConfig);
+        return new OpenAICodeSuggestion(chatClient, codeCompletionConfig);
     }
 
-    private OpenAICodeSuggestion(ApiConfig apiConfig, CodeCompletionConfig codeCompletionConfig)
+    private OpenAICodeSuggestion(IChatClient chatClient, CodeCompletionConfig codeCompletionConfig)
     {
         _isAvailable = true;
         _lastError = null;
-        _inferenceBackend = new OpenAIInferenceBackend(apiConfig);
         _codeCompletionConfig = codeCompletionConfig;
-        _codeCompletionInference = new CodeCompletionInference();
+        _codeCompletionInference = new CodeCompletionInference(chatClient);
     }
 
     public bool IsAvailable => _isAvailable;
@@ -51,7 +50,7 @@ public virtual async Task<string> GetInsertionSuggestionAsync(string textBefore,
         // Call OpenAI API directly
         try
         {
-            return await _codeCompletionInference.GetInsertionSuggestionAsync(_inferenceBackend, _codeCompletionConfig, textBefore, textAfter);
+            return await _codeCompletionInference.GetInsertionSuggestionAsync(_codeCompletionConfig, textBefore, textAfter);
         }
         catch (Exception ex)
         {
diff --git a/src/libraries/Highbyte.DotNet6502.AI/Highbyte.DotNet6502.AI.csproj b/src/libraries/Highbyte.DotNet6502.AI/Highbyte.DotNet6502.AI.csproj
index 1aa7b82d..6bb4d1d4 100644
--- a/src/libraries/Highbyte.DotNet6502.AI/Highbyte.DotNet6502.AI.csproj
+++ b/src/libraries/Highbyte.DotNet6502.AI/Highbyte.DotNet6502.AI.csproj
@@ -25,7 +25,14 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Azure.AI.OpenAI" Version="1.0.0-beta.11" />
+    <PackageReference Include="Azure.AI.OpenAI" Version="2.0.0" />
+    <PackageReference Include="Azure.Identity" Version="1.13.0" />
+    <PackageReference Include="Microsoft.ApplicationInsights.AspNetCore" Version="2.22.0" />
+    <PackageReference Include="Microsoft.Extensions.AI" Version="9.0.0-preview.9.24507.7" />
+    <PackageReference Include="Microsoft.Extensions.AI.Abstractions" Version="9.0.0-preview.9.24507.7" />
+    <PackageReference Include="Microsoft.Extensions.AI.Ollama" Version="9.0.0-preview.9.24507.7" />
+    <PackageReference Include="Microsoft.Extensions.AI.OpenAI" Version="9.0.0-preview.9.24507.7" />
+    <PackageReference Include="Microsoft.Extensions.Caching.Memory" Version="8.0.1" />
     <PackageReference Include="Microsoft.Extensions.Configuration.Binder" Version="8.0.2" />
 
   </ItemGroup>