diff --git a/src/DotNext.IO/IO/Pipelines/PipeExtensions.Readers.cs b/src/DotNext.IO/IO/Pipelines/PipeExtensions.Readers.cs index 009864c2f..3f93a1ad5 100644 --- a/src/DotNext.IO/IO/Pipelines/PipeExtensions.Readers.cs +++ b/src/DotNext.IO/IO/Pipelines/PipeExtensions.Readers.cs @@ -5,8 +5,8 @@ using System.Runtime.InteropServices; using System.Runtime.Versioning; using System.Security.Cryptography; +using System.Text; using static System.Buffers.Binary.BinaryPrimitives; -using static System.Text.EncodingExtensions; using Missing = System.Reflection.Missing; namespace DotNext.IO.Pipelines; @@ -757,10 +757,9 @@ public static async IAsyncEnumerable> ReadAllAsync(this Pip } /// - /// Decodes null-terminated string. + /// Decodes null-terminated UTF-8 encoded string. /// /// The pipe reader. - /// The text decoding context. /// The output buffer for decoded characters. /// The token that can be used to cancel the operation. /// The task representing asynchronous execution of this method. @@ -769,12 +768,12 @@ public static async IAsyncEnumerable> ReadAllAsync(this Pip /// or is . /// /// The operation has been canceled. - public static async ValueTask ReadStringAsync(this PipeReader reader, DecodingContext context, IBufferWriter output, CancellationToken token = default) + public static async ValueTask ReadUtf8Async(this PipeReader reader, IBufferWriter output, CancellationToken token = default) { ArgumentNullException.ThrowIfNull(reader); ArgumentNullException.ThrowIfNull(output); - var decoder = context.GetDecoder(); + var decoder = Encoding.UTF8.GetDecoder(); SequencePosition consumed; bool completed; @@ -783,7 +782,7 @@ public static async ValueTask ReadStringAsync(this PipeReader reader, DecodingCo var readResult = await reader.ReadAsync(token).ConfigureAwait(false); var buffer = readResult.Buffer; - if (buffer.PositionOf(DecodingContext.StringTerminationByte).TryGetValue(out consumed)) + if (buffer.PositionOf(DecodingContext.Utf8NullChar).TryGetValue(out consumed)) { buffer = buffer.Slice(0, consumed); completed = true; diff --git a/src/DotNext.IO/IO/StreamExtensions.cs b/src/DotNext.IO/IO/StreamExtensions.cs index c083ff940..50b95b2c9 100644 --- a/src/DotNext.IO/IO/StreamExtensions.cs +++ b/src/DotNext.IO/IO/StreamExtensions.cs @@ -4,6 +4,7 @@ using System.Runtime.InteropServices; using System.Runtime.Versioning; using System.Text; +using Utf8 = System.Text.Unicode.Utf8; namespace DotNext.IO; @@ -1317,10 +1318,9 @@ public static async IAsyncEnumerable> ReadAllAsync(this Str } /// - /// Decodes null-terminated string asynchronously. + /// Decodes null-terminated UTF-8 encoded string asynchronously. /// /// The stream containing encoded string. - /// The decoding context. /// The buffer used to read from stream. /// The output buffer for decoded characters. /// The token that can be used to cancel the operation. @@ -1331,47 +1331,30 @@ public static async IAsyncEnumerable> ReadAllAsync(this Str /// /// is too small to decode at least one character. /// The operation has been canceled. - public static async ValueTask ReadStringAsync(this Stream stream, DecodingContext context, Memory buffer, IBufferWriter output, CancellationToken token = default) + [AsyncMethodBuilder(typeof(PoolingAsyncValueTaskMethodBuilder<>))] + public static async ValueTask ReadUtf8Async(this Stream stream, Memory buffer, IBufferWriter output, CancellationToken token = default) { ArgumentNullException.ThrowIfNull(stream); ArgumentNullException.ThrowIfNull(output); - if (context.Encoding.GetMaxCharCount(buffer.Length) is 0) + if (Encoding.UTF8.GetMaxCharCount(buffer.Length) is 0) throw new ArgumentException(ExceptionMessages.BufferTooSmall, nameof(buffer)); - var decoder = context.GetDecoder(); - var result = 0; - bool completed; + int consumedBufferBytes, bytesRead, bufferOffset = 0; do { - var bytesRead = await stream.ReadAsync(buffer, token).ConfigureAwait(false); - var input = buffer.Slice(0, bytesRead); - - var nullCharIndex = input.Span.IndexOf(DecodingContext.StringTerminationByte); - if (nullCharIndex >= 0) - { - result = nullCharIndex + 1; - input = input.Slice(0, nullCharIndex); - completed = true; - } - else - { - completed = input.IsEmpty; - } - - decoder.Convert(input.Span, output, completed, out _, out _); + bytesRead = await stream.ReadAsync(buffer.Slice(bufferOffset), token).ConfigureAwait(false); } - while (!completed); + while (!ConvertToUtf8(buffer.Span.Slice(0, bufferOffset + bytesRead), output, out consumedBufferBytes, out bufferOffset)); - return result; + return consumedBufferBytes; } /// - /// Decodes null-terminated string synchronously. + /// Decodes null-terminated UTF-8 encoded string synchronously. /// /// The stream containing encoded string. - /// The decoding context. /// The buffer used to read from stream. /// The output buffer for decoded characters. /// The number of used bytes in . @@ -1380,39 +1363,60 @@ public static async ValueTask ReadStringAsync(this Stream stream, DecodingC /// or is . /// /// is too small to decode at least one character. - public static int ReadString(this Stream stream, in DecodingContext context, Span buffer, IBufferWriter output) + public static int ReadUtf8(this Stream stream, Span buffer, IBufferWriter output) { ArgumentNullException.ThrowIfNull(stream); ArgumentNullException.ThrowIfNull(output); - if (context.Encoding.GetMaxCharCount(buffer.Length) is 0) + if (Encoding.UTF8.GetMaxCharCount(buffer.Length) is 0) throw new ArgumentException(ExceptionMessages.BufferTooSmall, nameof(buffer)); - var decoder = context.GetDecoder(); - var result = 0; - bool completed; + int consumedBufferBytes, bytesRead, bufferOffset = 0; do { - var bytesRead = stream.Read(buffer); - var input = buffer.Slice(0, bytesRead); - - var nullCharIndex = input.IndexOf(DecodingContext.StringTerminationByte); - if (nullCharIndex >= 0) - { - result = nullCharIndex + 1; - input = input.Slice(0, nullCharIndex); - completed = true; - } - else - { - completed = input.IsEmpty; - } - - decoder.Convert(input, output, completed, out _, out _); + bytesRead = stream.Read(buffer.Slice(bufferOffset)); } - while (!completed); + while (!ConvertToUtf8(buffer.Slice(0, bufferOffset + bytesRead), output, out consumedBufferBytes, out bufferOffset)); - return result; + return consumedBufferBytes; + } + + private static bool ConvertToUtf8(Span buffer, IBufferWriter output, out int consumedCount, out int bufferOffset) + { + bool flush; + var nullCharIndex = buffer.IndexOf(DecodingContext.Utf8NullChar); + + if (nullCharIndex >= 0) + { + consumedCount = nullCharIndex + 1; + buffer = buffer.Slice(0, nullCharIndex); + flush = true; + } + else + { + consumedCount = buffer.Length; + flush = buffer.IsEmpty; + } + + var chars = output.GetSpan(Encoding.UTF8.GetMaxCharCount(buffer.Length)); + + switch (Utf8.ToUtf16(buffer, chars, out var bytesRead, out var charsWritten, replaceInvalidSequences: false, flush)) + { + case OperationStatus.NeedMoreData: + // we need more data, copy undecoded bytes to the beginning of the buffer + var bufferTail = buffer.Slice(bytesRead); + bufferOffset = bufferTail.Length; + bufferTail.CopyTo(buffer); + break; + case OperationStatus.Done: + bufferOffset = 0; + break; + default: + throw new DecoderFallbackException(); + } + + output.Advance(charsWritten); + return flush; } } \ No newline at end of file diff --git a/src/DotNext.IO/Text/DecodingContext.cs b/src/DotNext.IO/Text/DecodingContext.cs index 5899b6f2c..1562ec685 100644 --- a/src/DotNext.IO/Text/DecodingContext.cs +++ b/src/DotNext.IO/Text/DecodingContext.cs @@ -14,7 +14,7 @@ namespace DotNext.Text; [StructLayout(LayoutKind.Auto)] public readonly struct DecodingContext : ICloneable, IResettable { - internal const byte StringTerminationByte = 0; + internal const byte Utf8NullChar = 0; private readonly Encoding encoding; private readonly Decoder? decoder; diff --git a/src/DotNext.Tests/IO/Pipelines/PipeExtensionsTests.cs b/src/DotNext.Tests/IO/Pipelines/PipeExtensionsTests.cs index 6bf5c6ba5..da03a1886 100644 --- a/src/DotNext.Tests/IO/Pipelines/PipeExtensionsTests.cs +++ b/src/DotNext.Tests/IO/Pipelines/PipeExtensionsTests.cs @@ -367,7 +367,7 @@ public static async Task DecodeNullTerminatedStringAsync() pipe.Writer.Complete(); var writer = new ArrayBufferWriter(); - await pipe.Reader.ReadStringAsync(Encoding.UTF8, writer); + await pipe.Reader.ReadUtf8Async(writer); Equal("Привет, мир!", writer.WrittenSpan.ToString()); } } \ No newline at end of file diff --git a/src/DotNext.Tests/IO/StreamExtensionsTests.cs b/src/DotNext.Tests/IO/StreamExtensionsTests.cs index 586e692b1..f5f99a494 100644 --- a/src/DotNext.Tests/IO/StreamExtensionsTests.cs +++ b/src/DotNext.Tests/IO/StreamExtensionsTests.cs @@ -682,15 +682,15 @@ public static async Task ReadEntireStream() public static async Task DecodeNullTerminatedStringAsync(int bufferSize) { using var ms = new MemoryStream(); - ms.Write("Привет, мир!"u8); + ms.Write("Привет, \u263A!"u8); ms.WriteByte(0); ms.WriteByte(0); ms.Position = 0L; - var buffer = new byte[bufferSize]; + Memory buffer = new byte[bufferSize]; var writer = new ArrayBufferWriter(); - await ms.ReadStringAsync(Encoding.UTF8, buffer, writer); - Equal("Привет, мир!", writer.WrittenSpan.ToString()); + await ms.ReadUtf8Async(buffer, writer); + Equal("Привет, \u263A!", writer.WrittenSpan.ToString()); } [Theory] @@ -700,14 +700,29 @@ public static async Task DecodeNullTerminatedStringAsync(int bufferSize) public static void DecodeNullTerminatedString(int bufferSize) { using var ms = new MemoryStream(); - ms.Write("Привет, мир!"u8); + ms.Write("Привет, \u263A!"u8); ms.WriteByte(0); ms.WriteByte(0); ms.Position = 0L; - var buffer = new byte[bufferSize]; + Span buffer = stackalloc byte[bufferSize]; + var writer = new ArrayBufferWriter(); + ms.ReadUtf8(buffer, writer); + Equal("Привет, \u263A!", writer.WrittenSpan.ToString()); + } + + [Fact] + public static void DecodeNullTerminatedEmptyString() + { + using var ms = new MemoryStream(); + ms.Write("\0"u8); + ms.WriteByte(0); + ms.WriteByte(0); + ms.Position = 0L; + + Span buffer = stackalloc byte[8]; var writer = new ArrayBufferWriter(); - ms.ReadString(Encoding.UTF8, buffer, writer); - Equal("Привет, мир!", writer.WrittenSpan.ToString()); + ms.ReadUtf8(buffer, writer); + Equal(string.Empty, writer.WrittenSpan.ToString()); } } \ No newline at end of file