Skip to content

Commit

Permalink
Merge pull request #35 from sveinungf/dev/encoding
Browse files Browse the repository at this point in the history
New XML encode implementation
  • Loading branch information
sveinungf authored Jan 26, 2024
2 parents 2fe06e2 + f550b68 commit d7c085f
Show file tree
Hide file tree
Showing 24 changed files with 356 additions and 71 deletions.
4 changes: 3 additions & 1 deletion .editorconfig
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,8 @@ dotnet_diagnostic.CS9113.severity = none
[{*Test,*.Benchmark,*.Cmd}/**.cs]
# CA1305: Specify IFormatProvider
dotnet_diagnostic.CA1305.severity = none
# CA1062: Validate arguments of public methods
dotnet_diagnostic.CA1062.severity = none
# CA1707: Identifiers should not contain underscores
dotnet_diagnostic.CA1707.severity = none
# CA1720: Identifiers should not contain type names
Expand Down Expand Up @@ -201,7 +203,7 @@ MA0053.public_class_should_be_sealed = true
# MA0076: Do not use implicit culture-sensitive ToString in interpolated strings
dotnet_diagnostic.MA0076.severity = warning

# Disable MA0053 for types that have been publicly exposed in a previous version
# Disable MA0053 (sealing class) for types that have been publicly exposed in a previous version
[{AutoFilterOptions.cs,ColumnOptions.cs,RowOptions.cs,SpreadCheetahOptions.cs,WorksheetOptions.cs,WorksheetRowGenerator.cs}]
dotnet_diagnostic.MA0053.severity = none

Expand Down
1 change: 1 addition & 0 deletions BannedSymbols.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
M:System.IO.Compression.ZipArchive.CreateEntry(System.String);Pass CompressionLevel parameter
M:System.Net.WebUtility.HtmlEncode(System.String);Use XmlUtility.XmlEncode
P:System.Nullable`1.Value;GetValueOrDefault() or the coalesce operator do less work
8 changes: 0 additions & 8 deletions Directory.Build.props
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,6 @@
<AccelerateBuildsInVisualStudio>true</AccelerateBuildsInVisualStudio>
</PropertyGroup>

<ItemGroup Condition="'$(MSBuildProjectName)' != 'SpreadCheetah.SourceGenerator' And '$(MSBuildProjectName)' != 'SpreadCheetah.SourceGenerator.CSharp8Test'">
<PackageReference Include="Polyfill">
<PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
<NoWarn>MA0048</NoWarn>
</PackageReference>
</ItemGroup>

<ItemGroup>
<AdditionalFiles Include="$(MSBuildThisFileDirectory)/BannedSymbols.txt" Visible="false" />
</ItemGroup>
Expand Down
4 changes: 4 additions & 0 deletions SpreadCheetah.Benchmark/SpreadCheetah.Benchmark.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
<PackageReference Include="DocumentFormat.OpenXml" />
<PackageReference Include="EPPlusFree" />
<PackageReference Include="Microsoft.CodeAnalysis.CSharp" PrivateAssets="all" />
<PackageReference Include="Polyfill">
<PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
</PackageReference>
</ItemGroup>

<ItemGroup>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
<PrivateAssets>all</PrivateAssets>
</PackageReference>
<PackageReference Include="Microsoft.NET.Test.Sdk" />
<PackageReference Include="Polyfill">
<PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
</PackageReference>
<PackageReference Include="xunit" />
<PackageReference Include="xunit.runner.visualstudio">
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
Expand Down
7 changes: 7 additions & 0 deletions SpreadCheetah.Test/SpreadCheetah.Test.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,13 @@
</PackageReference>
</ItemGroup>

<ItemGroup Condition="'$(Configuration)' != 'Debug'">
<PackageReference Include="Polyfill">
<PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
</PackageReference>
</ItemGroup>

<ItemGroup Condition="'$(TargetFramework)' == 'net472'">
<PackageReference Include="System.Collections.Immutable" />
<PackageReference Include="System.IO.Compression" />
Expand Down
32 changes: 30 additions & 2 deletions SpreadCheetah.Test/Tests/SpreadsheetRowTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -111,10 +111,14 @@ public async Task Spreadsheet_AddRow_CellWithoutValue(CellType type, RowCollecti
"With whitespace",
"With trailing whitespace ",
" With leading whitespace",
"With-Special-Characters!#¤%&",
"With-Special!#¤%Characters",
"With-Special<>&'\\Characters",
"With\"Quotation\"Marks",
"WithNorwegianCharactersÆØÅ",
"WithEmoji\ud83d\udc4d",
"With\ud83d\udc4dEmoji",
"With🌉Emoji",
"With\tValid\nControlCharacters",
"WithCharacters\u00a0\u00c9\u00ffBetween160And255",
"",
null);

Expand Down Expand Up @@ -144,6 +148,30 @@ public async Task Spreadsheet_AddRow_CellWithStringValue(string? value, CellType
Assert.Equal(value ?? string.Empty, actualCell.InnerText);
}

[Theory]
[MemberData(nameof(TestData.CellTypes), MemberType = typeof(TestData))]
public async Task Spreadsheet_AddRow_CellWithInvalidControlCharacterStringValue(CellType type, RowCollectionType rowType)
{
// Arrange
const string value = "With\u0000Control\u0010\u001fCharacters";
using var stream = new MemoryStream();
await using var spreadsheet = await Spreadsheet.CreateNewAsync(stream);
await spreadsheet.StartWorksheetAsync("Sheet");
var cell = CellFactory.Create(type, value);

// Act
await spreadsheet.AddRowAsync(cell, rowType);
await spreadsheet.FinishAsync();

// Assert
SpreadsheetAssert.Valid(stream);
using var actual = SpreadsheetDocument.Open(stream, true);
var sheetPart = actual.WorkbookPart!.WorksheetParts.Single();
var actualCell = sheetPart.Worksheet.Descendants<OpenXmlCell>().Single();
Assert.Equal(CellValues.InlineString, actualCell.DataType?.Value);
Assert.Equal("WithControlCharacters", actualCell.InnerText);
}

public static IEnumerable<object?[]> StringLengths() => TestData.CombineWithCellTypes(
4095,
4096,
Expand Down
60 changes: 60 additions & 0 deletions SpreadCheetah.Test/Tests/XmlUtilityTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#if DEBUG
using SpreadCheetah.Helpers;
using System.Text;
using Xunit;

namespace SpreadCheetah.Test.Tests;

public static class XmlUtilityTests

{
[Theory]
[InlineData("", "")]
[InlineData("Just a regular string without any special characters", "Just a regular string without any special characters")]
[InlineData("&", "&amp;")]
[InlineData("&<>'\"", "&amp;&lt;&gt;&apos;&quot;")]
[InlineData("&OnlyFirstCharacter", "&amp;OnlyFirstCharacter")]
[InlineData("&This<string>has'special\"characters in between", "&amp;This&lt;string&gt;has&apos;special&quot;characters in between")]
[InlineData("This<string>has'special\"characters in between&", "This&lt;string&gt;has&apos;special&quot;characters in between&amp;")]
[InlineData("\tHandling\r\nAllowed\nControl\tCharacters\n", "\tHandling\r\nAllowed\nControl\tCharacters\n")]
[InlineData("\u0001Handling\u0002Invalid\u0003Control\u0004Characters\u0005", "HandlingInvalidControlCharacters")]
[InlineData("\u0006", "")]
[InlineData("\u0007\u0008", "")]
public static void XmlUtility_TryXmlEncodeToUtf8_Success(string value, string expected)
{
// Arrange
var buffer = new byte[value.Length * 6];

// Act
var result = XmlUtility.TryXmlEncodeToUtf8(value.AsSpan(), buffer, out var bytesWritten);

// Assert
Assert.True(result);

var bytes = buffer.AsSpan(0, bytesWritten);
var actual = Encoding.UTF8.GetString(bytes.ToArray());
Assert.Equal(expected, actual);
}

[Theory]
[InlineData("", "")]
[InlineData("Just a regular string without any special characters", "Just a regular string without any special characters")]
[InlineData("&", "&amp;")]
[InlineData("&<>'\"", "&amp;&lt;&gt;&apos;&quot;")]
[InlineData("&OnlyFirstCharacter", "&amp;OnlyFirstCharacter")]
[InlineData("&This<string>has'special\"characters in between", "&amp;This&lt;string&gt;has&apos;special&quot;characters in between")]
[InlineData("This<string>has'special\"characters in between&", "This&lt;string&gt;has&apos;special&quot;characters in between&amp;")]
[InlineData("\tHandling\r\nAllowed\nControl\tCharacters\n", "\tHandling\r\nAllowed\nControl\tCharacters\n")]
[InlineData("\u0001Handling\u0002Invalid\u0003Control\u0004Characters\u0005", "HandlingInvalidControlCharacters")]
[InlineData("\u0006", "")]
[InlineData("\u0007\u0008", "")]
public static void XmlUtility_XmlEncode_Success(string value, string expected)
{
// Act
var result = XmlUtility.XmlEncode(value);

// Assert
Assert.Equal(expected, result);
}
}
#endif
7 changes: 4 additions & 3 deletions SpreadCheetah/CellWriters/BaseCellWriter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -277,9 +277,10 @@ protected bool FinishWritingFormulaCellValue(in Cell cell, string formulaText, r
// Write the "</f><v>" part
if (cellValueIndex < cachedValueStartIndex)
{
var separator = FormulaCellHelper.EndFormulaBeginCachedValue;
if (separator.Length > Buffer.FreeCapacity) return false;
Buffer.Advance(SpanHelper.GetBytes(separator, Buffer.GetSpan()));
if (!FormulaCellHelper.EndFormulaBeginCachedValue.TryCopyTo(Buffer.GetSpan()))
return false;

Buffer.Advance(FormulaCellHelper.EndFormulaBeginCachedValue.Length);
cellValueIndex = cachedValueStartIndex;
}

Expand Down
3 changes: 1 addition & 2 deletions SpreadCheetah/DataCell.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
using SpreadCheetah.CellValueWriters;
using System.Net;

namespace SpreadCheetah;

Expand All @@ -18,7 +17,7 @@ public readonly record struct DataCell
/// </summary>
public DataCell(string? value)
{
StringValue = value != null ? WebUtility.HtmlEncode(value) : string.Empty;
StringValue = value;
Type = value != null ? CellWriterType.String : CellWriterType.Null;
}

Expand Down
4 changes: 1 addition & 3 deletions SpreadCheetah/Formula.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
using System.Net;

namespace SpreadCheetah;

/// <summary>
Expand All @@ -15,6 +13,6 @@ public readonly record struct Formula
/// </summary>
public Formula(string? formulaText)
{
FormulaText = formulaText != null ? WebUtility.HtmlEncode(formulaText) : string.Empty;
FormulaText = formulaText ?? "";
}
}
8 changes: 1 addition & 7 deletions SpreadCheetah/Helpers/SpanHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,6 @@ namespace SpreadCheetah.Helpers;

internal static class SpanHelper
{
public static int GetBytes(ReadOnlySpan<byte> source, Span<byte> destination)
{
source.CopyTo(destination);
return source.Length;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool TryCopyTo(this ReadOnlySpan<byte> source, Span<byte> bytes, ref int bytesWritten)
{
Expand Down Expand Up @@ -81,7 +75,7 @@ public static bool TryWrite(ReadOnlySpan<char> value, Span<byte> bytes, ref int
}

#if NETSTANDARD2_0
public static bool TryWriteLongString(string value, ref int valueIndex, Span<byte> bytes, ref int bytesWritten)
public static bool TryWriteLongString(string? value, ref int valueIndex, Span<byte> bytes, ref int bytesWritten)
=> TryWriteLongString(value.AsSpan(), ref valueIndex, bytes, ref bytesWritten);
#endif

Expand Down
19 changes: 0 additions & 19 deletions SpreadCheetah/Helpers/Utf8EncodingExtensions.cs

This file was deleted.

24 changes: 19 additions & 5 deletions SpreadCheetah/Helpers/Utf8Helper.cs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
using System.Runtime.CompilerServices;
using System.Text;

namespace SpreadCheetah.Helpers;
Expand All @@ -6,13 +7,26 @@ internal static class Utf8Helper
{
public const int MaxBytePerChar = 6;

private static readonly UTF8Encoding Utf8NoBom = new(false);
public static readonly UTF8Encoding Utf8NoBom = new(false);

public static int GetBytes(ReadOnlySpan<char> chars, Span<byte> bytes) => Utf8NoBom.GetBytes(chars, bytes);

#if NETSTANDARD2_0
public static bool TryGetBytes(string? chars, Span<byte> bytes, out int bytesWritten) => TryGetBytes(chars.AsSpan(), bytes, out bytesWritten);
#endif
public static bool TryGetBytes(ReadOnlySpan<char> chars, Span<byte> bytes, out int bytesWritten)
{
// Try first with an approximate value length, then try with a more accurate value length
if (DestinationCanFitTranscodedString(chars, bytes))
{
bytesWritten = Utf8NoBom.GetBytes(chars, bytes);
return true;
}

public static bool TryGetBytes(ReadOnlySpan<char> chars, Span<byte> bytes, out int bytesWritten) => Utf8NoBom.TryGetBytesInternal(chars, bytes, out bytesWritten);
bytesWritten = 0;
return false;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool DestinationCanFitTranscodedString(ReadOnlySpan<char> chars, Span<byte> bytes)
{
return bytes.Length >= chars.Length * MaxBytePerChar || bytes.Length >= Utf8NoBom.GetByteCount(chars);
}
}
Loading

0 comments on commit d7c085f

Please sign in to comment.