Skip to content

Commit

Permalink
Changed TensorSpan and ReadOnlyTensorSpan layout for better performan…
Browse files Browse the repository at this point in the history
…ce. (dotnet#103244)

* TensorShape

* fixed TensorShape issue

* removed extra inline buffer type. Fixed tests. Added large dimension testing

* removed typo

* adding unsaved files

* changes to use const for stack alloc comparison

* changes from pr comments
  • Loading branch information
michaelgsharp authored Jun 20, 2024
1 parent 7377eda commit 4cfc93b
Show file tree
Hide file tree
Showing 11 changed files with 475 additions and 296 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
</ItemGroup>

<ItemGroup Condition="'$(TargetFrameworkIdentifier)' == '.NETCoreApp'">
<Compile Include="System\Numerics\Tensors\netcore\TensorShape.cs" />
<Compile Include="System\Numerics\Tensors\netcore\TensorHelpers.cs" />
<Compile Include="System\Numerics\Tensors\netcore\TensorExtensions.cs" />
<Compile Include="System\Numerics\Tensors\netcore\Tensor.Factory.cs" />
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -153,16 +153,16 @@ public static Tensor<T> CreateUninitialized<T>(scoped ReadOnlySpan<nint> lengths

public static ref readonly TensorSpan<T> FillGaussianNormalDistribution<T>(in TensorSpan<T> destination) where T : IFloatingPoint<T>
{
Span<T> span = MemoryMarshal.CreateSpan<T>(ref destination._reference, (int)destination._flattenedLength);
Span<T> span = MemoryMarshal.CreateSpan<T>(ref destination._reference, (int)destination._shape._memoryLength);

GaussianDistribution<T>(span, destination._flattenedLength);
GaussianDistribution<T>(span, destination._shape._memoryLength);

return ref destination;
}

public static ref readonly TensorSpan<T> FillUniformDistribution<T>(in TensorSpan<T> destination) where T : IFloatingPoint<T>
{
Span<T> span = MemoryMarshal.CreateSpan<T>(ref destination._reference, (int)destination._flattenedLength);
Span<T> span = MemoryMarshal.CreateSpan<T>(ref destination._reference, (int)destination._shape._memoryLength);

for (int i = 0; i < span.Length; i++)
span[i] = T.CreateChecked(Random.Shared.NextDouble());
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ internal static class TensorHelpers
/// <returns>How many boolean values are true.</returns>
public static nint CountTrueElements(scoped in ReadOnlyTensorSpan<bool> filter)
{
Span<bool> filterSpan = MemoryMarshal.CreateSpan(ref filter._reference, (int)filter._flattenedLength);
Span<bool> filterSpan = MemoryMarshal.CreateSpan(ref filter._reference, (int)filter._shape._memoryLength);
nint count = 0;
for (int i = 0; i < filterSpan.Length; i++)
{
Expand Down Expand Up @@ -83,11 +83,14 @@ internal static nint[] GetIntermediateShape(ReadOnlySpan<nint> shape1, int shape
return newShape;
}

internal static bool IsUnderlyingStorageSameSize<T>(scoped in ReadOnlyTensorSpan<T> tensor1, scoped in ReadOnlyTensorSpan<T> tensor2)
=> tensor1._shape._memoryLength == tensor2._shape._memoryLength;

internal static bool IsUnderlyingStorageSameSize<T>(Tensor<T> tensor1, Tensor<T> tensor2)
=> tensor1.Lengths.Length == tensor2.Lengths.Length;
=> tensor1._values.Length == tensor2._values.Length;

internal static bool AreLengthsTheSame<T>(ReadOnlyTensorSpan<T> tensor1, ReadOnlyTensorSpan<T> tensor2)
=> tensor1._lengths.SequenceEqual(tensor2._lengths);
internal static bool AreLengthsTheSame<T>(scoped in ReadOnlyTensorSpan<T> tensor1, scoped in ReadOnlyTensorSpan<T> tensor2)
=> tensor1.Lengths.SequenceEqual(tensor2.Lengths);

internal static bool AreLengthsTheSame(ReadOnlySpan<nint> lengths1, ReadOnlySpan<nint> lengths2)
=> lengths1.SequenceEqual(lengths2);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;

namespace System.Numerics.Tensors
{
internal readonly struct TensorShape
{
// Used to determine when we need to allocate a metadata array
public const int MaxInlineArraySize = 5;

// Used to determine when we can stack alloc for indexing vs when we need to allocate
public const int MaxInlineRank = 8;

internal readonly nint[]? _metadata; // 8 bytes

internal readonly nint _memoryLength; // 8 bytes
internal readonly int _rank; // 4 bytes

private readonly NintBuffer _lengths;
private readonly NintBuffer _strides;

internal TensorShape(nint memoryLength, ReadOnlySpan<nint> lengths, ReadOnlySpan<nint> strides)
{
_memoryLength = memoryLength;
_rank = lengths.Length;
if (lengths.Length > MaxInlineArraySize)
{
_metadata = new nint[lengths.Length + strides.Length];
lengths.CopyTo(MemoryMarshal.CreateSpan(ref _metadata[0], lengths.Length));
strides.CopyTo(MemoryMarshal.CreateSpan(ref _metadata[lengths.Length], strides.Length));
}
else
{
lengths.CopyTo(_lengths);
strides.CopyTo(_strides);
}
}

[InlineArray(MaxInlineArraySize)] // 5x8 bytes (40)
private struct NintBuffer
{
public nint e0;
}

[UnscopedRef]
public ReadOnlySpan<nint> Lengths => (_metadata is null)
? ((ReadOnlySpan<nint>)_lengths).Slice(0, _rank)
: MemoryMarshal.CreateReadOnlySpan(ref MemoryMarshal.GetArrayDataReference(_metadata), _rank);

[UnscopedRef]
public ReadOnlySpan<nint> Strides => (_metadata is null)
? ((ReadOnlySpan<nint>)_strides).Slice(0, _rank)
: MemoryMarshal.CreateReadOnlySpan(ref MemoryMarshal.GetArrayDataReference(_metadata), _rank * 2).Slice(_rank);

public nint FlattenedLength => TensorSpanHelpers.CalculateTotalLength(Lengths);

public bool IsEmpty => FlattenedLength == 0;
}
}
Loading

0 comments on commit 4cfc93b

Please sign in to comment.