Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Expose the TarEntry start of the data stream #105007

Merged
merged 11 commits into from
Jul 22, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,7 @@ public Stream? DataStream
/// </summary>
/// <remarks>
/// If the entry does not come from an archive stream or if the archive stream is not seekable, returns -1.
/// The position value returned by this property is relative to the absolute start of the archive stream, independently of where the tar archive beging.
carlossanlop marked this conversation as resolved.
Show resolved Hide resolved
/// </remarks>
public long DataOffset => _header._dataOffset;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ private void WriteWithUnseekableDataStream(TarEntryFormat format, Stream destina
long dataStartPosition = headerStartPosition + dataLocation;

// Before writing, update the offset field now that the entry belongs to an archive
_dataOffset = dataStartPosition + 1;
_dataOffset = dataStartPosition;

// Move to the data start location and write the data
destinationStream.Seek(dataLocation, SeekOrigin.Current);
Expand Down Expand Up @@ -136,7 +136,7 @@ private async Task WriteWithUnseekableDataStreamAsync(TarEntryFormat format, Str
long dataStartPosition = headerStartPosition + dataLocation;

// Before writing, update the offset field now that the entry belongs to an archive
_dataOffset = dataStartPosition + 1;
_dataOffset = dataStartPosition;

// Move to the data start location and write the data
destinationStream.Seek(dataLocation, SeekOrigin.Current);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ internal void InitializeExtendedAttributesWithExisting(IEnumerable<KeyValuePair<
private static void SetDataOffset(TarHeader header, Stream archiveStream) => header._dataOffset =
archiveStream.CanSeek
// Add one because the last byte read is still part of the header
carlossanlop marked this conversation as resolved.
Show resolved Hide resolved
? archiveStream.Position + 1
? archiveStream.Position
: -1;
}
}
168 changes: 147 additions & 21 deletions src/libraries/System.Formats.Tar/tests/TarEntry/GnuTarEntry.Tests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ public void SupportedEntryType_Fifo()
[InlineData(true)]
public void DataOffset_RegularFile(bool canSeek)
{
byte expectedData = 5;
using MemoryStream ms = new();
using (TarWriter writer = new(ms, leaveOpen: true))
{
Expand All @@ -120,15 +121,23 @@ public void DataOffset_RegularFile(bool canSeek)
// * 512 bytes of the regular tar header
// Totalling 2560.
// The regular file data section starts on the next byte.
long expectedDataOffset = canSeek ? 513 : -1;
long expectedDataOffset = canSeek ? 512 : -1;
Assert.Equal(expectedDataOffset, actualEntry.DataOffset);
carlossanlop marked this conversation as resolved.
Show resolved Hide resolved

if (canSeek)
{
ms.Position = actualEntry.DataOffset;
byte actualData = (byte)ms.ReadByte();
Assert.Equal(expectedData, actualData);
}
}

[Theory]
[InlineData(false)]
[InlineData(true)]
public async Task DataOffset_RegularFile_Async(bool canSeek)
{
byte expectedData = 5;
await using MemoryStream ms = new();
await using (TarWriter writer = new(ms, leaveOpen: true))
{
Expand All @@ -149,8 +158,15 @@ public async Task DataOffset_RegularFile_Async(bool canSeek)
// * 512 bytes of the regular tar header
// Totalling 2560.
// The regular file data section starts on the next byte.
long expectedDataOffset = canSeek ? 513 : -1;
long expectedDataOffset = canSeek ? 512 : -1;
Assert.Equal(expectedDataOffset, actualEntry.DataOffset);

if (canSeek)
{
ms.Position = actualEntry.DataOffset;
byte actualData = (byte)ms.ReadByte();
Assert.Equal(expectedData, actualData);
}
}

[Theory]
Expand Down Expand Up @@ -183,7 +199,7 @@ public void DataOffset_RegularFile_LongPath(bool canSeek)
// * 512 bytes of the regular tar header
// Totalling 2560.
// The regular file data section starts on the next byte.
long expectedDataOffset = canSeek ? 2561 : -1;
long expectedDataOffset = canSeek ? 2560 : -1;
Assert.Equal(expectedDataOffset, actualEntry.DataOffset);
}

Expand Down Expand Up @@ -217,7 +233,7 @@ public async Task DataOffset_RegularFile_LongPath_Async(bool canSeek)
// * 512 bytes of the regular tar header
// Totalling 2560.
// The regular file data section starts on the next byte.
long expectedDataOffset = canSeek ? 2561 : -1;
long expectedDataOffset = canSeek ? 2560 : -1;
Assert.Equal(expectedDataOffset, actualEntry.DataOffset);
}

Expand Down Expand Up @@ -248,7 +264,7 @@ public void DataOffset_RegularFile_LongLink(bool canSeek)
// * 512 bytes of the regular tar header
// Totalling 2560.
// The regular file data section starts on the next byte.
long expectedDataOffset = canSeek ? 2561 : -1;
long expectedDataOffset = canSeek ? 2560 : -1;
Assert.Equal(expectedDataOffset, actualEntry.DataOffset);
}
[Theory]
Expand All @@ -260,7 +276,7 @@ public async Task DataOffset_RegularFile_LongLink_Async(bool canSeek)
await using (TarWriter writer = new(ms, leaveOpen: true))
{
GnuTarEntry entry = new GnuTarEntry(TarEntryType.SymbolicLink, InitialEntryName);
entry.LinkName = new string('a', 1234); // Forces using a GNU LongLink entry
entry.LinkName = new string('b', 1234); // Forces using a GNU LongLink entry
await writer.WriteEntryAsync(entry);
}
ms.Position = 0;
Expand All @@ -278,21 +294,21 @@ public async Task DataOffset_RegularFile_LongLink_Async(bool canSeek)
// * 512 bytes of the regular tar header
// Totalling 2560.
// The regular file data section starts on the next byte.
long expectedDataOffset = canSeek ? 2561 : -1;
long expectedDataOffset = canSeek ? 2560 : -1;
Assert.Equal(expectedDataOffset, actualEntry.DataOffset);
}

[Theory]
[InlineData(false)]
[InlineData(true)]
public void DataOffset_RegularFile_LongLin_LongPath(bool canSeek)
public void DataOffset_RegularFile_LongLink_LongPath(bool canSeek)
{
using MemoryStream ms = new();
using (TarWriter writer = new(ms, leaveOpen: true))
{
string veryLongName = new string('a', 1234); // Forces using a GNU LongPath entry
GnuTarEntry entry = new GnuTarEntry(TarEntryType.SymbolicLink, veryLongName);
entry.LinkName = new string('a', 1234); // Forces using a GNU LongLink entry
entry.LinkName = new string('b', 1234); // Forces using a GNU LongLink entry
writer.WriteEntry(entry);
}
ms.Position = 0;
Expand All @@ -309,25 +325,25 @@ public void DataOffset_RegularFile_LongLin_LongPath(bool canSeek)
// * 512 bytes of the regular long path tar header
// * 1234 bytes for the data section containing the full long path
// * 302 bytes of padding
// Then it writes the actual regular file entry, containing:
// Then it writes the actual entry, containing:
// * 512 bytes of the regular tar header
// Totalling 4608.
// The regular file data section starts on the next byte.
long expectedDataOffset = canSeek ? 4609 : -1;
// The data section starts on the next byte.
long expectedDataOffset = canSeek ? 4608 : -1;
Assert.Equal(expectedDataOffset, actualEntry.DataOffset);
}

[Theory]
[InlineData(false)]
[InlineData(true)]
public async Task DataOffset_RegularFile_LongLin_LongPath_Async(bool canSeek)
public async Task DataOffset_RegularFile_LongLink_LongPath_Async(bool canSeek)
{
await using MemoryStream ms = new();
await using (TarWriter writer = new(ms, leaveOpen: true))
{
string veryLongName = new string('a', 1234); // Forces using a GNU LongPath entry
GnuTarEntry entry = new GnuTarEntry(TarEntryType.SymbolicLink, veryLongName);
entry.LinkName = new string('a', 1234); // Forces using a GNU LongLink entry
entry.LinkName = new string('b', 1234); // Forces using a GNU LongLink entry
await writer.WriteEntryAsync(entry);
}
ms.Position = 0;
Expand All @@ -344,11 +360,11 @@ public async Task DataOffset_RegularFile_LongLin_LongPath_Async(bool canSeek)
// * 512 bytes of the regular long path tar header
// * 1234 bytes for the data section containing the full long path
// * 302 bytes of padding
// Then it writes the actual regular file entry, containing:
// Then it writes the actual entry, containing:
// * 512 bytes of the regular tar header
// Totalling 4608.
// The regular file data section starts on the next byte.
long expectedDataOffset = canSeek ? 4609 : -1;
// The data section starts on the next byte.
long expectedDataOffset = canSeek ? 4608 : -1;
Assert.Equal(expectedDataOffset, actualEntry.DataOffset);
}

Expand All @@ -359,11 +375,19 @@ public void DataOffset_BeforeAndAfterArchive()
Assert.Equal(-1, entry.DataOffset);
entry.DataStream = new MemoryStream();
entry.DataStream.WriteByte(5);
entry.DataStream.Position = 0; // The data stream is written to the archive from the current position

using MemoryStream ms = new();
using TarWriter writer = new(ms);
writer.WriteEntry(entry);
Assert.Equal(513, entry.DataOffset);
Assert.Equal(512, entry.DataOffset);

// Write it again, the offset should now point to the second written entry
// First entry 512 (header) + 1 (data) + 511 (padding)
// Second entry 512 (header)
// 512 + 512 + 512 = 1536
writer.WriteEntry(entry);
Assert.Equal(1536, entry.DataOffset);
}

[Fact]
Expand All @@ -374,11 +398,19 @@ public async Task DataOffset_BeforeAndAfterArchive_Async()

entry.DataStream = new MemoryStream();
entry.DataStream.WriteByte(5);
entry.DataStream.Position = 0; // The data stream is written to the archive from the current position

await using MemoryStream ms = new();
await using TarWriter writer = new(ms);
await writer.WriteEntryAsync(entry);
Assert.Equal(513, entry.DataOffset);
Assert.Equal(512, entry.DataOffset);

// Write it again, the offset should now point to the second written entry
// First entry 512 (header) + 1 (data) + 511 (padding)
// Second entry 512 (header)
// 512 + 512 + 512 = 1536
await writer.WriteEntryAsync(entry);
Assert.Equal(1536, entry.DataOffset);
}

[Fact]
Expand All @@ -404,7 +436,7 @@ public void DataOffset_UnseekableDataStream()
TarEntry actualEntry = reader.GetNextEntry();
Assert.NotNull(actualEntry);
// Gnu header length is 512, data starts in the next position
Assert.Equal(513, actualEntry.DataOffset);
Assert.Equal(512, actualEntry.DataOffset);
}

[Fact]
Expand All @@ -430,7 +462,101 @@ public async Task DataOffset_UnseekableDataStream_Async()
TarEntry actualEntry = await reader.GetNextEntryAsync();
Assert.NotNull(actualEntry);
// Gnu header length is 512, data starts in the next position
Assert.Equal(513, actualEntry.DataOffset);
Assert.Equal(512, actualEntry.DataOffset);
}

[Theory]
[InlineData(false)]
[InlineData(true)]
public void DataOffset_LongPath_LongLink_SecondEntry(bool canSeek)
{
string veryLongPathName = new string('a', 1234); // Forces using a GNU LongPath entry
string veryLongLinkName = new string('b', 1234); // Forces using a GNU LongLink entry

using MemoryStream ms = new();
using (TarWriter writer = new(ms, leaveOpen: true))
{
GnuTarEntry entry1 = new GnuTarEntry(TarEntryType.SymbolicLink, veryLongPathName);
entry1.LinkName = veryLongLinkName;
writer.WriteEntry(entry1);

GnuTarEntry entry2 = new GnuTarEntry(TarEntryType.SymbolicLink, veryLongPathName);
entry2.LinkName = veryLongLinkName;
writer.WriteEntry(entry2);
}
ms.Position = 0;

using Stream streamToRead = new WrappedStream(ms, canWrite: true, canRead: true, canSeek: canSeek);
using TarReader reader = new(streamToRead);
TarEntry firstEntry = reader.GetNextEntry();
Assert.NotNull(firstEntry);
// GNU first writes the long link and long path entries, containing:
// * 512 bytes of the regular long link tar header
// * 1234 bytes for the data section containing the full long link
// * 302 bytes of padding
// * 512 bytes of the regular long path tar header
// * 1234 bytes for the data section containing the full long path
// * 302 bytes of padding
// Then it writes the actual regular file entry, containing:
// * 512 bytes of the regular tar header
// Totalling 4608.
// The regular file data section starts on the next byte.
long firstExpectedDataOffset = canSeek ? 4608 : -1;
Assert.Equal(firstExpectedDataOffset, firstEntry.DataOffset);

TarEntry secondEntry = reader.GetNextEntry();
Assert.NotNull(secondEntry);
// First entry (including its long link and long path entries) end at 4608 (no padding, empty, as symlink has no data)
// Second entry (including its long link and long path entries) data section starts one byte after 4608 + 4608 = 9216
long secondExpectedDataOffset = canSeek ? 9216 : -1;
Assert.Equal(secondExpectedDataOffset, secondEntry.DataOffset);
}

[Theory]
[InlineData(false)]
[InlineData(true)]
public async Task DataOffset_LongPath_LongLink_SecondEntry_Async(bool canSeek)
{
string veryLongPathName = new string('a', 1234); // Forces using a GNU LongPath entry
string veryLongLinkName = new string('b', 1234); // Forces using a GNU LongLink entry

await using MemoryStream ms = new();
await using (TarWriter writer = new(ms, leaveOpen: true))
{
GnuTarEntry entry1 = new GnuTarEntry(TarEntryType.SymbolicLink, veryLongPathName);
entry1.LinkName = veryLongLinkName;
await writer.WriteEntryAsync(entry1);

GnuTarEntry entry2 = new GnuTarEntry(TarEntryType.SymbolicLink, veryLongPathName);
entry2.LinkName = veryLongLinkName;
await writer.WriteEntryAsync(entry2);
}
ms.Position = 0;

await using Stream streamToRead = new WrappedStream(ms, canWrite: true, canRead: true, canSeek: canSeek);
await using TarReader reader = new(streamToRead);
TarEntry firstEntry = await reader.GetNextEntryAsync();
Assert.NotNull(firstEntry);
// GNU first writes the long link and long path entries, containing:
// * 512 bytes of the regular long link tar header
// * 1234 bytes for the data section containing the full long link
// * 302 bytes of padding
// * 512 bytes of the regular long path tar header
// * 1234 bytes for the data section containing the full long path
// * 302 bytes of padding
// Then it writes the actual regular file entry, containing:
// * 512 bytes of the regular tar header
// Totalling 4608.
// The regular file data section starts on the next byte.
long firstExpectedDataOffset = canSeek ? 4608 : -1;
Assert.Equal(firstExpectedDataOffset, firstEntry.DataOffset);

TarEntry secondEntry = await reader.GetNextEntryAsync();
Assert.NotNull(secondEntry);
// First entry (including its long link and long path entries) end at 4608 (no padding, empty, as symlink has no data)
// Second entry (including its long link and long path entries) data section starts one byte after 4608 + 4608 = 9216
long secondExpectedDataOffset = canSeek ? 9216 : -1;
Assert.Equal(secondExpectedDataOffset, secondEntry.DataOffset);
}
}
}
Loading
Loading