diff --git a/src/libraries/System.Formats.Tar/ref/System.Formats.Tar.cs b/src/libraries/System.Formats.Tar/ref/System.Formats.Tar.cs index d26411cc965b7..66210cdcaeca4 100644 --- a/src/libraries/System.Formats.Tar/ref/System.Formats.Tar.cs +++ b/src/libraries/System.Formats.Tar/ref/System.Formats.Tar.cs @@ -46,6 +46,7 @@ internal TarEntry() { } public System.IO.UnixFileMode Mode { get { throw null; } set { } } public System.DateTimeOffset ModificationTime { get { throw null; } set { } } public string Name { get { throw null; } set { } } + public long DataOffset { get { throw null; } } public int Uid { get { throw null; } set { } } public void ExtractToFile(string destinationFileName, bool overwrite) { } public System.Threading.Tasks.Task ExtractToFileAsync(string destinationFileName, bool overwrite, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; } diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarEntry.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarEntry.cs index 73b158b10957b..d4727db5510ab 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarEntry.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarEntry.cs @@ -281,6 +281,15 @@ public Stream? DataStream } } + /// + /// Gets the starting position of the data stream respective to the archive stream. + /// + /// + /// If the entry does not come from an archive stream or if the archive stream is not seekable, returns -1. + /// The position value returned by this property is relative to the absolute start of the archive stream, independent of where the tar archive begins. + /// + public long DataOffset => _header._dataOffset; + /// /// A string that represents the current entry. /// diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs index e6e7e8aece00a..281812c11f6f2 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs @@ -25,7 +25,7 @@ internal sealed partial class TarHeader archiveStream.ReadExactly(buffer); - TarHeader? header = TryReadAttributes(initialFormat, buffer); + TarHeader? header = TryReadAttributes(initialFormat, buffer, archiveStream); if (header != null && processDataBlock) { header.ProcessDataBlock(archiveStream, copyData); @@ -47,7 +47,7 @@ internal sealed partial class TarHeader await archiveStream.ReadExactlyAsync(buffer, cancellationToken).ConfigureAwait(false); - TarHeader? header = TryReadAttributes(initialFormat, buffer.Span); + TarHeader? header = TryReadAttributes(initialFormat, buffer.Span, archiveStream); if (header != null && processDataBlock) { await header.ProcessDataBlockAsync(archiveStream, copyData, cancellationToken).ConfigureAwait(false); @@ -58,7 +58,7 @@ internal sealed partial class TarHeader return header; } - private static TarHeader? TryReadAttributes(TarEntryFormat initialFormat, ReadOnlySpan buffer) + private static TarHeader? TryReadAttributes(TarEntryFormat initialFormat, ReadOnlySpan buffer, Stream archiveStream) { // Confirms if v7 or pax, or tentatively selects ustar TarHeader? header = TryReadCommonAttributes(buffer, initialFormat); @@ -86,6 +86,8 @@ internal sealed partial class TarHeader } // In PAX, there is nothing to read in this section (empty space) } + // Finished reading the header metadata, next byte belongs to the data section, save the position + SetDataOffset(header, archiveStream); } return header; } diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs index e0d003a657bc8..81d90e1d7be21 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs @@ -83,6 +83,9 @@ private void WriteWithUnseekableDataStream(TarEntryFormat format, Stream destina // We know the exact location where the data starts depending on the format long dataStartPosition = headerStartPosition + dataLocation; + // Before writing, update the offset field now that the entry belongs to an archive + _dataOffset = dataStartPosition; + // Move to the data start location and write the data destinationStream.Seek(dataLocation, SeekOrigin.Current); _dataStream.CopyTo(destinationStream); // The data gets copied from the current position @@ -132,6 +135,9 @@ private async Task WriteWithUnseekableDataStreamAsync(TarEntryFormat format, Str // We know the exact location where the data starts depending on the format long dataStartPosition = headerStartPosition + dataLocation; + // Before writing, update the offset field now that the entry belongs to an archive + _dataOffset = dataStartPosition; + // Move to the data start location and write the data destinationStream.Seek(dataLocation, SeekOrigin.Current); await _dataStream.CopyToAsync(destinationStream, cancellationToken).ConfigureAwait(false); // The data gets copied from the current position @@ -758,6 +764,9 @@ private int WriteGnuFields(Span buffer) // Writes the current header's data stream into the archive stream. private void WriteData(Stream archiveStream, Stream dataStream) { + // Before writing, update the offset field now that the entry belongs to an archive + SetDataOffset(this, archiveStream); + dataStream.CopyTo(archiveStream); // The data gets copied from the current position WriteEmptyPadding(archiveStream); } @@ -798,6 +807,9 @@ private async Task WriteDataAsync(Stream archiveStream, Stream dataStream, Cance { cancellationToken.ThrowIfCancellationRequested(); + // Before writing, update the offset field now that the entry belongs to an archive + SetDataOffset(this, archiveStream); + await dataStream.CopyToAsync(archiveStream, cancellationToken).ConfigureAwait(false); // The data gets copied from the current position int paddingAfterData = TarHelpers.CalculatePadding(_size); diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.cs index f72a69d5dcc13..9306ef1997c88 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.cs @@ -40,6 +40,7 @@ internal sealed partial class TarHeader private const string PaxEaDevMinor = "devminor"; internal Stream? _dataStream; + internal long _dataOffset; // Position in the stream where the data ends in this header. internal long _endOfHeaderAndDataAndBlockAlignment; @@ -95,6 +96,7 @@ internal TarHeader(TarEntryFormat format, string name = "", int mode = 0, DateTi _typeFlag = typeFlag; _magic = GetMagicForFormat(format); _version = GetVersionForFormat(format); + _dataOffset = -1; } // Constructor called when creating an entry using the common fields from another entry. @@ -149,5 +151,10 @@ internal void InitializeExtendedAttributesWithExisting(IEnumerable GnuVersion, _ => string.Empty, }; + + // Stores the archive stream's position where we know the current entry's data section begins, + // if the archive stream is seekable. Otherwise, -1. + private static void SetDataOffset(TarHeader header, Stream archiveStream) => + header._dataOffset = archiveStream.CanSeek ? archiveStream.Position : -1; } } diff --git a/src/libraries/System.Formats.Tar/tests/TarEntry/GnuTarEntry.Tests.cs b/src/libraries/System.Formats.Tar/tests/TarEntry/GnuTarEntry.Tests.cs index 4c13273a30cea..bae49e5680c03 100644 --- a/src/libraries/System.Formats.Tar/tests/TarEntry/GnuTarEntry.Tests.cs +++ b/src/libraries/System.Formats.Tar/tests/TarEntry/GnuTarEntry.Tests.cs @@ -3,6 +3,7 @@ using System.IO; using System.Linq; +using System.Threading.Tasks; using Xunit; namespace System.Formats.Tar.Tests @@ -93,5 +94,467 @@ public void SupportedEntryType_Fifo() SetFifo(fifo); VerifyFifo(fifo); } + + [Theory] + [InlineData(false)] + [InlineData(true)] + public void DataOffset_RegularFile(bool canSeek) + { + using MemoryStream ms = new(); + using (TarWriter writer = new(ms, leaveOpen: true)) + { + GnuTarEntry entry = new GnuTarEntry(TarEntryType.RegularFile, InitialEntryName); + entry.DataStream = new MemoryStream(); + entry.DataStream.WriteByte(ExpectedOffsetDataSingleByte); + entry.DataStream.Position = 0; + writer.WriteEntry(entry); + } + ms.Position = 0; + + using Stream streamToRead = new WrappedStream(ms, canWrite: true, canRead: true, canSeek: canSeek); + using TarReader reader = new(streamToRead); + TarEntry actualEntry = reader.GetNextEntry(); + Assert.NotNull(actualEntry); + + // Then it writes the actual regular file entry, containing: + // * 512 bytes of the regular tar header + // Totalling 2560. + // The regular file data section starts on the next byte. + long expectedDataOffset = canSeek ? 512 : -1; + Assert.Equal(expectedDataOffset, actualEntry.DataOffset); + + if (canSeek) + { + ms.Position = actualEntry.DataOffset; + byte actualData = (byte)ms.ReadByte(); + Assert.Equal(ExpectedOffsetDataSingleByte, actualData); + } + } + + [Theory] + [InlineData(false)] + [InlineData(true)] + public async Task DataOffset_RegularFile_Async(bool canSeek) + { + await using MemoryStream ms = new(); + await using (TarWriter writer = new(ms, leaveOpen: true)) + { + GnuTarEntry entry = new GnuTarEntry(TarEntryType.RegularFile, InitialEntryName); + entry.DataStream = new MemoryStream(); + entry.DataStream.WriteByte(ExpectedOffsetDataSingleByte); + entry.DataStream.Position = 0; + await writer.WriteEntryAsync(entry); + } + ms.Position = 0; + + await using Stream streamToRead = new WrappedStream(ms, canWrite: true, canRead: true, canSeek: canSeek); + await using TarReader reader = new(streamToRead); + TarEntry actualEntry = await reader.GetNextEntryAsync(); + Assert.NotNull(actualEntry); + + // Then it writes the actual regular file entry, containing: + // * 512 bytes of the regular tar header + // Totalling 2560. + // The regular file data section starts on the next byte. + long expectedDataOffset = canSeek ? 512 : -1; + Assert.Equal(expectedDataOffset, actualEntry.DataOffset); + + if (canSeek) + { + ms.Position = actualEntry.DataOffset; + byte actualData = (byte)ms.ReadByte(); + Assert.Equal(ExpectedOffsetDataSingleByte, actualData); + } + } + + [Theory] + [InlineData(false)] + [InlineData(true)] + public void DataOffset_RegularFile_LongPath(bool canSeek) + { + using MemoryStream ms = new(); + using (TarWriter writer = new(ms, leaveOpen: true)) + { + string veryLongName = new string('a', 1234); // Forces using a GNU LongPath entry + GnuTarEntry entry = new GnuTarEntry(TarEntryType.RegularFile, veryLongName); + entry.DataStream = new MemoryStream(); + entry.DataStream.WriteByte(ExpectedOffsetDataSingleByte); + entry.DataStream.Position = 0; + writer.WriteEntry(entry); + } + ms.Position = 0; + + using Stream streamToRead = new WrappedStream(ms, canWrite: true, canRead: true, canSeek: canSeek); + using TarReader reader = new(streamToRead); + TarEntry actualEntry = reader.GetNextEntry(); + Assert.NotNull(actualEntry); + + // GNU first writes the long path entry, containing: + // * 512 bytes of the regular tar header + // * 1234 bytes for the data section containing the full long path + // * 302 bytes of padding + // Then it writes the actual regular file entry, containing: + // * 512 bytes of the regular tar header + // Totalling 2560. + // The regular file data section starts on the next byte. + long expectedDataOffset = canSeek ? 2560 : -1; + Assert.Equal(expectedDataOffset, actualEntry.DataOffset); + } + + [Theory] + [InlineData(false)] + [InlineData(true)] + public async Task DataOffset_RegularFile_LongPath_Async(bool canSeek) + { + await using MemoryStream ms = new(); + await using (TarWriter writer = new(ms, leaveOpen: true)) + { + string veryLongName = new string('a', 1234); // Forces using a GNU LongPath entry + GnuTarEntry entry = new GnuTarEntry(TarEntryType.RegularFile, veryLongName); + entry.DataStream = new MemoryStream(); + entry.DataStream.WriteByte(ExpectedOffsetDataSingleByte); + entry.DataStream.Position = 0; + await writer.WriteEntryAsync(entry); + } + ms.Position = 0; + + await using Stream streamToRead = new WrappedStream(ms, canWrite: true, canRead: true, canSeek: canSeek); + await using TarReader reader = new(streamToRead); + TarEntry actualEntry = await reader.GetNextEntryAsync(); + Assert.NotNull(actualEntry); + + // GNU first writes the long path entry, containing: + // * 512 bytes of the regular tar header + // * 1234 bytes for the data section containing the full long path + // * 302 bytes of padding + // Then it writes the actual regular file entry, containing: + // * 512 bytes of the regular tar header + // Totalling 2560. + // The regular file data section starts on the next byte. + long expectedDataOffset = canSeek ? 2560 : -1; + Assert.Equal(expectedDataOffset, actualEntry.DataOffset); + } + + [Theory] + [InlineData(false)] + [InlineData(true)] + public void DataOffset_RegularFile_LongLink(bool canSeek) + { + using MemoryStream ms = new(); + using (TarWriter writer = new(ms, leaveOpen: true)) + { + GnuTarEntry entry = new GnuTarEntry(TarEntryType.SymbolicLink, InitialEntryName); + entry.LinkName = new string('a', 1234); // Forces using a GNU LongLink entry + writer.WriteEntry(entry); + } + ms.Position = 0; + + using Stream streamToRead = new WrappedStream(ms, canWrite: true, canRead: true, canSeek: canSeek); + using TarReader reader = new(streamToRead); + TarEntry actualEntry = reader.GetNextEntry(); + Assert.NotNull(actualEntry); + + // GNU first writes the long link entry, containing: + // * 512 bytes of the regular tar header + // * 1234 bytes for the data section containing the full long link + // * 302 bytes of padding + // Then it writes the actual regular file entry, containing: + // * 512 bytes of the regular tar header + // Totalling 2560. + // The regular file data section starts on the next byte. + long expectedDataOffset = canSeek ? 2560 : -1; + Assert.Equal(expectedDataOffset, actualEntry.DataOffset); + } + [Theory] + [InlineData(false)] + [InlineData(true)] + public async Task DataOffset_RegularFile_LongLink_Async(bool canSeek) + { + await using MemoryStream ms = new(); + await using (TarWriter writer = new(ms, leaveOpen: true)) + { + GnuTarEntry entry = new GnuTarEntry(TarEntryType.SymbolicLink, InitialEntryName); + entry.LinkName = new string('b', 1234); // Forces using a GNU LongLink entry + await writer.WriteEntryAsync(entry); + } + ms.Position = 0; + + await using Stream streamToRead = new WrappedStream(ms, canWrite: true, canRead: true, canSeek: canSeek); + await using TarReader reader = new(streamToRead); + TarEntry actualEntry = await reader.GetNextEntryAsync(); + Assert.NotNull(actualEntry); + + // GNU first writes the long link entry, containing: + // * 512 bytes of the regular tar header + // * 1234 bytes for the data section containing the full long link + // * 302 bytes of padding + // Then it writes the actual regular file entry, containing: + // * 512 bytes of the regular tar header + // Totalling 2560. + // The regular file data section starts on the next byte. + long expectedDataOffset = canSeek ? 2560 : -1; + Assert.Equal(expectedDataOffset, actualEntry.DataOffset); + } + + [Theory] + [InlineData(false)] + [InlineData(true)] + public void DataOffset_RegularFile_LongLink_LongPath(bool canSeek) + { + using MemoryStream ms = new(); + using (TarWriter writer = new(ms, leaveOpen: true)) + { + string veryLongName = new string('a', 1234); // Forces using a GNU LongPath entry + GnuTarEntry entry = new GnuTarEntry(TarEntryType.SymbolicLink, veryLongName); + entry.LinkName = new string('b', 1234); // Forces using a GNU LongLink entry + writer.WriteEntry(entry); + } + ms.Position = 0; + + using Stream streamToRead = new WrappedStream(ms, canWrite: true, canRead: true, canSeek: canSeek); + using TarReader reader = new(streamToRead); + TarEntry actualEntry = reader.GetNextEntry(); + Assert.NotNull(actualEntry); + + // GNU first writes the long link and long path entries, containing: + // * 512 bytes of the regular long link tar header + // * 1234 bytes for the data section containing the full long link + // * 302 bytes of padding + // * 512 bytes of the regular long path tar header + // * 1234 bytes for the data section containing the full long path + // * 302 bytes of padding + // Then it writes the actual entry, containing: + // * 512 bytes of the regular tar header + // Totalling 4608. + // The data section starts on the next byte. + long expectedDataOffset = canSeek ? 4608 : -1; + Assert.Equal(expectedDataOffset, actualEntry.DataOffset); + } + + [Theory] + [InlineData(false)] + [InlineData(true)] + public async Task DataOffset_RegularFile_LongLink_LongPath_Async(bool canSeek) + { + await using MemoryStream ms = new(); + await using (TarWriter writer = new(ms, leaveOpen: true)) + { + string veryLongName = new string('a', 1234); // Forces using a GNU LongPath entry + GnuTarEntry entry = new GnuTarEntry(TarEntryType.SymbolicLink, veryLongName); + entry.LinkName = new string('b', 1234); // Forces using a GNU LongLink entry + await writer.WriteEntryAsync(entry); + } + ms.Position = 0; + + await using Stream streamToRead = new WrappedStream(ms, canWrite: true, canRead: true, canSeek: canSeek); + await using TarReader reader = new(streamToRead); + TarEntry actualEntry = await reader.GetNextEntryAsync(); + Assert.NotNull(actualEntry); + + // GNU first writes the long link and long path entries, containing: + // * 512 bytes of the regular long link tar header + // * 1234 bytes for the data section containing the full long link + // * 302 bytes of padding + // * 512 bytes of the regular long path tar header + // * 1234 bytes for the data section containing the full long path + // * 302 bytes of padding + // Then it writes the actual entry, containing: + // * 512 bytes of the regular tar header + // Totalling 4608. + // The data section starts on the next byte. + long expectedDataOffset = canSeek ? 4608 : -1; + Assert.Equal(expectedDataOffset, actualEntry.DataOffset); + } + + [Fact] + public void DataOffset_BeforeAndAfterArchive() + { + GnuTarEntry entry = new GnuTarEntry(TarEntryType.RegularFile, InitialEntryName); + Assert.Equal(-1, entry.DataOffset); + entry.DataStream = new MemoryStream(); + entry.DataStream.WriteByte(ExpectedOffsetDataSingleByte); + entry.DataStream.Position = 0; // The data stream is written to the archive from the current position + + using MemoryStream ms = new(); + using TarWriter writer = new(ms); + writer.WriteEntry(entry); + Assert.Equal(512, entry.DataOffset); + + // Write it again, the offset should now point to the second written entry + // First entry 512 (header) + 1 (data) + 511 (padding) + // Second entry 512 (header) + // 512 + 512 + 512 = 1536 + writer.WriteEntry(entry); + Assert.Equal(1536, entry.DataOffset); + } + + [Fact] + public async Task DataOffset_BeforeAndAfterArchive_Async() + { + GnuTarEntry entry = new GnuTarEntry(TarEntryType.RegularFile, InitialEntryName); + Assert.Equal(-1, entry.DataOffset); + + entry.DataStream = new MemoryStream(); + entry.DataStream.WriteByte(ExpectedOffsetDataSingleByte); + entry.DataStream.Position = 0; // The data stream is written to the archive from the current position + + await using MemoryStream ms = new(); + await using TarWriter writer = new(ms); + await writer.WriteEntryAsync(entry); + Assert.Equal(512, entry.DataOffset); + + // Write it again, the offset should now point to the second written entry + // First entry 512 (header) + 1 (data) + 511 (padding) + // Second entry 512 (header) + // 512 + 512 + 512 = 1536 + await writer.WriteEntryAsync(entry); + Assert.Equal(1536, entry.DataOffset); + } + + [Fact] + public void DataOffset_UnseekableDataStream() + { + using MemoryStream ms = new(); + using (TarWriter writer = new(ms, leaveOpen: true)) + { + GnuTarEntry entry = new GnuTarEntry(TarEntryType.RegularFile, InitialEntryName); + Assert.Equal(-1, entry.DataOffset); + + using MemoryStream dataStream = new(); + dataStream.WriteByte(ExpectedOffsetDataSingleByte); + dataStream.Position = 0; + using WrappedStream wds = new(dataStream, canWrite: true, canRead: true, canSeek: false); + entry.DataStream = wds; + + writer.WriteEntry(entry); + } + ms.Position = 0; + + using TarReader reader = new(ms); + TarEntry actualEntry = reader.GetNextEntry(); + Assert.NotNull(actualEntry); + // Gnu header length is 512, data starts in the next position + Assert.Equal(512, actualEntry.DataOffset); + } + + [Fact] + public async Task DataOffset_UnseekableDataStream_Async() + { + await using MemoryStream ms = new(); + await using (TarWriter writer = new(ms, leaveOpen: true)) + { + GnuTarEntry entry = new GnuTarEntry(TarEntryType.RegularFile, InitialEntryName); + Assert.Equal(-1, entry.DataOffset); + + await using MemoryStream dataStream = new(); + dataStream.WriteByte(ExpectedOffsetDataSingleByte); + dataStream.Position = 0; + await using WrappedStream wds = new(dataStream, canWrite: true, canRead: true, canSeek: false); + entry.DataStream = wds; + + await writer.WriteEntryAsync(entry); + } + ms.Position = 0; + + await using TarReader reader = new(ms); + TarEntry actualEntry = await reader.GetNextEntryAsync(); + Assert.NotNull(actualEntry); + // Gnu header length is 512, data starts in the next position + Assert.Equal(512, actualEntry.DataOffset); + } + + [Theory] + [InlineData(false)] + [InlineData(true)] + public void DataOffset_LongPath_LongLink_SecondEntry(bool canSeek) + { + string veryLongPathName = new string('a', 1234); // Forces using a GNU LongPath entry + string veryLongLinkName = new string('b', 1234); // Forces using a GNU LongLink entry + + using MemoryStream ms = new(); + using (TarWriter writer = new(ms, leaveOpen: true)) + { + GnuTarEntry entry1 = new GnuTarEntry(TarEntryType.SymbolicLink, veryLongPathName); + entry1.LinkName = veryLongLinkName; + writer.WriteEntry(entry1); + + GnuTarEntry entry2 = new GnuTarEntry(TarEntryType.SymbolicLink, veryLongPathName); + entry2.LinkName = veryLongLinkName; + writer.WriteEntry(entry2); + } + ms.Position = 0; + + using Stream streamToRead = new WrappedStream(ms, canWrite: true, canRead: true, canSeek: canSeek); + using TarReader reader = new(streamToRead); + TarEntry firstEntry = reader.GetNextEntry(); + Assert.NotNull(firstEntry); + // GNU first writes the long link and long path entries, containing: + // * 512 bytes of the regular long link tar header + // * 1234 bytes for the data section containing the full long link + // * 302 bytes of padding + // * 512 bytes of the regular long path tar header + // * 1234 bytes for the data section containing the full long path + // * 302 bytes of padding + // Then it writes the actual regular file entry, containing: + // * 512 bytes of the regular tar header + // Totalling 4608. + // The regular file data section starts on the next byte. + long firstExpectedDataOffset = canSeek ? 4608 : -1; + Assert.Equal(firstExpectedDataOffset, firstEntry.DataOffset); + + TarEntry secondEntry = reader.GetNextEntry(); + Assert.NotNull(secondEntry); + // First entry (including its long link and long path entries) end at 4608 (no padding, empty, as symlink has no data) + // Second entry (including its long link and long path entries) data section starts one byte after 4608 + 4608 = 9216 + long secondExpectedDataOffset = canSeek ? 9216 : -1; + Assert.Equal(secondExpectedDataOffset, secondEntry.DataOffset); + } + + [Theory] + [InlineData(false)] + [InlineData(true)] + public async Task DataOffset_LongPath_LongLink_SecondEntry_Async(bool canSeek) + { + string veryLongPathName = new string('a', 1234); // Forces using a GNU LongPath entry + string veryLongLinkName = new string('b', 1234); // Forces using a GNU LongLink entry + + await using MemoryStream ms = new(); + await using (TarWriter writer = new(ms, leaveOpen: true)) + { + GnuTarEntry entry1 = new GnuTarEntry(TarEntryType.SymbolicLink, veryLongPathName); + entry1.LinkName = veryLongLinkName; + await writer.WriteEntryAsync(entry1); + + GnuTarEntry entry2 = new GnuTarEntry(TarEntryType.SymbolicLink, veryLongPathName); + entry2.LinkName = veryLongLinkName; + await writer.WriteEntryAsync(entry2); + } + ms.Position = 0; + + await using Stream streamToRead = new WrappedStream(ms, canWrite: true, canRead: true, canSeek: canSeek); + await using TarReader reader = new(streamToRead); + TarEntry firstEntry = await reader.GetNextEntryAsync(); + Assert.NotNull(firstEntry); + // GNU first writes the long link and long path entries, containing: + // * 512 bytes of the regular long link tar header + // * 1234 bytes for the data section containing the full long link + // * 302 bytes of padding + // * 512 bytes of the regular long path tar header + // * 1234 bytes for the data section containing the full long path + // * 302 bytes of padding + // Then it writes the actual regular file entry, containing: + // * 512 bytes of the regular tar header + // Totalling 4608. + // The regular file data section starts on the next byte. + long firstExpectedDataOffset = canSeek ? 4608 : -1; + Assert.Equal(firstExpectedDataOffset, firstEntry.DataOffset); + + TarEntry secondEntry = await reader.GetNextEntryAsync(); + Assert.NotNull(secondEntry); + // First entry (including its long link and long path entries) end at 4608 (no padding, empty, as symlink has no data) + // Second entry (including its long link and long path entries) data section starts one byte after 4608 + 4608 = 9216 + long secondExpectedDataOffset = canSeek ? 9216 : -1; + Assert.Equal(secondExpectedDataOffset, secondEntry.DataOffset); + } } } diff --git a/src/libraries/System.Formats.Tar/tests/TarEntry/PaxTarEntry.Tests.cs b/src/libraries/System.Formats.Tar/tests/TarEntry/PaxTarEntry.Tests.cs index 19613acce4621..023584af183c6 100644 --- a/src/libraries/System.Formats.Tar/tests/TarEntry/PaxTarEntry.Tests.cs +++ b/src/libraries/System.Formats.Tar/tests/TarEntry/PaxTarEntry.Tests.cs @@ -2,6 +2,8 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.Collections.Generic; +using System.IO; +using System.Threading.Tasks; using Xunit; namespace System.Formats.Tar.Tests @@ -114,5 +116,373 @@ public void SupportedEntryType_Fifo() SetFifo(fifo); VerifyFifo(fifo); } + + [Theory] + [InlineData(false)] + [InlineData(true)] + public void DataOffset_RegularFile(bool canSeek) + { + using MemoryStream ms = new(); + using (TarWriter writer = new(ms, leaveOpen: true)) + { + PaxTarEntry entry = new PaxTarEntry(TarEntryType.RegularFile, InitialEntryName); + Assert.Equal(-1, entry.DataOffset); + entry.DataStream = new MemoryStream(); + entry.DataStream.WriteByte(ExpectedOffsetDataSingleByte); + entry.DataStream.Position = 0; + writer.WriteEntry(entry); + } + ms.Position = 0; + + using Stream streamToRead = new WrappedStream(ms, canWrite: true, canRead: true, canSeek: canSeek); + using TarReader reader = new(streamToRead); + TarEntry actualEntry = reader.GetNextEntry(); + Assert.NotNull(actualEntry); + // PAX first writes the extended attributes entry, containing: + // * 512 bytes of the regular tar header + // * 113 bytes of the default extended attributes in the data section (mdata) + // * 399 bytes of padding after the data + // Then it writes the actual regular file entry, containing: + // * 512 bytes of the regular tar header + // Totalling 1536. + // The regular file data section starts on the next byte. + long expectedDataOffset = canSeek ? 1536 : -1; + Assert.Equal(expectedDataOffset, actualEntry.DataOffset); + + if (canSeek) + { + ms.Position = actualEntry.DataOffset; + byte actualData = (byte)ms.ReadByte(); + Assert.Equal(ExpectedOffsetDataSingleByte, actualData); + } + } + + [Theory] + [InlineData(false)] + [InlineData(true)] + public async Task DataOffset_RegularFile_Async(bool canSeek) + { + await using MemoryStream ms = new(); + await using (TarWriter writer = new(ms, leaveOpen: true)) + { + PaxTarEntry entry = new PaxTarEntry(TarEntryType.RegularFile, InitialEntryName); + Assert.Equal(-1, entry.DataOffset); + entry.DataStream = new MemoryStream(); + entry.DataStream.WriteByte(ExpectedOffsetDataSingleByte); + entry.DataStream.Position = 0; + await writer.WriteEntryAsync(entry); + } + ms.Position = 0; + + await using Stream streamToRead = new WrappedStream(ms, canWrite: true, canRead: true, canSeek: canSeek); + await using TarReader reader = new(streamToRead); + TarEntry actualEntry = await reader.GetNextEntryAsync(); + Assert.NotNull(actualEntry); + // PAX first writes the extended attributes entry, containing: + // * 512 bytes of the regular tar header + // * 113 bytes of the default extended attributes in the data section (mdata) + // * 399 bytes of padding after the data + // Then it writes the actual regular file entry, containing: + // * 512 bytes of the regular tar header + // Totalling 1536. + // The regular file data section starts on the next byte. + long expectedDataOffset = canSeek ? 1536 : -1; + Assert.Equal(expectedDataOffset, actualEntry.DataOffset); + + if (canSeek) + { + ms.Position = actualEntry.DataOffset; + byte actualData = (byte)ms.ReadByte(); + Assert.Equal(ExpectedOffsetDataSingleByte, actualData); + } + } + + [Theory] + [InlineData(false)] + [InlineData(true)] + public void DataOffset_GlobalExtendedAttributes(bool canSeek) + { + using MemoryStream ms = new(); + using (TarWriter writer = new(ms, leaveOpen: true)) + { + PaxGlobalExtendedAttributesTarEntry entry = new PaxGlobalExtendedAttributesTarEntry(new Dictionary()); + Assert.Equal(-1, entry.DataOffset); + writer.WriteEntry(entry); + } + ms.Position = 0; + + using Stream streamToRead = new WrappedStream(ms, canWrite: true, canRead: true, canSeek: canSeek); + using TarReader reader = new(streamToRead); + TarEntry actualEntry = reader.GetNextEntry(); + Assert.NotNull(actualEntry); + Assert.Equal(TarEntryType.GlobalExtendedAttributes, actualEntry.EntryType); + // The PAX global extended attributes header length is 512, data starts in the next position + long expectedDataOffset = canSeek ? 512 : -1; + Assert.Equal(expectedDataOffset, actualEntry.DataOffset); + } + + [Theory] + [InlineData(false)] + [InlineData(true)] + public async Task DataOffset_GlobalExtendedAttributes_Async(bool canSeek) + { + await using MemoryStream ms = new(); + await using (TarWriter writer = new(ms, leaveOpen: true)) + { + PaxGlobalExtendedAttributesTarEntry entry = new PaxGlobalExtendedAttributesTarEntry(new Dictionary()); + Assert.Equal(-1, entry.DataOffset); + await writer.WriteEntryAsync(entry); + } + ms.Position = 0; + + await using Stream streamToRead = new WrappedStream(ms, canWrite: true, canRead: true, canSeek: canSeek); + await using TarReader reader = new(streamToRead); + TarEntry actualEntry = await reader.GetNextEntryAsync(); + Assert.NotNull(actualEntry); + Assert.Equal(TarEntryType.GlobalExtendedAttributes, actualEntry.EntryType); + // The PAX global extended attributes header length is 512, data starts in the next position + long expectedDataOffset = canSeek ? 512 : -1; + Assert.Equal(expectedDataOffset, actualEntry.DataOffset); + } + + [Fact] + public void DataOffset_BeforeAndAfterArchive() + { + PaxTarEntry entry = new PaxTarEntry(TarEntryType.RegularFile, InitialEntryName); + Assert.Equal(-1, entry.DataOffset); + + entry.DataStream = new MemoryStream(); + entry.DataStream.WriteByte(ExpectedOffsetDataSingleByte); + entry.DataStream.Position = 0; // The data stream is written to the archive from the current position + + using MemoryStream ms = new(); + using TarWriter writer = new(ms); + writer.WriteEntry(entry); + // PAX first writes the extended attributes entry, containing: + // * 512 bytes of the regular tar header + // * 113 bytes of the default extended attributes in the data section (mdata) + // * 399 bytes of padding after the data + // Then it writes the actual regular file entry, containing: + // * 512 bytes of the regular tar header + // Totalling 1536. + // The regular file data section starts on the next byte. + Assert.Equal(1536, entry.DataOffset); + + // Write it again, the offset should now point to the second written entry + // First entry 1536 + 1 (data) + 511 (padding) = 2048 + // Second entry 1536 + // 2048 + 1536 = 3584 + writer.WriteEntry(entry); + Assert.Equal(3584, entry.DataOffset); + } + + [Fact] + public async Task DataOffset_BeforeAndAfterArchive_Async() + { + PaxTarEntry entry = new PaxTarEntry(TarEntryType.RegularFile, InitialEntryName); + Assert.Equal(-1, entry.DataOffset); + + entry.DataStream = new MemoryStream(); + entry.DataStream.WriteByte(ExpectedOffsetDataSingleByte); + entry.DataStream.Position = 0; // The data stream is written to the archive from the current position + + await using MemoryStream ms = new(); + await using TarWriter writer = new(ms); + await writer.WriteEntryAsync(entry); + // PAX first writes the extended attributes entry, containing: + // * 512 bytes of the regular tar header + // * 113 bytes of the default extended attributes in the data section (mdata) + // * 399 bytes of padding after the data + // Then it writes the actual regular file entry, containing: + // * 512 bytes of the regular tar header + // Totalling 1536. + // The regular file data section starts on the next byte. + Assert.Equal(1536, entry.DataOffset); + + // Write it again, the offset should now point to the second written entry + // First entry 1536 + 1 (data) + 511 (padding) = 2048 + // Second entry 1536 + // 2048 + 1536 = 3584 + await writer.WriteEntryAsync(entry); + Assert.Equal(3584, entry.DataOffset); + } + + [Fact] + public void DataOffset_UnseekableDataStream() + { + using MemoryStream ms = new(); + using (TarWriter writer = new(ms, leaveOpen: true)) + { + PaxTarEntry entry = new PaxTarEntry(TarEntryType.RegularFile, InitialEntryName); + Assert.Equal(-1, entry.DataOffset); + + using MemoryStream dataStream = new(); + dataStream.WriteByte(ExpectedOffsetDataSingleByte); + dataStream.Position = 0; + using WrappedStream wds = new(dataStream, canWrite: true, canRead: true, canSeek: false); + entry.DataStream = wds; + + writer.WriteEntry(entry); + } + ms.Position = 0; + + using TarReader reader = new(ms); + TarEntry actualEntry = reader.GetNextEntry(); + Assert.NotNull(actualEntry); + // PAX first writes the extended attributes entry, containing: + // * 512 bytes of the regular tar header + // * 113 bytes of the default extended attributes in the data section (mdata) + // * 399 bytes of padding after the data + // Then it writes the actual regular file entry, containing: + // * 512 bytes of the regular tar header + // Totalling 1536. + // The regular file data section starts on the next byte. + Assert.Equal(1536, actualEntry.DataOffset); + } + + [Fact] + public async Task DataOffset_UnseekableDataStream_Async() + { + await using MemoryStream ms = new(); + await using (TarWriter writer = new(ms, leaveOpen: true)) + { + PaxTarEntry entry = new PaxTarEntry(TarEntryType.RegularFile, InitialEntryName); + Assert.Equal(-1, entry.DataOffset); + + await using MemoryStream dataStream = new(); + dataStream.WriteByte(ExpectedOffsetDataSingleByte); + dataStream.Position = 0; + await using WrappedStream wds = new(dataStream, canWrite: true, canRead: true, canSeek: false); + entry.DataStream = wds; + + await writer.WriteEntryAsync(entry); + } + ms.Position = 0; + + await using TarReader reader = new(ms); + TarEntry actualEntry = await reader.GetNextEntryAsync(); + Assert.NotNull(actualEntry); + // PAX first writes the extended attributes entry, containing: + // * 512 bytes of the regular tar header + // * 113 bytes of the default extended attributes in the data section (mdata) + // * 399 bytes of padding after the data + // Then it writes the actual regular file entry, containing: + // * 512 bytes of the regular tar header + // Totalling 1536. + // The regular file data section starts on the next byte. + Assert.Equal(1536, actualEntry.DataOffset); + } + + [Theory] + [InlineData(false)] + [InlineData(true)] + public void DataOffset_RegularFile_SecondEntry_MultiByte(bool canSeek) + { + using MemoryStream ms = new(); + using (TarWriter writer = new(ms, leaveOpen: true)) + { + PaxTarEntry entry1 = new PaxTarEntry(TarEntryType.RegularFile, InitialEntryName); + Assert.Equal(-1, entry1.DataOffset); + entry1.DataStream = new MemoryStream(); + entry1.DataStream.Write(ExpectedOffsetDataMultiByte); + entry1.DataStream.Position = 0; + writer.WriteEntry(entry1); + + PaxTarEntry entry2 = new PaxTarEntry(TarEntryType.RegularFile, InitialEntryName); + Assert.Equal(-1, entry2.DataOffset); + entry2.DataStream = new MemoryStream(); + entry2.DataStream.Write(ExpectedOffsetDataMultiByte); + entry2.DataStream.Position = 0; + writer.WriteEntry(entry2); + } + ms.Position = 0; + + using Stream streamToRead = new WrappedStream(ms, canWrite: true, canRead: true, canSeek: canSeek); + using TarReader reader = new(streamToRead); + TarEntry firstEntry = reader.GetNextEntry(); + Assert.NotNull(firstEntry); + // PAX first writes the extended attributes entry, containing: + // * 512 bytes of the regular tar header + // * 113 bytes of the default extended attributes in the data section (mdata) + // * 399 bytes of padding after the data + // Then it writes the actual regular file entry, containing: + // * 512 bytes of the regular tar header + // Totalling 1536. + // The regular file data section starts on the next byte. + long firstExpectedDataOffset = canSeek ? 1536 : -1; + Assert.Equal(firstExpectedDataOffset, firstEntry.DataOffset); + + if (canSeek) + { + byte[] actualData = new byte[ExpectedOffsetDataMultiByte.Length]; + ms.Position = firstEntry.DataOffset; // Reposition the archive stream to confirm the reader will autorestore its position later + ms.ReadExactly(actualData); + AssertExtensions.SequenceEqual(ExpectedOffsetDataMultiByte, actualData); + } + + // If the archive stream is seekable, this should autorestore archive stream position internally + TarEntry secondEntry = reader.GetNextEntry(); + Assert.NotNull(secondEntry); + // The first entry (including its extended attribute entry) end at 1536 + 4 (data) + 508 (padding) = 2048 + // Second entry's data also starts one byte after the 1536 bytes after the beginning of its header, so 2048 + 1536 = 3584 + long secondExpectedDataOffset = canSeek ? 3584 : -1; + Assert.Equal(secondExpectedDataOffset, secondEntry.DataOffset); + } + + [Theory] + [InlineData(false)] + [InlineData(true)] + public async Task DataOffset_RegularFile_SecondEntry_MultiByte_Async(bool canSeek) + { + await using MemoryStream ms = new(); + await using (TarWriter writer = new(ms, leaveOpen: true)) + { + PaxTarEntry entry1 = new PaxTarEntry(TarEntryType.RegularFile, InitialEntryName); + Assert.Equal(-1, entry1.DataOffset); + entry1.DataStream = new MemoryStream(); + entry1.DataStream.Write(ExpectedOffsetDataMultiByte); + entry1.DataStream.Position = 0; + await writer.WriteEntryAsync(entry1); + + PaxTarEntry entry2 = new PaxTarEntry(TarEntryType.RegularFile, InitialEntryName); + Assert.Equal(-1, entry2.DataOffset); + entry2.DataStream = new MemoryStream(); + entry2.DataStream.Write(ExpectedOffsetDataMultiByte); + entry2.DataStream.Position = 0; + await writer.WriteEntryAsync(entry2); + } + ms.Position = 0; + + await using Stream streamToRead = new WrappedStream(ms, canWrite: true, canRead: true, canSeek: canSeek); + await using TarReader reader = new(streamToRead); + TarEntry firstEntry = await reader.GetNextEntryAsync(); + Assert.NotNull(firstEntry); + // PAX first writes the extended attributes entry, containing: + // * 512 bytes of the regular tar header + // * 113 bytes of the default extended attributes in the data section (mdata) + // * 399 bytes of padding after the data + // Then it writes the actual regular file entry, containing: + // * 512 bytes of the regular tar header + // Totalling 1536. + // The regular file data section starts on the next byte. + long firstExpectedDataOffset = canSeek ? 1536 : -1; + Assert.Equal(firstExpectedDataOffset, firstEntry.DataOffset); + + if (canSeek) + { + byte[] actualData = new byte[ExpectedOffsetDataMultiByte.Length]; + ms.Position = firstEntry.DataOffset; // Reposition the archive stream to confirm the reader will autorestore its position later + await ms.ReadExactlyAsync(actualData); + AssertExtensions.SequenceEqual(ExpectedOffsetDataMultiByte, actualData); + } + + // If the archive stream is seekable, this should autorestore archive stream position internally + TarEntry secondEntry = await reader.GetNextEntryAsync(); + Assert.NotNull(secondEntry); + // The first entry (including its extended attribute entry) end at 1536 + 4 (data) + 508 (padding) = 2048 + // Second entry's data also starts one byte after the 1536 bytes after the beginning of its header, so 2048 + 1536 = 3584 + long secondExpectedDataOffset = canSeek ? 3584 : -1; + Assert.Equal(secondExpectedDataOffset, secondEntry.DataOffset); + } } } diff --git a/src/libraries/System.Formats.Tar/tests/TarEntry/UstarTarEntry.Tests.cs b/src/libraries/System.Formats.Tar/tests/TarEntry/UstarTarEntry.Tests.cs index 2fdcb34069e50..d3affbfcce3a3 100644 --- a/src/libraries/System.Formats.Tar/tests/TarEntry/UstarTarEntry.Tests.cs +++ b/src/libraries/System.Formats.Tar/tests/TarEntry/UstarTarEntry.Tests.cs @@ -3,6 +3,7 @@ using System.IO; using System.Linq; +using System.Threading.Tasks; using Xunit; namespace System.Formats.Tar.Tests @@ -89,5 +90,267 @@ public void SupportedEntryType_Fifo() SetFifo(fifo); VerifyFifo(fifo); } + + [Theory] + [InlineData(false)] + [InlineData(true)] + public void DataOffset_RegularFile(bool canSeek) + { + using MemoryStream ms = new(); + using (TarWriter writer = new(ms, leaveOpen: true)) + { + UstarTarEntry entry = new UstarTarEntry(TarEntryType.RegularFile, InitialEntryName); + Assert.Equal(-1, entry.DataOffset); + entry.DataStream = new MemoryStream(); + entry.DataStream.WriteByte(ExpectedOffsetDataSingleByte); + entry.DataStream.Position = 0; + writer.WriteEntry(entry); + } + ms.Position = 0; + + using Stream streamToRead = new WrappedStream(ms, canWrite: true, canRead: true, canSeek: canSeek); + using TarReader reader = new(streamToRead); + TarEntry actualEntry = reader.GetNextEntry(); + Assert.NotNull(actualEntry); + // Ustar header length is 512, data starts in the next position + long expectedDataOffset = canSeek ? 512 : -1; + Assert.Equal(expectedDataOffset, actualEntry.DataOffset); + + if (canSeek) + { + ms.Position = actualEntry.DataOffset; + byte actualData = (byte)ms.ReadByte(); + Assert.Equal(ExpectedOffsetDataSingleByte, actualData); + } + } + + [Theory] + [InlineData(false)] + [InlineData(true)] + public async Task DataOffset_RegularFile_Async(bool canSeek) + { + await using MemoryStream ms = new(); + await using (TarWriter writer = new(ms, leaveOpen: true)) + { + UstarTarEntry entry = new UstarTarEntry(TarEntryType.RegularFile, InitialEntryName); + Assert.Equal(-1, entry.DataOffset); + entry.DataStream = new MemoryStream(); + entry.DataStream.WriteByte(ExpectedOffsetDataSingleByte); + entry.DataStream.Position = 0; + await writer.WriteEntryAsync(entry); + } + ms.Position = 0; + + await using Stream streamToRead = new WrappedStream(ms, canWrite: true, canRead: true, canSeek: canSeek); + await using TarReader reader = new(streamToRead); + TarEntry actualEntry = await reader.GetNextEntryAsync(); + Assert.NotNull(actualEntry); + // Ustar header length is 512, data starts in the next position + long expectedDataOffset = canSeek ? 512 : -1; + Assert.Equal(expectedDataOffset, actualEntry.DataOffset); + + if (canSeek) + { + ms.Position = actualEntry.DataOffset; + byte actualData = (byte)ms.ReadByte(); + Assert.Equal(ExpectedOffsetDataSingleByte, actualData); + } + } + + [Fact] + public void DataOffset_BeforeAndAfterArchive() + { + UstarTarEntry entry = new UstarTarEntry(TarEntryType.RegularFile, InitialEntryName); + Assert.Equal(-1, entry.DataOffset); + + entry.DataStream = new MemoryStream(); + entry.DataStream.WriteByte(ExpectedOffsetDataSingleByte); + entry.DataStream.Position = 0; // The data stream is written to the archive from the current position + + using MemoryStream ms = new(); + using TarWriter writer = new(ms); + writer.WriteEntry(entry); + Assert.Equal(512, entry.DataOffset); + + // Write it again, the offset should now point to the second written entry + // First entry 512 (header) + 1 (data) + 511 (padding) + // Second entry 512 (header) + // 512 + 512 + 512 = 1536 + writer.WriteEntry(entry); + Assert.Equal(1536, entry.DataOffset); + } + + [Fact] + public async Task DataOffset_BeforeAndAfterArchive_Async() + { + UstarTarEntry entry = new UstarTarEntry(TarEntryType.RegularFile, InitialEntryName); + Assert.Equal(-1, entry.DataOffset); + + entry.DataStream = new MemoryStream(); + entry.DataStream.WriteByte(ExpectedOffsetDataSingleByte); + entry.DataStream.Position = 0; // The data stream is written to the archive from the current position + + await using MemoryStream ms = new(); + await using TarWriter writer = new(ms); + await writer.WriteEntryAsync(entry); + Assert.Equal(512, entry.DataOffset); + + // Write it again, the offset should now point to the second written entry + // First entry 512 (header) + 1 (data) + 511 (padding) + // Second entry 512 (header) + // 512 + 512 + 512 = 1536 + await writer.WriteEntryAsync(entry); + Assert.Equal(1536, entry.DataOffset); + } + + [Fact] + public void DataOffset_UnseekableDataStream() + { + using MemoryStream ms = new(); + using (TarWriter writer = new(ms, leaveOpen: true)) + { + UstarTarEntry entry = new UstarTarEntry(TarEntryType.RegularFile, InitialEntryName); + Assert.Equal(-1, entry.DataOffset); + + using MemoryStream dataStream = new(); + dataStream.WriteByte(ExpectedOffsetDataSingleByte); + dataStream.Position = 0; + using WrappedStream wds = new(dataStream, canWrite: true, canRead: true, canSeek: false); + entry.DataStream = wds; + + writer.WriteEntry(entry); + } + ms.Position = 0; + + using TarReader reader = new(ms); + TarEntry actualEntry = reader.GetNextEntry(); + Assert.NotNull(actualEntry); + // Ustar header length is 512, data starts in the next position + Assert.Equal(512, actualEntry.DataOffset); + } + + [Fact] + public async Task DataOffset_UnseekableDataStream_Async() + { + await using MemoryStream ms = new(); + await using (TarWriter writer = new(ms, leaveOpen: true)) + { + UstarTarEntry entry = new UstarTarEntry(TarEntryType.RegularFile, InitialEntryName); + Assert.Equal(-1, entry.DataOffset); + + await using MemoryStream dataStream = new(); + dataStream.WriteByte(ExpectedOffsetDataSingleByte); + dataStream.Position = 0; + await using WrappedStream wds = new(dataStream, canWrite: true, canRead: true, canSeek: false); + entry.DataStream = wds; + + await writer.WriteEntryAsync(entry); + } + ms.Position = 0; + + await using TarReader reader = new(ms); + TarEntry actualEntry = await reader.GetNextEntryAsync(); + Assert.NotNull(actualEntry); + // Ustar header length is 512, data starts in the next position + Assert.Equal(512, actualEntry.DataOffset); + } + + [Theory] + [InlineData(false)] + [InlineData(true)] + public void DataOffset_RegularFile_SecondEntry_MultiByte(bool canSeek) + { + using MemoryStream ms = new(); + using (TarWriter writer = new(ms, leaveOpen: true)) + { + UstarTarEntry entry1 = new UstarTarEntry(TarEntryType.RegularFile, InitialEntryName); + Assert.Equal(-1, entry1.DataOffset); + entry1.DataStream = new MemoryStream(); + entry1.DataStream.Write(ExpectedOffsetDataMultiByte); + entry1.DataStream.Position = 0; + writer.WriteEntry(entry1); + + UstarTarEntry entry2 = new UstarTarEntry(TarEntryType.RegularFile, InitialEntryName); + Assert.Equal(-1, entry2.DataOffset); + entry2.DataStream = new MemoryStream(); + entry2.DataStream.Write(ExpectedOffsetDataMultiByte); + entry2.DataStream.Position = 0; + writer.WriteEntry(entry2); + } + ms.Position = 0; + + using Stream streamToRead = new WrappedStream(ms, canWrite: true, canRead: true, canSeek: canSeek); + using TarReader reader = new(streamToRead); + TarEntry firstEntry = reader.GetNextEntry(); + Assert.NotNull(firstEntry); + // Ustar header length is 512, data starts in the next position + long firstExpectedDataOffset = canSeek ? 512 : -1; + Assert.Equal(firstExpectedDataOffset, firstEntry.DataOffset); + + if (canSeek) + { + byte[] actualData = new byte[ExpectedOffsetDataMultiByte.Length]; + ms.Position = firstEntry.DataOffset; // Reposition the archive stream to confirm the reader will autorestore its position later + ms.ReadExactly(actualData); + AssertExtensions.SequenceEqual(ExpectedOffsetDataMultiByte, actualData); + } + + // If the archive stream is seekable, this should autorestore archive stream position internally + TarEntry secondEntry = reader.GetNextEntry(); + Assert.NotNull(secondEntry); + // First entry ends at 512 (header) + 4 (data) + 508 (padding) = 1024 + // Second entry also has 512 header, so data starts one byte after 1536 + long secondExpectedDataOffset = canSeek ? 1536 : -1; + Assert.Equal(secondExpectedDataOffset, secondEntry.DataOffset); + } + + [Theory] + [InlineData(false)] + [InlineData(true)] + public async Task DataOffset_RegularFile_SecondEntry_MultiByte_Async(bool canSeek) + { + await using MemoryStream ms = new(); + await using (TarWriter writer = new(ms, leaveOpen: true)) + { + UstarTarEntry entry1 = new UstarTarEntry(TarEntryType.RegularFile, InitialEntryName); + Assert.Equal(-1, entry1.DataOffset); + entry1.DataStream = new MemoryStream(); + entry1.DataStream.Write(ExpectedOffsetDataMultiByte); + entry1.DataStream.Position = 0; + await writer.WriteEntryAsync(entry1); + + UstarTarEntry entry2 = new UstarTarEntry(TarEntryType.RegularFile, InitialEntryName); + Assert.Equal(-1, entry2.DataOffset); + entry2.DataStream = new MemoryStream(); + entry2.DataStream.Write(ExpectedOffsetDataMultiByte); + entry2.DataStream.Position = 0; + await writer.WriteEntryAsync(entry2); + } + ms.Position = 0; + + await using Stream streamToRead = new WrappedStream(ms, canWrite: true, canRead: true, canSeek: canSeek); + await using TarReader reader = new(streamToRead); + TarEntry firstEntry = await reader.GetNextEntryAsync(); + Assert.NotNull(firstEntry); + // Ustar header length is 512, data starts in the next position + long firstExpectedDataOffset = canSeek ? 512 : -1; + Assert.Equal(firstExpectedDataOffset, firstEntry.DataOffset); + + if (canSeek) + { + byte[] actualData = new byte[ExpectedOffsetDataMultiByte.Length]; + ms.Position = firstEntry.DataOffset; // Reposition the archive stream to confirm the reader will autorestore its position later + await ms.ReadExactlyAsync(actualData); + AssertExtensions.SequenceEqual(ExpectedOffsetDataMultiByte, actualData); + } + + // If the archive stream is seekable, this should autorestore archive stream position internally + TarEntry secondEntry = await reader.GetNextEntryAsync(); + Assert.NotNull(secondEntry); + // First entry ends at 512 (header) + 4 (data) + 508 (padding) = 1024 + // Second entry also has 512 header, so data starts one byte after 1536 + long secondExpectedDataOffset = canSeek ? 1536 : -1; + Assert.Equal(secondExpectedDataOffset, secondEntry.DataOffset); + } } } diff --git a/src/libraries/System.Formats.Tar/tests/TarEntry/V7TarEntry.Tests.cs b/src/libraries/System.Formats.Tar/tests/TarEntry/V7TarEntry.Tests.cs index 2bf5471d4fa48..da5e40e715a3d 100644 --- a/src/libraries/System.Formats.Tar/tests/TarEntry/V7TarEntry.Tests.cs +++ b/src/libraries/System.Formats.Tar/tests/TarEntry/V7TarEntry.Tests.cs @@ -4,6 +4,7 @@ using System.Collections.Generic; using System.IO; using System.Linq; +using System.Threading.Tasks; using Xunit; namespace System.Formats.Tar.Tests @@ -69,5 +70,245 @@ public void SupportedEntryType_SymbolicLink() SetSymbolicLink(symbolicLink); VerifySymbolicLink(symbolicLink); } + + [Theory] + [InlineData(false)] + [InlineData(true)] + public void DataOffset_RegularFile(bool canSeek) + { + using MemoryStream ms = new(); + using (TarWriter writer = new(ms, leaveOpen: true)) + { + V7TarEntry entry = new V7TarEntry(TarEntryType.V7RegularFile, InitialEntryName); + Assert.Equal(-1, entry.DataOffset); + entry.DataStream = new MemoryStream(); + entry.DataStream.WriteByte(ExpectedOffsetDataSingleByte); + entry.DataStream.Position = 0; + writer.WriteEntry(entry); + } + ms.Position = 0; + + using Stream streamToRead = new WrappedStream(ms, canWrite: true, canRead: true, canSeek: canSeek); + using TarReader reader = new(streamToRead); + TarEntry actualEntry = reader.GetNextEntry(); + Assert.NotNull(actualEntry); + // V7 header length is 512, data starts in the next position + long expectedDataOffset = canSeek ? 512 : -1; + Assert.Equal(expectedDataOffset, actualEntry.DataOffset); + + if (canSeek) + { + ms.Position = actualEntry.DataOffset; + byte actualData = (byte)ms.ReadByte(); + Assert.Equal(ExpectedOffsetDataSingleByte, actualData); + } + } + + [Theory] + [InlineData(false)] + [InlineData(true)] + public async Task DataOffset_RegularFile_Async(bool canSeek) + { + await using MemoryStream ms = new(); + await using (TarWriter writer = new(ms, leaveOpen: true)) + { + V7TarEntry entry = new V7TarEntry(TarEntryType.V7RegularFile, InitialEntryName); + Assert.Equal(-1, entry.DataOffset); + entry.DataStream = new MemoryStream(); + entry.DataStream.WriteByte(ExpectedOffsetDataSingleByte); + entry.DataStream.Position = 0; + await writer.WriteEntryAsync(entry); + } + ms.Position = 0; + + await using Stream streamToRead = new WrappedStream(ms, canWrite: true, canRead: true, canSeek: canSeek); + await using TarReader reader = new(streamToRead); + TarEntry actualEntry = await reader.GetNextEntryAsync(); + Assert.NotNull(actualEntry); + // V7 header length is 512, data starts in the next position + long expectedDataOffset = canSeek ? 512 : -1; + Assert.Equal(expectedDataOffset, actualEntry.DataOffset); + + if (canSeek) + { + ms.Position = actualEntry.DataOffset; + byte actualData = (byte)ms.ReadByte(); + Assert.Equal(ExpectedOffsetDataSingleByte, actualData); + } + } + + [Fact] + public void DataOffset_BeforeAndAfterArchive() + { + V7TarEntry entry = new V7TarEntry(TarEntryType.V7RegularFile, InitialEntryName); + Assert.Equal(-1, entry.DataOffset); + entry.DataStream = new MemoryStream(); + entry.DataStream.WriteByte(ExpectedOffsetDataSingleByte); + entry.DataStream.Position = 0; // The data stream is written to the archive from the current position + + using MemoryStream ms = new(); + using TarWriter writer = new(ms); + writer.WriteEntry(entry); + Assert.Equal(512, entry.DataOffset); + + // Write it again, the offset should now point to the second written entry + // First entry 512 (header) + 1 (data) + 511 (padding) + // Second entry 512 (header) + // 512 + 512 + 512 = 1536 + writer.WriteEntry(entry); + Assert.Equal(1536, entry.DataOffset); + } + + [Fact] + public async Task DataOffset_BeforeAndAfterArchive_Async() + { + V7TarEntry entry = new V7TarEntry(TarEntryType.V7RegularFile, InitialEntryName); + Assert.Equal(-1, entry.DataOffset); + entry.DataStream = new MemoryStream(); + entry.DataStream.WriteByte(ExpectedOffsetDataSingleByte); + entry.DataStream.Position = 0; // The data stream is written to the archive from the current position + + await using MemoryStream ms = new(); + await using TarWriter writer = new(ms); + await writer.WriteEntryAsync(entry); + Assert.Equal(512, entry.DataOffset); + + // Write it again, the offset should now point to the second written entry + // First entry 512 (header) + 1 (data) + 511 (padding) + // Second entry 512 (header) + // 512 + 512 + 512 = 1536 + await writer.WriteEntryAsync(entry); + Assert.Equal(1536, entry.DataOffset); + } + + [Fact] + public void DataOffset_UnseekableDataStream() + { + using MemoryStream ms = new(); + using (TarWriter writer = new(ms, leaveOpen: true)) + { + V7TarEntry entry = new V7TarEntry(TarEntryType.V7RegularFile, InitialEntryName); + Assert.Equal(-1, entry.DataOffset); + + using MemoryStream dataStream = new(); + dataStream.WriteByte(ExpectedOffsetDataSingleByte); + dataStream.Position = 0; + using WrappedStream wds = new(dataStream, canWrite: true, canRead: true, canSeek: false); + entry.DataStream = wds; + + writer.WriteEntry(entry); + } + ms.Position = 0; + + using TarReader reader = new(ms); + TarEntry actualEntry = reader.GetNextEntry(); + Assert.NotNull(actualEntry); + } + + [Fact] + public async Task DataOffset_UnseekableDataStream_Async() + { + await using MemoryStream ms = new(); + await using (TarWriter writer = new(ms, leaveOpen: true)) + { + V7TarEntry entry = new V7TarEntry(TarEntryType.V7RegularFile, InitialEntryName); + Assert.Equal(-1, entry.DataOffset); + + await using MemoryStream dataStream = new(); + dataStream.WriteByte(ExpectedOffsetDataSingleByte); + dataStream.Position = 0; + await using WrappedStream wds = new(dataStream, canWrite: true, canRead: true, canSeek: false); + entry.DataStream = wds; + + await writer.WriteEntryAsync(entry); + } + ms.Position = 0; + + await using TarReader reader = new(ms); + TarEntry actualEntry = await reader.GetNextEntryAsync(); + Assert.NotNull(actualEntry); + // V7 header length is 512, data starts in the next position + Assert.Equal(512, actualEntry.DataOffset); + } + + [Theory] + [InlineData(false)] + [InlineData(true)] + public void DataOffset_RegularFile_SecondEntry(bool canSeek) + { + using MemoryStream ms = new(); + using (TarWriter writer = new(ms, leaveOpen: true)) + { + V7TarEntry entry1 = new V7TarEntry(TarEntryType.V7RegularFile, InitialEntryName); + Assert.Equal(-1, entry1.DataOffset); + entry1.DataStream = new MemoryStream(); + entry1.DataStream.WriteByte(ExpectedOffsetDataSingleByte); + entry1.DataStream.Position = 0; + writer.WriteEntry(entry1); + + V7TarEntry entry2 = new V7TarEntry(TarEntryType.V7RegularFile, InitialEntryName); + Assert.Equal(-1, entry2.DataOffset); + entry2.DataStream = new MemoryStream(); + entry2.DataStream.WriteByte(ExpectedOffsetDataSingleByte); + entry2.DataStream.Position = 0; + writer.WriteEntry(entry2); + } + ms.Position = 0; + + using Stream streamToRead = new WrappedStream(ms, canWrite: true, canRead: true, canSeek: canSeek); + using TarReader reader = new(streamToRead); + TarEntry firstEntry = reader.GetNextEntry(); + Assert.NotNull(firstEntry); + // V7 header length is 512, data starts in the next position + long firstExpectedDataOffset = canSeek ? 512 : -1; + Assert.Equal(firstExpectedDataOffset, firstEntry.DataOffset); + + TarEntry secondEntry = reader.GetNextEntry(); + Assert.NotNull(secondEntry); + // First entry ends at 512 (header) + 1 (data) + 511 (padding) = 1024 + // Second entry also has 512 header, so data starts one byte after 1536 + long secondExpectedDataOffset = canSeek ? 1536 : -1; + Assert.Equal(secondExpectedDataOffset, secondEntry.DataOffset); + } + + [Theory] + [InlineData(false)] + [InlineData(true)] + public async Task DataOffset_RegularFile_SecondEntryAsync(bool canSeek) + { + await using MemoryStream ms = new(); + await using (TarWriter writer = new(ms, leaveOpen: true)) + { + V7TarEntry entry1 = new V7TarEntry(TarEntryType.V7RegularFile, InitialEntryName); + Assert.Equal(-1, entry1.DataOffset); + entry1.DataStream = new MemoryStream(); + entry1.DataStream.Write(ExpectedOffsetDataMultiByte); + entry1.DataStream.Position = 0; + await writer.WriteEntryAsync(entry1); + + V7TarEntry entry2 = new V7TarEntry(TarEntryType.V7RegularFile, InitialEntryName); + Assert.Equal(-1, entry2.DataOffset); + entry2.DataStream = new MemoryStream(); + entry2.DataStream.WriteByte(ExpectedOffsetDataSingleByte); + entry2.DataStream.Position = 0; + await writer.WriteEntryAsync(entry2); + } + ms.Position = 0; + + await using Stream streamToRead = new WrappedStream(ms, canWrite: true, canRead: true, canSeek: canSeek); + await using TarReader reader = new(streamToRead); + TarEntry firstEntry = await reader.GetNextEntryAsync(); + Assert.NotNull(firstEntry); + // V7 header length is 512, data starts in the next position + long firstExpectedDataOffset = canSeek ? 512 : -1; + Assert.Equal(firstExpectedDataOffset, firstEntry.DataOffset); + + TarEntry secondEntry = await reader.GetNextEntryAsync(); + Assert.NotNull(secondEntry); + // First entry ends at 512 (header) + 1 (data) + 511 (padding) = 1024 + // Second entry also has 512 header, so data starts one byte after 1536 + long secondExpectedDataOffset = canSeek ? 1536 : -1; + Assert.Equal(secondExpectedDataOffset, secondEntry.DataOffset); + } } } diff --git a/src/libraries/System.Formats.Tar/tests/TarTestsBase.cs b/src/libraries/System.Formats.Tar/tests/TarTestsBase.cs index 29a15217879ee..5412d03f6f29a 100644 --- a/src/libraries/System.Formats.Tar/tests/TarTestsBase.cs +++ b/src/libraries/System.Formats.Tar/tests/TarTestsBase.cs @@ -97,6 +97,8 @@ public abstract partial class TarTestsBase : FileCleanupTestBase internal const char Separator = '/'; internal const int MaxPathComponent = 255; internal const long LegacyMaxFileSize = (1L << 33) - 1; // Max value of 11 octal digits = 2^33 - 1 or 8 Gb. + internal const byte ExpectedOffsetDataSingleByte = 5; + internal readonly byte[] ExpectedOffsetDataMultiByte = [9, 8, 7, 6]; private static readonly string[] V7TestCaseNames = new[] {