From 612969e574fd9d922314f24923792e343871f102 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?nils=20m=C3=A5s=C3=A9n?= Date: Sun, 10 Oct 2021 02:21:22 +0200 Subject: [PATCH] feat(zip): better string encoding handling (#592) This replaces the global static ZipStrings singleton with instances of StringCodec, which will: - Remove encoding configuration from a shared global state - Allow for different defaults for input and output - Explicitly override the encodings used for ZipCrypto and zip archive comments (the one in the Central Directory, not the individual entry comments). - Use "Unicode" for new entries (unless overriden) - Make it much more clear (hopefully) how and why different encodings are used. --- .../Streams/DeflaterOutputStream.cs | 4 + src/ICSharpCode.SharpZipLib/Zip/FastZip.cs | 30 +- .../Zip/ZipConstants.cs | 43 --- src/ICSharpCode.SharpZipLib/Zip/ZipEntry.cs | 10 +- .../Zip/ZipEntryFactory.cs | 4 +- src/ICSharpCode.SharpZipLib/Zip/ZipFile.cs | 82 +++-- src/ICSharpCode.SharpZipLib/Zip/ZipFormat.cs | 10 +- .../Zip/ZipInputStream.cs | 9 +- .../Zip/ZipOutputStream.cs | 18 +- src/ICSharpCode.SharpZipLib/Zip/ZipStrings.cs | 299 ++++++++++-------- .../Zip/FastZipHandling.cs | 64 ++-- .../Zip/GeneralHandling.cs | 27 +- .../Zip/ZipStreamAsyncTests.cs | 5 +- 13 files changed, 319 insertions(+), 286 deletions(-) diff --git a/src/ICSharpCode.SharpZipLib/Zip/Compression/Streams/DeflaterOutputStream.cs b/src/ICSharpCode.SharpZipLib/Zip/Compression/Streams/DeflaterOutputStream.cs index fd4bb47af..1c54b6848 100644 --- a/src/ICSharpCode.SharpZipLib/Zip/Compression/Streams/DeflaterOutputStream.cs +++ b/src/ICSharpCode.SharpZipLib/Zip/Compression/Streams/DeflaterOutputStream.cs @@ -2,6 +2,7 @@ using System; using System.IO; using System.Security.Cryptography; +using System.Text; using System.Threading; using System.Threading.Tasks; @@ -203,6 +204,9 @@ public bool CanPatchEntries /// protected byte[] AESAuthCode; + /// + public Encoding ZipCryptoEncoding { get; set; } = StringCodec.DefaultZipCryptoEncoding; + /// /// Encrypt a block of data /// diff --git a/src/ICSharpCode.SharpZipLib/Zip/FastZip.cs b/src/ICSharpCode.SharpZipLib/Zip/FastZip.cs index 01725f4c3..13aedb021 100644 --- a/src/ICSharpCode.SharpZipLib/Zip/FastZip.cs +++ b/src/ICSharpCode.SharpZipLib/Zip/FastZip.cs @@ -345,6 +345,29 @@ public Deflater.CompressionLevel CompressionLevel set { compressionLevel_ = value; } } + /// + /// Reflects the opposite of the internal , setting it to false overrides the encoding used for reading and writing zip entries + /// + public bool UseUnicode + { + get => !_stringCodec.ForceZipLegacyEncoding; + set => _stringCodec.ForceZipLegacyEncoding = !value; + } + + /// Gets or sets the code page used for reading/writing zip file entries when unicode is disabled + public int LegacyCodePage + { + get => _stringCodec.CodePage; + set => _stringCodec.CodePage = value; + } + + /// + public StringCodec StringCodec + { + get => _stringCodec; + set => _stringCodec = value; + } + #endregion Properties #region Delegates @@ -456,7 +479,7 @@ private void CreateZip(Stream outputStream, string sourceDirectory, bool recurse NameTransform = new ZipNameTransform(sourceDirectory); sourceDirectory_ = sourceDirectory; - using (outputStream_ = new ZipOutputStream(outputStream)) + using (outputStream_ = new ZipOutputStream(outputStream, _stringCodec)) { outputStream_.SetLevel((int)CompressionLevel); outputStream_.IsStreamOwner = !leaveOpen; @@ -631,6 +654,10 @@ private void ProcessFile(object sender, ScanEventArgs e) using (FileStream stream = File.Open(e.Name, FileMode.Open, FileAccess.Read, FileShare.Read)) { ZipEntry entry = entryFactory_.MakeFileEntry(e.Name); + if (_stringCodec.ForceZipLegacyEncoding) + { + entry.IsUnicodeText = false; + } // Set up AES encryption for the entry if required. ConfigureEntryEncryption(entry); @@ -967,6 +994,7 @@ private static bool NameIsValid(string name) private INameTransform extractNameTransform_; private UseZip64 useZip64_ = UseZip64.Dynamic; private CompressionLevel compressionLevel_ = CompressionLevel.DEFAULT_COMPRESSION; + private StringCodec _stringCodec = new StringCodec(); private string password_; diff --git a/src/ICSharpCode.SharpZipLib/Zip/ZipConstants.cs b/src/ICSharpCode.SharpZipLib/Zip/ZipConstants.cs index eadf33901..6d4892d55 100644 --- a/src/ICSharpCode.SharpZipLib/Zip/ZipConstants.cs +++ b/src/ICSharpCode.SharpZipLib/Zip/ZipConstants.cs @@ -471,48 +471,5 @@ public static class ZipConstants public const int ENDSIG = 'P' | ('K' << 8) | (5 << 16) | (6 << 24); #endregion Header Signatures - - /// - /// Default encoding used for string conversion. 0 gives the default system OEM code page. - /// Using the default code page isnt the full solution necessarily - /// there are many variable factors, codepage 850 is often a good choice for - /// European users, however be careful about compatability. - /// - [Obsolete("Use ZipStrings instead")] - public static int DefaultCodePage - { - get => ZipStrings.CodePage; - set => ZipStrings.CodePage = value; - } - - /// Deprecated wrapper for - [Obsolete("Use ZipStrings.ConvertToString instead")] - public static string ConvertToString(byte[] data, int count) - => ZipStrings.ConvertToString(data, count); - - /// Deprecated wrapper for - [Obsolete("Use ZipStrings.ConvertToString instead")] - public static string ConvertToString(byte[] data) - => ZipStrings.ConvertToString(data); - - /// Deprecated wrapper for - [Obsolete("Use ZipStrings.ConvertToStringExt instead")] - public static string ConvertToStringExt(int flags, byte[] data, int count) - => ZipStrings.ConvertToStringExt(flags, data, count); - - /// Deprecated wrapper for - [Obsolete("Use ZipStrings.ConvertToStringExt instead")] - public static string ConvertToStringExt(int flags, byte[] data) - => ZipStrings.ConvertToStringExt(flags, data); - - /// Deprecated wrapper for - [Obsolete("Use ZipStrings.ConvertToArray instead")] - public static byte[] ConvertToArray(string str) - => ZipStrings.ConvertToArray(str); - - /// Deprecated wrapper for - [Obsolete("Use ZipStrings.ConvertToArray instead")] - public static byte[] ConvertToArray(int flags, string str) - => ZipStrings.ConvertToArray(flags, str); } } diff --git a/src/ICSharpCode.SharpZipLib/Zip/ZipEntry.cs b/src/ICSharpCode.SharpZipLib/Zip/ZipEntry.cs index ffeee1883..b0bf15821 100644 --- a/src/ICSharpCode.SharpZipLib/Zip/ZipEntry.cs +++ b/src/ICSharpCode.SharpZipLib/Zip/ZipEntry.cs @@ -1,5 +1,6 @@ using System; using System.IO; +using System.Text; namespace ICSharpCode.SharpZipLib.Zip { @@ -150,7 +151,7 @@ private enum Known : byte /// The name passed is null /// public ZipEntry(string name) - : this(name, 0, ZipConstants.VersionMadeBy, CompressionMethod.Deflated) + : this(name, 0, ZipConstants.VersionMadeBy, CompressionMethod.Deflated, true) { } @@ -171,7 +172,7 @@ public ZipEntry(string name) /// internal ZipEntry(string name, int versionRequiredToExtract) : this(name, versionRequiredToExtract, ZipConstants.VersionMadeBy, - CompressionMethod.Deflated) + CompressionMethod.Deflated, true) { } @@ -182,6 +183,7 @@ internal ZipEntry(string name, int versionRequiredToExtract) /// Version and HostSystem Information /// Minimum required zip feature version required to extract this entry /// Compression method for this entry. + /// Whether the entry uses unicode for name and comment /// /// The name passed is null /// @@ -193,7 +195,7 @@ internal ZipEntry(string name, int versionRequiredToExtract) /// It is not generally useful, use the constructor specifying the name only. /// internal ZipEntry(string name, int versionRequiredToExtract, int madeByInfo, - CompressionMethod method) + CompressionMethod method, bool unicode) { if (name == null) { @@ -216,7 +218,7 @@ internal ZipEntry(string name, int versionRequiredToExtract, int madeByInfo, this.versionToExtract = (ushort)versionRequiredToExtract; this.method = method; - IsUnicodeText = ZipStrings.UseUnicode; + IsUnicodeText = unicode; } /// diff --git a/src/ICSharpCode.SharpZipLib/Zip/ZipEntryFactory.cs b/src/ICSharpCode.SharpZipLib/Zip/ZipEntryFactory.cs index 1e40baaff..ccbb26968 100644 --- a/src/ICSharpCode.SharpZipLib/Zip/ZipEntryFactory.cs +++ b/src/ICSharpCode.SharpZipLib/Zip/ZipEntryFactory.cs @@ -68,7 +68,7 @@ public enum TimeSetting public ZipEntryFactory() { nameTransform_ = new ZipNameTransform(); - isUnicodeText_ = ZipStrings.UseUnicode; + isUnicodeText_ = true; } /// @@ -162,7 +162,7 @@ public int SetAttributes } /// - /// Get set a value indicating whether unidoce text should be set on. + /// Get set a value indicating whether unicode text should be set on. /// public bool IsUnicodeText { diff --git a/src/ICSharpCode.SharpZipLib/Zip/ZipFile.cs b/src/ICSharpCode.SharpZipLib/Zip/ZipFile.cs index a07b19f0c..0a844916e 100644 --- a/src/ICSharpCode.SharpZipLib/Zip/ZipFile.cs +++ b/src/ICSharpCode.SharpZipLib/Zip/ZipFile.cs @@ -367,7 +367,7 @@ public string Password } else { - key = PkzipClassic.GenerateKeys(ZipStrings.ConvertToArray(value)); + key = PkzipClassic.GenerateKeys(ZipCryptoEncoding.GetBytes(value)); } rawPassword_ = value; @@ -390,6 +390,7 @@ private bool HaveKeys /// Opens a Zip file with the given name for reading. /// /// The name of the file to open. + /// /// The argument supplied is null. /// /// An i/o error occurs @@ -397,13 +398,18 @@ private bool HaveKeys /// /// The file doesn't contain a valid zip archive. /// - public ZipFile(string name) + public ZipFile(string name, StringCodec stringCodec = null) { name_ = name ?? throw new ArgumentNullException(nameof(name)); baseStream_ = File.Open(name, FileMode.Open, FileAccess.Read, FileShare.Read); isStreamOwner = true; + if (stringCodec != null) + { + _stringCodec = stringCodec; + } + try { ReadEntries(); @@ -725,6 +731,21 @@ public ZipEntry this[int index] } } + + /// + public Encoding ZipCryptoEncoding + { + get => _stringCodec.ZipCryptoEncoding; + set => _stringCodec.ZipCryptoEncoding = value; + } + + /// + public StringCodec StringCodec + { + get => _stringCodec; + set => _stringCodec = value; + } + #endregion Properties #region Input Handling @@ -1189,6 +1210,8 @@ private long TestLocalHeader(ZipEntry entry, HeaderTest tests) throw new ZipException(string.Format("Version required to extract this entry is invalid ({0})", extractVersion)); } + var localEncoding = _stringCodec.ZipInputEncoding(localFlags); + // Local entry flags dont have reserved bit set on. if ((localFlags & (int)(GeneralBitFlags.ReservedPKware4 | GeneralBitFlags.ReservedPkware14 | GeneralBitFlags.ReservedPkware15)) != 0) { @@ -1281,7 +1304,7 @@ private long TestLocalHeader(ZipEntry entry, HeaderTest tests) } // Name data has already been read convert it and compare. - string localName = ZipStrings.ConvertToStringExt(localFlags, nameData); + string localName = localEncoding.GetString(nameData); // Central directory and local entry name match if (localName != entry.Name) @@ -1577,11 +1600,11 @@ public void CommitUpdate() else { // Create an empty archive if none existed originally. - if (entries_.Length == 0) - { - byte[] theComment = (newComment_ != null) ? newComment_.RawComment : ZipStrings.ConvertToArray(comment_); - ZipFormat.WriteEndOfCentralDirectory(baseStream_, 0, 0, 0, theComment); - } + if (entries_.Length != 0) return; + byte[] theComment = (newComment_ != null) + ? newComment_.RawComment + : _stringCodec.ZipArchiveCommentEncoding.GetBytes(comment_); + ZipFormat.WriteEndOfCentralDirectory(baseStream_, 0, 0, 0, theComment); } } finally @@ -1614,7 +1637,7 @@ public void SetComment(string comment) CheckUpdating(); - newComment_ = new ZipString(comment); + newComment_ = new ZipString(comment, _stringCodec.ZipArchiveCommentEncoding); if (newComment_.RawLength > 0xffff) { @@ -2142,7 +2165,8 @@ private void WriteLocalEntryHeader(ZipUpdate update) WriteLEInt((int)entry.Size); } - byte[] name = ZipStrings.ConvertToArray(entry.Flags, entry.Name); + var entryEncoding = _stringCodec.ZipInputEncoding(entry.Flags); + byte[] name = entryEncoding.GetBytes(entry.Name); if (name.Length > 0xFFFF) { @@ -2249,7 +2273,8 @@ private int WriteCentralDirectoryHeader(ZipEntry entry) WriteLEInt((int)entry.Size); } - byte[] name = ZipStrings.ConvertToArray(entry.Flags, entry.Name); + var entryEncoding = _stringCodec.ZipInputEncoding(entry.Flags); + byte[] name = entryEncoding.GetBytes(entry.Name); if (name.Length > 0xFFFF) { @@ -3076,7 +3101,7 @@ private void RunUpdates() } } - byte[] theComment = (newComment_ != null) ? newComment_.RawComment : ZipStrings.ConvertToArray(comment_); + byte[] theComment = newComment_?.RawComment ?? _stringCodec.ZipArchiveCommentEncoding.GetBytes(comment_); ZipFormat.WriteEndOfCentralDirectory(workFile.baseStream_, updateCount_, sizeEntries, centralDirOffset, theComment); @@ -3469,7 +3494,7 @@ private void ReadEntries() byte[] comment = new byte[commentSize]; StreamUtils.ReadFully(baseStream_, comment); - comment_ = ZipStrings.ConvertToString(comment); + comment_ = _stringCodec.ZipArchiveCommentEncoding.GetString(comment); } else { @@ -3586,11 +3611,13 @@ private void ReadEntries() long offset = ReadLEUint(); byte[] buffer = new byte[Math.Max(nameLen, commentLen)]; + var entryEncoding = _stringCodec.ZipInputEncoding(bitFlags); StreamUtils.ReadFully(baseStream_, buffer, 0, nameLen); - string name = ZipStrings.ConvertToStringExt(bitFlags, buffer, nameLen); + string name = entryEncoding.GetString(buffer, 0, nameLen); + var unicode = entryEncoding.IsZipUnicode(); - var entry = new ZipEntry(name, versionToExtract, versionMadeBy, (CompressionMethod)method) + var entry = new ZipEntry(name, versionToExtract, versionMadeBy, (CompressionMethod)method, unicode) { Crc = crc & 0xffffffffL, Size = size & 0xffffffffL, @@ -3623,7 +3650,7 @@ private void ReadEntries() if (commentLen > 0) { StreamUtils.ReadFully(baseStream_, buffer, 0, commentLen); - entry.Comment = ZipStrings.ConvertToStringExt(bitFlags, buffer, commentLen); + entry.Comment = entryEncoding.GetString(buffer, 0, commentLen); } entries_[i] = entry; @@ -3767,7 +3794,7 @@ private static void WriteEncryptionHeader(Stream stream, long crcValue) private bool isDisposed_; private string name_; - private string comment_; + private string comment_ = string.Empty; private string rawPassword_; private Stream baseStream_; private bool isStreamOwner; @@ -3775,6 +3802,7 @@ private static void WriteEncryptionHeader(Stream stream, long crcValue) private ZipEntry[] entries_; private byte[] key; private bool isNewArchive_; + private StringCodec _stringCodec = ZipStrings.GetStringCodec(); // Default is dynamic which is not backwards compatible and can cause problems // with XP's built in compression which cant read Zip64 archives. @@ -3813,19 +3841,23 @@ private class ZipString /// Initialise a with a string. /// /// The textual string form. - public ZipString(string comment) + /// + public ZipString(string comment, Encoding encoding) { comment_ = comment; isSourceString_ = true; + _encoding = encoding; } /// /// Initialise a using a string in its binary 'raw' form. /// /// - public ZipString(byte[] rawString) + /// + public ZipString(byte[] rawString, Encoding encoding) { rawComment_ = rawString; + _encoding = encoding; } #endregion Constructors @@ -3834,10 +3866,7 @@ public ZipString(byte[] rawString) /// Get a value indicating the original source of data for this instance. /// True if the source was a string; false if the source was binary data. /// - public bool IsSourceString - { - get { return isSourceString_; } - } + public bool IsSourceString => isSourceString_; /// /// Get the length of the comment when represented as raw bytes. @@ -3882,7 +3911,7 @@ private void MakeTextAvailable() { if (comment_ == null) { - comment_ = ZipStrings.ConvertToString(rawComment_); + comment_ = _encoding.GetString(rawComment_); } } @@ -3890,7 +3919,7 @@ private void MakeBytesAvailable() { if (rawComment_ == null) { - rawComment_ = ZipStrings.ConvertToArray(comment_); + rawComment_ = _encoding.GetBytes(comment_); } } @@ -3899,7 +3928,7 @@ private void MakeBytesAvailable() /// /// The to convert to a string. /// The textual equivalent for the input value. - static public implicit operator string(ZipString zipString) + public static implicit operator string(ZipString zipString) { zipString.MakeTextAvailable(); return zipString.comment_; @@ -3910,6 +3939,7 @@ static public implicit operator string(ZipString zipString) private string comment_; private byte[] rawComment_; private readonly bool isSourceString_; + private readonly Encoding _encoding; #endregion Instance Fields } diff --git a/src/ICSharpCode.SharpZipLib/Zip/ZipFormat.cs b/src/ICSharpCode.SharpZipLib/Zip/ZipFormat.cs index 75f6b72d7..a37ab3031 100644 --- a/src/ICSharpCode.SharpZipLib/Zip/ZipFormat.cs +++ b/src/ICSharpCode.SharpZipLib/Zip/ZipFormat.cs @@ -48,7 +48,7 @@ internal static class ZipFormat // Write the local file header // TODO: ZipFormat.WriteLocalHeader is not yet used and needs checking for ZipFile and ZipOuptutStream usage internal static int WriteLocalHeader(Stream stream, ZipEntry entry, out EntryPatchData patchData, - bool headerInfoAvailable, bool patchEntryHeader, long streamOffset) + bool headerInfoAvailable, bool patchEntryHeader, long streamOffset, StringCodec stringCodec) { patchData = new EntryPatchData(); @@ -95,7 +95,7 @@ internal static int WriteLocalHeader(Stream stream, ZipEntry entry, out EntryPat } } - byte[] name = ZipStrings.ConvertToArray(entry.Flags, entry.Name); + byte[] name = stringCodec.ZipOutputEncoding.GetBytes(entry.Name); if (name.Length > 0xFFFF) { @@ -385,7 +385,7 @@ internal static void ReadDataDescriptor(Stream stream, bool zip64, DescriptorDat } } - internal static int WriteEndEntry(Stream stream, ZipEntry entry) + internal static int WriteEndEntry(Stream stream, ZipEntry entry, StringCodec stringCodec) { stream.WriteLEInt(ZipConstants.CentralHeaderSignature); stream.WriteLEShort((entry.HostSystem << 8) | entry.VersionMadeBy); @@ -415,7 +415,7 @@ internal static int WriteEndEntry(Stream stream, ZipEntry entry) stream.WriteLEInt((int)entry.Size); } - byte[] name = ZipStrings.ConvertToArray(entry.Flags, entry.Name); + byte[] name = stringCodec.ZipOutputEncoding.GetBytes(entry.Name); if (name.Length > 0xffff) { @@ -458,7 +458,7 @@ internal static int WriteEndEntry(Stream stream, ZipEntry entry) byte[] extra = ed.GetEntryData(); byte[] entryComment = !(entry.Comment is null) - ? ZipStrings.ConvertToArray(entry.Flags, entry.Comment) + ? stringCodec.ZipOutputEncoding.GetBytes(entry.Comment) : Empty.Array(); if (entryComment.Length > 0xffff) diff --git a/src/ICSharpCode.SharpZipLib/Zip/ZipInputStream.cs b/src/ICSharpCode.SharpZipLib/Zip/ZipInputStream.cs index cccac6639..1b5b0ad53 100644 --- a/src/ICSharpCode.SharpZipLib/Zip/ZipInputStream.cs +++ b/src/ICSharpCode.SharpZipLib/Zip/ZipInputStream.cs @@ -76,6 +76,7 @@ public class ZipInputStream : InflaterInputStream private CompressionMethod method; private int flags; private string password; + private readonly StringCodec _stringCodec = ZipStrings.GetStringCodec(); #endregion Instance Fields @@ -221,9 +222,11 @@ public ZipEntry GetNextEntry() byte[] buffer = new byte[nameLen]; inputBuffer.ReadRawBuffer(buffer); - string name = ZipStrings.ConvertToStringExt(flags, buffer); + var entryEncoding = _stringCodec.ZipInputEncoding(flags); + string name = entryEncoding.GetString(buffer); + var unicode = entryEncoding.IsZipUnicode(); - entry = new ZipEntry(name, versionRequiredToExtract, ZipConstants.VersionMadeBy, method) + entry = new ZipEntry(name, versionRequiredToExtract, ZipConstants.VersionMadeBy, method, unicode) { Flags = flags, }; @@ -524,7 +527,7 @@ private int InitialRead(byte[] destination, int offset, int count) // Generate and set crypto transform... var managed = new PkzipClassicManaged(); - byte[] key = PkzipClassic.GenerateKeys(ZipStrings.ConvertToArray(password)); + byte[] key = PkzipClassic.GenerateKeys(_stringCodec.ZipCryptoEncoding.GetBytes(password)); inputBuffer.CryptoTransform = managed.CreateDecryptor(key, null); diff --git a/src/ICSharpCode.SharpZipLib/Zip/ZipOutputStream.cs b/src/ICSharpCode.SharpZipLib/Zip/ZipOutputStream.cs index 7aa3295fe..0b292fb3f 100644 --- a/src/ICSharpCode.SharpZipLib/Zip/ZipOutputStream.cs +++ b/src/ICSharpCode.SharpZipLib/Zip/ZipOutputStream.cs @@ -80,6 +80,11 @@ public ZipOutputStream(Stream baseOutputStream, int bufferSize) { } + internal ZipOutputStream(Stream baseOutputStream, StringCodec stringCodec) : this(baseOutputStream) + { + _stringCodec = stringCodec; + } + #endregion Constructors /// @@ -105,8 +110,7 @@ public bool IsFinished /// public void SetComment(string comment) { - // TODO: Its not yet clear how to handle unicode comments here. - byte[] commentBytes = ZipStrings.ConvertToArray(comment); + byte[] commentBytes = _stringCodec.ZipArchiveCommentEncoding.GetBytes(comment); if (commentBytes.Length > 0xffff) { throw new ArgumentOutOfRangeException(nameof(comment)); @@ -392,7 +396,7 @@ internal void PutNextEntry(Stream stream, ZipEntry entry, long streamOffset = 0) // Write the local file header offset += ZipFormat.WriteLocalHeader(stream, entry, out var entryPatchData, - headerInfoAvailable, patchEntryHeader, streamOffset); + headerInfoAvailable, patchEntryHeader, streamOffset, _stringCodec); patchData = entryPatchData; @@ -652,7 +656,7 @@ private byte[] CreateZipCryptoHeader(long crcValue) private void InitializeZipCryptoPassword(string password) { var pkManaged = new PkzipClassicManaged(); - byte[] key = PkzipClassic.GenerateKeys(ZipStrings.ConvertToArray(password)); + byte[] key = PkzipClassic.GenerateKeys(ZipCryptoEncoding.GetBytes(password)); cryptoTransform_ = pkManaged.CreateEncryptor(key, null); } @@ -765,7 +769,7 @@ public override void Finish() foreach (var entry in entries) { - sizeEntries += ZipFormat.WriteEndEntry(baseOutputStream_, entry); + sizeEntries += ZipFormat.WriteEndEntry(baseOutputStream_, entry, _stringCodec); } ZipFormat.WriteEndOfCentralDirectory(baseOutputStream_, numEntries, sizeEntries, offset, zipComment); @@ -795,7 +799,7 @@ public override async Task FinishAsync(CancellationToken ct) { await baseOutputStream_.WriteProcToStreamAsync(ms, s => { - sizeEntries += ZipFormat.WriteEndEntry(s, entry); + sizeEntries += ZipFormat.WriteEndEntry(s, entry, _stringCodec); }, ct); } @@ -880,6 +884,8 @@ public override void Flush() /// private string password; + private readonly StringCodec _stringCodec = ZipStrings.GetStringCodec(); + #endregion Instance Fields #region Static Fields diff --git a/src/ICSharpCode.SharpZipLib/Zip/ZipStrings.cs b/src/ICSharpCode.SharpZipLib/Zip/ZipStrings.cs index 2d0c4cff4..29fa98014 100644 --- a/src/ICSharpCode.SharpZipLib/Zip/ZipStrings.cs +++ b/src/ICSharpCode.SharpZipLib/Zip/ZipStrings.cs @@ -4,191 +4,210 @@ namespace ICSharpCode.SharpZipLib.Zip { + internal static class EncodingExtensions + { + public static bool IsZipUnicode(this Encoding e) + => e.Equals(StringCodec.UnicodeZipEncoding); + } + /// - /// This static class contains functions for encoding and decoding zip file strings + /// Deprecated way of setting zip encoding provided for backwards compability. + /// Use when possible. /// + /// + /// If any ZipStrings properties are being modified, it will enter a backwards compatibility mode, mimicking the + /// old behaviour where a single instance was shared between all Zip* instances. + /// public static class ZipStrings { - static ZipStrings() + static readonly StringCodec CompatCodec = new StringCodec(); + + private static bool compatibilityMode; + + /// + /// Returns a new instance or the shared backwards compatible instance. + /// + /// + public static StringCodec GetStringCodec() + => compatibilityMode ? CompatCodec : new StringCodec(); + + /// + [Obsolete("Use ZipFile/Zip*Stream StringCodec instead")] + public static int CodePage { - try + get => CompatCodec.CodePage; + set { - var platformCodepage = Encoding.GetEncoding(0).CodePage; - SystemDefaultCodePage = (platformCodepage == 1 || platformCodepage == 2 || platformCodepage == 3 || platformCodepage == 42) ? FallbackCodePage : platformCodepage; + CompatCodec.CodePage = value; + compatibilityMode = true; } - catch + } + + /// + [Obsolete("Use ZipFile/Zip*Stream StringCodec instead")] + public static int SystemDefaultCodePage => StringCodec.SystemDefaultCodePage; + + /// + [Obsolete("Use ZipFile/Zip*Stream StringCodec instead")] + public static bool UseUnicode + { + get => !CompatCodec.ForceZipLegacyEncoding; + set { - SystemDefaultCodePage = FallbackCodePage; + CompatCodec.ForceZipLegacyEncoding = !value; + compatibilityMode = true; } } - /// Code page backing field - /// - /// The original Zip specification (https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT) states - /// that file names should only be encoded with IBM Code Page 437 or UTF-8. - /// In practice, most zip apps use OEM or system encoding (typically cp437 on Windows). - /// Let's be good citizens and default to UTF-8 http://utf8everywhere.org/ - /// - private static int codePage = AutomaticCodePage; + /// + [Obsolete("Use ZipFile/Zip*Stream StringCodec instead")] + private static bool HasUnicodeFlag(int flags) + => ((GeneralBitFlags)flags).HasFlag(GeneralBitFlags.UnicodeText); + + /// + [Obsolete("Use ZipFile/Zip*Stream StringCodec instead")] + public static string ConvertToString(byte[] data, int count) + => CompatCodec.ZipOutputEncoding.GetString(data, 0, count); - /// Automatically select codepage while opening archive - /// see https://github.com/icsharpcode/SharpZipLib/pull/280#issuecomment-433608324 - /// - private const int AutomaticCodePage = -1; + /// + [Obsolete("Use ZipFile/Zip*Stream StringCodec instead")] + public static string ConvertToString(byte[] data) + => CompatCodec.ZipOutputEncoding.GetString(data); + + /// + [Obsolete("Use ZipFile/Zip*Stream StringCodec instead")] + public static string ConvertToStringExt(int flags, byte[] data, int count) + => CompatCodec.ZipEncoding(HasUnicodeFlag(flags)).GetString(data, 0, count); - /// - /// Encoding used for string conversion. Setting this to 65001 (UTF-8) will - /// also set the Language encoding flag to indicate UTF-8 encoded file names. - /// - public static int CodePage + /// + [Obsolete("Use ZipFile/Zip*Stream StringCodec instead")] + public static string ConvertToStringExt(int flags, byte[] data) + => CompatCodec.ZipEncoding(HasUnicodeFlag(flags)).GetString(data); + + /// + [Obsolete("Use ZipFile/Zip*Stream StringCodec instead")] + public static byte[] ConvertToArray(string str) + => ConvertToArray(0, str); + + /// + [Obsolete("Use ZipFile/Zip*Stream StringCodec instead")] + public static byte[] ConvertToArray(int flags, string str) + => (string.IsNullOrEmpty(str)) + ? Empty.Array() + : CompatCodec.ZipEncoding(HasUnicodeFlag(flags)).GetBytes(str); + } + + /// + /// Utility class for resolving the encoding used for reading and writing strings + /// + public class StringCodec + { + static StringCodec() { - get + try { - return codePage == AutomaticCodePage? Encoding.UTF8.CodePage:codePage; + var platformCodepage = Encoding.Default.CodePage; + SystemDefaultCodePage = (platformCodepage == 1 || platformCodepage == 2 || platformCodepage == 3 || platformCodepage == 42) ? FallbackCodePage : platformCodepage; } - set + catch { - if ((value < 0) || (value > 65535) || - (value == 1) || (value == 2) || (value == 3) || (value == 42)) - { - throw new ArgumentOutOfRangeException(nameof(value)); - } - - codePage = value; + SystemDefaultCodePage = FallbackCodePage; } + + SystemDefaultEncoding = Encoding.GetEncoding(SystemDefaultCodePage); } - private const int FallbackCodePage = 437; + /// + /// If set, use the encoding set by for zip entries instead of the defaults + /// + public bool ForceZipLegacyEncoding { get; set; } /// - /// Attempt to get the operating system default codepage, or failing that, to - /// the fallback code page IBM 437. + /// The default encoding used for ZipCrypto passwords in zip files, set to + /// for greatest compability. /// - public static int SystemDefaultCodePage { get; } + public static Encoding DefaultZipCryptoEncoding => SystemDefaultEncoding; + + /// + /// Returns the encoding for an output . + /// Unless overriden by it returns . + /// + public Encoding ZipOutputEncoding => ZipEncoding(!ForceZipLegacyEncoding); /// - /// Get whether the default codepage is set to UTF-8. Setting this property to false will - /// set the to + /// Returns if is set, otherwise it returns the encoding indicated by /// + public Encoding ZipEncoding(bool unicode) => unicode ? UnicodeZipEncoding : _legacyEncoding; + + /// + /// Returns the appropriate encoding for an input according to . + /// If overridden by , it always returns the encoding indicated by . + /// + /// + /// + public Encoding ZipInputEncoding(GeneralBitFlags flags) => ZipInputEncoding((int)flags); + + /// + public Encoding ZipInputEncoding(int flags) => ZipEncoding(!ForceZipLegacyEncoding && (flags & (int)GeneralBitFlags.UnicodeText) != 0); + + /// Code page encoding, used for non-unicode strings /// - /// Get OEM codepage from NetFX, which parses the NLP file with culture info table etc etc. - /// But sometimes it yields the special value of 1 which is nicknamed CodePageNoOEM in sources (might also mean CP_OEMCP, but Encoding puts it so). - /// This was observed on Ukranian and Hindu systems. - /// Given this value, throws an . - /// So replace it with , (IBM 437 which is the default code page in a default Windows installation console. + /// The original Zip specification (https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT) states + /// that file names should only be encoded with IBM Code Page 437 or UTF-8. + /// In practice, most zip apps use OEM or system encoding (typically cp437 on Windows). + /// Let's be good citizens and default to UTF-8 http://utf8everywhere.org/ /// - public static bool UseUnicode - { - get - { - return codePage == Encoding.UTF8.CodePage; - } - set - { - if (value) - { - codePage = Encoding.UTF8.CodePage; - } - else - { - codePage = SystemDefaultCodePage; - } - } - } + private Encoding _legacyEncoding = SystemDefaultEncoding; + + private Encoding _zipArchiveCommentEncoding; + private Encoding _zipCryptoEncoding; /// - /// Convert a portion of a byte array to a string using + /// Returns the UTF-8 code page (65001) used for zip entries with unicode flag set /// - /// - /// Data to convert to string - /// - /// - /// Number of bytes to convert starting from index 0 - /// - /// - /// data[0]..data[count - 1] converted to a string - /// - public static string ConvertToString(byte[] data, int count) - => data == null - ? string.Empty - : Encoding.GetEncoding(CodePage).GetString(data, 0, count); + public static readonly Encoding UnicodeZipEncoding = Encoding.UTF8; /// - /// Convert a byte array to a string using + /// Code page used for non-unicode strings and legacy zip encoding (if is set). + /// Default value is /// - /// - /// Byte array to convert - /// - /// - /// dataconverted to a string - /// - public static string ConvertToString(byte[] data) - => ConvertToString(data, data.Length); - - private static Encoding EncodingFromFlag(int flags) - => ((flags & (int)GeneralBitFlags.UnicodeText) != 0) - ? Encoding.UTF8 - : Encoding.GetEncoding( - // if CodePage wasn't set manually and no utf flag present - // then we must use SystemDefault (old behavior) - // otherwise, CodePage should be preferred over SystemDefault - // see https://github.com/icsharpcode/SharpZipLib/issues/274 - codePage == AutomaticCodePage? - SystemDefaultCodePage: - codePage); + public int CodePage + { + get => _legacyEncoding.CodePage; + set => _legacyEncoding = (value < 4 || value > 65535 || value == 42) + ? throw new ArgumentOutOfRangeException(nameof(value)) + : Encoding.GetEncoding(value); + } + + private const int FallbackCodePage = 437; /// - /// Convert a byte array to a string using + /// Operating system default codepage, or if it could not be retrieved, the fallback code page IBM 437. /// - /// The applicable general purpose bits flags - /// - /// Byte array to convert - /// - /// The number of bytes to convert. - /// - /// dataconverted to a string - /// - public static string ConvertToStringExt(int flags, byte[] data, int count) - => (data == null) - ? string.Empty - : EncodingFromFlag(flags).GetString(data, 0, count); + public static int SystemDefaultCodePage { get; } /// - /// Convert a byte array to a string using + /// The system default encoding, based on /// - /// - /// Byte array to convert - /// - /// The applicable general purpose bits flags - /// - /// dataconverted to a string - /// - public static string ConvertToStringExt(int flags, byte[] data) - => ConvertToStringExt(flags, data, data.Length); + public static Encoding SystemDefaultEncoding { get; } /// - /// Convert a string to a byte array using + /// The encoding used for the zip archive comment. Defaults to the encoding for , since + /// no unicode flag can be set for it in the files. /// - /// - /// String to convert to an array - /// - /// Converted array - public static byte[] ConvertToArray(string str) - => str == null - ? Empty.Array() - : Encoding.GetEncoding(CodePage).GetBytes(str); + public Encoding ZipArchiveCommentEncoding + { + get => _zipArchiveCommentEncoding ?? _legacyEncoding; + set => _zipArchiveCommentEncoding = value; + } /// - /// Convert a string to a byte array using + /// The encoding used for the ZipCrypto passwords. Defaults to . /// - /// The applicable general purpose bits flags - /// - /// String to convert to an array - /// - /// Converted array - public static byte[] ConvertToArray(int flags, string str) - => (string.IsNullOrEmpty(str)) - ? Empty.Array() - : EncodingFromFlag(flags).GetBytes(str); + public Encoding ZipCryptoEncoding + { + get => _zipCryptoEncoding ?? DefaultZipCryptoEncoding; + set => _zipCryptoEncoding = value; + } } } diff --git a/test/ICSharpCode.SharpZipLib.Tests/Zip/FastZipHandling.cs b/test/ICSharpCode.SharpZipLib.Tests/Zip/FastZipHandling.cs index f1c9863da..90b5784ff 100644 --- a/test/ICSharpCode.SharpZipLib.Tests/Zip/FastZipHandling.cs +++ b/test/ICSharpCode.SharpZipLib.Tests/Zip/FastZipHandling.cs @@ -239,9 +239,14 @@ public void CreateExceptions() #region String testing helper - private void TestFileNames(IReadOnlyList names) + private void TestFileNames(int codePage, IReadOnlyList names) { var zippy = new FastZip(); + if (codePage > 0) + { + zippy.UseUnicode = false; + zippy.LegacyCodePage = codePage; + } using var tempDir = Utils.GetTempDir(); using var tempZip = Utils.GetTempFile(); @@ -254,7 +259,7 @@ private void TestFileNames(IReadOnlyList names) zippy.CreateZip(tempZip, tempDir, recurse: true, fileFilter: null); - using var zf = new ZipFile(tempZip); + using var zf = new ZipFile(tempZip, zippy.StringCodec); Assert.AreEqual(nameCount, zf.Count); foreach (var name in names) { @@ -264,7 +269,7 @@ private void TestFileNames(IReadOnlyList names) var entry = zf[index]; - if (ZipStrings.UseUnicode) + if (zippy.UseUnicode) { Assert.IsTrue(entry.IsUnicodeText, "Zip entry #{0} not marked as unicode", index); } @@ -288,15 +293,7 @@ private void TestFileNames(IReadOnlyList names) [Category("Unicode")] public void UnicodeText() { - var preCp = ZipStrings.CodePage; - try - { - TestFileNames(StringTesting.Filenames.ToArray()); - } - finally - { - ZipStrings.CodePage = preCp; - } + TestFileNames(0, StringTesting.Filenames.ToArray()); } [Test] @@ -304,35 +301,26 @@ public void UnicodeText() [Category("Unicode")] public void NonUnicodeText() { - var preCp = ZipStrings.CodePage; - try - { - Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); + Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); - foreach (var (language, filename, encoding) in StringTesting.TestSamples) + foreach (var (language, filename, encoding) in StringTesting.TestSamples) + { + Console.WriteLine($"{language} filename \"{filename}\" using \"{encoding}\":"); + + // TODO: samples of this test must be reversible + // Some samples can't be restored back with their encoding. + // test wasn't failing only because SystemDefaultCodepage is 65001 on Net.Core and + // old behaviour actually was using Unicode instead of user's passed codepage + var encoder = Encoding.GetEncoding(encoding); + var bytes = encoder.GetBytes(filename); + var restoredString = encoder.GetString(bytes); + if(string.CompareOrdinal(filename, restoredString) != 0) { - Console.WriteLine($"{language} filename \"{filename}\" using \"{encoding}\":"); - - // TODO: samples of this test must be reversible - // Some samples can't be restored back with their encoding. - // test wasn't failing only because SystemDefaultCodepage is 65001 on Net.Core and - // old behaviour actually was using Unicode instead of user's passed codepage - var encoder = Encoding.GetEncoding(encoding); - var bytes = encoder.GetBytes(filename); - var restoredString = encoder.GetString(bytes); - if(string.CompareOrdinal(filename, restoredString) != 0) - { - Console.WriteLine($"Sample for language {language} with value of {filename} is skipped, because it's irreversable"); - continue; - } - - ZipStrings.CodePage = Encoding.GetEncoding(encoding).CodePage; - TestFileNames(new []{filename}); + Console.WriteLine($"Sample for language {language} with value of {filename} is skipped, because it's irreversable"); + continue; } - } - finally - { - ZipStrings.CodePage = preCp; + + TestFileNames(Encoding.GetEncoding(encoding).CodePage, new [] { filename }); } } diff --git a/test/ICSharpCode.SharpZipLib.Tests/Zip/GeneralHandling.cs b/test/ICSharpCode.SharpZipLib.Tests/Zip/GeneralHandling.cs index c3e32064c..ad97563aa 100644 --- a/test/ICSharpCode.SharpZipLib.Tests/Zip/GeneralHandling.cs +++ b/test/ICSharpCode.SharpZipLib.Tests/Zip/GeneralHandling.cs @@ -856,20 +856,17 @@ private object UnZipZeroLength(byte[] zipped) return result; } - private void CheckNameConversion(string toCheck) - { - byte[] intermediate = ZipStrings.ConvertToArray(toCheck); - string final = ZipStrings.ConvertToString(intermediate); - - Assert.AreEqual(toCheck, final, "Expected identical result"); - } - [Test] [Category("Zip")] - public void NameConversion() + [TestCase("Hello")] + [TestCase("a/b/c/d/e/f/g/h/SomethingLikeAnArchiveName.txt")] + public void LegacyNameConversion(string name) { - CheckNameConversion("Hello"); - CheckNameConversion("a/b/c/d/e/f/g/h/SomethingLikeAnArchiveName.txt"); + var encoding = new StringCodec().ZipEncoding(false); + byte[] intermediate = encoding.GetBytes(name); + string final = encoding.GetString(intermediate); + + Assert.AreEqual(name, final, "Expected identical result"); } [Test] @@ -878,22 +875,22 @@ public void UnicodeNameConversion() { Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); - ZipStrings.CodePage = 850; + var codec = new StringCodec() {CodePage = 850}; string sample = "Hello world"; byte[] rawData = Encoding.ASCII.GetBytes(sample); - string converted = ZipStrings.ConvertToStringExt(0, rawData); + var converted = codec.ZipInputEncoding(0).GetString(rawData); Assert.AreEqual(sample, converted); - converted = ZipStrings.ConvertToStringExt((int)GeneralBitFlags.UnicodeText, rawData); + converted = codec.ZipInputEncoding((int)GeneralBitFlags.UnicodeText).GetString(rawData); Assert.AreEqual(sample, converted); // This time use some greek characters sample = "\u03A5\u03d5\u03a3"; rawData = Encoding.UTF8.GetBytes(sample); - converted = ZipStrings.ConvertToStringExt((int)GeneralBitFlags.UnicodeText, rawData); + converted = codec.ZipInputEncoding((int)GeneralBitFlags.UnicodeText).GetString(rawData); Assert.AreEqual(sample, converted); } diff --git a/test/ICSharpCode.SharpZipLib.Tests/Zip/ZipStreamAsyncTests.cs b/test/ICSharpCode.SharpZipLib.Tests/Zip/ZipStreamAsyncTests.cs index b693f205d..5eb33c063 100644 --- a/test/ICSharpCode.SharpZipLib.Tests/Zip/ZipStreamAsyncTests.cs +++ b/test/ICSharpCode.SharpZipLib.Tests/Zip/ZipStreamAsyncTests.cs @@ -10,13 +10,12 @@ namespace ICSharpCode.SharpZipLib.Tests.Zip [TestFixture] public class ZipStreamAsyncTests { - +#if NETCOREAPP3_1_OR_GREATER [Test] [Category("Zip")] [Category("Async")] public async Task WriteZipStreamUsingAsync() { -#if NETCOREAPP3_1_OR_GREATER await using var ms = new MemoryStream(); await using (var outStream = new ZipOutputStream(ms){IsStreamOwner = false}) @@ -29,8 +28,8 @@ public async Task WriteZipStreamUsingAsync() } ZipTesting.AssertValidZip(ms); -#endif } +#endif [Test] [Category("Zip")]