| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193 |
- using System;
- using System.Text;
- namespace ICSharpCode.SharpZipLib.Zip
- {
- /// <summary>
- /// This static class contains functions for encoding and decoding zip file strings
- /// </summary>
- public static class ZipStrings
- {
- static ZipStrings()
- {
- try
- {
- var platformCodepage = Encoding.GetEncoding(0).CodePage;
- SystemDefaultCodePage = (platformCodepage == 1 || platformCodepage == 2 || platformCodepage == 3 || platformCodepage == 42) ? FallbackCodePage : platformCodepage;
- }
- catch
- {
- SystemDefaultCodePage = FallbackCodePage;
- }
- }
- /// <summary>Code page backing field</summary>
- /// <remarks>
- /// The original Zip specification (https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT) states
- /// that file names should only be encoded with IBM Code Page 437 or UTF-8.
- /// In practice, most zip apps use OEM or system encoding (typically cp437 on Windows).
- /// Let's be good citizens and default to UTF-8 http://utf8everywhere.org/
- /// </remarks>
- private static int codePage = AutomaticCodePage;
- /// Automatically select codepage while opening archive
- /// see https://github.com/icsharpcode/SharpZipLib/pull/280#issuecomment-433608324
- ///
- private const int AutomaticCodePage = -1;
- /// <summary>
- /// Encoding used for string conversion. Setting this to 65001 (UTF-8) will
- /// also set the Language encoding flag to indicate UTF-8 encoded file names.
- /// </summary>
- public static int CodePage
- {
- get
- {
- return codePage == AutomaticCodePage? Encoding.UTF8.CodePage:codePage;
- }
- set
- {
- if ((value < 0) || (value > 65535) ||
- (value == 1) || (value == 2) || (value == 3) || (value == 42))
- {
- throw new ArgumentOutOfRangeException(nameof(value));
- }
- codePage = value;
- }
- }
- private const int FallbackCodePage = 437;
- /// <summary>
- /// Attempt to get the operating system default codepage, or failing that, to
- /// the fallback code page IBM 437.
- /// </summary>
- public static int SystemDefaultCodePage { get; }
- /// <summary>
- /// Get wether the default codepage is set to UTF-8. Setting this property to false will
- /// set the <see cref="CodePage"/> to <see cref="SystemDefaultCodePage"/>
- /// </summary>
- /// <remarks>
- /// /// Get OEM codepage from NetFX, which parses the NLP file with culture info table etc etc.
- /// But sometimes it yields the special value of 1 which is nicknamed <c>CodePageNoOEM</c> in <see cref="Encoding"/> sources (might also mean <c>CP_OEMCP</c>, but Encoding puts it so).
- /// This was observed on Ukranian and Hindu systems.
- /// Given this value, <see cref="Encoding.GetEncoding(int)"/> throws an <see cref="ArgumentException"/>.
- /// So replace it with <see cref="FallbackCodePage"/>, (IBM 437 which is the default code page in a default Windows installation console.
- /// </remarks>
- public static bool UseUnicode
- {
- get
- {
- return codePage == Encoding.UTF8.CodePage;
- }
- set
- {
- if (value)
- {
- codePage = Encoding.UTF8.CodePage;
- }
- else
- {
- codePage = SystemDefaultCodePage;
- }
- }
- }
- /// <summary>
- /// Convert a portion of a byte array to a string using <see cref="CodePage"/>
- /// </summary>
- /// <param name="data">
- /// Data to convert to string
- /// </param>
- /// <param name="count">
- /// Number of bytes to convert starting from index 0
- /// </param>
- /// <returns>
- /// data[0]..data[count - 1] converted to a string
- /// </returns>
- public static string ConvertToString(byte[] data, int count)
- => data == null
- ? string.Empty
- : Encoding.GetEncoding(CodePage).GetString(data, 0, count);
- /// <summary>
- /// Convert a byte array to a string using <see cref="CodePage"/>
- /// </summary>
- /// <param name="data">
- /// Byte array to convert
- /// </param>
- /// <returns>
- /// <paramref name="data">data</paramref>converted to a string
- /// </returns>
- public static string ConvertToString(byte[] data)
- => ConvertToString(data, data.Length);
- private static Encoding EncodingFromFlag(int flags)
- => ((flags & (int)GeneralBitFlags.UnicodeText) != 0)
- ? Encoding.UTF8
- : Encoding.GetEncoding(
- // if CodePage wasn't set manually and no utf flag present
- // then we must use SystemDefault (old behavior)
- // otherwise, CodePage should be preferred over SystemDefault
- // see https://github.com/icsharpcode/SharpZipLib/issues/274
- codePage == AutomaticCodePage?
- SystemDefaultCodePage:
- codePage);
- /// <summary>
- /// Convert a byte array to a string using <see cref="CodePage"/>
- /// </summary>
- /// <param name="flags">The applicable general purpose bits flags</param>
- /// <param name="data">
- /// Byte array to convert
- /// </param>
- /// <param name="count">The number of bytes to convert.</param>
- /// <returns>
- /// <paramref name="data">data</paramref>converted to a string
- /// </returns>
- public static string ConvertToStringExt(int flags, byte[] data, int count)
- => (data == null)
- ? string.Empty
- : EncodingFromFlag(flags).GetString(data, 0, count);
- /// <summary>
- /// Convert a byte array to a string using <see cref="CodePage"/>
- /// </summary>
- /// <param name="data">
- /// Byte array to convert
- /// </param>
- /// <param name="flags">The applicable general purpose bits flags</param>
- /// <returns>
- /// <paramref name="data">data</paramref>converted to a string
- /// </returns>
- public static string ConvertToStringExt(int flags, byte[] data)
- => ConvertToStringExt(flags, data, data.Length);
- /// <summary>
- /// Convert a string to a byte array using <see cref="CodePage"/>
- /// </summary>
- /// <param name="str">
- /// String to convert to an array
- /// </param>
- /// <returns>Converted array</returns>
- public static byte[] ConvertToArray(string str)
- => str == null
- ? new byte[0]
- : Encoding.GetEncoding(CodePage).GetBytes(str);
- /// <summary>
- /// Convert a string to a byte array using <see cref="CodePage"/>
- /// </summary>
- /// <param name="flags">The applicable <see cref="GeneralBitFlags">general purpose bits flags</see></param>
- /// <param name="str">
- /// String to convert to an array
- /// </param>
- /// <returns>Converted array</returns>
- public static byte[] ConvertToArray(int flags, string str)
- => (string.IsNullOrEmpty(str))
- ? new byte[0]
- : EncodingFromFlag(flags).GetBytes(str);
- }
- }
|