ZipStrings.cs 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193
  1. using System;
  2. using System.Text;
  3. namespace ICSharpCode.SharpZipLib.Zip
  4. {
  5. /// <summary>
  6. /// This static class contains functions for encoding and decoding zip file strings
  7. /// </summary>
  8. public static class ZipStrings
  9. {
  10. static ZipStrings()
  11. {
  12. try
  13. {
  14. var platformCodepage = Encoding.GetEncoding(0).CodePage;
  15. SystemDefaultCodePage = (platformCodepage == 1 || platformCodepage == 2 || platformCodepage == 3 || platformCodepage == 42) ? FallbackCodePage : platformCodepage;
  16. }
  17. catch
  18. {
  19. SystemDefaultCodePage = FallbackCodePage;
  20. }
  21. }
  22. /// <summary>Code page backing field</summary>
  23. /// <remarks>
  24. /// The original Zip specification (https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT) states
  25. /// that file names should only be encoded with IBM Code Page 437 or UTF-8.
  26. /// In practice, most zip apps use OEM or system encoding (typically cp437 on Windows).
  27. /// Let's be good citizens and default to UTF-8 http://utf8everywhere.org/
  28. /// </remarks>
  29. private static int codePage = AutomaticCodePage;
  30. /// Automatically select codepage while opening archive
  31. /// see https://github.com/icsharpcode/SharpZipLib/pull/280#issuecomment-433608324
  32. ///
  33. private const int AutomaticCodePage = -1;
  34. /// <summary>
  35. /// Encoding used for string conversion. Setting this to 65001 (UTF-8) will
  36. /// also set the Language encoding flag to indicate UTF-8 encoded file names.
  37. /// </summary>
  38. public static int CodePage
  39. {
  40. get
  41. {
  42. return codePage == AutomaticCodePage? Encoding.UTF8.CodePage:codePage;
  43. }
  44. set
  45. {
  46. if ((value < 0) || (value > 65535) ||
  47. (value == 1) || (value == 2) || (value == 3) || (value == 42))
  48. {
  49. throw new ArgumentOutOfRangeException(nameof(value));
  50. }
  51. codePage = value;
  52. }
  53. }
  54. private const int FallbackCodePage = 437;
  55. /// <summary>
  56. /// Attempt to get the operating system default codepage, or failing that, to
  57. /// the fallback code page IBM 437.
  58. /// </summary>
  59. public static int SystemDefaultCodePage { get; }
  60. /// <summary>
  61. /// Get wether the default codepage is set to UTF-8. Setting this property to false will
  62. /// set the <see cref="CodePage"/> to <see cref="SystemDefaultCodePage"/>
  63. /// </summary>
  64. /// <remarks>
  65. /// /// Get OEM codepage from NetFX, which parses the NLP file with culture info table etc etc.
  66. /// But sometimes it yields the special value of 1 which is nicknamed <c>CodePageNoOEM</c> in <see cref="Encoding"/> sources (might also mean <c>CP_OEMCP</c>, but Encoding puts it so).
  67. /// This was observed on Ukranian and Hindu systems.
  68. /// Given this value, <see cref="Encoding.GetEncoding(int)"/> throws an <see cref="ArgumentException"/>.
  69. /// So replace it with <see cref="FallbackCodePage"/>, (IBM 437 which is the default code page in a default Windows installation console.
  70. /// </remarks>
  71. public static bool UseUnicode
  72. {
  73. get
  74. {
  75. return codePage == Encoding.UTF8.CodePage;
  76. }
  77. set
  78. {
  79. if (value)
  80. {
  81. codePage = Encoding.UTF8.CodePage;
  82. }
  83. else
  84. {
  85. codePage = SystemDefaultCodePage;
  86. }
  87. }
  88. }
  89. /// <summary>
  90. /// Convert a portion of a byte array to a string using <see cref="CodePage"/>
  91. /// </summary>
  92. /// <param name="data">
  93. /// Data to convert to string
  94. /// </param>
  95. /// <param name="count">
  96. /// Number of bytes to convert starting from index 0
  97. /// </param>
  98. /// <returns>
  99. /// data[0]..data[count - 1] converted to a string
  100. /// </returns>
  101. public static string ConvertToString(byte[] data, int count)
  102. => data == null
  103. ? string.Empty
  104. : Encoding.GetEncoding(CodePage).GetString(data, 0, count);
  105. /// <summary>
  106. /// Convert a byte array to a string using <see cref="CodePage"/>
  107. /// </summary>
  108. /// <param name="data">
  109. /// Byte array to convert
  110. /// </param>
  111. /// <returns>
  112. /// <paramref name="data">data</paramref>converted to a string
  113. /// </returns>
  114. public static string ConvertToString(byte[] data)
  115. => ConvertToString(data, data.Length);
  116. private static Encoding EncodingFromFlag(int flags)
  117. => ((flags & (int)GeneralBitFlags.UnicodeText) != 0)
  118. ? Encoding.UTF8
  119. : Encoding.GetEncoding(
  120. // if CodePage wasn't set manually and no utf flag present
  121. // then we must use SystemDefault (old behavior)
  122. // otherwise, CodePage should be preferred over SystemDefault
  123. // see https://github.com/icsharpcode/SharpZipLib/issues/274
  124. codePage == AutomaticCodePage?
  125. SystemDefaultCodePage:
  126. codePage);
  127. /// <summary>
  128. /// Convert a byte array to a string using <see cref="CodePage"/>
  129. /// </summary>
  130. /// <param name="flags">The applicable general purpose bits flags</param>
  131. /// <param name="data">
  132. /// Byte array to convert
  133. /// </param>
  134. /// <param name="count">The number of bytes to convert.</param>
  135. /// <returns>
  136. /// <paramref name="data">data</paramref>converted to a string
  137. /// </returns>
  138. public static string ConvertToStringExt(int flags, byte[] data, int count)
  139. => (data == null)
  140. ? string.Empty
  141. : EncodingFromFlag(flags).GetString(data, 0, count);
  142. /// <summary>
  143. /// Convert a byte array to a string using <see cref="CodePage"/>
  144. /// </summary>
  145. /// <param name="data">
  146. /// Byte array to convert
  147. /// </param>
  148. /// <param name="flags">The applicable general purpose bits flags</param>
  149. /// <returns>
  150. /// <paramref name="data">data</paramref>converted to a string
  151. /// </returns>
  152. public static string ConvertToStringExt(int flags, byte[] data)
  153. => ConvertToStringExt(flags, data, data.Length);
  154. /// <summary>
  155. /// Convert a string to a byte array using <see cref="CodePage"/>
  156. /// </summary>
  157. /// <param name="str">
  158. /// String to convert to an array
  159. /// </param>
  160. /// <returns>Converted array</returns>
  161. public static byte[] ConvertToArray(string str)
  162. => str == null
  163. ? new byte[0]
  164. : Encoding.GetEncoding(CodePage).GetBytes(str);
  165. /// <summary>
  166. /// Convert a string to a byte array using <see cref="CodePage"/>
  167. /// </summary>
  168. /// <param name="flags">The applicable <see cref="GeneralBitFlags">general purpose bits flags</see></param>
  169. /// <param name="str">
  170. /// String to convert to an array
  171. /// </param>
  172. /// <returns>Converted array</returns>
  173. public static byte[] ConvertToArray(int flags, string str)
  174. => (string.IsNullOrEmpty(str))
  175. ? new byte[0]
  176. : EncodingFromFlag(flags).GetBytes(str);
  177. }
  178. }