GzipInputStream.cs 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390
  1. using ICSharpCode.SharpZipLib.Checksum;
  2. using ICSharpCode.SharpZipLib.Zip.Compression;
  3. using ICSharpCode.SharpZipLib.Zip.Compression.Streams;
  4. using System;
  5. using System.IO;
  6. namespace ICSharpCode.SharpZipLib.GZip
  7. {
  8. /// <summary>
  9. /// This filter stream is used to decompress a "GZIP" format stream.
  10. /// The "GZIP" format is described baseInputStream RFC 1952.
  11. ///
  12. /// author of the original java version : John Leuner
  13. /// </summary>
  14. /// <example> This sample shows how to unzip a gzipped file
  15. /// <code>
  16. /// using System;
  17. /// using System.IO;
  18. ///
  19. /// using ICSharpCode.SharpZipLib.Core;
  20. /// using ICSharpCode.SharpZipLib.GZip;
  21. ///
  22. /// class MainClass
  23. /// {
  24. /// public static void Main(string[] args)
  25. /// {
  26. /// using (Stream inStream = new GZipInputStream(File.OpenRead(args[0])))
  27. /// using (FileStream outStream = File.Create(Path.GetFileNameWithoutExtension(args[0]))) {
  28. /// byte[] buffer = new byte[4096];
  29. /// StreamUtils.Copy(inStream, outStream, buffer);
  30. /// }
  31. /// }
  32. /// }
  33. /// </code>
  34. /// </example>
  35. public class GZipInputStream : InflaterInputStream
  36. {
  37. #region Instance Fields
  38. /// <summary>
  39. /// CRC-32 value for uncompressed data
  40. /// </summary>
  41. protected Crc32 crc;
  42. /// <summary>
  43. /// Flag to indicate if we've read the GZIP header yet for the current member (block of compressed data).
  44. /// This is tracked per-block as the file is parsed.
  45. /// </summary>
  46. private bool readGZIPHeader;
  47. /// <summary>
  48. /// Flag to indicate if at least one block in a stream with concatenated blocks was read successfully.
  49. /// This allows us to exit gracefully if downstream data is not in gzip format.
  50. /// </summary>
  51. private bool completedLastBlock;
  52. #endregion Instance Fields
  53. #region Constructors
  54. /// <summary>
  55. /// Creates a GZipInputStream with the default buffer size
  56. /// </summary>
  57. /// <param name="baseInputStream">
  58. /// The stream to read compressed data from (baseInputStream GZIP format)
  59. /// </param>
  60. public GZipInputStream(Stream baseInputStream)
  61. : this(baseInputStream, 4096)
  62. {
  63. }
  64. /// <summary>
  65. /// Creates a GZIPInputStream with the specified buffer size
  66. /// </summary>
  67. /// <param name="baseInputStream">
  68. /// The stream to read compressed data from (baseInputStream GZIP format)
  69. /// </param>
  70. /// <param name="size">
  71. /// Size of the buffer to use
  72. /// </param>
  73. public GZipInputStream(Stream baseInputStream, int size)
  74. : base(baseInputStream, new Inflater(true), size)
  75. {
  76. }
  77. #endregion Constructors
  78. #region Stream overrides
  79. /// <summary>
  80. /// Reads uncompressed data into an array of bytes
  81. /// </summary>
  82. /// <param name="buffer">
  83. /// The buffer to read uncompressed data into
  84. /// </param>
  85. /// <param name="offset">
  86. /// The offset indicating where the data should be placed
  87. /// </param>
  88. /// <param name="count">
  89. /// The number of uncompressed bytes to be read
  90. /// </param>
  91. /// <returns>Returns the number of bytes actually read.</returns>
  92. public override int Read(byte[] buffer, int offset, int count)
  93. {
  94. // A GZIP file can contain multiple blocks of compressed data, although this is quite rare.
  95. // A compressed block could potentially be empty, so we need to loop until we reach EOF or
  96. // we find data.
  97. while (true)
  98. {
  99. // If we haven't read the header for this block, read it
  100. if (!readGZIPHeader)
  101. {
  102. // Try to read header. If there is no header (0 bytes available), this is EOF. If there is
  103. // an incomplete header, this will throw an exception.
  104. try
  105. {
  106. if (!ReadHeader())
  107. {
  108. return 0;
  109. }
  110. }
  111. catch (Exception ex) when (completedLastBlock && (ex is GZipException || ex is EndOfStreamException))
  112. {
  113. // if we completed the last block (i.e. we're in a stream that has multiple blocks concatenated
  114. // we want to return gracefully from any header parsing exceptions since sometimes there may
  115. // be trailing garbage on a stream
  116. return 0;
  117. }
  118. }
  119. // Try to read compressed data
  120. int bytesRead = base.Read(buffer, offset, count);
  121. if (bytesRead > 0)
  122. {
  123. crc.Update(new ArraySegment<byte>(buffer, offset, bytesRead));
  124. }
  125. // If this is the end of stream, read the footer
  126. if (inf.IsFinished)
  127. {
  128. ReadFooter();
  129. }
  130. // Attempting to read 0 bytes will never yield any bytesRead, so we return instead of looping forever
  131. if (bytesRead > 0 || count == 0)
  132. {
  133. return bytesRead;
  134. }
  135. }
  136. }
  137. #endregion Stream overrides
  138. #region Support routines
  139. private bool ReadHeader()
  140. {
  141. // Initialize CRC for this block
  142. crc = new Crc32();
  143. // Make sure there is data in file. We can't rely on ReadLeByte() to fill the buffer, as this could be EOF,
  144. // which is fine, but ReadLeByte() throws an exception if it doesn't find data, so we do this part ourselves.
  145. if (inputBuffer.Available <= 0)
  146. {
  147. inputBuffer.Fill();
  148. if (inputBuffer.Available <= 0)
  149. {
  150. // No header, EOF.
  151. return false;
  152. }
  153. }
  154. // 1. Check the two magic bytes
  155. var headCRC = new Crc32();
  156. int magic = inputBuffer.ReadLeByte();
  157. if (magic < 0)
  158. {
  159. throw new EndOfStreamException("EOS reading GZIP header");
  160. }
  161. headCRC.Update(magic);
  162. if (magic != (GZipConstants.GZIP_MAGIC >> 8))
  163. {
  164. throw new GZipException("Error GZIP header, first magic byte doesn't match");
  165. }
  166. //magic = baseInputStream.ReadByte();
  167. magic = inputBuffer.ReadLeByte();
  168. if (magic < 0)
  169. {
  170. throw new EndOfStreamException("EOS reading GZIP header");
  171. }
  172. if (magic != (GZipConstants.GZIP_MAGIC & 0xFF))
  173. {
  174. throw new GZipException("Error GZIP header, second magic byte doesn't match");
  175. }
  176. headCRC.Update(magic);
  177. // 2. Check the compression type (must be 8)
  178. int compressionType = inputBuffer.ReadLeByte();
  179. if (compressionType < 0)
  180. {
  181. throw new EndOfStreamException("EOS reading GZIP header");
  182. }
  183. if (compressionType != 8)
  184. {
  185. throw new GZipException("Error GZIP header, data not in deflate format");
  186. }
  187. headCRC.Update(compressionType);
  188. // 3. Check the flags
  189. int flags = inputBuffer.ReadLeByte();
  190. if (flags < 0)
  191. {
  192. throw new EndOfStreamException("EOS reading GZIP header");
  193. }
  194. headCRC.Update(flags);
  195. /* This flag byte is divided into individual bits as follows:
  196. bit 0 FTEXT
  197. bit 1 FHCRC
  198. bit 2 FEXTRA
  199. bit 3 FNAME
  200. bit 4 FCOMMENT
  201. bit 5 reserved
  202. bit 6 reserved
  203. bit 7 reserved
  204. */
  205. // 3.1 Check the reserved bits are zero
  206. if ((flags & 0xE0) != 0)
  207. {
  208. throw new GZipException("Reserved flag bits in GZIP header != 0");
  209. }
  210. // 4.-6. Skip the modification time, extra flags, and OS type
  211. for (int i = 0; i < 6; i++)
  212. {
  213. int readByte = inputBuffer.ReadLeByte();
  214. if (readByte < 0)
  215. {
  216. throw new EndOfStreamException("EOS reading GZIP header");
  217. }
  218. headCRC.Update(readByte);
  219. }
  220. // 7. Read extra field
  221. if ((flags & GZipConstants.FEXTRA) != 0)
  222. {
  223. // XLEN is total length of extra subfields, we will skip them all
  224. int len1, len2;
  225. len1 = inputBuffer.ReadLeByte();
  226. len2 = inputBuffer.ReadLeByte();
  227. if ((len1 < 0) || (len2 < 0))
  228. {
  229. throw new EndOfStreamException("EOS reading GZIP header");
  230. }
  231. headCRC.Update(len1);
  232. headCRC.Update(len2);
  233. int extraLen = (len2 << 8) | len1; // gzip is LSB first
  234. for (int i = 0; i < extraLen; i++)
  235. {
  236. int readByte = inputBuffer.ReadLeByte();
  237. if (readByte < 0)
  238. {
  239. throw new EndOfStreamException("EOS reading GZIP header");
  240. }
  241. headCRC.Update(readByte);
  242. }
  243. }
  244. // 8. Read file name
  245. if ((flags & GZipConstants.FNAME) != 0)
  246. {
  247. int readByte;
  248. while ((readByte = inputBuffer.ReadLeByte()) > 0)
  249. {
  250. headCRC.Update(readByte);
  251. }
  252. if (readByte < 0)
  253. {
  254. throw new EndOfStreamException("EOS reading GZIP header");
  255. }
  256. headCRC.Update(readByte);
  257. }
  258. // 9. Read comment
  259. if ((flags & GZipConstants.FCOMMENT) != 0)
  260. {
  261. int readByte;
  262. while ((readByte = inputBuffer.ReadLeByte()) > 0)
  263. {
  264. headCRC.Update(readByte);
  265. }
  266. if (readByte < 0)
  267. {
  268. throw new EndOfStreamException("EOS reading GZIP header");
  269. }
  270. headCRC.Update(readByte);
  271. }
  272. // 10. Read header CRC
  273. if ((flags & GZipConstants.FHCRC) != 0)
  274. {
  275. int tempByte;
  276. int crcval = inputBuffer.ReadLeByte();
  277. if (crcval < 0)
  278. {
  279. throw new EndOfStreamException("EOS reading GZIP header");
  280. }
  281. tempByte = inputBuffer.ReadLeByte();
  282. if (tempByte < 0)
  283. {
  284. throw new EndOfStreamException("EOS reading GZIP header");
  285. }
  286. crcval = (crcval << 8) | tempByte;
  287. if (crcval != ((int)headCRC.Value & 0xffff))
  288. {
  289. throw new GZipException("Header CRC value mismatch");
  290. }
  291. }
  292. readGZIPHeader = true;
  293. return true;
  294. }
  295. private void ReadFooter()
  296. {
  297. byte[] footer = new byte[8];
  298. // End of stream; reclaim all bytes from inf, read the final byte count, and reset the inflator
  299. long bytesRead = inf.TotalOut & 0xffffffff;
  300. inputBuffer.Available += inf.RemainingInput;
  301. inf.Reset();
  302. // Read footer from inputBuffer
  303. int needed = 8;
  304. while (needed > 0)
  305. {
  306. int count = inputBuffer.ReadClearTextBuffer(footer, 8 - needed, needed);
  307. if (count <= 0)
  308. {
  309. throw new EndOfStreamException("EOS reading GZIP footer");
  310. }
  311. needed -= count; // Jewel Jan 16
  312. }
  313. // Calculate CRC
  314. int crcval = (footer[0] & 0xff) | ((footer[1] & 0xff) << 8) | ((footer[2] & 0xff) << 16) | (footer[3] << 24);
  315. if (crcval != (int)crc.Value)
  316. {
  317. throw new GZipException("GZIP crc sum mismatch, theirs \"" + crcval + "\" and ours \"" + (int)crc.Value);
  318. }
  319. // NOTE The total here is the original total modulo 2 ^ 32.
  320. uint total =
  321. (uint)((uint)footer[4] & 0xff) |
  322. (uint)(((uint)footer[5] & 0xff) << 8) |
  323. (uint)(((uint)footer[6] & 0xff) << 16) |
  324. (uint)((uint)footer[7] << 24);
  325. if (bytesRead != total)
  326. {
  327. throw new GZipException("Number of bytes mismatch in footer");
  328. }
  329. // Mark header read as false so if another header exists, we'll continue reading through the file
  330. readGZIPHeader = false;
  331. // Indicate that we succeeded on at least one block so we can exit gracefully if there is trailing garbage downstream
  332. completedLastBlock = true;
  333. }
  334. #endregion Support routines
  335. }
  336. }