LzwInputStream.cs 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572
  1. using System;
  2. using System.IO;
  3. namespace ICSharpCode.SharpZipLib.Lzw
  4. {
  5. /// <summary>
  6. /// This filter stream is used to decompress a LZW format stream.
  7. /// Specifically, a stream that uses the LZC compression method.
  8. /// This file format is usually associated with the .Z file extension.
  9. ///
  10. /// See http://en.wikipedia.org/wiki/Compress
  11. /// See http://wiki.wxwidgets.org/Development:_Z_File_Format
  12. ///
  13. /// The file header consists of 3 (or optionally 4) bytes. The first two bytes
  14. /// contain the magic marker "0x1f 0x9d", followed by a byte of flags.
  15. ///
  16. /// Based on Java code by Ronald Tschalar, which in turn was based on the unlzw.c
  17. /// code in the gzip package.
  18. /// </summary>
  19. /// <example> This sample shows how to unzip a compressed file
  20. /// <code>
  21. /// using System;
  22. /// using System.IO;
  23. ///
  24. /// using ICSharpCode.SharpZipLib.Core;
  25. /// using ICSharpCode.SharpZipLib.LZW;
  26. ///
  27. /// class MainClass
  28. /// {
  29. /// public static void Main(string[] args)
  30. /// {
  31. /// using (Stream inStream = new LzwInputStream(File.OpenRead(args[0])))
  32. /// using (FileStream outStream = File.Create(Path.GetFileNameWithoutExtension(args[0]))) {
  33. /// byte[] buffer = new byte[4096];
  34. /// StreamUtils.Copy(inStream, outStream, buffer);
  35. /// // OR
  36. /// inStream.Read(buffer, 0, buffer.Length);
  37. /// // now do something with the buffer
  38. /// }
  39. /// }
  40. /// }
  41. /// </code>
  42. /// </example>
  43. public class LzwInputStream : Stream
  44. {
  45. /// <summary>
  46. /// Gets or sets a flag indicating ownership of underlying stream.
  47. /// When the flag is true <see cref="Stream.Dispose()" /> will close the underlying stream also.
  48. /// </summary>
  49. /// <remarks>The default value is true.</remarks>
  50. public bool IsStreamOwner { get; set; } = true;
  51. /// <summary>
  52. /// Creates a LzwInputStream
  53. /// </summary>
  54. /// <param name="baseInputStream">
  55. /// The stream to read compressed data from (baseInputStream LZW format)
  56. /// </param>
  57. public LzwInputStream(Stream baseInputStream)
  58. {
  59. this.baseInputStream = baseInputStream;
  60. }
  61. /// <summary>
  62. /// See <see cref="System.IO.Stream.ReadByte"/>
  63. /// </summary>
  64. /// <returns></returns>
  65. public override int ReadByte()
  66. {
  67. int b = Read(one, 0, 1);
  68. if (b == 1)
  69. return (one[0] & 0xff);
  70. return -1;
  71. }
  72. /// <summary>
  73. /// Reads decompressed data into the provided buffer byte array
  74. /// </summary>
  75. /// <param name ="buffer">
  76. /// The array to read and decompress data into
  77. /// </param>
  78. /// <param name ="offset">
  79. /// The offset indicating where the data should be placed
  80. /// </param>
  81. /// <param name ="count">
  82. /// The number of bytes to decompress
  83. /// </param>
  84. /// <returns>The number of bytes read. Zero signals the end of stream</returns>
  85. public override int Read(byte[] buffer, int offset, int count)
  86. {
  87. if (!headerParsed)
  88. ParseHeader();
  89. if (eof)
  90. return 0;
  91. int start = offset;
  92. /* Using local copies of various variables speeds things up by as
  93. * much as 30% in Java! Performance not tested in C#.
  94. */
  95. int[] lTabPrefix = tabPrefix;
  96. byte[] lTabSuffix = tabSuffix;
  97. byte[] lStack = stack;
  98. int lNBits = nBits;
  99. int lMaxCode = maxCode;
  100. int lMaxMaxCode = maxMaxCode;
  101. int lBitMask = bitMask;
  102. int lOldCode = oldCode;
  103. byte lFinChar = finChar;
  104. int lStackP = stackP;
  105. int lFreeEnt = freeEnt;
  106. byte[] lData = data;
  107. int lBitPos = bitPos;
  108. // empty stack if stuff still left
  109. int sSize = lStack.Length - lStackP;
  110. if (sSize > 0)
  111. {
  112. int num = (sSize >= count) ? count : sSize;
  113. Array.Copy(lStack, lStackP, buffer, offset, num);
  114. offset += num;
  115. count -= num;
  116. lStackP += num;
  117. }
  118. if (count == 0)
  119. {
  120. stackP = lStackP;
  121. return offset - start;
  122. }
  123. // loop, filling local buffer until enough data has been decompressed
  124. MainLoop:
  125. do
  126. {
  127. if (end < EXTRA)
  128. {
  129. Fill();
  130. }
  131. int bitIn = (got > 0) ? (end - end % lNBits) << 3 :
  132. (end << 3) - (lNBits - 1);
  133. while (lBitPos < bitIn)
  134. {
  135. #region A
  136. // handle 1-byte reads correctly
  137. if (count == 0)
  138. {
  139. nBits = lNBits;
  140. maxCode = lMaxCode;
  141. maxMaxCode = lMaxMaxCode;
  142. bitMask = lBitMask;
  143. oldCode = lOldCode;
  144. finChar = lFinChar;
  145. stackP = lStackP;
  146. freeEnt = lFreeEnt;
  147. bitPos = lBitPos;
  148. return offset - start;
  149. }
  150. // check for code-width expansion
  151. if (lFreeEnt > lMaxCode)
  152. {
  153. int nBytes = lNBits << 3;
  154. lBitPos = (lBitPos - 1) +
  155. nBytes - (lBitPos - 1 + nBytes) % nBytes;
  156. lNBits++;
  157. lMaxCode = (lNBits == maxBits) ? lMaxMaxCode :
  158. (1 << lNBits) - 1;
  159. lBitMask = (1 << lNBits) - 1;
  160. lBitPos = ResetBuf(lBitPos);
  161. goto MainLoop;
  162. }
  163. #endregion A
  164. #region B
  165. // read next code
  166. int pos = lBitPos >> 3;
  167. int code = (((lData[pos] & 0xFF) |
  168. ((lData[pos + 1] & 0xFF) << 8) |
  169. ((lData[pos + 2] & 0xFF) << 16)) >>
  170. (lBitPos & 0x7)) & lBitMask;
  171. lBitPos += lNBits;
  172. // handle first iteration
  173. if (lOldCode == -1)
  174. {
  175. if (code >= 256)
  176. throw new LzwException("corrupt input: " + code + " > 255");
  177. lFinChar = (byte)(lOldCode = code);
  178. buffer[offset++] = lFinChar;
  179. count--;
  180. continue;
  181. }
  182. // handle CLEAR code
  183. if (code == TBL_CLEAR && blockMode)
  184. {
  185. Array.Copy(zeros, 0, lTabPrefix, 0, zeros.Length);
  186. lFreeEnt = TBL_FIRST - 1;
  187. int nBytes = lNBits << 3;
  188. lBitPos = (lBitPos - 1) + nBytes - (lBitPos - 1 + nBytes) % nBytes;
  189. lNBits = LzwConstants.INIT_BITS;
  190. lMaxCode = (1 << lNBits) - 1;
  191. lBitMask = lMaxCode;
  192. // Code tables reset
  193. lBitPos = ResetBuf(lBitPos);
  194. goto MainLoop;
  195. }
  196. #endregion B
  197. #region C
  198. // setup
  199. int inCode = code;
  200. lStackP = lStack.Length;
  201. // Handle KwK case
  202. if (code >= lFreeEnt)
  203. {
  204. if (code > lFreeEnt)
  205. {
  206. throw new LzwException("corrupt input: code=" + code +
  207. ", freeEnt=" + lFreeEnt);
  208. }
  209. lStack[--lStackP] = lFinChar;
  210. code = lOldCode;
  211. }
  212. // Generate output characters in reverse order
  213. while (code >= 256)
  214. {
  215. lStack[--lStackP] = lTabSuffix[code];
  216. code = lTabPrefix[code];
  217. }
  218. lFinChar = lTabSuffix[code];
  219. buffer[offset++] = lFinChar;
  220. count--;
  221. // And put them out in forward order
  222. sSize = lStack.Length - lStackP;
  223. int num = (sSize >= count) ? count : sSize;
  224. Array.Copy(lStack, lStackP, buffer, offset, num);
  225. offset += num;
  226. count -= num;
  227. lStackP += num;
  228. #endregion C
  229. #region D
  230. // generate new entry in table
  231. if (lFreeEnt < lMaxMaxCode)
  232. {
  233. lTabPrefix[lFreeEnt] = lOldCode;
  234. lTabSuffix[lFreeEnt] = lFinChar;
  235. lFreeEnt++;
  236. }
  237. // Remember previous code
  238. lOldCode = inCode;
  239. // if output buffer full, then return
  240. if (count == 0)
  241. {
  242. nBits = lNBits;
  243. maxCode = lMaxCode;
  244. bitMask = lBitMask;
  245. oldCode = lOldCode;
  246. finChar = lFinChar;
  247. stackP = lStackP;
  248. freeEnt = lFreeEnt;
  249. bitPos = lBitPos;
  250. return offset - start;
  251. }
  252. #endregion D
  253. } // while
  254. lBitPos = ResetBuf(lBitPos);
  255. } while (got > 0); // do..while
  256. nBits = lNBits;
  257. maxCode = lMaxCode;
  258. bitMask = lBitMask;
  259. oldCode = lOldCode;
  260. finChar = lFinChar;
  261. stackP = lStackP;
  262. freeEnt = lFreeEnt;
  263. bitPos = lBitPos;
  264. eof = true;
  265. return offset - start;
  266. }
  267. /// <summary>
  268. /// Moves the unread data in the buffer to the beginning and resets
  269. /// the pointers.
  270. /// </summary>
  271. /// <param name="bitPosition"></param>
  272. /// <returns></returns>
  273. private int ResetBuf(int bitPosition)
  274. {
  275. int pos = bitPosition >> 3;
  276. Array.Copy(data, pos, data, 0, end - pos);
  277. end -= pos;
  278. return 0;
  279. }
  280. private void Fill()
  281. {
  282. got = baseInputStream.Read(data, end, data.Length - 1 - end);
  283. if (got > 0)
  284. {
  285. end += got;
  286. }
  287. }
  288. private void ParseHeader()
  289. {
  290. headerParsed = true;
  291. byte[] hdr = new byte[LzwConstants.HDR_SIZE];
  292. int result = baseInputStream.Read(hdr, 0, hdr.Length);
  293. // Check the magic marker
  294. if (result < 0)
  295. throw new LzwException("Failed to read LZW header");
  296. if (hdr[0] != (LzwConstants.MAGIC >> 8) || hdr[1] != (LzwConstants.MAGIC & 0xff))
  297. {
  298. throw new LzwException(String.Format(
  299. "Wrong LZW header. Magic bytes don't match. 0x{0:x2} 0x{1:x2}",
  300. hdr[0], hdr[1]));
  301. }
  302. // Check the 3rd header byte
  303. blockMode = (hdr[2] & LzwConstants.BLOCK_MODE_MASK) > 0;
  304. maxBits = hdr[2] & LzwConstants.BIT_MASK;
  305. if (maxBits > LzwConstants.MAX_BITS)
  306. {
  307. throw new LzwException("Stream compressed with " + maxBits +
  308. " bits, but decompression can only handle " +
  309. LzwConstants.MAX_BITS + " bits.");
  310. }
  311. if ((hdr[2] & LzwConstants.RESERVED_MASK) > 0)
  312. {
  313. throw new LzwException("Unsupported bits set in the header.");
  314. }
  315. // Initialize variables
  316. maxMaxCode = 1 << maxBits;
  317. nBits = LzwConstants.INIT_BITS;
  318. maxCode = (1 << nBits) - 1;
  319. bitMask = maxCode;
  320. oldCode = -1;
  321. finChar = 0;
  322. freeEnt = blockMode ? TBL_FIRST : 256;
  323. tabPrefix = new int[1 << maxBits];
  324. tabSuffix = new byte[1 << maxBits];
  325. stack = new byte[1 << maxBits];
  326. stackP = stack.Length;
  327. for (int idx = 255; idx >= 0; idx--)
  328. tabSuffix[idx] = (byte)idx;
  329. }
  330. #region Stream Overrides
  331. /// <summary>
  332. /// Gets a value indicating whether the current stream supports reading
  333. /// </summary>
  334. public override bool CanRead
  335. {
  336. get
  337. {
  338. return baseInputStream.CanRead;
  339. }
  340. }
  341. /// <summary>
  342. /// Gets a value of false indicating seeking is not supported for this stream.
  343. /// </summary>
  344. public override bool CanSeek
  345. {
  346. get
  347. {
  348. return false;
  349. }
  350. }
  351. /// <summary>
  352. /// Gets a value of false indicating that this stream is not writeable.
  353. /// </summary>
  354. public override bool CanWrite
  355. {
  356. get
  357. {
  358. return false;
  359. }
  360. }
  361. /// <summary>
  362. /// A value representing the length of the stream in bytes.
  363. /// </summary>
  364. public override long Length
  365. {
  366. get
  367. {
  368. return got;
  369. }
  370. }
  371. /// <summary>
  372. /// The current position within the stream.
  373. /// Throws a NotSupportedException when attempting to set the position
  374. /// </summary>
  375. /// <exception cref="NotSupportedException">Attempting to set the position</exception>
  376. public override long Position
  377. {
  378. get
  379. {
  380. return baseInputStream.Position;
  381. }
  382. set
  383. {
  384. throw new NotSupportedException("InflaterInputStream Position not supported");
  385. }
  386. }
  387. /// <summary>
  388. /// Flushes the baseInputStream
  389. /// </summary>
  390. public override void Flush()
  391. {
  392. baseInputStream.Flush();
  393. }
  394. /// <summary>
  395. /// Sets the position within the current stream
  396. /// Always throws a NotSupportedException
  397. /// </summary>
  398. /// <param name="offset">The relative offset to seek to.</param>
  399. /// <param name="origin">The <see cref="SeekOrigin"/> defining where to seek from.</param>
  400. /// <returns>The new position in the stream.</returns>
  401. /// <exception cref="NotSupportedException">Any access</exception>
  402. public override long Seek(long offset, SeekOrigin origin)
  403. {
  404. throw new NotSupportedException("Seek not supported");
  405. }
  406. /// <summary>
  407. /// Set the length of the current stream
  408. /// Always throws a NotSupportedException
  409. /// </summary>
  410. /// <param name="value">The new length value for the stream.</param>
  411. /// <exception cref="NotSupportedException">Any access</exception>
  412. public override void SetLength(long value)
  413. {
  414. throw new NotSupportedException("InflaterInputStream SetLength not supported");
  415. }
  416. /// <summary>
  417. /// Writes a sequence of bytes to stream and advances the current position
  418. /// This method always throws a NotSupportedException
  419. /// </summary>
  420. /// <param name="buffer">Thew buffer containing data to write.</param>
  421. /// <param name="offset">The offset of the first byte to write.</param>
  422. /// <param name="count">The number of bytes to write.</param>
  423. /// <exception cref="NotSupportedException">Any access</exception>
  424. public override void Write(byte[] buffer, int offset, int count)
  425. {
  426. throw new NotSupportedException("InflaterInputStream Write not supported");
  427. }
  428. /// <summary>
  429. /// Writes one byte to the current stream and advances the current position
  430. /// Always throws a NotSupportedException
  431. /// </summary>
  432. /// <param name="value">The byte to write.</param>
  433. /// <exception cref="NotSupportedException">Any access</exception>
  434. public override void WriteByte(byte value)
  435. {
  436. throw new NotSupportedException("InflaterInputStream WriteByte not supported");
  437. }
  438. /// <summary>
  439. /// Closes the input stream. When <see cref="IsStreamOwner"></see>
  440. /// is true the underlying stream is also closed.
  441. /// </summary>
  442. protected override void Dispose(bool disposing)
  443. {
  444. if (!isClosed)
  445. {
  446. isClosed = true;
  447. if (IsStreamOwner)
  448. {
  449. baseInputStream.Dispose();
  450. }
  451. }
  452. }
  453. #endregion Stream Overrides
  454. #region Instance Fields
  455. private Stream baseInputStream;
  456. /// <summary>
  457. /// Flag indicating wether this instance has been closed or not.
  458. /// </summary>
  459. private bool isClosed;
  460. private readonly byte[] one = new byte[1];
  461. private bool headerParsed;
  462. // string table stuff
  463. private const int TBL_CLEAR = 0x100;
  464. private const int TBL_FIRST = TBL_CLEAR + 1;
  465. private int[] tabPrefix;
  466. private byte[] tabSuffix;
  467. private readonly int[] zeros = new int[256];
  468. private byte[] stack;
  469. // various state
  470. private bool blockMode;
  471. private int nBits;
  472. private int maxBits;
  473. private int maxMaxCode;
  474. private int maxCode;
  475. private int bitMask;
  476. private int oldCode;
  477. private byte finChar;
  478. private int stackP;
  479. private int freeEnt;
  480. // input buffer
  481. private readonly byte[] data = new byte[1024 * 8];
  482. private int bitPos;
  483. private int end;
  484. private int got;
  485. private bool eof;
  486. private const int EXTRA = 64;
  487. #endregion Instance Fields
  488. }
  489. }