ultimatepp/uppdev/NibblePtr/lzv.cpp

#include "Entropy.h"


/***********************************************************************
**
**	lzv.c -- an extremly fast compression/decompression-method
**
**	Written by Hermann Vogt
**
**		v 0.5 -- 00/04/10	fix unaligned access (Marc)
**		v 0.4 -- 00/03/25	adapted for PApp by Marc Lehmann <pcg@goof.com>
**		v 0.3 -- 94/03/08	aCembler version of rLZV built in.
**		v 0.2 -- 94/03/04	Changes for usage with DouBle 0.2 built in.
**		v 0.1 -- 94/03/01	Intensivly tested, removed all known bugs.
**		v 0.0 -- 94/02/21	First Version.
**
**	Copyright (c) 1994 Hermann Vogt. Redistribution of this file is
**	permitted under the GNU Public Licence.
**
**	The method presented here is faster and compresses better
**	than lzrw1 and lzrw1-a. I named it lzv for "Lev-Zimpel-Vogt".
**	It uses ideas introduced by Ross Williams in his algorithm lzrw1
**	[R. N. Williams (1991): "An Extremly Fast ZIV-Lempel Data
**	Compression Algorithm", Proceedings IEEE Data Compression
**	Conference, Snowbird, Utah, 362-371] and by Fiala and Green in their
**	algorithm a1 [E. R. Fiala, D. H. Greene (1989): "Data Compression
**	with Finite Windows", Communications of the ACM, 4, 490-505].
**	Because lzv differs strongly from both, I hope there will be no
**	patent problems. The hashing-method has been stolen from Jean-loup
**	Gailly's (patent free) gzip.
**
**	KNOWN PROBLEMS:
**	-	My english is very bad.
**	-	Badly commented. (I hope this will be better in the next
**		version.)
**	-	I'm not sure if lzv is free from patent problems.
**
***********************************************************************/

#define HSIZE	0x4000
#define HMASK	0x3fff
#define HSHIFT	5

#define	MLL	32		/*        Maximum len of chain of literals        */
#define	MML	(8+256)		/*        Maximum len of match                    */
#define	MOFF	8191		/*        Maximum offset                          */
#define	HSIZ	16384		/*        Size of Hashtable                       */

/* ugly type names */

typedef	byte	uch;
typedef	word	ush;
typedef	dword	uit;

#undef ONLY_64K /* 64k-max encoder is faster */
                /* but only veeeery slightly */

/* unconditionally aligning does not cost much much, so do it if unsure */
#define align_ushort !defined(__i386)

int LZVCompress(byte * in, byte * out, byte * heap, int len, int out_len)
{
  uit hval, op, ip, l_len, m_pos, m_off, m_len, maxlen;
  ush *lzv1_htab = (word *)heap;

  maxlen = out_len;
  hval = ((in[0] << 5) ^ in[1]) & (HSIZ - 1);
  ip = op = l_len = 0;

  do
    {
      hval = ((hval << 5) ^ in[ip + 2]) & (HSIZ - 1);
      m_pos = lzv1_htab[hval];
      lzv1_htab[hval] = ip;

#ifndef ONLY_64K
      /*
       *    If you want to compress more than 64K, uncomment
       *      the following lines.
       */

      m_pos = (ip & ~0xffff) + m_pos;
      if (m_pos >= ip && m_pos >= 0x10000)
	m_pos -= 0x10000;
#endif

      if (m_pos < ip
	  && in[m_pos    ] == in[ip    ]
	  && (m_off = ip - m_pos - 1) <= MOFF
	  && ip + 4 < len
#if align_ushort
          && in[m_pos + 1] == in[ip + 1]
          && in[m_pos + 2] == in[ip + 2]
#else
	  && *(ush *) (in + m_pos + 1) == *(ush *) (in + ip + 1)
#endif
         )
	{
	  /*      We have found a match   */
	  uit look = len - ip - 2;
	  if (look > MML)
	    look = MML;
	  m_len = 2;

	  do
	    {
	      m_len++;
	    }
	  while (m_len != look && in[ip + m_len] == in[m_pos + m_len]);

	  if (op + 2 + l_len + 3 >= maxlen)
	    return 0;

	  if (l_len != 0)
	    {
	      out[op++] = (l_len - 1) << 3;
	      do
		{
		  out[op++] = in[ip - l_len--];
		}
	      while (l_len != 0);
	    }

	  m_len -= 2;

	  if (m_len <= 6)
	    {
	      out[op++] = m_len | ((m_off >> 5) & 0xf8);
	    }
	  else
	    {
	      out[op++] = 0x07 | ((m_off >> 5) & 0xf8);
	      out[op++] = m_len - 7;
	    }

	  out[op++] = m_off & 0xff;
	  ip++;
	  hval = ((hval << 5) ^ in[ip + 2]) & (HSIZ - 1);
	  lzv1_htab[hval] = ip;
	  ip++;

	  do
	    {
	      hval = ((hval << 5) ^ in[ip + 2]) & (HSIZ - 1);
	      lzv1_htab[hval] = ip;
	      ip++;
	      m_len--;
	    }
	  while (0 != m_len);
	}
      else
	{
	  /*      No match found  */
	  ip++;
	  l_len++;

	  if (MLL == l_len)
	    {
	      if (op + 2 + MLL >= maxlen)
		return 0;

	      out[op++] = 0xf8;

	      do
		{
		  out[op++] = in[ip - l_len--];
		}
	      while (l_len != 0);
	    }
	}
    }
  while (ip < len);

  if (l_len != 0)
    {
      if (op + l_len + 3 >= maxlen)
	return 0;

      out[op++] = (l_len - 1) << 3;

      do
	{
	  out[op++] = in[ip - l_len--];
	}
      while (l_len != 0);
    }
  return op;
}

int LZVDecompress (uch const *const in, uch * const out, int ilen, int len)
{
  register uit tbuf, c_len;
  uch *const out_end = out + len;
  register uch *op = out;
  uch const *const in_end = in + ilen;
  register uch const *ip = in;

  do
    {
      tbuf = *ip++;
      c_len = tbuf & 0x07;

      if (0 == c_len)
	{
	  c_len = (tbuf >> 3) + 1;

          /*if (op + c_len > out_end) /* too many checks... */
          /*  return 0;*/

	  do
	    *op++ = *ip++;
	  while (--c_len);	/* effic: memcpy()? */
	}
      else
	{
	  register uch *m_pos;

	  if (0x07 == c_len)
	    c_len = *ip++ + 7;
	  m_pos = op - 1 - (((uit) (tbuf & 0xf8) << 5) | *ip++);

	  /* If we don't check this then we segfault (if in user
	     space) or leave process in uninteruptible state (if
	     in kernel) if the data is corrupt. */
	  if (m_pos < out)
	    return 0;		/* Compression error. */

          /*if (op + c_len + 2 > out_end) /* too many checks */
          /*  return 0;*/

	  *op++ = *m_pos++;
	  *op++ = *m_pos++;

	  do
	    *op++ = *m_pos++;
	  while (--c_len);
	}
    }
  while (op < out_end && ip < in_end);

  return op - out;
}