ultimatepp/uppdev/NibblePtr/lzv.cpp
cxl 3cd394812c Merge continued
git-svn-id: svn://ultimatepp.org/upp/trunk@10263 f0d560ea-af0d-0410-9eb7-867de7ffcac7
2016-10-04 08:34:39 +00:00

242 lines
5.8 KiB
C++

#include "Entropy.h"
/***********************************************************************
**
** lzv.c -- an extremly fast compression/decompression-method
**
** Written by Hermann Vogt
**
** v 0.5 -- 00/04/10 fix unaligned access (Marc)
** v 0.4 -- 00/03/25 adapted for PApp by Marc Lehmann <pcg@goof.com>
** v 0.3 -- 94/03/08 aCembler version of rLZV built in.
** v 0.2 -- 94/03/04 Changes for usage with DouBle 0.2 built in.
** v 0.1 -- 94/03/01 Intensivly tested, removed all known bugs.
** v 0.0 -- 94/02/21 First Version.
**
** Copyright (c) 1994 Hermann Vogt. Redistribution of this file is
** permitted under the GNU Public Licence.
**
** The method presented here is faster and compresses better
** than lzrw1 and lzrw1-a. I named it lzv for "Lev-Zimpel-Vogt".
** It uses ideas introduced by Ross Williams in his algorithm lzrw1
** [R. N. Williams (1991): "An Extremly Fast ZIV-Lempel Data
** Compression Algorithm", Proceedings IEEE Data Compression
** Conference, Snowbird, Utah, 362-371] and by Fiala and Green in their
** algorithm a1 [E. R. Fiala, D. H. Greene (1989): "Data Compression
** with Finite Windows", Communications of the ACM, 4, 490-505].
** Because lzv differs strongly from both, I hope there will be no
** patent problems. The hashing-method has been stolen from Jean-loup
** Gailly's (patent free) gzip.
**
** KNOWN PROBLEMS:
** - My english is very bad.
** - Badly commented. (I hope this will be better in the next
** version.)
** - I'm not sure if lzv is free from patent problems.
**
***********************************************************************/
#define HSIZE 0x4000
#define HMASK 0x3fff
#define HSHIFT 5
#define MLL 32 /* Maximum len of chain of literals */
#define MML (8+256) /* Maximum len of match */
#define MOFF 8191 /* Maximum offset */
#define HSIZ 16384 /* Size of Hashtable */
/* ugly type names */
typedef byte uch;
typedef word ush;
typedef dword uit;
#undef ONLY_64K /* 64k-max encoder is faster */
/* but only veeeery slightly */
/* unconditionally aligning does not cost much much, so do it if unsure */
#define align_ushort !defined(__i386)
int LZVCompress(byte * in, byte * out, byte * heap, int len, int out_len)
{
uit hval, op, ip, l_len, m_pos, m_off, m_len, maxlen;
ush *lzv1_htab = (word *)heap;
maxlen = out_len;
hval = ((in[0] << 5) ^ in[1]) & (HSIZ - 1);
ip = op = l_len = 0;
do
{
hval = ((hval << 5) ^ in[ip + 2]) & (HSIZ - 1);
m_pos = lzv1_htab[hval];
lzv1_htab[hval] = ip;
#ifndef ONLY_64K
/*
* If you want to compress more than 64K, uncomment
* the following lines.
*/
m_pos = (ip & ~0xffff) + m_pos;
if (m_pos >= ip && m_pos >= 0x10000)
m_pos -= 0x10000;
#endif
if (m_pos < ip
&& in[m_pos ] == in[ip ]
&& (m_off = ip - m_pos - 1) <= MOFF
&& ip + 4 < len
#if align_ushort
&& in[m_pos + 1] == in[ip + 1]
&& in[m_pos + 2] == in[ip + 2]
#else
&& *(ush *) (in + m_pos + 1) == *(ush *) (in + ip + 1)
#endif
)
{
/* We have found a match */
uit look = len - ip - 2;
if (look > MML)
look = MML;
m_len = 2;
do
{
m_len++;
}
while (m_len != look && in[ip + m_len] == in[m_pos + m_len]);
if (op + 2 + l_len + 3 >= maxlen)
return 0;
if (l_len != 0)
{
out[op++] = (l_len - 1) << 3;
do
{
out[op++] = in[ip - l_len--];
}
while (l_len != 0);
}
m_len -= 2;
if (m_len <= 6)
{
out[op++] = m_len | ((m_off >> 5) & 0xf8);
}
else
{
out[op++] = 0x07 | ((m_off >> 5) & 0xf8);
out[op++] = m_len - 7;
}
out[op++] = m_off & 0xff;
ip++;
hval = ((hval << 5) ^ in[ip + 2]) & (HSIZ - 1);
lzv1_htab[hval] = ip;
ip++;
do
{
hval = ((hval << 5) ^ in[ip + 2]) & (HSIZ - 1);
lzv1_htab[hval] = ip;
ip++;
m_len--;
}
while (0 != m_len);
}
else
{
/* No match found */
ip++;
l_len++;
if (MLL == l_len)
{
if (op + 2 + MLL >= maxlen)
return 0;
out[op++] = 0xf8;
do
{
out[op++] = in[ip - l_len--];
}
while (l_len != 0);
}
}
}
while (ip < len);
if (l_len != 0)
{
if (op + l_len + 3 >= maxlen)
return 0;
out[op++] = (l_len - 1) << 3;
do
{
out[op++] = in[ip - l_len--];
}
while (l_len != 0);
}
return op;
}
int LZVDecompress (uch const *const in, uch * const out, int ilen, int len)
{
register uit tbuf, c_len;
uch *const out_end = out + len;
register uch *op = out;
uch const *const in_end = in + ilen;
register uch const *ip = in;
do
{
tbuf = *ip++;
c_len = tbuf & 0x07;
if (0 == c_len)
{
c_len = (tbuf >> 3) + 1;
/*if (op + c_len > out_end) /* too many checks... */
/* return 0;*/
do
*op++ = *ip++;
while (--c_len); /* effic: memcpy()? */
}
else
{
register uch *m_pos;
if (0x07 == c_len)
c_len = *ip++ + 7;
m_pos = op - 1 - (((uit) (tbuf & 0xf8) << 5) | *ip++);
/* If we don't check this then we segfault (if in user
space) or leave process in uninteruptible state (if
in kernel) if the data is corrupt. */
if (m_pos < out)
return 0; /* Compression error. */
/*if (op + c_len + 2 > out_end) /* too many checks */
/* return 0;*/
*op++ = *m_pos++;
*op++ = *m_pos++;
do
*op++ = *m_pos++;
while (--c_len);
}
}
while (op < out_end && ip < in_end);
return op - out;
}