399 lines
12 KiB
C
399 lines
12 KiB
C
#include <stdbool.h>
|
|
#include <string.h>
|
|
#include <assert.h>
|
|
#include <stdio.h>
|
|
#include <stdint.h>
|
|
#include "global.h"
|
|
#include "huff.h"
|
|
|
|
static int cmp_tree(const void * a0, const void * b0) {
|
|
return ((struct HuffData *)a0)->value - ((struct HuffData *)b0)->value;
|
|
}
|
|
|
|
typedef int (*cmpfun)(const void *, const void *);
|
|
|
|
int msort_r(void * data, size_t count, size_t size, cmpfun cmp, void * buffer) {
|
|
/*
|
|
* Out-of-place mergesort (stable sort)
|
|
* Returns 1 on success, 0 on failure
|
|
*/
|
|
void * leftPtr;
|
|
void * rightPtr;
|
|
void * leftEnd;
|
|
void * rightEnd;
|
|
int i;
|
|
|
|
switch (count) {
|
|
case 0:
|
|
// Should never be here
|
|
return 0;
|
|
|
|
case 1:
|
|
// Nothing to do here
|
|
break;
|
|
|
|
case 2:
|
|
// Swap the two entries if the right one compares higher.
|
|
if (cmp(data, data + size) > 0) {
|
|
memcpy(buffer, data, size);
|
|
memcpy(data, data + size, size);
|
|
memcpy(data + size, buffer, size);
|
|
}
|
|
break;
|
|
default:
|
|
// Merge sort out-of-place.
|
|
leftPtr = data;
|
|
leftEnd = rightPtr = data + count / 2 * size;
|
|
rightEnd = data + count * size;
|
|
|
|
// Sort the left half
|
|
if (!msort_r(leftPtr, count / 2, size, cmp, buffer))
|
|
return 0;
|
|
|
|
// Sort the right half
|
|
if (!msort_r(rightPtr, count / 2 + (count & 1), size, cmp, buffer))
|
|
return 0;
|
|
|
|
// Merge the sorted halves out of place
|
|
i = 0;
|
|
do {
|
|
if (cmp(leftPtr, rightPtr) <= 0) {
|
|
memcpy(buffer + i * size, leftPtr, size);
|
|
leftPtr += size;
|
|
} else {
|
|
memcpy(buffer + i * size, rightPtr, size);
|
|
rightPtr += size;
|
|
}
|
|
|
|
} while (++i < count && leftPtr < leftEnd && rightPtr < rightEnd);
|
|
|
|
// Copy the remainder
|
|
if (i < count) {
|
|
if (leftPtr < leftEnd) {
|
|
memcpy(buffer + i * size, leftPtr, leftEnd - leftPtr);
|
|
}
|
|
else {
|
|
memcpy(buffer + i * size, rightPtr, rightEnd - rightPtr);
|
|
}
|
|
}
|
|
|
|
// Copy the merged data back
|
|
memcpy(data, buffer, count * size);
|
|
break;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
int msort(void * data, size_t count, size_t size, cmpfun cmp) {
|
|
void * buffer = malloc(count * size);
|
|
if (buffer == NULL) return 0;
|
|
int result = msort_r(data, count, size, cmp, buffer);
|
|
free(buffer);
|
|
return result;
|
|
}
|
|
|
|
static void write_tree(unsigned char * dest, HuffNode_t * tree, int nitems, struct BitEncoding * encoding) {
|
|
/*
|
|
* The example used to guide this function encodes the tree in a
|
|
* breadth-first manner. We attempt to emulate that here.
|
|
*/
|
|
|
|
int i, j, k;
|
|
|
|
// There are (2 * nitems - 1) nodes in the binary tree. Allocate that.
|
|
HuffNode_t * traversal = calloc(2 * nitems - 1, sizeof(HuffNode_t));
|
|
if (traversal == NULL)
|
|
FATAL_ERROR("Fatal error while compressing Huff file.\n");
|
|
|
|
// The first node is the root of the tree.
|
|
traversal[0] = *tree;
|
|
i = 1;
|
|
|
|
// Copy the tree into a breadth-first ordering using brute force.
|
|
for (int depth = 1; i < 2 * nitems - 1; depth++) {
|
|
// Consider every possible path up to the current depth.
|
|
for (j = 0; i < 2 * nitems - 1 && j < 1 << depth; j++) {
|
|
// The index of the path is used to encode the path itself.
|
|
// Start from the most significant relevant bit and work our way down.
|
|
// Keep track of the current and previous nodes.
|
|
HuffNode_t * currNode = traversal;
|
|
HuffNode_t * parent = NULL;
|
|
for (k = 0; k < depth; k++) {
|
|
if (currNode->header.isLeaf)
|
|
break;
|
|
parent = currNode;
|
|
if ((j >> (depth - k - 1)) & 1)
|
|
currNode = currNode->branch.right;
|
|
else
|
|
currNode = currNode->branch.left;
|
|
}
|
|
// Check that the length of the current path equals the current depth.
|
|
if (k == depth) {
|
|
// Make sure we can encode the current branch.
|
|
// Bail here if we cannot.
|
|
// This is only applicable for 8-bit encodings.
|
|
if (traversal + i - parent > 128)
|
|
FATAL_ERROR("Fatal error while compressing Huff file: unable to encode binary tree.\n");
|
|
// Copy the current node, and update its parent.
|
|
traversal[i] = *currNode;
|
|
if (parent != NULL) {
|
|
if ((j & 1) == 1)
|
|
parent->branch.right = traversal + i;
|
|
else
|
|
parent->branch.left = traversal + i;
|
|
}
|
|
// Encode the path through the tree in the lookup table
|
|
if (traversal[i].header.isLeaf) {
|
|
encoding[traversal[i].leaf.key].nbits = depth;
|
|
encoding[traversal[i].leaf.key].bitstring = j;
|
|
}
|
|
i++;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Encode the size of the tree.
|
|
// This is used by the decompressor to skip the tree.
|
|
dest[4] = nitems - 1;
|
|
|
|
// Encode each node in the tree.
|
|
for (i = 0; i < 2 * nitems - 1; i++) {
|
|
HuffNode_t * currNode = traversal + i;
|
|
if (currNode->header.isLeaf) {
|
|
dest[5 + i] = traversal[i].leaf.key;
|
|
} else {
|
|
dest[5 + i] = (((currNode->branch.right - traversal - i) / 2) - 1);
|
|
if (currNode->branch.left->header.isLeaf)
|
|
dest[5 + i] |= 0x80;
|
|
if (currNode->branch.right->header.isLeaf)
|
|
dest[5 + i] |= 0x40;
|
|
}
|
|
}
|
|
|
|
free(traversal);
|
|
}
|
|
|
|
static inline void write_32_le(unsigned char * dest, int * destPos, uint32_t * buff, int * buffPos) {
|
|
dest[*destPos] = *buff;
|
|
dest[*destPos + 1] = *buff >> 8;
|
|
dest[*destPos + 2] = *buff >> 16;
|
|
dest[*destPos + 3] = *buff >> 24;
|
|
*destPos += 4;
|
|
*buff = 0;
|
|
*buffPos = 0;
|
|
}
|
|
|
|
static inline void read_32_le(unsigned char * src, int * srcPos, uint32_t * buff) {
|
|
uint32_t tmp = src[*srcPos];
|
|
tmp |= src[*srcPos + 1] << 8;
|
|
tmp |= src[*srcPos + 2] << 16;
|
|
tmp |= src[*srcPos + 3] << 24;
|
|
*srcPos += 4;
|
|
*buff = tmp;
|
|
}
|
|
|
|
static void write_bits(unsigned char * dest, int * destPos, struct BitEncoding * encoding, int value, uint32_t * buff, int * buffBits) {
|
|
int nbits = encoding[value].nbits;
|
|
uint32_t bitstring = encoding[value].bitstring;
|
|
|
|
if (*buffBits + nbits >= 32) {
|
|
int diff = *buffBits + nbits - 32;
|
|
*buff <<= nbits - diff;
|
|
*buff |= bitstring >> diff;
|
|
bitstring &= ~(1 << diff);
|
|
nbits = diff;
|
|
write_32_le(dest, destPos, buff, buffBits);
|
|
}
|
|
if (nbits != 0) {
|
|
*buff <<= nbits;
|
|
*buff |= bitstring;
|
|
*buffBits += nbits;
|
|
}
|
|
}
|
|
|
|
/*
|
|
=======================================
|
|
MAIN COMPRESSION/DECOMPRESSION ROUTINES
|
|
=======================================
|
|
*/
|
|
|
|
unsigned char * HuffCompress(unsigned char * src, int srcSize, int * compressedSize_p, int bitDepth) {
|
|
if (srcSize <= 0)
|
|
goto fail;
|
|
|
|
int worstCaseDestSize = 4 + (2 << bitDepth) + srcSize * 3;
|
|
|
|
unsigned char *dest = malloc(worstCaseDestSize);
|
|
if (dest == NULL)
|
|
goto fail;
|
|
|
|
int nitems = 1 << bitDepth;
|
|
|
|
HuffNode_t * freqs = calloc(nitems, sizeof(HuffNode_t));
|
|
if (freqs == NULL)
|
|
goto fail;
|
|
|
|
struct BitEncoding * encoding = calloc(nitems, sizeof(struct BitEncoding));
|
|
if (encoding == NULL)
|
|
goto fail;
|
|
|
|
// Set up the frequencies table. This will inform the tree.
|
|
for (int i = 0; i < nitems; i++) {
|
|
freqs[i].header.isLeaf = 1;
|
|
freqs[i].header.value = 0;
|
|
freqs[i].leaf.key = i;
|
|
}
|
|
|
|
// Count each nybble or byte.
|
|
for (int i = 0; i < srcSize; i++) {
|
|
if (bitDepth == 8) {
|
|
freqs[src[i]].header.value++;
|
|
} else {
|
|
freqs[src[i] >> 4].header.value++;
|
|
freqs[src[i] & 0xF].header.value++;
|
|
}
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
for (int i = 0; i < nitems; i++) {
|
|
fprintf(stderr, "%d: %d\n", i, freqs[i].header.value);
|
|
}
|
|
#endif // DEBUG
|
|
|
|
// Sort the frequency table.
|
|
if (!msort(freqs, nitems, sizeof(HuffNode_t), cmp_tree))
|
|
goto fail;
|
|
|
|
// Prune zero-frequency values.
|
|
for (int i = 0; i < nitems; i++) {
|
|
if (freqs[i].header.value != 0) {
|
|
if (i > 0) {
|
|
for (int j = i; j < nitems; j++) {
|
|
freqs[j - i] = freqs[j];
|
|
}
|
|
nitems -= i;
|
|
}
|
|
break;
|
|
}
|
|
// This should never happen:
|
|
if (i == nitems - 1)
|
|
goto fail;
|
|
}
|
|
|
|
HuffNode_t * tree = calloc(nitems * 2 - 1, sizeof(HuffNode_t));
|
|
if (tree == NULL)
|
|
goto fail;
|
|
|
|
// Iteratively collapse the two least frequent nodes.
|
|
HuffNode_t * endptr = freqs + nitems - 2;
|
|
|
|
for (int i = 0; i < nitems - 1; i++) {
|
|
HuffNode_t * left = freqs;
|
|
HuffNode_t * right = freqs + 1;
|
|
tree[i * 2] = *right;
|
|
tree[i * 2 + 1] = *left;
|
|
for (int j = 0; j < nitems - i - 2; j++)
|
|
freqs[j] = freqs[j + 2];
|
|
endptr->header.isLeaf = 0;
|
|
endptr->header.value = tree[i * 2].header.value + tree[i * 2 + 1].header.value;
|
|
endptr->branch.left = tree + i * 2;
|
|
endptr->branch.right = tree + i * 2 + 1;
|
|
endptr--;
|
|
if (i < nitems - 2 && !msort(freqs, nitems - i - 1, sizeof(HuffNode_t), cmp_tree))
|
|
goto fail;
|
|
}
|
|
|
|
// Write the tree breadth-first, and create the path lookup table.
|
|
write_tree(dest, freqs, nitems, encoding);
|
|
|
|
free(tree);
|
|
free(freqs);
|
|
|
|
// Encode the data itself.
|
|
int destPos = 4 + nitems * 2;
|
|
uint32_t destBuf = 0;
|
|
uint32_t srcBuf = 0;
|
|
int destBitPos = 0;
|
|
|
|
for (int srcPos = 0; srcPos < srcSize;) {
|
|
read_32_le(src, &srcPos, &srcBuf);
|
|
for (int i = 0; i < 32 / bitDepth; i++) {
|
|
write_bits(dest, &destPos, encoding, srcBuf & (0xFF >> (8 - bitDepth)), &destBuf, &destBitPos);
|
|
srcBuf >>= bitDepth;
|
|
}
|
|
}
|
|
|
|
if (destBitPos != 0) {
|
|
write_32_le(dest, &destPos, &destBuf, &destBitPos);
|
|
}
|
|
|
|
free(encoding);
|
|
|
|
// Write the header.
|
|
dest[0] = bitDepth | 0x20;
|
|
dest[1] = srcSize;
|
|
dest[2] = srcSize >> 8;
|
|
dest[3] = srcSize >> 16;
|
|
*compressedSize_p = (destPos + 3) & ~3;
|
|
return dest;
|
|
|
|
fail:
|
|
FATAL_ERROR("Fatal error while compressing Huff file.\n");
|
|
}
|
|
|
|
unsigned char * HuffDecompress(unsigned char * src, int srcSize, int * uncompressedSize_p) {
|
|
if (srcSize < 4)
|
|
goto fail;
|
|
|
|
int bitDepth = *src & 15;
|
|
if (bitDepth != 4 && bitDepth != 8)
|
|
goto fail;
|
|
|
|
int destSize = (src[3] << 16) | (src[2] << 8) | src[1];
|
|
|
|
unsigned char *dest = malloc(destSize);
|
|
|
|
if (dest == NULL)
|
|
goto fail;
|
|
|
|
int treePos = 5;
|
|
int treeSize = (src[4] + 1) * 2;
|
|
int srcPos = 4 + treeSize;
|
|
int destPos = 0;
|
|
int curValPos = 0;
|
|
uint32_t destTmp = 0;
|
|
uint32_t window;
|
|
|
|
for (;;)
|
|
{
|
|
if (srcPos >= srcSize)
|
|
goto fail;
|
|
read_32_le(src, &srcPos, &window);
|
|
for (int i = 0; i < 32; i++) {
|
|
int curBit = (window >> 31) & 1;
|
|
unsigned char treeView = src[treePos];
|
|
bool isLeaf = ((treeView << curBit) & 0x80) != 0;
|
|
treePos &= ~1; // align
|
|
treePos += ((treeView & 0x3F) + 1) * 2 + curBit;
|
|
if (isLeaf) {
|
|
destTmp >>= bitDepth;
|
|
destTmp |= (src[treePos] << (32 - bitDepth));
|
|
curValPos++;
|
|
if (curValPos == 32 / bitDepth) {
|
|
write_32_le(dest, &destPos, &destTmp, &curValPos);
|
|
if (destPos == destSize) {
|
|
*uncompressedSize_p = destSize;
|
|
return dest;
|
|
}
|
|
}
|
|
treePos = 5;
|
|
}
|
|
window <<= 1;
|
|
}
|
|
}
|
|
|
|
fail:
|
|
FATAL_ERROR("Fatal error while decompressing Huff file.\n");
|
|
}
|