/* BitDecoder.c */
/* 2008/11/05   */

#include "StdAfx.h"

#include "BitDecoder.h"

/* BIT_MASK */
static const UINT32 BIT_MASK[] = {
	0x00000000, 0x00000001, 0x00000003, 0x00000007,
	0x0000000f, 0x0000001f, 0x0000003f, 0x0000007f,
	0x000000ff, 0x000001ff, 0x000003ff, 0x000007ff,
	0x00000fff, 0x00001fff, 0x00003fff, 0x00007fff,
	0x0000ffff, 0x0001ffff, 0x0003ffff, 0x0007ffff,
	0x000fffff, 0x001fffff, 0x003fffff, 0x007fffff,
	0x00ffffff, 0x01ffffff, 0x03ffffff, 0x07ffffff,
	0x0fffffff, 0x1fffffff, 0x3fffffff, 0x7fffffff,
	0xffffffff
}; /* BIT_MASK */

void QV_BitDecoder_Init(
	BitDecoder_t* t,
	const VOID*   p,
	SIZE_T        length)
{
	const BYTE* b = (const BYTE*)p;

	t->Cache = 0;
	t->Count = 0;

	t->p   = b;
	t->end = b + length;
}

UINT32 QV_BitDecoder_GetBits(
	BitDecoder_t* t,
	INT           bits)
{
	UINT32 v;

	if (t->Count < bits) {
		if (t->p >= t->end) {
			return 0;
		}

		t->Cache |= ((UINT64)(*((const UINT32*)(t->p))) << t->Count);
		t->Count += ((t->end - t->p) >= 4) ? 32 : (INT32)(t->end - t->p) * 8;

		t->p += 4;
	}

	v = (UINT32)(t->Cache & BIT_MASK[bits]);

	t->Cache >>= bits;
	t->Count  -= bits;

	return v;
}

UINT32 QV_BitDecoder_PeekBits(
	BitDecoder_t* t,
	INT           bits)
{
	if (t->Count < bits && t->p < t->end) {
		t->Cache |= ((UINT64)(*((const UINT32*)(t->p))) << t->Count);
		t->Count += ((t->end - t->p) >= 4) ? 32 : (INT32)(t->end - t->p) * 8;

		t->p += 4;
	}

	return (UINT32)(t->Cache & BIT_MASK[bits]);
}

void QV_BitDecoder_SkipBits(
	BitDecoder_t* t,
	INT           bits)
{
	t->Cache >>= bits;
	t->Count  -= bits;
}

/* */

#include "HuffmanDecoder.h"

#define SSE2_PEEKBITS(bits) \
	if (d->Count < (bits) && d->p < d->end) { \
		__m128i* cache = (__m128i*)&(d->Cache); \
		__m128i c0 = _mm_slli_epi64(_mm_cvtsi32_si128(*((const INT32*)(d->p))), d->Count); \
		_mm_storel_epi64(cache, _mm_or_si128(_mm_loadl_epi64(cache), c0)); \
		d->Count += ((d->end - d->p) >= 4) ? 32 : (INT32)(d->end - d->p) * 8; \
		d->p += 4; \
	} \
	code = *((UINT32*)&(d->Cache)) & BIT_MASK[(bits)];

#define SSE2_SKIPBITS(bits) \
	{ \
		__m128i* cache = (__m128i*)&(d->Cache); \
		_mm_storel_epi64(cache, _mm_srli_epi64(_mm_loadl_epi64(cache), (bits))); \
	} \
	d->Count -= (bits);


INT32 QV_HuffmanDecoder_LookupTable_SSE2(
	const HuffmanDecoder_t* h,
	BitDecoder_t*           d)
{
	UINT32 code;
	UINT16 x;

	SSE2_PEEKBITS(h->LookupBits)

	x = h->CodeLookup[code];
	if (x != 0xffff) {
		INT32 bits = x >> 12;
		SSE2_SKIPBITS(bits)
		return x & 0x0fff;

	} else {
		INT32 i;

		UINT32* s;
		UINT32* e;

		INT32 start = h->NextStart;
		for (i = h->NextCode; i < 32; i++) {
			INT32 count = h->CodeCount[i];

			INT32 len = h->Length[i];
			if (len == 0) {
				break;
			}

			s = h->CodeWord + start;
			e = s + count;

			SSE2_PEEKBITS(len)

			while (s < e) {
				UINT32* p = s + (e - s) / 2;
				if (code == *p) {
					SSE2_SKIPBITS(len)
					return h->Index[p - h->CodeWord];
				}

				if (code < *p) {
					e = p;
				} else {
					s = p + 1;
				}
			}

			start += count;
		}
	}

	return -1;
}

/* */

