/* BitReader.h */
/* 2009/06/26  */

#pragma once

/* Generic C */

struct QT_BitReader_C {

	UINT32 Cache;

	INT32 Count;

	const UINT8* p;
	const UINT8* end;

};

typedef struct QT_BitReader_C BitReader_C_t;

void QT_BitReader_C_Initialize(
	BitReader_C_t* t,
	const VOID*    p,
	SIZE_T         size);

/* */

#define INITIATE_BITS_C UINT32 bit_cache = d->Cache;
#define FINALIZE_BITS_C d->Cache = bit_cache;

#define LOAD_BITS_C \
	if (d->Count > 0) { bit_cache |= ((d->p[0] << 8) | d->p[1]) << d->Count; d->p += 2; d->Count -= 16; }

#define RETIRE_BITS_C(X) \
	bit_cache <<= (X); d->Count += (X);

#define GET_BITS_C(X) (bit_cache >> (32 - (X)))

#define REST_BITS_C ((d->end - d->p) * 8 + (16 - d->Count))

/* Generic X86 */

struct QT_BitReader_X86 {

	UINT64 Cache;

	INT32 Count;

	const UINT8* p;
	const UINT8* end;

};

typedef struct QT_BitReader_X86 BitReader_X86_t;

void QT_BitReader_X86_Initialize(
	BitReader_X86_t* t,
	const VOID*      p,
	SIZE_T           size);

/* */

#define INITIATE_BITS_X86 UINT64 bit_cache = d->Cache;
#define FINALIZE_BITS_X86 d->Cache = bit_cache;

#define LOAD_BITS_X86 \
	if      (d->Count == 32) { bit_cache |= (UINT64)    _byteswap_ulong(*((const UINT32*)(d->p))) << 32;      d->p += 4; d->Count -= 32; } \
	else if (d->Count >   0) { bit_cache |= __ll_lshift(_byteswap_ulong(*((const UINT32*)(d->p))), d->Count); d->p += 4; d->Count -= 32; }

#define RETIRE_BITS_X86(X) \
	bit_cache = __ll_lshift(bit_cache, (X)); d->Count += (X);

#define GET_BITS_X86(X) ((UINT32)(bit_cache >> 32) >> (32 - (X)))

#define REST_BITS_X86 ((d->end - d->p) * 8 + (32 - d->Count))

/* MMX */

struct QT_BitReader_MMX {

	__m64 Cache;

	INT32 Count;

	const UINT8* p;
	const UINT8* end;

};

typedef struct QT_BitReader_MMX BitReader_MMX_t;

void QT_BitReader_MMX_Initialize(
	BitReader_MMX_t* t,
	const VOID*      p,
	SIZE_T           size);

/* */

#define INITIATE_BITS_MMX __m64 bit_cache = d->Cache;
#define FINALIZE_BITS_MMX d->Cache = bit_cache;

#define LOAD_BITS_MMX \
	if (d->Count > 0) { bit_cache = _mm_or_si64(bit_cache, _mm_sll_si64(_mm_cvtsi32_si64(_byteswap_ulong(*((const UINT32*)(d->p)))), _mm_cvtsi32_si64(d->Count))); d->p += 4; d->Count -= 32; }

#define RETIRE_BITS_MMX(X) \
	bit_cache = _mm_sll_si64(bit_cache, _mm_cvtsi32_si64(X)); d->Count += (X);

#define RETIRE_BITS_I_MMX(X) \
	bit_cache = _mm_slli_si64(bit_cache, (X)); d->Count += (X);

#define GET_BITS_MMX(X) _mm_cvtsi64_si32(_mm_srl_si64(bit_cache, _mm_cvtsi32_si64(64 - (X))))

#define GET_BITS_I_MMX(X) _mm_cvtsi64_si32(_mm_srli_si64(bit_cache, (64 - (X))))

#define REST_BITS_MMX ((d->end - d->p) * 8 + (32 - d->Count))

/* */

#define FETCH_BITS_I(L, X) LOAD_BITS L = GET_BITS_I(X); RETIRE_BITS_I(X)
#define FETCH_BITS(L, X)   LOAD_BITS L = GET_BITS(X);   RETIRE_BITS(X)

#define RESUME_BITS bit_cache = d->Cache;

/* */

