
/*	Move Mnemonics  */
#define ROAST_SIMD_SSE_MOVAPS_XM( XMM_NUM, M128 )		__asm movaps xmm ## XMM_NUM, [M128]
#define ROAST_SIMD_SSE_MOVAPS_MX( M128, XMM_NUM )		__asm movaps [M128], xmm ## XMM_NUM


/*	Calculation Mnemonics  */
#define ROAST_SIMD_SSE_MULPS_XM( XMM_NUM, M128 )			__asm mulps xmm ## XMM_NUM, [M128]
#define ROAST_SIMD_SSE_MULPS_MX( M128, XMM_NUM )			__asm mulps xmm [M128], xmm ## XMM_NUM



/*	=========================================================
	movaps
	4̋EςfloatzXMMxixxmm_numjɊi[	*/
/*
MOVAPS
  xmm1, xmm2/m128
  xmm2/m128, xmm1
  4̃pbNhPx_lXMMWX^m̊ԁA܂XMMWX^ƃACg̍Ƃ̊Ԃœ]
*/

#define ___ROAST_SIMD__NEMONI__			movaps

//	xmm <= float*
#define ___ROAST_SIMD__NEMONI_			___ROAST_SIMD__NEMONI__
#define ___ROAST_SIMD__VAL_TYPE_		const float*
#define ___ROAST_SIMD__VAL_NAME_		f_array_4
#define ___ROAST_SIMD__VAL_IS_POINTER	
#include "simd_core__xmm_m128"

//	xmm <= __m128
#define ___ROAST_SIMD__NEMONI_			___ROAST_SIMD__NEMONI__
#define ___ROAST_SIMD__VAL_TYPE_		__m128
#define ___ROAST_SIMD__VAL_NAME_		m128val
#include "simd_core__xmm_m128"

//	xmm <= _ROAST_SIMD_M128
#define ___ROAST_SIMD__NEMONI_			___ROAST_SIMD__NEMONI__
#define ___ROAST_SIMD__VAL_TYPE_		_ROAST_SIMD_M128
#define ___ROAST_SIMD__VAL_NAME_		m128val
#include "simd_core__xmm_m128"

//	float* <= xmm
#define ___ROAST_SIMD__NEMONI_			___ROAST_SIMD__NEMONI__
#define ___ROAST_SIMD__VAL_TYPE_		float*
#define ___ROAST_SIMD__VAL_NAME_		f_array_4
#define ___ROAST_SIMD__VAL_IS_POINTER	
#include "simd_core__m128_xmm"

//	__m128& <= xmm
#define ___ROAST_SIMD__NEMONI_			___ROAST_SIMD__NEMONI__
#define ___ROAST_SIMD__VAL_TYPE_		__m128&
#define ___ROAST_SIMD__VAL_NAME_		m128val
#define ___ROAST_SIMD__VAL_IS_POINTER	
#include "simd_core__m128_xmm"

//	_ROAST_SIMD_M128& <= xmm
#define ___ROAST_SIMD__NEMONI_			___ROAST_SIMD__NEMONI__
#define ___ROAST_SIMD__VAL_TYPE_		_ROAST_SIMD_M128&
#define ___ROAST_SIMD__VAL_NAME_		m128val
#define ___ROAST_SIMD__VAL_IS_POINTER	
#include "simd_core__m128_xmm"

//	(get) __m128 <= xmm
#define ___ROAST_SIMD__NEMONI_			___ROAST_SIMD__NEMONI__
#define ___ROAST_SIMD__VAL_TYPE_		__m128
#define ___ROAST_SIMD__VAL_IS_POINTER	
#include "simd_core__m128_xmm_get"

//	xmm <= xmm
#define ___ROAST_SIMD__NEMONI_			___ROAST_SIMD__NEMONI__
#include "simd_core__xmm_xmm"




/*	=========================================================
	movaps
	4̋EĂȂfloatzXMMxixxmm_numjɊi[	*/
/*
MOVUPS
  xmm1, xmm2/m128
  xmm2/m128, xmm1
  4̃pbNhPx_lXMMWX^m̊ԁA܂XMMWX^ƃACg̍ĂȂƂ̊Ԃœ] SSE 
*/

#undef ___ROAST_SIMD__NEMONI__
#define ___ROAST_SIMD__NEMONI__			movups

//	xmm <= float*
#define ___ROAST_SIMD__NEMONI_			___ROAST_SIMD__NEMONI__
#define ___ROAST_SIMD__VAL_TYPE_		const float*
#define ___ROAST_SIMD__VAL_NAME_		f_array_4
#define ___ROAST_SIMD__VAL_IS_POINTER	
#include "simd_core__xmm_m128"

//	xmm <= __m128
#define ___ROAST_SIMD__NEMONI_			___ROAST_SIMD__NEMONI__
#define ___ROAST_SIMD__VAL_TYPE_		__m128
#define ___ROAST_SIMD__VAL_NAME_		m128val
#include "simd_core__xmm_m128"

//	xmm <= _ROAST_SIMD_M128
#define ___ROAST_SIMD__NEMONI_			___ROAST_SIMD__NEMONI__
#define ___ROAST_SIMD__VAL_TYPE_		_ROAST_SIMD_M128
#define ___ROAST_SIMD__VAL_NAME_		m128val
#include "simd_core__xmm_m128"

//	float* <= xmm
#define ___ROAST_SIMD__NEMONI_			___ROAST_SIMD__NEMONI__
#define ___ROAST_SIMD__VAL_TYPE_		float*
#define ___ROAST_SIMD__VAL_NAME_		f_array_4
#define ___ROAST_SIMD__VAL_IS_POINTER	
#include "simd_core__m128_xmm"

//	__m128& <= xmm
#define ___ROAST_SIMD__NEMONI_			___ROAST_SIMD__NEMONI__
#define ___ROAST_SIMD__VAL_TYPE_		__m128&
#define ___ROAST_SIMD__VAL_NAME_		m128val
#define ___ROAST_SIMD__VAL_IS_POINTER	
#include "simd_core__m128_xmm"

//	(get) __m128 <= xmm
#define ___ROAST_SIMD__NEMONI_			___ROAST_SIMD__NEMONI__
#define ___ROAST_SIMD__VAL_TYPE_		__m128
#define ___ROAST_SIMD__VAL_IS_POINTER	
#include "simd_core__m128_xmm_get"

//	xmm <= xmm
#define ___ROAST_SIMD__NEMONI_			___ROAST_SIMD__NEMONI__
#include "simd_core__xmm_xmm"



/*
///// SSE /////


MOVAPS
  xmm1, xmm2/m128
  xmm2/m128, xmm1
  4̃pbNhPx_lXMMWX^m̊ԁA܂XMMWX^ƃACg̍Ƃ̊Ԃœ]

MOVUPS
  xmm1, xmm2/m128
  xmm2/m128, xmm1
  4̃pbNhPx_lXMMWX^m̊ԁA܂XMMWX^ƃACg̍ĂȂƂ̊Ԃœ] SSE 

MOVHLPS
  xmm1, xmm2
  2̃pbNhPx_lXMMWX^̏ʃNbh[hʂXMMWX^̉ʃNbh[hɓ] SSE 

MOVLHPS
  xmm1, xmm2
  2̃pbNhPx_lXMMWX^̉ʃNbh[hʂXMMWX^̏ʃNbh[hɓ] SSE 

MOVHPS
  xmm, m64
  m64, xmm
  2̃pbNhPx_lXMMWX^̏ʃNbh[hƃƂ̊Ԃœ] SSE 

MOVLPS
  xmm, m64
  m64, xmm
  2̃pbNhPx_lXMMWX^̉ʃNbh[hƃƂ̊Ԃœ] SSE 

MOVSS
  xmm1, xmm2/m32
  xmm2/m32, xmm
  XJPx_lXMMWX^m̊ԁA܂XMMWX^ƃƂ̊Ԃœ] SSE 

MOVMSKPS
  r32, xmm
  4̃pbNhPx_l畄}XN𒊏oėpWX^ɓ] SSE 

PMOVMSKB
  r32, mm
  r32, xmm
  SIMDWX^̃oCg}XNėpWX^ɓ] SSE 
*/


/******************************************************************/


/*
PADDB  	mm1, mm2/m64
xmm1, mm2/m128 	pbNhEoCgZ 	MMX
PADDW 	mm1, mm2/m64
xmm1, mm2/m128 	pbNhE[hZ 	MMX
PADDD 	mm1, mm2/m64
xmm1, mm2/m128 	pbNhE_u[hZ 	MMX
PADDQ 	mm1, mm2/m64
xmm1, mm2/m128 	pbNhENbh[hZ 	SSE2
PADDSB 	mm1, mm2/m64
xmm1, mm2/m128 	tOaZɂ肵pbNhtoCgZ 	MMX
PADDSW 	mm1, mm2/m64
xmm1, mm2/m128 	tOaZɂpbNht[hZ 	MMX
PADDUSB 	mm1, mm2/m64
xmm1, mm2/m128 	ȂOaZɂpbNhȂoCgZ 	MMX
PADDUSW 	mm1, mm2/m64
xmm1, mm2/m128 	ȂOaZɂpbNhȂ[hZ 	MMX
ADDPS 	xmm1, xmm2/m128 	pbNhPx_lZ 	SSE
ADDSS 	xmm1, xmm2/m32 	XJPx_lZ 	SSE
*/

/*	|Z	*/
#undef ___ROAST_SIMD__NEMONI__
#define ___ROAST_SIMD__NEMONI__			mulps

#define ___ROAST_SIMD__NEMONI_			___ROAST_SIMD__NEMONI__
#define ___ROAST_SIMD__VAL_TYPE_		const float*
#define ___ROAST_SIMD__VAL_NAME_		f_array_4
#define ___ROAST_SIMD__VAL_IS_POINTER	
#include "simd_core__xmm_m128"

#define ___ROAST_SIMD__NEMONI_			___ROAST_SIMD__NEMONI__
#define ___ROAST_SIMD__VAL_TYPE_		__m128
#define ___ROAST_SIMD__VAL_NAME_		m128val
#include "simd_core__xmm_m128"

#define ___ROAST_SIMD__NEMONI_			___ROAST_SIMD__NEMONI__
#define ___ROAST_SIMD__VAL_TYPE_		_ROAST_SIMD_M128
#define ___ROAST_SIMD__VAL_NAME_		m128val
#include "simd_core__xmm_m128"

//	xmm <= xmm
#define ___ROAST_SIMD__NEMONI_			___ROAST_SIMD__NEMONI__
#include "simd_core__xmm_xmm"


		/*	EEEEEE
		template <int xmm_n1, int xmm_n2>
			inline void mulps_2()
		{
			ROAST_SIMD__MULPS_XX(xmm_n1, xmm_n2);
		}
		*/
