47 template<
unsigned int VectorSize>
class Mask 59 #if defined VC_MSVC && defined _WIN32 74 : k(_mm_castsi128_ps(_mm_packs_epi16(a[0].dataI(), a[1].dataI()))) {}
99 Vc_ALWAYS_INLINE Mask &operator&=(
const Mask &rhs) { k = _mm_and_ps(k, rhs.
k);
return *
this; }
107 _mm_movemask_epi8(dataI()) == 0xffff;
112 _mm_testz_si128(dataI(), dataI());
114 _mm_movemask_epi8(dataI()) == 0x0000;
121 const int tmp = _mm_movemask_epi8(dataI());
122 return tmp != 0 && (tmp ^ 0xffff) != 0;
126 #ifndef VC_NO_AUTOMATIC_BOOL_FROM_MASK 152 #ifdef VC_COMPILE_BENCHMARKS 170 const _long bit = __builtin_ctzl(mask);
171 __asm__(
"btr %1,%0" :
"+r"(mask) :
"r"(bit));
172 #elif defined(_WIN64) 174 _BitScanForward64(&bit, mask);
175 _bittestandreset64(&mask, bit);
176 #elif defined(_WIN32) 178 _BitScanForward(&bit, mask);
179 _bittestandreset(&mask, bit);
181 #error "Not implemented yet. Please contact vc-devel@compeng.uni-frankfurt.de" 187 #define Vc_foreach_bit(_it_, _mask_) \ 188 for (Vc::SSE::ForeachHelper Vc__make_unique(foreach_bit_obj)((_mask_).toInt()); Vc__make_unique(foreach_bit_obj).outer(); ) \ 189 for (_it_ = Vc__make_unique(foreach_bit_obj).next(); Vc__make_unique(foreach_bit_obj).inner(); Vc__make_unique(foreach_bit_obj).noBreak()) 193 return _mm_movemask_epi8(dataI());
197 k = _mm_unpacklo_ps(x.
data(), x.
data());
201 k = _mm_castsi128_ps(_mm_unpacklo_epi32(tmp, tmp));
205 tmp = _mm_unpacklo_epi16(tmp, tmp);
206 k = _mm_castsi128_ps(_mm_unpacklo_epi32(tmp, tmp));
209 k = _mm_castsi128_ps(_mm_packs_epi16(x.
dataI(), _mm_setzero_si128()));
212 k = _mm_castsi128_ps(_mm_unpacklo_epi16(x.
dataI(), x.
dataI()));
216 k = _mm_castsi128_ps(_mm_unpacklo_epi16(tmp, tmp));
220 k = _mm_castsi128_ps(_mm_packs_epi16(tmp, tmp));
223 k = _mm_castsi128_ps(_mm_packs_epi16(x.
dataI(), x.
dataI()));
226 k = _mm_castsi128_ps(_mm_unpacklo_epi8(x.
dataI(), x.
dataI()));
230 x[0].
k = _mm_unpacklo_ps(data(), data());
231 x[1].
k = _mm_unpackhi_ps(data(), data());
234 x[0].
k = _mm_castsi128_ps(_mm_unpacklo_epi16(dataI(), dataI()));
235 x[1].
k = _mm_castsi128_ps(_mm_unpackhi_epi16(dataI(), dataI()));
238 x[0].
k = _mm_castsi128_ps(_mm_unpacklo_epi8 (dataI(), dataI()));
239 x[1].
k = _mm_castsi128_ps(_mm_unpackhi_epi8 (dataI(), dataI()));
254 int mask = _mm_movemask_pd(dataD());
255 return (mask & 1) + (mask >> 1);
260 #ifdef VC_IMPL_POPCNT 261 return _mm_popcnt_u32(_mm_movemask_ps(data()));
265 _M128I x = _mm_srli_epi32(dataI(), 31);
266 x = _mm_add_epi32(x, _mm_shuffle_epi32(x, _MM_SHUFFLE(0, 1, 2, 3)));
267 x = _mm_add_epi32(x, _mm_shufflelo_epi16(x, _MM_SHUFFLE(1, 0, 3, 2)));
268 return _mm_cvtsi128_si32(x);
274 #ifdef VC_IMPL_POPCNT 275 return _mm_popcnt_u32(_mm_movemask_epi8(dataI())) / 2;
281 _M128I x = _mm_srli_epi16(dataI(), 15);
282 x = _mm_add_epi16(x, _mm_shuffle_epi32(x, _MM_SHUFFLE(0, 1, 2, 3)));
283 x = _mm_add_epi16(x, _mm_shufflelo_epi16(x, _MM_SHUFFLE(0, 1, 2, 3)));
284 x = _mm_add_epi16(x, _mm_shufflelo_epi16(x, _MM_SHUFFLE(2, 3, 0, 1)));
285 return _mm_extract_epi16(x, 0);
291 int tmp = _mm_movemask_epi8(dataI());
292 #ifdef VC_IMPL_POPCNT 293 return _mm_popcnt_u32(tmp);
295 tmp = (tmp & 0x5555) + ((tmp >> 1) & 0x5555);
296 tmp = (tmp & 0x3333) + ((tmp >> 2) & 0x3333);
297 tmp = (tmp & 0x0f0f) + ((tmp >> 4) & 0x0f0f);
298 return (tmp & 0x00ff) + ((tmp >> 8) & 0x00ff);
314 #if defined VC_MSVC && defined _WIN32 323 k[0] = _mm_setzero_ps();
324 k[1] = _mm_setzero_ps();
336 k[0] = _mm_castsi128_ps(_mm_unpacklo_epi16(a.
dataI(), a.
dataI()));
337 k[1] = _mm_castsi128_ps(_mm_unpackhi_epi16(a.
dataI(), a.
dataI()));
351 r.
k[0] = _mm_and_ps(k[0], rhs.
k[0]);
352 r.
k[1] = _mm_and_ps(k[1], rhs.
k[1]);
357 r.
k[0] = _mm_and_ps(k[0], rhs.
k[0]);
358 r.
k[1] = _mm_and_ps(k[1], rhs.
k[1]);
363 r.
k[0] = _mm_or_ps(k[0], rhs.
k[0]);
364 r.
k[1] = _mm_or_ps(k[1], rhs.
k[1]);
369 r.
k[0] = _mm_or_ps(k[0], rhs.
k[0]);
370 r.
k[1] = _mm_or_ps(k[1], rhs.
k[1]);
375 r.
k[0] = _mm_xor_ps(k[0], rhs.
k[0]);
376 r.
k[1] = _mm_xor_ps(k[1], rhs.
k[1]);
386 k[0] = _mm_and_ps(k[0], rhs.
k[0]);
387 k[1] = _mm_and_ps(k[1], rhs.
k[1]);
391 k[0] = _mm_or_ps (k[0], rhs.
k[0]);
392 k[1] = _mm_or_ps (k[1], rhs.
k[1]);
396 k[0] = _mm_xor_ps(k[0], rhs.
k[0]);
397 k[1] = _mm_xor_ps(k[1], rhs.
k[1]);
402 const _M128 tmp = _mm_and_ps(k[0], k[1]);
406 return _mm_movemask_ps(tmp) == 0xf;
412 const _M128 tmp = _mm_or_ps(k[0], k[1]);
414 return _mm_testz_si128(_mm_castps_si128(tmp), _mm_castps_si128(tmp));
416 return _mm_movemask_ps(tmp) == 0x0;
427 __m128i tmp = _mm_castps_si128(_mm_xor_ps(k[0], k[1]));
429 return !_mm_testz_si128(tmp, tmp) ||
432 const int tmp = _mm_movemask_ps(k[0]) + _mm_movemask_ps(k[1]);
433 return tmp > 0x0 && tmp < (0xf + 0xf);
437 #ifndef VC_NO_AUTOMATIC_BOOL_FROM_MASK 442 return (_mm_movemask_ps(k[1]) << 4) + _mm_movemask_ps(k[0]);
449 return (toInt() & (1 << index)) != 0;
453 #ifdef VC_IMPL_POPCNT 454 return _mm_popcnt_u32(toInt());
461 _M128I x = _mm_add_epi32(_mm_srli_epi32(_mm_castps_si128(k[0]), 31),
462 _mm_srli_epi32(_mm_castps_si128(k[1]), 31));
463 x = _mm_add_epi32(x, _mm_shuffle_epi32(x, _MM_SHUFFLE(0, 1, 2, 3)));
464 x = _mm_add_epi32(x, _mm_shufflelo_epi16(x, _MM_SHUFFLE(1, 0, 3, 2)));
465 return _mm_cvtsi128_si32(x);
472 #ifdef VC_COMPILE_BENCHMARKS 480 const int mask = toInt();
483 _BitScanForward(&bit, mask);
486 __asm__(
"bsf %1,%0" :
"=&r"(bit) :
"r"(mask));
492 const int mask = toInt();
495 _BitScanForward(&bit, mask);
498 __asm__(
"bsf %1,%0" :
"=&r"(bit) :
"r"(mask));
503 template<
unsigned int VectorSize>
505 : k(_mm_castsi128_ps(_mm_packs_epi32(_mm_castps_si128(m.data()[0]), _mm_castps_si128(m.data()[1])))) {}
Vc_ALWAYS_INLINE Vc_PURE Mask< OtherSize > cast() const
Vc_ALWAYS_INLINE Vc_PURE bool isEmpty() const
static Vc_ALWAYS_INLINE Vc_CONST bool cmpneq(_M128 k1, _M128 k2)
Vc_ALWAYS_INLINE Vc_PURE bool isFull() const
Vc_ALWAYS_INLINE Float8Mask()
Namespace for new ROOT classes and functions.
static Vc_ALWAYS_INLINE Vc_CONST bool cmpeq(_M128 k1, _M128 k2)
Vc_ALWAYS_INLINE Vc_PURE int count() const
Vc_ALWAYS_INLINE Vc_PURE bool isMix() const
Vc_ALWAYS_INLINE Vc_PURE _M128 data() const
Vc_ALWAYS_INLINE Vc_PURE int shiftMask() const
Vc_ALWAYS_INLINE Mask(const Mask< VectorSize/2 > *a)
Vc_ALWAYS_INLINE Vc_PURE Float8Mask operator!() const
Vc_ALWAYS_INLINE Mask(const __m128i &x)
Mask< LSize > operator &(const Mask< LSize > &lhs, const Mask< RSize > &rhs)
Loop over all set bits in the mask.
Vc_ALWAYS_INLINE Mask(bool b)
#define FREE_STORE_OPERATORS_ALIGNED(alignment)
static Vc_ALWAYS_INLINE Vc_CONST bool cmpneq(_M128 k1, _M128 k2)
Vc_ALWAYS_INLINE_L Vc_PURE_L int count() const Vc_ALWAYS_INLINE_R Vc_PURE_R
Vc_ALWAYS_INLINE Vc_PURE Float8Mask operator||(const Float8Mask &rhs) const
Vc_ALWAYS_INLINE Float8Mask(const Mask< VectorSize > &a)
void operator &&(const Mask< LSize > &lhs, const Mask< RSize > &rhs)
Vc_ALWAYS_INLINE Vc_PURE bool operator==(const Float8Mask &rhs) const
Vc_ALWAYS_INLINE Float8Mask(const M256 &x)
Vc_ALWAYS_INLINE bool inner()
Vc_ALWAYS_INLINE Mask(VectorSpecialInitializerZero::ZEnum)
Vc_ALWAYS_INLINE Float8Mask(VectorSpecialInitializerZero::ZEnum)
Vc_ALWAYS_INLINE void noBreak()
Vc_ALWAYS_INLINE Mask(const __m128d &x)
static Vc_ALWAYS_INLINE Vc_CONST bool cmpneq(_M128 k1, _M128 k2)
static Vc_INTRINSIC __m128i Vc_CONST _mm_setallone_si128()
Vc_ALWAYS_INLINE Vc_PURE Mask operator!() const
Vc_ALWAYS_INLINE Vc_PURE const M256 & data() const
static Vc_INTRINSIC __m128 Vc_CONST _mm_setallone_ps()
Vc_ALWAYS_INLINE Mask(const __m128 &x)
static Vc_ALWAYS_INLINE Vc_CONST bool cmpeq(_M128 k1, _M128 k2)
#define Vc_ALWAYS_INLINE_R
Vc_ALWAYS_INLINE _long next()
Float8GatherMask(const Float8Mask &k)
Mask< VectorSize > Argument
Vc_ALWAYS_INLINE bool outer() const
Vc_ALWAYS_INLINE Mask(const Mask &rhs)
Vc_ALWAYS_INLINE Float8Mask(bool b)
Vc_ALWAYS_INLINE Vc_PURE _M128D dataD() const
Vc_ALWAYS_INLINE Vc_PURE _M128I dataI() const
Vc_ALWAYS_INLINE Vc_PURE int toInt() const
#define Vc_ALWAYS_INLINE_L
Vc_ALWAYS_INLINE_L Vc_PURE_L bool operator[](int index) const Vc_ALWAYS_INLINE_R Vc_PURE_R
Vc_ALWAYS_INLINE Vc_PURE bool isFull() const
Vc_ALWAYS_INLINE_L Vc_PURE_L int firstOne() const Vc_ALWAYS_INLINE_R Vc_PURE_R
Vc_ALWAYS_INLINE Vc_PURE bool operator[](int index) const
Vc_ALWAYS_INLINE Float8Mask(VectorSpecialInitializerOne::OEnum)
Vc_ALWAYS_INLINE Mask & operator^=(const Mask &rhs)
Mask< LSize > operator|(const Mask< LSize > &lhs, const Mask< RSize > &rhs)
static Vc_ALWAYS_INLINE Vc_CONST bool cmpeq(_M128 k1, _M128 k2)
Float8GatherMask(const Mask< 8u > &k)
Vc_ALWAYS_INLINE Float8Mask & operator|=(const Float8Mask &rhs)
Vc_ALWAYS_INLINE Vc_PURE bool isMix() const
Vc_ALWAYS_INLINE Mask(VectorSpecialInitializerOne::OEnum)
Vc_ALWAYS_INLINE Vc_PURE bool isEmpty() const
Mask< LSize > operator^(const Mask< LSize > &lhs, const Mask< RSize > &rhs)
Vc_ALWAYS_INLINE Vc_PURE bool operator!=(const Float8Mask &rhs) const
Vc_ALWAYS_INLINE_L Vc_PURE_L int shiftMask() const Vc_ALWAYS_INLINE_R Vc_PURE_R
Vc_ALWAYS_INLINE Float8Mask & operator^=(const Float8Mask &rhs)
Vc_ALWAYS_INLINE Vc_PURE bool operator!=(const Mask &rhs) const
Vc_ALWAYS_INLINE_L Vc_PURE_L int toInt() const Vc_ALWAYS_INLINE_R Vc_PURE_R
Vc_ALWAYS_INLINE_L Vc_PURE_L int firstOne() const Vc_ALWAYS_INLINE_R Vc_PURE_R
Returns the index of the first one in the mask.
Vc_ALWAYS_INLINE Vc_PURE bool operator==(const Mask &rhs) const
void expand(Mask< VectorSize/2 > *x) const
Vc_ALWAYS_INLINE ForeachHelper(_long _mask)
void operator||(const Mask< LSize > &lhs, const Mask< RSize > &rhs)
Vc_ALWAYS_INLINE Mask & operator|=(const Mask &rhs)