Intel® C++ Compiler Classic Developer Guide and Reference

ID 767249
Date 12/16/2022
Public

A newer version of this document is available. Customers should click here to go to the newest version.

Document Table of Contents

Intrinsics for Integer Reduction Operations

The prototypes for Intel® Advanced Vector Extensions 512 (Intel® AVX-512) intrinsics are located in the zmmintrin.h header file.

To use these intrinsics, include the immintrin.h file as follows:

#include <immintrin.h>


Intrinsic Name

Operation

Corresponding
Intel® AVX-512 Instruction

_mm512_reduce_add_epi32, _mm512_mask_reduce_add_epi32

Reduces int32 elements of an addition operation.

None.

_mm512_reduce_add_epi64, _mm512_mask_reduce_add_epi64

Reduces int64 elements of an addition operation.

None.

_mm512_reduce_mul_epi32, _mm512_mask_reduce_mul_epi32

Reduces int32 elements of a multiplication operation.

None.

_mm512_reduce_mul_epi64, _mm512_mask_reduce_mul_epi64

Reduces int64 elements of a multiplication operation.

None.

_mm512_reduce_min_epi32, _mm512_mask_reduce_min_epi32

Reduces signed int32 elements of a minimum value operation.

None.

_mm512_reduce_min_epi64, _mm512_mask_reduce_min_epi64

Reduces signed int64 elements of a minimum value operation.

None.

_mm512_reduce_min_epu32, _mm512_mask_reduce_min_epu32

Reduces unsigned int32 elements of a minimum value operation.

None.

_mm512_reduce_min_epu64, _mm512_mask_reduce_min_epu64

Reduces unsigned int64 elements of a minimum value operation.

None.

_mm512_reduce_max_epi32, _mm512_mask_reduce_max_epi32

Reduces signed int32 elements of a maximum value operation.

None.

_mm512_reduce_max_epi64, _mm512_mask_reduce_max_epi64

Reduces signed int64 elements of a maximum value operation.

None.

_mm512_reduce_max_epu32, _mm512_mask_reduce_max_epu32

Reduces unsigned int32 elements of a maximum value operation.

None.

_mm512_reduce_max_epu64, _mm512_mask_reduce_max_epu64

Reduces unsigned int64 elements of a maximum value operation.

None.

_mm512_reduce_or_epi32, _mm512_mask_reduce_or_epi32

Reduces int32 elements of a bitwise OR operation.

None.

_mm512_reduce_or_epi64, _mm512_mask_reduce_or_epi64

Reduces int64 elements of a bitwise OR operation.

None.

_mm512_reduce_and_epi32, _mm512_mask_reduce_and_epi32

Reduces int32 elements of a bitwise AND operation.

None.

_mm512_reduce_and_epi64, _mm512_mask_reduce_and_epi64

Reduces int64 elements of a bitwise AND operation.

None.

variable definition
k

writemask used as a selector

a

first source vector element

src

source element to use based on writemask result


_mm512_reduce_and_epi32

extern int __cdecl _mm512_reduce_and_epi32(__m512i a);

Reduces the packed int32 elements in a by bitwise AND.

Returns the bitwise AND of all elements in a.


_mm512_mask_reduce_and_epi32

extern int __cdecl _mm512_mask_reduce_and_epi32(__mmask16 k, __m512i a);

Reduces the packed int32 elements in a by bitwise AND using mask k.

Returns the bitwise AND of all active elements in a.



_mm512_reduce_and_epi64

extern __int64 __cdecl _mm512_reduce_and_epi64(__m512i a);

Reduces the packed int64 elements in a by bitwise AND.

Returns the bitwise AND of all elements in a.


_mm512_mask_reduce_and_epi64

extern __int64 __cdecl _mm512_mask_reduce_and_epi64(__mmask8 k, __m512i a);

Reduces the packed int64 elements in a by bitwise AND using mask k.

Only those elements in the source registers with the corresponding bit set in vector mask k are used for computing. Elements in a with corresponding bit clear in k are copied as is to the resulting vector.

Returns the bitwise AND of all active elements in a.



_mm512_reduce_add_epi32

extern int __cdecl _mm512_reduce_add_epi32(__m512i a);

Reduces the packed int32 elements in a by addition.

Returns the sum of all elements in a.


_mm512_mask_reduce_add_epi32

extern int __cdecl _mm512_mask_reduce_add_epi32(__mmask16 k, __m512i a);

Reduces the packed int32 elements in a by addition using mask k.

Returns the sum of all active elements in a.



_mm512_reduce_add_epi64

extern __int64 __cdecl _mm512_reduce_add_epi64(__m512i a);

Reduces the packed int64 elements in a by addition.

Returns the sum of all elements in a.


_mm512_mask_reduce_add_epi64

extern __int64 __cdecl _mm512_mask_reduce_add_epi64(__mmask8 k, __m512i a);

Reduce the packed int64 elements in a by addition, using mask k.

Only those elements in the source registers with the corresponding bit set in vector mask k are used for computing. Elements in a with corresponding bit clear in k are copied as is to the resulting vector.

Returns the sum of all active elements in a.



_mm512_reduce_max_epi32

extern int __cdecl _mm512_reduce_max_epi32(__m512i a);

Reduce the packed int32 elements in a by maximum.

Returns the maximum of all elements in a.


_mm512_mask_reduce_max_epi32

extern int __cdecl _mm512_mask_reduce_max_epi32(__mmask16 k, __m512i a);

Reduce the packed int32 elements in a by maximum using mask k.

Returns the maximum of all active elements in a.



_mm512_reduce_max_epi64

extern __int64 __cdecl _mm512_reduce_max_epi64(__m512i a);

Reduce the packed int64 elements in a by maximum.

Returns the maximum of all elements in a.


_mm512_mask_reduce_max_epi64

extern __int64 __cdecl _mm512_mask_reduce_max_epi64(__mmask8 k, __m512i a);

Reduce the packed int64 elements in a by maximum using mask k.

Only those elements in the source registers with the corresponding bit set in vector mask k are used for computing. Elements in a with corresponding bit clear in k are copied as is to the resulting vector.

Returns the maximum of all active elements in a.



_mm512_reduce_max_epu32

extern unsigned int __cdecl _mm512_reduce_max_epu32(__m512i a);

Reduce the packed unsigned int32 elements in a by maximum.

Returns the maximum of all elements in a.


_mm512_mask_reduce_max_epu32

extern unsigned int __cdecl _mm512_mask_reduce_max_epu32(__mmask16 k, __m512i a);

Reduce the packed unsigned int32 elements in a by maximum using mask k.

Returns the maximum of all active elements in a.



_mm512_reduce_max_epu64

extern unsigned __int64 __cdecl _mm512_reduce_max_epu64(__m512i a);

Reduce the packed unsigned int64 elements in a by maximum.

Returns the maximum of all elements in a.


_mm512_mask_reduce_max_epu64

extern unsigned __int64 __cdecl _mm512_mask_reduce_max_epu64(__mmask8 k, __m512i a);

Reduce the packed unsigned int64 elements in a by maximum using mask k.

Only those elements in the source registers with the corresponding bit set in vector mask k are used for computing. Elements in a with corresponding bit clear in k are copied as is to the resulting vector.

Returns the maximum of all active elements in a.



_mm512_reduce_min_epi32

extern int __cdecl _mm512_reduce_min_epi32(__m512i a);

Reduce the packed int32 elements in a by minimum.

Returns the minimum of all elements in a.


_mm512_mask_reduce_min_epi32

extern int __cdecl _mm512_mask_reduce_min_epi32(__mmask16 k, __m512i a);

Reduce the packed int32 elements in a by maximum using mask k.

Returns the minimum of all active elements in a.



_mm512_reduce_min_epi64

extern __int64 __cdecl _mm512_reduce_min_epi64(__m512i a);

Reduce the packed int64 elements in a by minimum.

Returns the minimum of all elements in a.


_mm512_mask_reduce_min_epi64

extern __int64 __cdecl _mm512_mask_reduce_min_epi64(__mmask8 k, __m512i a);

Reduce the packed int64 elements in a by maximum, using mask k.

Only those elements in the source registers with the corresponding bit set in vector mask k are used for computing. Elements in a with corresponding bit clear in k are copied as is to the resulting vector.

Returns the minimum of all active elements in a.



_mm512_reduce_min_epu32

extern unsigned int __cdecl _mm512_reduce_min_epu32(__m512i a);

Reduce the packed unsigned int32 elements in a by minimum.

Returns the minimum of all elements in a.



_mm512_mask_reduce_min_epu32

extern unsigned int __cdecl _mm512_mask_reduce_min_epu32(__mmask16 k, __m512i a);

Reduce the packed unsigned int32 elements in a by maximum using mask k.

Returns the minimum of all active elements in a.



_mm512_reduce_min_epu64

extern unsigned __int64 __cdecl _mm512_reduce_min_epu64(__m512i a);

Reduce the packed unsigned int64 elements in a by minimum.

Returns the minimum of all elements in a.


_mm512_mask_reduce_min_epu64

extern unsigned __int64 __cdecl _mm512_mask_reduce_min_epu64(__mmask8 k, __m512i a);

Reduce the packed unsigned int64 elements in a by minimum using mask k.

Only those elements in the source registers with the corresponding bit set in vector mask k are used for computing. Elements in a with corresponding bit clear in k are copied as is to the resulting vector.

Returns the minimum of all active elements in a.



_mm512_reduce_mul_epi32

extern int __cdecl _mm512_reduce_mul_epi32(__m512i a);

Reduce the packed int32 elements in a by multiplication.

Returns the product of all elements in a.


_mm512_mask_reduce_mul_epi32

extern int __cdecl _mm512_mask_reduce_mul_epi32(__mmask16 k, __m512i a);

Reduce the packed int32 elements in a by multiplication using mask k.

Returns the product of all active elements in a.



_mm512_reduce_mul_epi64

extern __int64 __cdecl _mm512_reduce_mul_epi64(__m512i a);

Reduce the packed int64 elements in a by multiplication.

Returns the product of all elements in a.


_mm512_mask_reduce_mul_epi64

extern __int64 __cdecl _mm512_mask_reduce_mul_epi64(__mmask8 k, __m512i a);

Reduce the packed int64 elements in a by multiplication using mask k.

Only those elements in the source registers with the corresponding bit set in vector mask k are used for computing. Elements in a with corresponding bit clear in k are copied as is to the resulting vector.

Returns the product of all active elements in a.



_mm512_reduce_or_epi32

extern int __cdecl _mm512_reduce_or_epi32(__m512i a);

Reduce the packed int32 elements in a by bitwise OR.

Returns the bitwise OR of all elements in a.


_mm512_mask_reduce_or_epi32

extern int __cdecl _mm512_mask_reduce_or_epi32(__mmask16 k, __m512i a);

Reduce the packed int32 elements in a by bitwise OR using mask k.

Returns the bitwise OR of all active elements in a.



_mm512_reduce_or_epi64

extern __int64 __cdecl _mm512_reduce_or_epi64(__m512i a);

Reduce the packed int64 elements in a by bitwise OR.

Returns the bitwise OR of all elements in a.


_mm512_mask_reduce_or_epi64

extern __int64 __cdecl _mm512_mask_reduce_or_epi64(__mmask8 k, __m512i a);

Reduce the packed int64 elements in a by bitwise OR using mask k.

Only those elements in the source registers with the corresponding bit set in vector mask k are used for computing. Elements in a with corresponding bit clear in k are copied as is to the resulting vector.

Returns the bitwise OR of all active elements in a.