Intel® C++ Compiler Classic Developer Guide and Reference

ID 767249
Date 3/31/2023
Public

A newer version of this document is available. Customers should click here to go to the newest version.

Document Table of Contents

Intrinsics for Move Operations

The prototypes for Intel® Advanced Vector Extensions 512 (Intel® AVX-512) intrinsics are located in the zmmintrin.h header file.

To use these intrinsics, include the immintrin.h file as follows:

#include <immintrin.h>


variable definition
src

source element to use based on writemask result

k

writemask used as a selector

a

first source vector element


_mm_mask_mov_pd

__m128d _mm_mask_mov_pd(__m128d src, __mmask8 k, __m128d a)

CPUID Flags: AVX512F, AVX512VL

Instruction(s): vmovapd

Move packed double-precision (64-bit) floating-point elements from a to the return value using writemask k (elements are copied from src when the corresponding mask bit is not set).



_mm_maskz_mov_pd

__m128d _mm_maskz_mov_pd(__mmask8 k, __m128d a)

CPUID Flags: AVX512F, AVX512VL

Instruction(s): vmovapd

Move packed double-precision (64-bit) floating-point elements from a into the return value using zeromask k (elements are zeroed out when the corresponding mask bit is not set).



_mm256_mask_mov_pd

__m256d _mm256_mask_mov_pd(__m256d src, __mmask8 k, __m256d a)

CPUID Flags: AVX512F, AVX512VL

Instruction(s): vmovapd

Move packed double-precision (64-bit) floating-point elements from a to the return value using writemask k (elements are copied from src when the corresponding mask bit is not set).



_mm256_maskz_mov_pd

__m256d _mm256_maskz_mov_pd(__mmask8 k, __m256d a)

CPUID Flags: AVX512F, AVX512VL

Instruction(s): vmovapd

Move packed double-precision (64-bit) floating-point elements from a into the return value using zeromask k (elements are zeroed out when the corresponding mask bit is not set).



_mm_mask_mov_ps

__m128 _mm_mask_mov_ps(__m128 src, __mmask8 k, __m128 a)

CPUID Flags: AVX512F, AVX512VL

Instruction(s): vmovaps

Move packed single-precision (32-bit) floating-point elements from a to the return value using writemask k (elements are copied from src when the corresponding mask bit is not set).



_mm_maskz_mov_ps

__m128 _mm_maskz_mov_ps(__mmask8 k, __m128 a)

CPUID Flags: AVX512F, AVX512VL

Instruction(s): vmovaps

Move packed single-precision (32-bit) floating-point elements from a into the return value using zeromask k (elements are zeroed out when the corresponding mask bit is not set).



_mm256_mask_mov_ps

__m256 _mm256_mask_mov_ps(__m256 src, __mmask8 k, __m256 a)

CPUID Flags: AVX512F, AVX512VL

Instruction(s): vmovaps

Move packed single-precision (32-bit) floating-point elements from a to the return value using writemask k (elements are copied from src when the corresponding mask bit is not set).



_mm256_maskz_mov_ps

__m256 _mm256_maskz_mov_ps(__mmask8 k, __m256 a)

CPUID Flags: AVX512F, AVX512VL

Instruction(s): vmovaps

Move packed single-precision (32-bit) floating-point elements from a into the return value using zeromask k (elements are zeroed out when the corresponding mask bit is not set).



_mm_mask_movedup_pd

__m128d _mm_mask_movedup_pd(__m128d src, __mmask8 k, __m128d a)

CPUID Flags: AVX512F, AVX512VL

Instruction(s): vmovddup

Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and return the results using writemask k (elements are copied from src when the corresponding mask bit is not set).



_mm_maskz_movedup_pd

__m128d _mm_maskz_movedup_pd(__mmask8 k, __m128d a)

CPUID Flags: AVX512F, AVX512VL

Instruction(s): vmovddup

Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and return the results using zeromask k (elements are zeroed out when the corresponding mask bit is not set).



_mm256_mask_movedup_pd

__m256d _mm256_mask_movedup_pd(__m256d src, __mmask8 k, __m256d a)

CPUID Flags: AVX512F, AVX512VL

Instruction(s): vmovddup

Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and return the results using writemask k (elements are copied from src when the corresponding mask bit is not set).



_mm256_maskz_movedup_pd

__m256d _mm256_maskz_movedup_pd(__mmask8 k, __m256d a)

CPUID Flags: AVX512F, AVX512VL

Instruction(s): vmovddup

Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and return the results using zeromask k (elements are zeroed out when the corresponding mask bit is not set).



_mm_mask_movehdup_ps

__m128 _mm_mask_movehdup_ps(__m128 src, __mmask8 k, __m128 a)

CPUID Flags: AVX512F, AVX512VL

Instruction(s): vmovshdup

Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and return the results using writemask k (elements are copied from src when the corresponding mask bit is not set).



_mm_maskz_movehdup_ps

__m128 _mm_maskz_movehdup_ps(__mmask8 k, __m128 a)

CPUID Flags: AVX512F, AVX512VL

Instruction(s): vmovshdup

Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and return the results using zeromask k (elements are zeroed out when the corresponding mask bit is not set).



_mm256_mask_movehdup_ps

__m256 _mm256_mask_movehdup_ps(__m256 src, __mmask8 k, __m256 a)

CPUID Flags: AVX512F, AVX512VL

Instruction(s): vmovshdup

Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and return the results using writemask k (elements are copied from src when the corresponding mask bit is not set).



_mm256_maskz_movehdup_ps

__m256 _mm256_maskz_movehdup_ps(__mmask8 k, __m256 a)

CPUID Flags: AVX512F, AVX512VL

Instruction(s): vmovshdup

Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and return the results using zeromask k (elements are zeroed out when the corresponding mask bit is not set).



_mm_mask_moveldup_ps

__m128 _mm_mask_moveldup_ps(__m128 src, __mmask8 k, __m128 a)

CPUID Flags: AVX512F, AVX512VL

Instruction(s): vmovsldup

Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and return the results using writemask k (elements are copied from src when the corresponding mask bit is not set).



_mm_maskz_moveldup_ps

__m128 _mm_maskz_moveldup_ps(__mmask8 k, __m128 a)

CPUID Flags: AVX512F, AVX512VL

Instruction(s): vmovsldup

Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and return the results using zeromask k (elements are zeroed out when the corresponding mask bit is not set).



_mm256_mask_moveldup_ps

__m256 _mm256_mask_moveldup_ps(__m256 src, __mmask8 k, __m256 a)

CPUID Flags: AVX512F, AVX512VL

Instruction(s): vmovsldup

Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and return the results using writemask k (elements are copied from src when the corresponding mask bit is not set).



_mm256_maskz_moveldup_ps

__m256 _mm256_maskz_moveldup_ps(__mmask8 k, __m256 a)

CPUID Flags: AVX512F, AVX512VL

Instruction(s): vmovsldup

Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and return the results using zeromask k (elements are zeroed out when the corresponding mask bit is not set).



_mm_mask_mov_epi32

__m128i _mm_mask_mov_epi32(__m128i src, __mmask8 k, __m128i a)

CPUID Flags: AVX512F, AVX512VL

Instruction(s): vmovdqa32

Move packed 32-bit integers from a to the return value using writemask k (elements are copied from src when the corresponding mask bit is not set).



_mm_maskz_mov_epi32

__m128i _mm_maskz_mov_epi32(__mmask8 k, __m128i a)

CPUID Flags: AVX512F, AVX512VL

Instruction(s): vmovdqa32

Move packed 32-bit integers from a into the return value using zeromask k (elements are zeroed out when the corresponding mask bit is not set).



_mm256_mask_mov_epi32

__m256i _mm256_mask_mov_epi32(__m256i src, __mmask8 k, __m256i a)

CPUID Flags: AVX512F, AVX512VL

Instruction(s): vmovdqa32

Move packed 32-bit integers from a to the return value using writemask k (elements are copied from src when the corresponding mask bit is not set).



_mm256_maskz_mov_epi32

__m256i _mm256_maskz_mov_epi32(__mmask8 k, __m256i a)

CPUID Flags: AVX512F, AVX512VL

Instruction(s): vmovdqa32

Move packed 32-bit integers from a into the return value using zeromask k (elements are zeroed out when the corresponding mask bit is not set).



_mm_mask_mov_epi64

__m128i _mm_mask_mov_epi64(__m128i src, __mmask8 k, __m128i a)

CPUID Flags: AVX512F, AVX512VL

Instruction(s): vmovdqa64

Move packed 64-bit integers from a to the return value using writemask k (elements are copied from src when the corresponding mask bit is not set).



_mm_maskz_mov_epi64

__m128i _mm_maskz_mov_epi64(__mmask8 k, __m128i a)

CPUID Flags: AVX512F, AVX512VL

Instruction(s): vmovdqa64

Move packed 64-bit integers from a into the return value using zeromask k (elements are zeroed out when the corresponding mask bit is not set).



_mm256_mask_mov_epi64

__m256i _mm256_mask_mov_epi64(__m256i src, __mmask8 k, __m256i a)

CPUID Flags: AVX512F, AVX512VL

Instruction(s): vmovdqa64

Move packed 64-bit integers from a to the return value using writemask k (elements are copied from src when the corresponding mask bit is not set).



_mm256_maskz_mov_epi64

__m256i _mm256_maskz_mov_epi64(__mmask8 k, __m256i a)

CPUID Flags: AVX512F, AVX512VL

Instruction(s): vmovdqa64

Move packed 64-bit integers from a into the return value using zeromask k (elements are zeroed out when the corresponding mask bit is not set).



_mm_mask_mov_epi16

__m128i _mm_mask_mov_epi16(__m128i src, __mmask8 k, __m128i a)

CPUID Flags: AVX512BW, AVX512VL

Instruction(s): vmovdqu16

Move packed 16-bit integers from a into the return value using writemask k (elements are copied from src when the corresponding mask bit is not set).



_mm_maskz_mov_epi16

__m128i _mm_maskz_mov_epi16(__mmask8 k, __m128i a)

CPUID Flags: AVX512BW, AVX512VL

Instruction(s): vmovdqu16

Move packed 16-bit integers from a into the return value using zeromask k (elements are zeroed out when the corresponding mask bit is not set).



_mm256_mask_mov_epi16

__m256i _mm256_mask_mov_epi16(__m256i src, __mmask16 k, __m256i a)

CPUID Flags: AVX512BW, AVX512VL

Instruction(s): vmovdqu16

Move packed 16-bit integers from a into the return value using writemask k (elements are copied from src when the corresponding mask bit is not set).



_mm256_maskz_mov_epi16

__m256i _mm256_maskz_mov_epi16(__mmask16 k, __m256i a)

CPUID Flags: AVX512BW, AVX512VL

Instruction(s): vmovdqu16

Move packed 16-bit integers from a into the return value using zeromask k (elements are zeroed out when the corresponding mask bit is not set).



_mm512_mask_mov_epi16

__m512i _mm512_mask_mov_epi16(__m512i src, __mmask32 k, __m512i a)

CPUID Flags: AVX512BW

Instruction(s): vmovdqu16

Move packed 16-bit integers from a into the return value using writemask k (elements are copied from src when the corresponding mask bit is not set).



_mm512_maskz_mov_epi16

__m512i _mm512_maskz_mov_epi16(__mmask32 k, __m512i a)

CPUID Flags: AVX512BW

Instruction(s): vmovdqu16

Move packed 16-bit integers from a into the return value using zeromask k (elements are zeroed out when the corresponding mask bit is not set).



_mm_mask_mov_epi8

__m128i _mm_mask_mov_epi8(__m128i src, __mmask16 k, __m128i a)

CPUID Flags: AVX512BW, AVX512VL

Instruction(s): vmovdqu8

Move packed 8-bit integers from a into the return value using writemask k (elements are copied from src when the corresponding mask bit is not set).



_mm_maskz_mov_epi8

__m128i _mm_maskz_mov_epi8(__mmask16 k, __m128i a)

CPUID Flags: AVX512BW, AVX512VL

Instruction(s): vmovdqu8

Move packed 8-bit integers from a into the return value using zeromask k (elements are zeroed out when the corresponding mask bit is not set).



_mm256_mask_mov_epi8

__m256i _mm256_mask_mov_epi8(__m256i src, __mmask32 k, __m256i a)

CPUID Flags: AVX512BW, AVX512VL

Instruction(s): vmovdqu8

Move packed 8-bit integers from a into the return value using writemask k (elements are copied from src when the corresponding mask bit is not set).



_mm256_maskz_mov_epi8

__m256i _mm256_maskz_mov_epi8(__mmask32 k, __m256i a)

CPUID Flags: AVX512BW, AVX512VL

Instruction(s): vmovdqu8

Move packed 8-bit integers from a into the return value using zeromask k (elements are zeroed out when the corresponding mask bit is not set).



_mm512_mask_mov_epi8

__m512i _mm512_mask_mov_epi8(__m512i src, __mmask64 k, __m512i a)

CPUID Flags: AVX512BW

Instruction(s): vmovdqu8

Move packed 8-bit integers from a into the return value using writemask k (elements are copied from src when the corresponding mask bit is not set).



_mm512_maskz_mov_epi8

__m512i _mm512_maskz_mov_epi8(__mmask64 k, __m512i a)

CPUID Flags: AVX512BW

Instruction(s): vmovdqu8

Move packed 8-bit integers from a into the return value using zeromask k (elements are zeroed out when the corresponding mask bit is not set).