Developer Guide
Developer Guide for Intel® oneAPI Math Kernel Library Linux*
                    
                        ID
                        766690
                    
                
                
                    Date
                    6/30/2025
                
                
                    Public
                
            
                        
                        
                            
                            
                                Getting Help and Support
                            
                        
                            
                            
                                What's New
                            
                        
                            
                            
                                Notational Conventions
                            
                        
                            
                            
                                Related Information
                            
                        
                            
                                Getting Started
                            
                            
                        
                            
                                Structure of the Intel® oneAPI Math Kernel Library
                            
                            
                        
                            
                                Linking Your Application with the Intel® oneAPI Math Kernel Library
                            
                            
                        
                            
                                Managing Performance and Memory
                            
                            
                        
                            
                                Language-Specific Usage Options
                            
                            
                        
                            
                                Obtaining Numerically Reproducible Results
                            
                            
                        
                            
                                Coding Tips
                            
                            
                        
                            
                                Managing Output
                            
                            
                        
                            
                                Working with the Intel® Math Kernel Library Cluster Edition Software
                            
                            
                        
                            
                                Managing Behavior of the Intel® oneAPI Math Kernel Library with Environment Variables
                            
                            
                        
                            
                                Configuring Your Integrated Development Environment to Link with Intel® oneAPI Math Kernel Library
                            
                            
                        
                            
                                Intel® Math Kernel Library Benchmarks
                            
                            
                        
                            
                                Appendix A: Intel® oneAPI Math Kernel Library Language Interfaces Support
                            
                            
                        
                            
                                Appendix B: Support for Third-Party Interfaces
                            
                            
                        
                            
                                Appendix C: Directory Structure in Detail
                            
                            
                        
                            
                            
                                Notices and Disclaimers
                            
                        
                    
                
                                                
                                                
                                                    
                                                    
                                                        OpenMP* Threaded Functions and Problems
                                                    
                                                    
                                                
                                                    
                                                    
                                                        Functions Threaded with Intel® Threading Building Blocks
                                                    
                                                    
                                                
                                                    
                                                    
                                                        Avoiding Conflicts in the Execution Environment
                                                    
                                                    
                                                
                                                    
                                                    
                                                        Techniques to Set the Number of Threads
                                                    
                                                    
                                                
                                                    
                                                    
                                                        Setting the Number of Threads Using an OpenMP* Environment Variable
                                                    
                                                    
                                                
                                                    
                                                    
                                                        Changing the Number of OpenMP* Threads at Run Time
                                                    
                                                    
                                                
                                                    
                                                        Using Additional Threading Control
                                                    
                                                    
                                                    
                                                
                                                    
                                                    
                                                        Calling oneMKL Functions from Multi-threaded Applications
                                                    
                                                    
                                                
                                                    
                                                    
                                                        Using Intel® Hyper-Threading Technology
                                                    
                                                    
                                                
                                                    
                                                    
                                                        Managing Multi-core Performance
                                                    
                                                    
                                                
                                                    
                                                    
                                                        Managing Performance with Heterogeneous Cores
                                                    
                                                    
                                                
                                            
                                        
                                    
                                    
                                        
                                        
                                            Getting Started with Conditional Numerical Reproducibility
                                        
                                        
                                    
                                        
                                        
                                            Specifying Code Branches
                                        
                                        
                                    
                                        
                                        
                                            Reproducibility Conditions
                                        
                                        
                                    
                                        
                                        
                                            Setting the Environment Variable for Conditional Numerical Reproducibility
                                        
                                        
                                    
                                        
                                        
                                            Code Examples
                                        
                                        
                                            
                                                C Example of CNR
                                            
                                                Fortran Example of CNR
                                            
                                                Use of CNR with Unaligned Data in C
                                            
                                                Use of CNR with Unaligned Data in Fortran
                                            
                                        
                                    
                                
                            
                                                
                                                
                                                    
                                                    
                                                        Overview of the Intel® Distribution for LINPACK* Benchmark
                                                    
                                                    
                                                
                                                    
                                                    
                                                        Overview of the Intel® Optimized HPL-AI* Benchmark
                                                    
                                                    
                                                
                                                    
                                                    
                                                        Contents of the Intel® Distribution for LINPACK* Benchmark and the Intel® Optimized HPL-AI* Benchmark
                                                    
                                                    
                                                
                                                    
                                                    
                                                        Building the Intel® Distribution for LINPACK* Benchmark and the Intel® Optimized HPL-AI* Benchmark for a Customized MPI Implementation
                                                    
                                                    
                                                
                                                    
                                                    
                                                        Building the Netlib HPL from Source Code
                                                    
                                                    
                                                
                                                    
                                                    
                                                        Configuring Parameters
                                                    
                                                    
                                                
                                                    
                                                    
                                                        Ease-of-use Command-Line Parameters
                                                    
                                                    
                                                
                                                    
                                                    
                                                        Running the Intel® Distribution for LINPACK* Benchmark and the Intel® Optimized HPL-AI* Benchmark
                                                    
                                                    
                                                
                                                    
                                                    
                                                        Heterogeneous Support in the Intel® Distribution for LINPACK* Benchmark
                                                    
                                                    
                                                
                                                    
                                                    
                                                        Environment Variables
                                                    
                                                    
                                                
                                                    
                                                    
                                                        Improving Performance of Your Cluster
                                                    
                                                    
                                                
                                            
                                        
                                                
                                                
                                                    
                                                    
                                                        Overview of the Intel Optimized HPCG
                                                    
                                                    
                                                
                                                    
                                                    
                                                        Versions of the Intel® CPU Optimized HPCG
                                                    
                                                    
                                                
                                                    
                                                    
                                                        Versions of the Intel® GPU Optimized HPCG
                                                    
                                                    
                                                
                                                    
                                                    
                                                        Getting Started with Intel® CPU Optimized HPCG
                                                    
                                                    
                                                
                                                    
                                                    
                                                        Getting Started with Intel® GPU Optimized HPCG
                                                    
                                                    
                                                
                                                    
                                                    
                                                        Choosing the Best Configuration and Problem Sizes for CPUs
                                                    
                                                    
                                                
                                                    
                                                    
                                                        Choosing the Best HPCG Configuration for GPUs
                                                    
                                                    
                                                
                                            
                                        Code Examples
The following simple programs show how to obtain reproducible results from run to run of Intel® oneAPI Math Kernel Library (oneMKL) functions. See the Intel® oneAPI Math Kernel Library (oneMKL) Developer Reference for more examples.
C Example of CNR
#include <mkl.h>
int main(void) {
    int my_cbwr_branch;
    /* Align all input/output data on 64-byte boundaries */
    /* for best performance of Intel® oneAPI Math Kernel Library (oneMKL) */
    void *darray;
    int darray_size=1000;
    /* Set alignment value in bytes */
    int alignment=64;
    /* Allocate aligned array */
    darray = mkl_malloc (sizeof(double)*darray_size, alignment);
    /* Find the available MKL_CBWR_BRANCH automatically */
    my_cbwr_branch = mkl_cbwr_get_auto_branch();
    /* User code without oneMKL calls */
    /* Piece of the code where CNR of oneMKL is needed */
    /* The performance of oneMKL functions might be reduced for CNR mode */
/* If the "IF" statement below is commented out, Intel® oneAPI Math Kernel Library (oneMKL) will run in a regular mode, */
    /* and data alignment will allow you to get best performance */
    if (mkl_cbwr_set(my_cbwr_branch)) {
        printf("Error in setting MKL_CBWR_BRANCH! Aborting…\n");
        return;
    }
    /* CNR calls to oneMKL + any other code */
    /* Free the allocated aligned array */
    mkl_free(darray);
}
 
  Fortran Example of CNR
PROGRAM MAIN
    INCLUDE 'mkl.fi'
    INTEGER*4 MY_CBWR_BRANCH
! Align all input/output data on 64-byte boundaries
! for best performance of Intel® oneAPI Math Kernel Library (oneMKL)
! Declare oneMKL memory allocation routine
    DOUBLE PRECISION DARRAY
    POINTER (P_DARRAY,DARRAY(1))
    INTEGER DARRAY_SIZE
    PARAMETER (DARRAY_SIZE=1000)
! Set alignment value in bytes
    INTEGER ALIGNMENT
    PARAMETER (ALIGNMENT=64)
! Allocate aligned array
    INTEGER*8 ALLOC_SIZE
    ALLOC_SIZE = 8*DARRAY_SIZE
    P_DARRAY = MKL_MALLOC (ALLOC_SIZE, ALIGNMENT);
! Find the available MKL_CBWR_BRANCH automatically
    MY_CBWR_BRANCH = MKL_CBWR_GET_AUTO_BRANCH()
! User code without oneMKL calls
! Piece of the code where CNR of oneMKL is needed
! The performance of oneMKL functions may be reduced for CNR mode
! If the "IF" statement below is commented out,
! Intel® oneAPI Math Kernel Library (oneMKL) will run in a
! regular mode, and data alignment will enable you to get the best performance
    IF (MKL_CBWR_SET (MY_CBWR_BRANCH) .NE. MKL_CBWR_SUCCESS) THEN
        PRINT *, 'Error in setting MKL_CBWR_BRANCH! Aborting…'
        STOP 0
    ENDIF
! CNR calls to oneMKL + any other code
! Free the allocated aligned array
    CALL MKL_FREE(P_DARRAY)
END
 
  Use of CNR with Unaligned Data in C
#include <mkl.h>
int main(void) {
     int my_cbwr_branch;
     /* If it is not possible to align all input/output data on 64-byte boundaries */
     /* to achieve performance, use unaligned IO data with possible performance */ 
     /* penalty */
     /* Using unaligned IO data */
     double *darray;
     int darray_size=1000;
     /* Allocate array, malloc aligns data on 8/16-byte boundary only */
     darray = (double *)malloc (sizeof(double)*darray_size);
     /* Find the available MKL_CBWR_BRANCH automatically */
     my_cbwr_branch = mkl_cbwr_get_auto_branch();
     /* User code without oneMKL calls */
     /* Piece of the code where CNR of oneMKL is needed */
     /* The performance of oneMKL functions might be reduced for CNR mode */
     /* If the "IF" statement below is commented out, oneMKL will run in a regular mode, */ 
     /* and you will NOT get best performance without data alignment */
     if (mkl_cbwr_set(my_cbwr_branch)) {
          printf("Error in setting MKL_CBWR_BRANCH! Aborting…\n");
          return;
}
     /* CNR calls to oneMKL + any other code */
     /* Free the allocated array */
     free(darray);
 
  Use of CNR with Unaligned Data in Fortran
     PROGRAM MAIN
     INCLUDE 'mkl.fi'
     INTEGER*4 MY_CBWR_BRANCH
! If it is not possible to align all input/output data on 64-byte boundaries 
! to achieve performance, use unaligned IO data with possible performance 
! penalty 
     DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: DARRAY
     INTEGER DARRAY_SIZE, STATUS
     PARAMETER (DARRAY_SIZE=1000)
! Allocate array with undefined alignment
     ALLOCATE(DARRAY(DARRAY_SIZE));
! Find the available MKL_CBWR_BRANCH automatically
     MY_CBWR_BRANCH = MKL_CBWR_GET_AUTO_BRANCH()
! User code without oneMKL calls
! Piece of the code where CNR of oneMKL is needed
! The performance of oneMKL functions might be reduced for CNR mode
! If the "IF" statement below is commented out, oneMKL will run in a regular mode, 
! and you will NOT get best performance without data alignment 
     IF (MKL_CBWR_SET(MY_CBWR_BRANCH) .NE. MKL_CBWR_SUCCESS) THEN
          PRINT *, 'Error in setting MKL_CBWR_BRANCH! Aborting…'
          RETURN
     ENDIF
! CNR calls to oneMKL + any other code
! Free the allocated array
     DEALLOCATE(DARRAY)
     END
 
   Parent topic: Obtaining Numerically Reproducible Results