Developer Guide
Developer Guide for Intel® oneAPI Math Kernel Library macOS*
ID
766688
Date
11/07/2023
Public
Getting Help and Support
What's New
Notational Conventions
Related Information
Getting Started
Structure of the Intel® oneAPI Math Kernel Library
Linking Your Application with the Intel® oneAPI Math Kernel Library
Managing Performance and Memory
Language-specific Usage Options
Obtaining Numerically Reproducible Results
Coding Tips
Managing Output
Working with the Intel® oneAPI Math Kernel Library Cluster Software
Managing Behavior of the Intel® oneAPI Math Kernel Library with Environment Variables
Configuring Your Integrated Development Environment to Link with Intel® oneAPI Math Kernel Library
Intel® oneAPI Math Kernel Library Benchmarks
Appendix A: Intel® oneAPI Math Kernel Library Language Interfaces Support
Appendix B: Support for Third-Party Interfaces
Appendix C: Directory Structure in Detail
Notices and Disclaimers
OpenMP* Threaded Functions and Problems
Functions Threaded with Intel® Threading Building Blocks
Avoiding Conflicts in the Execution Environment
Techniques to Set the Number of Threads
Setting the Number of Threads Using an OpenMP* Environment Variable
Changing the Number of OpenMP* Threads at Run Time
Using Additional Threading Control
Calling oneMKL Functions from Multi-threaded Applications
Using Intel® Hyper-Threading Technology
Changing the Number of OpenMP* Threads at Run Time
You cannot change the number of OpenMP threads at run time using environment variables. However, you can call OpenMP routines to do this. Specifically, the following sample code shows how to change the number of threads during run time using the omp_set_num_threads() routine. For more options, see also Techniques to Set the Number of Threads.
The example is provided for both C and Fortran languages. To run the example in C, use the omp.h header file from the Intel(R) compiler package. If you do not have the Intel compiler but wish to explore the functionality in the example, use Fortran API for omp_set_num_threads() rather than the C version. For example, omp_set_num_threads_( &i_one );
// ******* C language *******
#include "omp.h"
#include "mkl.h"
#include <stdio.h>
#define SIZE 1000
int main(int args, char *argv[]){
double *a, *b, *c;
a = (double*)malloc(sizeof(double)*SIZE*SIZE);
b = (double*)malloc(sizeof(double)*SIZE*SIZE);
c = (double*)malloc(sizeof(double)*SIZE*SIZE);
double alpha=1, beta=1;
int m=SIZE, n=SIZE, k=SIZE, lda=SIZE, ldb=SIZE, ldc=SIZE, i=0, j=0;
char transa='n', transb='n';
for( i=0; i<SIZE; i++)
{
for( j=0; j<SIZE; j++)
{
a[i*SIZE+j]= (double)(i+j);
b[i*SIZE+j]= (double)(i*j);
c[i*SIZE+j]= (double)0;
}
}
cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
m, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
printf("row\ta\tc\n");
for ( i=0;i<10;i++)
{
printf("%d:\t%f\t%f\n", i, a[i*SIZE], c[i*SIZE]);
}
omp_set_num_threads(1);
for( i=0; i<SIZE; i++)
{
for( j=0; j<SIZE; j++)
{
a[i*SIZE+j]= (double)(i+j);
b[i*SIZE+j]= (double)(i*j);
c[i*SIZE+j]= (double)0;
}
}
cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
m, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
printf("row\ta\tc\n");
for ( i=0;i<10;i++)
{
printf("%d:\t%f\t%f\n", i, a[i*SIZE], c[i*SIZE]);
}
omp_set_num_threads(2);
for( i=0; i<SIZE; i++)
{
for( j=0; j<SIZE; j++)
{
a[i*SIZE+j]= (double)(i+j);
b[i*SIZE+j]= (double)(i*j);
c[i*SIZE+j]= (double)0;
}
}
cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
m, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
printf("row\ta\tc\n");
for ( i=0;i<10;i++)
{
printf("%d:\t%f\t%f\n", i, a[i*SIZE],
c[i*SIZE]);
}
free (a);
free (b);
free (c);
return 0;
}
// ******* Fortran language *******
PROGRAM DGEMM_DIFF_THREADS
INTEGER N, I, J
PARAMETER (N=100)
REAL*8 A(N,N),B(N,N),C(N,N)
REAL*8 ALPHA, BETA
ALPHA = 1.1
BETA = -1.2
DO I=1,N
DO J=1,N
A(I,J) = I+J
B(I,J) = I*j
C(I,J) = 0.0
END DO
END DO
CALL DGEMM('N','N',N,N,N,ALPHA,A,N,B,N,BETA,C,N)
print *,'Row A C'
DO i=1,10
write(*,'(I4,F20.8,F20.8)') I, A(1,I),C(1,I)
END DO
CALL OMP_SET_NUM_THREADS(1);
DO I=1,N
DO J=1,N
A(I,J) = I+J
B(I,J) = I*j
C(I,J) = 0.0
END DO
END DO
CALL DGEMM('N','N',N,N,N,ALPHA,A,N,B,N,BETA,C,N)
print *,'Row A C'
DO i=1,10
write(*,'(I4,F20.8,F20.8)') I, A(1,I),C(1,I)
END DO
CALL OMP_SET_NUM_THREADS(2);
DO I=1,N
DO J=1,N
A(I,J) = I+J
B(I,J) = I*j
C(I,J) = 0.0
END DO
END DO
CALL DGEMM('N','N',N,N,N,ALPHA,A,N,B,N,BETA,C,N)
print *,'Row A C'
DO i=1,10
write(*,'(I4,F20.8,F20.8)') I, A(1,I),C(1,I)
END DO
STOP
END
Parent topic: Improving Performance with Threading