Intel® oneAPI Deep Neural Network Developer Guide and Reference
                    
                        ID
                        768875
                    
                
                
                    Date
                    2/28/2024
                
                
                    Public
                
            A newer version of this document is available. Customers should click here to go to the newest version.
                                                
                                                
                                                    
                                                    
                                                        Abs
                                                    
                                                    
                                                
                                                    
                                                    
                                                        AbsBackward
                                                    
                                                    
                                                
                                                    
                                                    
                                                        Add
                                                    
                                                    
                                                
                                                    
                                                    
                                                        AvgPool
                                                    
                                                    
                                                
                                                    
                                                    
                                                        AvgPoolBackward
                                                    
                                                    
                                                
                                                    
                                                    
                                                        BatchNormForwardTraining
                                                    
                                                    
                                                
                                                    
                                                    
                                                        BatchNormInference
                                                    
                                                    
                                                
                                                    
                                                    
                                                        BatchNormTrainingBackward
                                                    
                                                    
                                                
                                                    
                                                    
                                                        BiasAdd
                                                    
                                                    
                                                
                                                    
                                                    
                                                        BiasAddBackward
                                                    
                                                    
                                                
                                                    
                                                    
                                                        Clamp
                                                    
                                                    
                                                
                                                    
                                                    
                                                        ClampBackward
                                                    
                                                    
                                                
                                                    
                                                    
                                                        Concat
                                                    
                                                    
                                                
                                                    
                                                    
                                                        Convolution
                                                    
                                                    
                                                
                                                    
                                                    
                                                        ConvolutionBackwardData
                                                    
                                                    
                                                
                                                    
                                                    
                                                        ConvolutionBackwardWeights
                                                    
                                                    
                                                
                                                    
                                                    
                                                        ConvTranspose
                                                    
                                                    
                                                
                                                    
                                                    
                                                        ConvTransposeBackwardData
                                                    
                                                    
                                                
                                                    
                                                    
                                                        ConvTransposeBackwardWeights
                                                    
                                                    
                                                
                                                    
                                                    
                                                        Dequantize
                                                    
                                                    
                                                
                                                    
                                                    
                                                        Divide
                                                    
                                                    
                                                
                                                    
                                                    
                                                        DynamicDequantize
                                                    
                                                    
                                                
                                                    
                                                    
                                                        DynamicQuantize
                                                    
                                                    
                                                
                                                    
                                                    
                                                        Elu
                                                    
                                                    
                                                
                                                    
                                                    
                                                        EluBackward
                                                    
                                                    
                                                
                                                    
                                                    
                                                        End
                                                    
                                                    
                                                
                                                    
                                                    
                                                        Exp
                                                    
                                                    
                                                
                                                    
                                                    
                                                        GELU
                                                    
                                                    
                                                
                                                    
                                                    
                                                        GELUBackward
                                                    
                                                    
                                                
                                                    
                                                    
                                                        HardSigmoid
                                                    
                                                    
                                                
                                                    
                                                    
                                                        HardSigmoidBackward
                                                    
                                                    
                                                
                                                    
                                                    
                                                        HardSwish
                                                    
                                                    
                                                
                                                    
                                                    
                                                        HardSwishBackward
                                                    
                                                    
                                                
                                                    
                                                    
                                                        Interpolate
                                                    
                                                    
                                                
                                                    
                                                    
                                                        InterpolateBackward
                                                    
                                                    
                                                
                                                    
                                                    
                                                        LayerNorm
                                                    
                                                    
                                                
                                                    
                                                    
                                                        LayerNormBackward
                                                    
                                                    
                                                
                                                    
                                                    
                                                        LeakyReLU
                                                    
                                                    
                                                
                                                    
                                                    
                                                        Log
                                                    
                                                    
                                                
                                                    
                                                    
                                                        LogSoftmax
                                                    
                                                    
                                                
                                                    
                                                    
                                                        LogSoftmaxBackward
                                                    
                                                    
                                                
                                                    
                                                    
                                                        MatMul
                                                    
                                                    
                                                
                                                    
                                                    
                                                        Maximum
                                                    
                                                    
                                                
                                                    
                                                    
                                                        MaxPool
                                                    
                                                    
                                                
                                                    
                                                    
                                                        MaxPoolBackward
                                                    
                                                    
                                                
                                                    
                                                    
                                                        Minimum
                                                    
                                                    
                                                
                                                    
                                                    
                                                        Mish
                                                    
                                                    
                                                
                                                    
                                                    
                                                        MishBackward
                                                    
                                                    
                                                
                                                    
                                                    
                                                        Multiply
                                                    
                                                    
                                                
                                                    
                                                    
                                                        Pow
                                                    
                                                    
                                                
                                                    
                                                    
                                                        PReLU
                                                    
                                                    
                                                
                                                    
                                                    
                                                        PReLUBackward
                                                    
                                                    
                                                
                                                    
                                                    
                                                        Quantize
                                                    
                                                    
                                                
                                                    
                                                    
                                                        Reciprocal
                                                    
                                                    
                                                
                                                    
                                                    
                                                        ReduceL1
                                                    
                                                    
                                                
                                                    
                                                    
                                                        ReduceL2
                                                    
                                                    
                                                
                                                    
                                                    
                                                        ReduceMax
                                                    
                                                    
                                                
                                                    
                                                    
                                                        ReduceMean
                                                    
                                                    
                                                
                                                    
                                                    
                                                        ReduceMin
                                                    
                                                    
                                                
                                                    
                                                    
                                                        ReduceProd
                                                    
                                                    
                                                
                                                    
                                                    
                                                        ReduceSum
                                                    
                                                    
                                                
                                                    
                                                    
                                                        ReLU
                                                    
                                                    
                                                
                                                    
                                                    
                                                        ReLUBackward
                                                    
                                                    
                                                
                                                    
                                                    
                                                        Reorder
                                                    
                                                    
                                                
                                                    
                                                    
                                                        Round
                                                    
                                                    
                                                
                                                    
                                                    
                                                        Select
                                                    
                                                    
                                                
                                                    
                                                    
                                                        Sigmoid
                                                    
                                                    
                                                
                                                    
                                                    
                                                        SigmoidBackward
                                                    
                                                    
                                                
                                                    
                                                    
                                                        SoftMax
                                                    
                                                    
                                                
                                                    
                                                    
                                                        SoftMaxBackward
                                                    
                                                    
                                                
                                                    
                                                    
                                                        SoftPlus
                                                    
                                                    
                                                
                                                    
                                                    
                                                        SoftPlusBackward
                                                    
                                                    
                                                
                                                    
                                                    
                                                        Sqrt
                                                    
                                                    
                                                
                                                    
                                                    
                                                        SqrtBackward
                                                    
                                                    
                                                
                                                    
                                                    
                                                        Square
                                                    
                                                    
                                                
                                                    
                                                    
                                                        SquaredDifference
                                                    
                                                    
                                                
                                                    
                                                    
                                                        StaticReshape
                                                    
                                                    
                                                
                                                    
                                                    
                                                        StaticTranspose
                                                    
                                                    
                                                
                                                    
                                                    
                                                        Subtract
                                                    
                                                    
                                                
                                                    
                                                    
                                                        Tanh
                                                    
                                                    
                                                
                                                    
                                                    
                                                        TanhBackward
                                                    
                                                    
                                                
                                                    
                                                    
                                                        TypeCast
                                                    
                                                    
                                                
                                                    
                                                    
                                                        Wildcard
                                                    
                                                    
                                                
                                            
                                        
                                                            
                                                            
                                                                
                                                                
                                                                    enum dnnl_alg_kind_t
                                                                
                                                                
                                                            
                                                                
                                                                
                                                                    enum dnnl_normalization_flags_t
                                                                
                                                                
                                                            
                                                                
                                                                
                                                                    enum dnnl_primitive_kind_t
                                                                
                                                                
                                                            
                                                                
                                                                
                                                                    enum dnnl_prop_kind_t
                                                                
                                                                
                                                            
                                                                
                                                                
                                                                    enum dnnl_query_t
                                                                
                                                                
                                                            
                                                                
                                                                
                                                                    enum dnnl::normalization_flags
                                                                
                                                                
                                                            
                                                                
                                                                
                                                                    enum dnnl::query
                                                                
                                                                
                                                            
                                                                
                                                                
                                                                    struct dnnl_exec_arg_t
                                                                
                                                                
                                                            
                                                                
                                                                
                                                                    struct dnnl_primitive
                                                                
                                                                
                                                            
                                                                
                                                                
                                                                    struct dnnl_primitive_desc
                                                                
                                                                
                                                            
                                                                
                                                                    struct dnnl::primitive
                                                                
                                                                
                                                                
                                                            
                                                                
                                                                
                                                                    struct dnnl::primitive_desc
                                                                
                                                                
                                                            
                                                                
                                                                
                                                                    struct dnnl::primitive_desc_base
                                                                
                                                                
                                                            
                                                        
                                                    
                                                            
                                                            
                                                                
                                                                
                                                                    enum dnnl_rnn_direction_t
                                                                
                                                                
                                                            
                                                                
                                                                
                                                                    enum dnnl_rnn_flags_t
                                                                
                                                                
                                                            
                                                                
                                                                
                                                                    enum dnnl::rnn_direction
                                                                
                                                                
                                                            
                                                                
                                                                
                                                                    enum dnnl::rnn_flags
                                                                
                                                                
                                                            
                                                                
                                                                    struct dnnl::augru_backward
                                                                
                                                                
                                                                
                                                            
                                                                
                                                                    struct dnnl::augru_forward
                                                                
                                                                
                                                                
                                                            
                                                                
                                                                    struct dnnl::gru_backward
                                                                
                                                                
                                                                
                                                            
                                                                
                                                                    struct dnnl::gru_forward
                                                                
                                                                
                                                                
                                                            
                                                                
                                                                    struct dnnl::lbr_augru_backward
                                                                
                                                                
                                                                
                                                            
                                                                
                                                                    struct dnnl::lbr_augru_forward
                                                                
                                                                
                                                                
                                                            
                                                                
                                                                    struct dnnl::lbr_gru_backward
                                                                
                                                                
                                                                
                                                            
                                                                
                                                                    struct dnnl::lbr_gru_forward
                                                                
                                                                
                                                                
                                                            
                                                                
                                                                    struct dnnl::lstm_backward
                                                                
                                                                
                                                                
                                                            
                                                                
                                                                    struct dnnl::lstm_forward
                                                                
                                                                
                                                                
                                                            
                                                                
                                                                
                                                                    struct dnnl::rnn_primitive_desc_base
                                                                
                                                                
                                                            
                                                                
                                                                    struct dnnl::vanilla_rnn_backward
                                                                
                                                                
                                                                
                                                            
                                                                
                                                                    struct dnnl::vanilla_rnn_forward
                                                                
                                                                
                                                                
                                                            
                                                        
                                                    cpu_sgemm_and_matmul cpp
Annotated version: MatMul Tutorial: Comparison with SGEMM
Annotated version: MatMul Tutorial: Comparison with SGEMM
/*******************************************************************************
* Copyright 2019-2022 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include <cassert>
#include <cctype>
#include <cmath>
#include <cstdio>
#include <iostream>
#include <random>
#include <stdexcept>
#include <vector>
#include "oneapi/dnnl/dnnl.hpp"
#include "example_utils.hpp"
using namespace dnnl;
namespace {
void init_vector(std::vector<float> &v) {
    std::mt19937 gen;
    std::uniform_real_distribution<float> u(-1, 1);
    for (auto &e : v)
        e = u(gen);
}
int compare_vectors(const std::vector<float> &v1, const std::vector<float> &v2,
        int64_t K, const char *message) {
    double v1_l2 = 0, diff_l2 = 0;
    for (size_t n = 0; n < v1.size(); ++n) {
        float diff = v1[n] - v2[n];
        v1_l2 += v1[n] * v1[n];
        diff_l2 += diff * diff;
    }
    v1_l2 = std::sqrt(v1_l2);
    diff_l2 = std::sqrt(diff_l2);
    // Finding the reasonable (tight and accurate) threshold is quite difficult
    // problem.
    // The implementation testing might also use special data filling to
    // alleviate issues related to the finite precision arithmetic.
    // However, in simple cases the machine epsilon multiplied by log(K) should
    // work reasonably well.
    const double threshold = std::numeric_limits<float>::epsilon()
            * std::log(std::max(2., (double)K));
    bool ok = diff_l2 <= threshold * v1_l2;
    printf("%s\n\tL2 Norms"
           "\n\t\tReference matrix:%g\n\t\tError:%g\n\t\tRelative_error:%g\n"
           "\tAccuracy check: %s\n",
            message, v1_l2, diff_l2, diff_l2 / v1_l2, ok ? "OK" : "FAILED");
    return ok ? 0 : 1;
}
} // namespace
int number_of_runs = 1;
float fixed_beta = 0.f;
engine eng(engine::kind::cpu, 0); // We create a global engine for simplicity
// Create a _dynamic_ MatMul primitive that can work with arbitrary shapes
// and alpha parameters.
// Warning: current limitation is that beta parameter should be known in
// advance (use fixed_beta).
matmul dynamic_matmul_create() {
    // We assume that beta is known at the primitive creation time
    float beta = fixed_beta;
    memory::dims a_shape = {DNNL_RUNTIME_DIM_VAL, DNNL_RUNTIME_DIM_VAL};
    memory::dims b_shape = {DNNL_RUNTIME_DIM_VAL, DNNL_RUNTIME_DIM_VAL};
    memory::dims c_shape = {DNNL_RUNTIME_DIM_VAL, DNNL_RUNTIME_DIM_VAL};
    memory::dims a_strides = {DNNL_RUNTIME_DIM_VAL, DNNL_RUNTIME_DIM_VAL};
    memory::dims b_strides = {DNNL_RUNTIME_DIM_VAL, DNNL_RUNTIME_DIM_VAL};
    memory::dims c_strides = {DNNL_RUNTIME_DIM_VAL, 1};
    memory::desc a_md(a_shape, memory::data_type::f32, a_strides);
    memory::desc b_md(b_shape, memory::data_type::f32, b_strides);
    memory::desc c_md(c_shape, memory::data_type::f32, c_strides);
    // Create attributes (to handle alpha dynamically and beta if necessary)
    primitive_attr attr;
    attr.set_scales_mask(DNNL_ARG_WEIGHTS, /* mask */ 0);
    if (beta != 0.f) {
        post_ops po;
        po.append_sum(beta);
        attr.set_post_ops(po);
    }
    // Create a MatMul primitive
    matmul::primitive_desc matmul_pd(eng, a_md, b_md, c_md, attr);
    return matmul(matmul_pd);
}
// Execute a _dynamic_ MatMul primitive created earlier. All the parameters are
// passed at a run-time (except for beta which has to be specified at the
// primitive creation time due to the current limitation).
void dynamic_matmul_execute(matmul &matmul_p, char transA, char transB,
        int64_t M, int64_t N, int64_t K, float alpha, const float *A,
        int64_t lda, const float *B, int64_t ldb, float beta, float *C,
        int64_t ldc) {
    using dims = memory::dims;
    if (beta != fixed_beta)
        throw std::logic_error("Run-time beta is not yet supported.");
    // Translate transA and transB
    dims a_strides = tolower(transA) == 'n' ? dims {lda, 1} : dims {1, lda};
    dims b_strides = tolower(transB) == 'n' ? dims {ldb, 1} : dims {1, ldb};
    // Wrap raw pointers into oneDNN memories (with proper shapes)
    memory A_m({{M, K}, memory::data_type::f32, a_strides}, eng, (void *)A);
    memory B_m({{K, N}, memory::data_type::f32, b_strides}, eng, (void *)B);
    memory C_m({{M, N}, memory::data_type::f32, {ldc, 1}}, eng, (void *)C);
    // Prepare oneDNN memory for alpha
    memory alpha_m({{1}, memory::data_type::f32, {1}}, eng, &alpha);
    // Execute the MatMul primitive
    stream s(eng);
    matmul_p.execute(s,
            {{DNNL_ARG_SRC, A_m}, {DNNL_ARG_WEIGHTS, B_m}, {DNNL_ARG_DST, C_m},
                    {DNNL_ARG_ATTR_SCALES | DNNL_ARG_WEIGHTS, alpha_m}});
    s.wait();
}
void sgemm_and_matmul_with_params(char transA, char transB, int64_t M,
        int64_t N, int64_t K, float alpha, float beta) {
    if (beta != fixed_beta)
        throw std::logic_error("Run-time beta is not yet supported.");
    // Allocate and initialize matrices
    std::vector<float> A(M * K);
    init_vector(A);
    std::vector<float> B(K * N);
    init_vector(B);
    std::vector<float> C_sgemm(M * N);
    init_vector(C_sgemm);
    std::vector<float> C_dynamic_matmul = C_sgemm;
    // Prepare leading dimensions
    int64_t lda = tolower(transA) == 'n' ? K : M;
    int64_t ldb = tolower(transB) == 'n' ? N : K;
    int64_t ldc = N;
    // 1. Execute sgemm
    for (int run = 0; run < number_of_runs; ++run)
        dnnl_sgemm(transA, transB, M, N, K, alpha, A.data(), lda, B.data(), ldb,
                beta, C_sgemm.data(), ldc);
    // 2.a Create dynamic MatMul
    auto dynamic_matmul = dynamic_matmul_create();
    // 2.b Execute
    for (int run = 0; run < number_of_runs; ++run)
        dynamic_matmul_execute(dynamic_matmul, transA, transB, M, N, K, alpha,
                A.data(), lda, B.data(), ldb, beta, C_dynamic_matmul.data(),
                ldc);
    int rc = 0;
    rc |= compare_vectors(
            C_sgemm, C_dynamic_matmul, K, "Compare SGEMM vs dynamic MatMul");
    if (rc) throw std::logic_error("The resulting matrices diverged too much.");
}
void sgemm_and_matmul() {
    sgemm_and_matmul_with_params('N', 'T', 10, 20, 30, 1.1f, fixed_beta);
}
int main(int argc, char **argv) {
    return handle_example_errors({engine::kind::cpu}, sgemm_and_matmul);
}