| disable_loop_pipelining |  
       Directs the Intel® oneAPI DPC++/C++ Compiler to disable pipelining of a loop.    |  
       [[intel::disable_loop_pipelining]]
for (int i = 1; i < N; i++) {
  int j = a[i-1];
  // Memory dependency induces a high-latency loop feedback path
  a[i] = foo(j)
}
  |  
      
 
       
       | initiation_interval |  
       Forces a loop to have a loop initialization interval (II) of a specified value.    |  
       // ii set to 5
[[intel::initiation_interval(5)]]
  for (int i = 0; i < N; ++i){
}
  |  
      
 
       
       | ivdep |  
       Ignores memory dependencies between iterations of this loop    |  
       // ivdep loop
[[intel::ivdep]] for (…) {}
 //ivdep safelen 
[[intel::ivdep(safelen)]] for (;;) {} 
 // ivdep accessor
[[intel::ivdep(accessorA)]] for (;;) {}
 //ivdep array safelen
[[intel::ivdep(accessorA, safelen)]]
for (;;){}
  |  
      
 
       
       | loop_coalesce |  
       Coalesces nested loops into a single loop without affecting the loop functionality.    |  
       [[intel::loop_coalesce(2)]] 
for (int i = 0; i < N; i++)
  for (int j = 0; j < M; j++)
    sum[i][j] += i+j;
  |  
      
 
       
       | max_concurrency |  
       Limits the number of iterations of a loop that can simultaneously execute at any time.    |  
       //max concurrency set to 1
[[intel::max_concurrency(1)]] 
  for (int i = 0; i < c; ++i){
}
  |  
      
 
       
       | max_interleaving |  
       Maximizes the throughput and hardware resource occupancy of pipelined inner loops in a loop nest.    |  
       // Loop j is pipelined with ii=1
for (int j = 0; j < M; j++) {
  int a[N];
  // Loop i is pipelined with ii=2 
  [[intel::max_interleaving(1)]]
  for (int i = 1; i < N; i++) {
    a[i] = foo(i)
  }
  …
}
  |  
      
 
       
       | speculated_iterations |  
       Improves the performance of pipelined loops.    |  
       [[intel::speculated_iterations(1)]]
  while (m*m*m < N) {
    m += 1;
  }
  dst[0] = m;
  |  
      
 
       
       | unroll |  
       Unrolls a loop in the kernel code.    |  
       // unroll factor N set to 2
#pragma unroll 2
for(size_t k = 0; k < 4; k++){
  mac += data_in[(gid * 4) + k] * coeff[k];
}
  |  
      
 
       
       | nofusion |  
       Prevents the compiler from fusing the annotated loop with any of the adjacent loops.  |  
       for (int x = 0; x < N; x++) { 
  a1_acc[x] = x; 
}
[[intel::nofusion]] 
for (int x = 0; x < N; x++) { 
  a2_acc[x] = x; 
}
  |  
      
 
       
       | sycl::ext::intel::fpga_loop_fuse<v>(f) |  
       Fuses loops within the function f up to a depth of v >= 1, where v = 1 by default.  |  
       [=]() [[intel::kernel_args_restrict]] { 
  sycl::ext::intel::fpga_loop_fuse<v>{
    for (int x = 0; x < N; x++) {
      for (int y = 0; y < N; y++) {
        for (int z = 0; z < N; z++) {
          a1_acc[x][y][z] = 0;
        }
      }
    }
    for (int x = 0; x < N + 1; x++) {
      for (int y = 0; y < N + 1; y++) {
        for (int z = 0; z < N + 1; z++) {
          a2_acc[x][y][z] = 0;
        }
      }
    }
  }
}
  |  
      
 
       
       | sycl::ext::intel::fpga_loop_fuse<v><v>(f) |  
       Fuses loops within the function f up to a depth v >= 1 while overriding fusion-safety checks. Here, v = 1 by default.  |  
       [=]() { //Kernel
  sycl::ext::intel::fpga_loop_fuse_independent([&] {
    for(int x = 0; x < N; x++){
      a3_acc[x] = x;
    }
    for(int x = 0; x < N + 1; x++){
      a4_acc[x] = x;
    }
  });
}
  |