Intel® FPGA SDK for OpenCL™ Pro Edition: Programming Guide

ID 683846
Date 12/19/2022
Public
Document Table of Contents

5.5.5.1. Ensuring Compatibility with Other OpenCL SDKs

Currently, Intel® 's implementation of OpenCL pipes is partially conformant to the OpenCL Specification version 2.0. If you port a kernel that implements pipes from another OpenCL SDK to the Intel® FPGA SDK for OpenCL™ , you must modify the host code and the kernel code. The modifications do not affect subsequent portability of your application to other OpenCL SDKs.

Original Program Code

Below is an example of an OpenCL application:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "CL/opencl.h"
#define SIZE 1000

const char *kernel_source = "__kernel void pipe_writer(__global int *in,"
                            "                          write_only pipe int p_in)\n"
                            "{\n"
                            "    int gid = get_global_id(0);\n"
                            "    write_pipe(p_in, &in[gid]);\n"
                            "}\n"
                            "__kernel void pipe_reader(__global int *out,"
                            "                          read_only pipe int p_out)\n"
                            "{\n"
                            "    int gid = get_global_id(0);\n"
                            "    read_pipe(p_out, &out[gid]);\n"
                            "}\n";

int main()
{
    int *input = (int *)malloc(sizeof(int) * SIZE);
    int *output = (int *)malloc(sizeof(int) * SIZE);
    memset(output, 0, sizeof(int) * SIZE);
    for (int i = 0; i != SIZE; ++i)
    {
        input[i] = rand();
    }

    cl_int status;
    cl_platform_id platform;
    cl_uint num_platforms;
    status = clGetPlatformIDs(1, &platform, &num_platforms);

    cl_device_id device;
    cl_uint num_devices;
    status = clGetDeviceIDs(platform,
	                        CL_DEVICE_TYPE_ALL,
                            1,
                            &device,
                            &num_devices);

    cl_context context = clCreateContext(0, 1, &device, NULL, NULL, &status);

    cl_command_queue queue = clCreateCommandQueue(context, device, 0, &status);

    size_t len = strlen(kernel_source);  
    cl_program program = clCreateProgramWithSource(context,
	                                               1,
                                                   (const char **)&kernel_source,
                                                   &len,
                                                   &status);

    status = clBuildProgram(program, num_devices, &device, "", NULL, NULL);

    cl_kernel pipe_writer = clCreateKernel(program, "pipe_writer", &status);
    cl_kernel pipe_reader = clCreateKernel(program, "pipe_reader", &status);

    cl_mem in_buffer = clCreateBuffer(context,
                                      CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
                                      sizeof(int) * SIZE,
                                      input,
                                      &status);
    cl_mem out_buffer = clCreateBuffer(context,
                                       CL_MEM_WRITE_ONLY,
                                       sizeof(int) * SIZE,
                                       NULL,
                                       &status);

    cl_mem pipe = clCreatePipe(context, 0, sizeof(cl_int), SIZE, NULL, &status);

    status = clSetKernelArg(pipe_writer, 0, sizeof(cl_mem), &in_buffer);
    status = clSetKernelArg(pipe_writer, 1, sizeof(cl_mem), &pipe);
    status = clSetKernelArg(pipe_reader, 0, sizeof(cl_mem), &out_buffer);
    status = clSetKernelArg(pipe_reader, 1, sizeof(cl_mem), &pipe);

    size_t size = SIZE; 
    cl_event sync;
    status = clEnqueueNDRangeKernel(queue,
                                    pipe_writer,
                                    1,
                                    NULL,
                                    &size,
                                    &size,
                                    0,
                                    NULL,
                                    &sync);
    status = clEnqueueNDRangeKernel(queue,
                                    pipe_reader,
                                    1,
                                    NULL,
                                    &size,
                                    &size,
                                    1,
                                    &sync,
                                    NULL);
    status = clFinish(queue);

    status = clEnqueueReadBuffer(queue,
                                 out_buffer,
                                 CL_TRUE,
                                 0,
                                 sizeof(int) * SIZE,
                                 output,
                                 0,
                                 NULL,
                                 NULL);

    int golden = 0, result = 0;
    for (int i = 0; i != SIZE; ++i)
    {
      golden += input[i];
      result += output[i];
    }

    int ret = 0;
    if (golden != result)
    {
        printf("FAILED!");
        ret = 1;
    } else
    {
        printf("PASSED!");
    }
    printf("\n");

    return ret;
}

Host Code Modification

If the original host code runs on OpenCL SDKs that conforms to the OpenCL Specification version 2.0, you must modify it before running it on the Intel® FPGA SDK for OpenCL™ . To modify the host code, perform the following changes:

  1. Use the clCreateProgramWithBinary function instead of the clCreateProgramWithSource function to create the program.
  2. Move the contents of the kernel_source string into a separate source file. Refer to Kernel Code Modification for more information.

Kernel Code Modification

If your kernel code runs on OpenCL SDKs that conforms to the OpenCL Specification version 2.0, you must modify it before running it on the Intel® FPGA SDK for OpenCL™ . To modify the kernel code, perform the following changes:

  1. Create a separate source (.cl) file for the kernel code.
  2. Rename the pipe arguments so that they are the same in both kernels. For example, rename p_in and p_out to p.
  3. Specify the depth attribute for the pipe arguments. Assign a depth attribute value that equals to the maximum number of packets that the pipe creates to hold in the host.
  4. Build the kernel program in the offline compilation mode because the Intel® FPGA SDK for OpenCL™ has an offline compiler.

The modified kernel code appears as follows:

#define SIZE 1000

__kernel void pipe_writer(__global int *in,
                          write_only pipe int __attribute__((depth(SIZE))) p)
{
    int gid = get_global_id(0);
    write_pipe(p, &in[gid]);
}

__kernel void pipe_reader(__global int *out,
                          read_only pipe int __attribute__((depth(SIZE))) p)
{
    int gid = get_global_id(0);
    read_pipe(p, &out[gid]);
}