Note: Use the class "language-opencl" for OpenCL kernel code. Host code is automatically highlighted with the "language-c" or "language-cpp" class.

OpenCL kernel code

// CLK_ADDRESS_CLAMP_TO_EDGE = aaa|abcd|ddd
constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
typedef float type_single;

type_single filter_sum_single_3x3(read_only image2d_t imgIn,
                                  constant float* filterKernel,
                                  const int2 coordBase,
                                  const int border)
{
    type_single sum = (type_single)(0.0f);
    const int rows = get_image_height(imgIn);
    const int cols = get_image_width(imgIn);
    int2 coordCurrent;
    int2 coordBorder;
    float color;

    // Image patch is row-wise accessed
    // Filter kernel is centred in the middle
    #pragma unroll
    for (int y = -ROWS_HALF_3x3; y <= ROWS_HALF_3x3; ++y)       // Start at the top left corner of the filter
    {
        coordCurrent.y = coordBase.y + y;
        #pragma unroll
        for (int x = -COLS_HALF_3x3; x <= COLS_HALF_3x3; ++x)   // And end at the bottom right corner
        {
            coordCurrent.x = coordBase.x + x;
            coordBorder = borderCoordinate(coordCurrent, rows, cols, border);
            color = read_imagef(imgIn, sampler, coordBorder).x;

            const int idx = (y + ROWS_HALF_3x3) * COLS_3x3 + x + COLS_HALF_3x3;
            sum += color * filterKernel[idx];
        }
    }

    return sum;
}

kernel void filter_single_3x3(read_only image2d_t imgIn,
                              write_only image2d_t imgOut,
                              constant float* filterKernel,
                              const int border)
{
    int2 coordBase = (int2)(get_global_id(0), get_global_id(1));

    type_single sum = filter_sum_single_3x3(imgIn, filterKernel, coordBase, border);

    write_imagef(imgOut, coordBase, sum);
}