41 lines
1015 B
Common Lisp
41 lines
1015 B
Common Lisp
__kernel void template(__global double *buf1, __global double *buf2,
|
|
__global double *result, int mat1_w, int mat1_h, int k)
|
|
{
|
|
|
|
int x = get_global_id(0);
|
|
int y = get_global_id(1);
|
|
|
|
int idx = x * mat1_h + y;
|
|
if (idx >= mat1_h * mat1_w)
|
|
return;
|
|
|
|
double sum = 0.0;
|
|
|
|
for (int kx = 0; kx < 2*k+1; kx++) {
|
|
for (int ky = 0; ky < 2*k+1; ky++) {
|
|
int overlap_x = x + kx - k;
|
|
int overlap_y = y + ky - k;
|
|
|
|
if (overlap_x < 0)
|
|
overlap_x = 0;
|
|
else if (overlap_x >= mat1_w)
|
|
overlap_x = mat1_w - 1;
|
|
|
|
if (overlap_y < 0)
|
|
overlap_y = 0;
|
|
else if (overlap_y >= mat1_h)
|
|
overlap_y = mat1_h - 1;
|
|
|
|
int idx1 = overlap_x * mat1_h + overlap_y;
|
|
int idxk = kx * 2*k+1 + ky;
|
|
|
|
if (idx1 >= mat1_h * mat1_w)
|
|
return;
|
|
sum += buf1[idx1] * buf2[idxk];
|
|
}
|
|
}
|
|
|
|
|
|
result[idx] = sum;
|
|
}
|