inital commit
This commit is contained in:
commit
ad99d3050e
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
obj/*
|
||||
image_blurred.bmp
|
||||
main
|
25
Makefile
Normal file
25
Makefile
Normal file
@ -0,0 +1,25 @@
|
||||
CC=gcc
|
||||
EXT=.c
|
||||
FLAGS=-O0 -g
|
||||
DEPFLAGS=-MD -MP
|
||||
LIBS=-lOpenCL -lSDL2 -lm
|
||||
EXEC_NAME=main
|
||||
|
||||
OBJDIR=obj
|
||||
|
||||
SRC=$(wildcard *$(EXT))
|
||||
OBJS=$(patsubst %,$(OBJDIR)/%.o, $(basename $(SRC)))
|
||||
|
||||
$(shell mkdir -p $(dir $(OBJS)) > /dev/null)
|
||||
|
||||
$(EXEC_NAME): $(OBJS)
|
||||
$(CC) $(FLAGS) $(LIBS) $^ -o $(EXEC_NAME)
|
||||
|
||||
$(OBJDIR)/%.o: %$(EXT)
|
||||
$(CC) $(FLAGS) $(DEPFLAGS) -c -o $@ $<
|
||||
|
||||
.PHONY: clean
|
||||
clean:
|
||||
rm -rf $(OBJDIR)
|
||||
|
||||
-include $(SRC:$(EXT)=.d)
|
BIN
assets/image.png
Normal file
BIN
assets/image.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 395 KiB |
40
gaussian_blur_kernel.cl
Normal file
40
gaussian_blur_kernel.cl
Normal file
@ -0,0 +1,40 @@
|
||||
__kernel void gaussian_blur(sampler_t sampler, __read_only image2d_t image,
|
||||
__global double *_kernel, __write_only image2d_t result,
|
||||
int w, int h, int k)
|
||||
{
|
||||
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
// printf("w: %d, h: %d, k: %d\n", w, h, k);
|
||||
|
||||
if (x >= w || y >= h)
|
||||
return;
|
||||
|
||||
int4 sum = {0, 0, 0, 255};
|
||||
|
||||
for (int kx = 0; kx < 2*k+1; kx++) {
|
||||
for (int ky = 0; ky < 2*k+1; ky++) {
|
||||
int overlap_x = x + kx - k;
|
||||
int overlap_y = y + ky - k;
|
||||
|
||||
if (overlap_x < 0)
|
||||
overlap_x = 0;
|
||||
else if (overlap_x >= w)
|
||||
overlap_x = w - 1;
|
||||
|
||||
if (overlap_y < 0)
|
||||
overlap_y = 0;
|
||||
else if (overlap_y >= h)
|
||||
overlap_y = h - 1;
|
||||
|
||||
int idxk = kx * 2*k+1 + ky;
|
||||
int4 pixel = read_imagei(image, sampler, (int2) { overlap_x, overlap_y });
|
||||
|
||||
sum.x += pixel.x * _kernel[idxk];
|
||||
sum.y += pixel.y * _kernel[idxk];
|
||||
sum.z += pixel.z * _kernel[idxk];
|
||||
}
|
||||
}
|
||||
|
||||
write_imagei(result, (int2) { x, y }, sum);
|
||||
}
|
5
image.c
Normal file
5
image.c
Normal file
@ -0,0 +1,5 @@
|
||||
#define STB_IMAGE_IMPLEMENTATION
|
||||
#include "stb/stb_image.h"
|
||||
|
||||
#define STB_IMAGE_WRITE_IMPLEMENTATION
|
||||
#include "stb/stb_image_write.h"
|
17
main.c
Normal file
17
main.c
Normal file
@ -0,0 +1,17 @@
|
||||
#include "opencl.h"
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
Matrixd kernel = gauss_filter_create(15, 2.0);
|
||||
Matrixu8 image = matrixu8_create_from_file("assets/image.png");
|
||||
matrixd_print(&kernel);
|
||||
|
||||
cl_struct cl = {0};
|
||||
opencl_init(&cl, "gaussian_blur_kernel.cl");
|
||||
opencl_init_copy_host_buffers_mu8(&cl, &image, &kernel);
|
||||
opencl_load_kernel(&cl, "sobel_kernel.cl", "sobel", &cl.kernel1, &cl.program1);
|
||||
opencl_execute_image(&cl);
|
||||
opencl_read_result_image(&cl);
|
||||
|
||||
return 0;
|
||||
}
|
298
opencl.c
Normal file
298
opencl.c
Normal file
@ -0,0 +1,298 @@
|
||||
#include "opencl.h"
|
||||
#include "utils.h"
|
||||
#include <CL/cl.h>
|
||||
#include <assert.h>
|
||||
#include <linux/limits.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include "stb/stb_image.h"
|
||||
#include "stb/stb_image_write.h"
|
||||
|
||||
cl_int opencl_init(cl_struct *cl, const char *program_path)
|
||||
{
|
||||
cl_platform_id platform_id = NULL;
|
||||
cl_uint ret_num_devices;
|
||||
cl_uint ret_num_platforms;
|
||||
cl_int ret;
|
||||
|
||||
ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
|
||||
CHECK_CL_ERROR(ret);
|
||||
|
||||
ret = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 1,
|
||||
&(cl->device_id), &ret_num_devices);
|
||||
CHECK_CL_ERROR(ret);
|
||||
|
||||
cl->context = clCreateContext(NULL, 1, &(cl->device_id), NULL, NULL, &ret);
|
||||
CHECK_CL_ERROR(ret);
|
||||
|
||||
cl_queue_properties properties[] = {
|
||||
CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0,
|
||||
};
|
||||
|
||||
cl->command_queue = clCreateCommandQueueWithProperties(cl->context,
|
||||
cl->device_id, properties, &ret);
|
||||
CHECK_CL_ERROR(ret);
|
||||
|
||||
ret = opencl_load_kernel(cl, program_path, "gaussian_blur", &cl->kernel, &cl->program);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
cl_int opencl_load_kernel(cl_struct *cl, const char *path, const char* kernel_name,
|
||||
cl_kernel *kernel, cl_program *program)
|
||||
{
|
||||
assert(cl);
|
||||
assert(path);
|
||||
assert(kernel_name);
|
||||
assert(kernel);
|
||||
assert(program);
|
||||
|
||||
cl_int ret;
|
||||
|
||||
FILE *fp;
|
||||
char *source_str;
|
||||
size_t source_size;
|
||||
const size_t max_source_size = 10000;
|
||||
|
||||
fp = fopen(path, "r");
|
||||
if (!fp) {
|
||||
fprintf(stderr, "failed to open kernel source file.\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
source_str = malloc(max_source_size);
|
||||
assert(source_str);
|
||||
|
||||
source_size = fread(source_str, 1, max_source_size, fp);
|
||||
fprintf(stderr, "kernel: read %ld bytes\n", source_size);
|
||||
|
||||
fclose(fp);
|
||||
|
||||
*program = clCreateProgramWithSource(cl->context, 1,
|
||||
(const char **)&source_str, (const size_t *)&source_size, &ret);
|
||||
CHECK_CL_ERROR(ret);
|
||||
|
||||
ret = clBuildProgram(*program, 1, &(cl->device_id), NULL, NULL, NULL);
|
||||
CHECK_CL_ERROR(ret);
|
||||
|
||||
if (ret != CL_SUCCESS) {
|
||||
char build_log[2000];
|
||||
size_t ret_size;
|
||||
ret = clGetProgramBuildInfo(*program, cl->device_id, CL_PROGRAM_BUILD_LOG, 2000,
|
||||
build_log, &ret_size);
|
||||
CHECK_CL_ERROR(ret);
|
||||
|
||||
fprintf(stderr, "build: read %ld bytes: %s\n", ret_size, build_log);
|
||||
fprintf(stderr, "build failed, exiting\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
*kernel = clCreateKernel(*program, kernel_name, &ret);
|
||||
CHECK_CL_ERROR(ret);
|
||||
|
||||
free(source_str);
|
||||
return ret;
|
||||
}
|
||||
|
||||
cl_int opencl_init_copy_host_buffers(cl_struct *cl, Matrixd *mat1, Matrixd *mat2)
|
||||
{
|
||||
cl_int ret;
|
||||
|
||||
cl->mat1 = mat1;
|
||||
cl->mat2 = mat2;
|
||||
|
||||
cl->cl_buffer1 = clCreateBuffer(cl->context, CL_MEM_READ_WRITE,
|
||||
matrixd_size(cl->mat1) * sizeof(double), NULL, &ret);
|
||||
CHECK_CL_ERROR(ret);
|
||||
|
||||
cl->cl_buffer2 = clCreateBuffer(cl->context, CL_MEM_READ_WRITE,
|
||||
matrixd_size(cl->mat2) * sizeof(double), NULL, &ret);
|
||||
CHECK_CL_ERROR(ret);
|
||||
|
||||
// size_t result_size = (cl->mat1 > cl->mat2) ? matrixd_size(cl->mat1) : matrixd_size(cl->mat2);
|
||||
size_t result_size = matrixd_size(cl->mat1);
|
||||
|
||||
cl->cl_result = clCreateBuffer(cl->context, CL_MEM_READ_WRITE,
|
||||
result_size * sizeof(double), NULL, &ret);
|
||||
CHECK_CL_ERROR(ret);
|
||||
|
||||
|
||||
// copy host memory to device memory
|
||||
ret = clEnqueueWriteBuffer(cl->command_queue, cl->cl_buffer1, CL_TRUE, 0,
|
||||
matrixd_size(cl->mat1) * sizeof(double), cl->mat1->buf, 0, NULL, NULL);
|
||||
CHECK_CL_ERROR(ret);
|
||||
|
||||
ret = clEnqueueWriteBuffer(cl->command_queue, cl->cl_buffer2, CL_TRUE, 0,
|
||||
matrixd_size(cl->mat2) * sizeof(double), cl->mat2->buf, 0, NULL, NULL);
|
||||
CHECK_CL_ERROR(ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
cl_int opencl_init_copy_host_buffers_mu8(cl_struct *cl, Matrixu8 *mat1, Matrixd *kernel)
|
||||
{
|
||||
cl_int ret;
|
||||
|
||||
cl->matu8 = mat1;
|
||||
cl->kernel_m = kernel;
|
||||
|
||||
cl_image_format format = {.image_channel_order = CL_RGBA,
|
||||
.image_channel_data_type = CL_UNSIGNED_INT8};
|
||||
cl_image_desc desc;
|
||||
memset(&desc, 0, sizeof(cl_image_desc));
|
||||
desc.image_type = CL_MEM_OBJECT_IMAGE2D;
|
||||
desc.image_width = cl->matu8->w;
|
||||
desc.image_height = cl->matu8->h;
|
||||
|
||||
cl->cl_buffer1 = clCreateImage(cl->context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
|
||||
&format, &desc, cl->matu8->buf, &ret);
|
||||
CHECK_CL_ERROR(ret);
|
||||
|
||||
cl->cl_buffer2 = clCreateBuffer(cl->context, CL_MEM_READ_ONLY,
|
||||
matrixd_size(cl->kernel_m) * sizeof(double), NULL, &ret);
|
||||
CHECK_CL_ERROR(ret);
|
||||
|
||||
cl->cl_result = clCreateImage(cl->context, CL_MEM_WRITE_ONLY,
|
||||
&format, &desc, NULL, &ret);
|
||||
CHECK_CL_ERROR(ret);
|
||||
|
||||
cl->sampler = clCreateSamplerWithProperties(cl->context, NULL, &ret);
|
||||
CHECK_CL_ERROR(ret);
|
||||
|
||||
// copy host memory to device memory
|
||||
ret = clEnqueueWriteBuffer(cl->command_queue, cl->cl_buffer2, CL_TRUE, 0,
|
||||
matrixd_size(cl->kernel_m) * sizeof(double), cl->kernel_m->buf, 0, NULL, NULL);
|
||||
CHECK_CL_ERROR(ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
cl_int opencl_execute_image(cl_struct* cl)
|
||||
{
|
||||
cl_int ret;
|
||||
|
||||
ret = clSetKernelArg(cl->kernel, 0, sizeof(cl_mem), (void *)&(cl->sampler));
|
||||
CHECK_CL_ERROR(ret);
|
||||
|
||||
ret = clSetKernelArg(cl->kernel, 1, sizeof(cl_mem), (void *)&(cl->cl_buffer1));
|
||||
CHECK_CL_ERROR(ret);
|
||||
|
||||
ret = clSetKernelArg(cl->kernel, 2, sizeof(cl_mem), (void *)&(cl->cl_buffer2));
|
||||
CHECK_CL_ERROR(ret);
|
||||
|
||||
ret = clSetKernelArg(cl->kernel, 3, sizeof(cl_mem), (void *)&(cl->cl_result));
|
||||
CHECK_CL_ERROR(ret);
|
||||
|
||||
ret = clSetKernelArg(cl->kernel, 4, sizeof(int32_t), (void *)&cl->matu8->w);
|
||||
CHECK_CL_ERROR(ret);
|
||||
|
||||
ret = clSetKernelArg(cl->kernel, 5, sizeof(int32_t), (void *)&cl->matu8->h);
|
||||
CHECK_CL_ERROR(ret);
|
||||
|
||||
int k = cl->kernel_m->w-1/2;
|
||||
ret = clSetKernelArg(cl->kernel, 6, sizeof(int32_t), (void *)&k);
|
||||
CHECK_CL_ERROR(ret);
|
||||
|
||||
size_t mat1_size = cl->matu8->w * cl->matu8->h;
|
||||
size_t mat2_size = matrixd_size(cl->kernel_m);
|
||||
|
||||
size_t local_size = 16;
|
||||
size_t local_items_size[2] = {local_size, local_size};
|
||||
size_t global_items_size[2];
|
||||
size_t work_groups1 = (cl->matu8->w + local_size + 1) / local_size;
|
||||
size_t work_groups2 = (cl->matu8->h + local_size + 1) / local_size;
|
||||
global_items_size[0] = work_groups1 * local_size;
|
||||
global_items_size[1] = work_groups2 * local_size;
|
||||
ret = clEnqueueNDRangeKernel(cl->command_queue, cl->kernel, 2, NULL,
|
||||
global_items_size, local_items_size, 0, NULL, NULL);
|
||||
CHECK_CL_ERROR(ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
cl_int opencl_execute(cl_struct* cl)
|
||||
{
|
||||
cl_int ret;
|
||||
|
||||
ret = clSetKernelArg(cl->kernel, 0, sizeof(cl_mem), (void *)&(cl->cl_buffer1));
|
||||
CHECK_CL_ERROR(ret);
|
||||
|
||||
ret = clSetKernelArg(cl->kernel, 1, sizeof(cl_mem), (void *)&(cl->cl_buffer2));
|
||||
CHECK_CL_ERROR(ret);
|
||||
|
||||
ret = clSetKernelArg(cl->kernel, 2, sizeof(cl_mem), (void *)&(cl->cl_result));
|
||||
CHECK_CL_ERROR(ret);
|
||||
|
||||
ret = clSetKernelArg(cl->kernel, 3, sizeof(uint32_t), (void *)&cl->mat1->w);
|
||||
CHECK_CL_ERROR(ret);
|
||||
|
||||
ret = clSetKernelArg(cl->kernel, 4, sizeof(uint32_t), (void *)&cl->mat1->h);
|
||||
CHECK_CL_ERROR(ret);
|
||||
|
||||
int k = cl->mat2->w-1/2;
|
||||
ret = clSetKernelArg(cl->kernel, 5, sizeof(uint32_t), (void *)&k);
|
||||
CHECK_CL_ERROR(ret);
|
||||
|
||||
size_t mat1_size = matrixd_size(cl->mat1);
|
||||
size_t mat2_size = matrixd_size(cl->mat2);
|
||||
|
||||
size_t local_size = 16;
|
||||
size_t local_items_size[2] = {local_size, local_size};
|
||||
size_t global_items_size[2];
|
||||
size_t work_groups1 = (cl->mat1->w + local_size + 1) / local_size;
|
||||
size_t work_groups2 = (cl->mat1->h + local_size + 1) / local_size;
|
||||
global_items_size[0] = work_groups1 * local_size;
|
||||
global_items_size[1] = work_groups2 * local_size;
|
||||
|
||||
ret = clEnqueueNDRangeKernel(cl->command_queue, cl->kernel, 2, NULL,
|
||||
global_items_size, local_items_size, 0, NULL, NULL);
|
||||
CHECK_CL_ERROR(ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
cl_int opencl_read_result(cl_struct *cl)
|
||||
{
|
||||
cl_int ret;
|
||||
|
||||
clFinish(cl->command_queue);
|
||||
|
||||
size_t result_size = matrixd_size(cl->mat1);
|
||||
|
||||
double *result_buffer = calloc(result_size, sizeof(double));
|
||||
ret = clEnqueueReadBuffer(cl->command_queue, cl->cl_result, CL_TRUE, 0,
|
||||
result_size * sizeof(double), result_buffer, 0, NULL, NULL);
|
||||
CHECK_CL_ERROR(ret);
|
||||
|
||||
for (size_t i = 0; i < result_size; i+=100) {
|
||||
printf("%ld: %lf\n", i, result_buffer[i]);
|
||||
}
|
||||
|
||||
free(result_buffer);
|
||||
return ret;
|
||||
}
|
||||
|
||||
cl_int opencl_read_result_image(cl_struct* cl)
|
||||
{
|
||||
cl_int ret;
|
||||
|
||||
clFinish(cl->command_queue);
|
||||
|
||||
size_t result_size = matrixu8_size(cl->matu8) * 4;
|
||||
|
||||
uint8_t *result_buffer = calloc(result_size, 1);
|
||||
|
||||
size_t origin[] = {0, 0, 0};
|
||||
size_t region[] = {cl->matu8->w, cl->matu8->h, 1};
|
||||
|
||||
ret = clEnqueueReadImage(cl->command_queue, cl->cl_result, CL_TRUE, origin,
|
||||
region, 0, 0, result_buffer, 0, NULL, NULL);
|
||||
CHECK_CL_ERROR(ret);
|
||||
|
||||
stbi_write_bmp("image_blurred.bmp", cl->matu8->w, cl->matu8->h, 4, result_buffer);
|
||||
|
||||
free(result_buffer);
|
||||
return ret;
|
||||
}
|
49
opencl.h
Normal file
49
opencl.h
Normal file
@ -0,0 +1,49 @@
|
||||
#ifndef OPENCL_H
|
||||
#define OPENCL_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <math.h>
|
||||
#include <pthread.h>
|
||||
#include <stdbool.h>
|
||||
#include <CL/cl.h>
|
||||
#include "utils.h"
|
||||
|
||||
#define CHECK_CL_ERROR(ret) \
|
||||
do { \
|
||||
if ((ret) != CL_SUCCESS) { \
|
||||
fprintf(stderr, "[%s:%d]: opencl function failed: %s\n", \
|
||||
__FILE__, __LINE__, cl_get_error_string(ret)); \
|
||||
} \
|
||||
} while (0); \
|
||||
|
||||
typedef struct {
|
||||
Matrixd *mat1;
|
||||
Matrixd *mat2;
|
||||
Matrixp *matp1;
|
||||
Matrixu8 *matu8;
|
||||
Matrixd *kernel_m;
|
||||
cl_mem cl_buffer1;
|
||||
cl_mem cl_buffer2;
|
||||
cl_mem cl_result;
|
||||
cl_sampler sampler;
|
||||
cl_device_id device_id;
|
||||
cl_context context;
|
||||
cl_command_queue command_queue;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
cl_program program1;
|
||||
cl_kernel kernel1;
|
||||
} cl_struct;
|
||||
|
||||
cl_int opencl_init(cl_struct *cl, const char *program_path);
|
||||
cl_int opencl_load_kernel(cl_struct *cl, const char *path, const char *kernel_name,
|
||||
cl_kernel *kernel, cl_program *program);
|
||||
cl_int opencl_init_copy_host_buffers(cl_struct *cl, Matrixd *mat1, Matrixd *mat2);
|
||||
cl_int opencl_init_copy_host_buffers_pixel(cl_struct *cl, Matrixp *mat1, Matrixd *mat2);
|
||||
cl_int opencl_init_copy_host_buffers_mu8(cl_struct *cl, Matrixu8 *mat1, Matrixd *mat2);
|
||||
cl_int opencl_execute(cl_struct* cl);
|
||||
cl_int opencl_execute_image(cl_struct* cl);
|
||||
cl_int opencl_read_result(cl_struct* cl);
|
||||
cl_int opencl_read_result_image(cl_struct* cl);
|
||||
|
||||
#endif
|
39
sobel_kernel.cl
Normal file
39
sobel_kernel.cl
Normal file
@ -0,0 +1,39 @@
|
||||
__kernel void sobel(sampler_t sampler, __read_only image2d_t image,
|
||||
__global double *_kernel, __write_only image2d_t result,
|
||||
int w, int h, int k)
|
||||
{
|
||||
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (x >= w || y >= h)
|
||||
return;
|
||||
|
||||
int4 sum = {0, 0, 0, 255};
|
||||
|
||||
for (int kx = 0; kx < 2*k+1; kx++) {
|
||||
for (int ky = 0; ky < 2*k+1; ky++) {
|
||||
int overlap_x = x + kx - k;
|
||||
int overlap_y = y + ky - k;
|
||||
|
||||
if (overlap_x < 0)
|
||||
overlap_x = 0;
|
||||
else if (overlap_x >= w)
|
||||
overlap_x = w - 1;
|
||||
|
||||
if (overlap_y < 0)
|
||||
overlap_y = 0;
|
||||
else if (overlap_y >= h)
|
||||
overlap_y = h - 1;
|
||||
|
||||
int idxk = kx * 2*k+1 + ky;
|
||||
int4 pixel = read_imagei(image, sampler, (int2) { overlap_x, overlap_y });
|
||||
|
||||
sum.x += pixel.x * _kernel[idxk];
|
||||
sum.y += pixel.y * _kernel[idxk];
|
||||
sum.z += pixel.z * _kernel[idxk];
|
||||
}
|
||||
}
|
||||
|
||||
write_imagei(result, (int2) { x, y }, sum);
|
||||
}
|
7987
stb/stb_image.h
Normal file
7987
stb/stb_image.h
Normal file
File diff suppressed because it is too large
Load Diff
1724
stb/stb_image_write.h
Normal file
1724
stb/stb_image_write.h
Normal file
File diff suppressed because it is too large
Load Diff
40
template_kernel.cl
Normal file
40
template_kernel.cl
Normal file
@ -0,0 +1,40 @@
|
||||
__kernel void template(__global double *buf1, __global double *buf2,
|
||||
__global double *result, int mat1_w, int mat1_h, int k)
|
||||
{
|
||||
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
int idx = x * mat1_h + y;
|
||||
if (idx >= mat1_h * mat1_w)
|
||||
return;
|
||||
|
||||
double sum = 0.0;
|
||||
|
||||
for (int kx = 0; kx < 2*k+1; kx++) {
|
||||
for (int ky = 0; ky < 2*k+1; ky++) {
|
||||
int overlap_x = x + kx - k;
|
||||
int overlap_y = y + ky - k;
|
||||
|
||||
if (overlap_x < 0)
|
||||
overlap_x = 0;
|
||||
else if (overlap_x >= mat1_w)
|
||||
overlap_x = mat1_w - 1;
|
||||
|
||||
if (overlap_y < 0)
|
||||
overlap_y = 0;
|
||||
else if (overlap_y >= mat1_h)
|
||||
overlap_y = mat1_h - 1;
|
||||
|
||||
int idx1 = overlap_x * mat1_h + overlap_y;
|
||||
int idxk = kx * 2*k+1 + ky;
|
||||
|
||||
if (idx1 >= mat1_h * mat1_w)
|
||||
return;
|
||||
sum += buf1[idx1] * buf2[idxk];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
result[idx] = sum;
|
||||
}
|
210
utils.c
Normal file
210
utils.c
Normal file
@ -0,0 +1,210 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
#include "stb/stb_image.h"
|
||||
#include "stb/stb_image_write.h"
|
||||
#include "utils.h"
|
||||
|
||||
Matrixd matrixd_create(uint32_t w, uint32_t h)
|
||||
{
|
||||
Matrixd ret;
|
||||
ret.w = w;
|
||||
ret.h = h;
|
||||
ret.buf = calloc(w * h, sizeof(double));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
size_t matrixd_size(Matrixd *m)
|
||||
{
|
||||
return m->w * m->h;
|
||||
}
|
||||
|
||||
void matrixd_print(Matrixd *m)
|
||||
{
|
||||
assert(m);
|
||||
|
||||
for (size_t i = 0; i < m->w; i++) {
|
||||
for (size_t j = 0; j < m->h; j++) {
|
||||
printf("%f ", m->buf[i * m->h + j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
Matrixp matrixp_create(uint32_t w, uint32_t h)
|
||||
{
|
||||
Matrixp ret;
|
||||
ret.w = w;
|
||||
ret.h = h;
|
||||
ret.buf = calloc(w * h, sizeof(Pixel));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
Matrixp matrixp_create_from_file(const char *filename)
|
||||
{
|
||||
Matrixp matrix = (Matrixp) { 0 };
|
||||
|
||||
if (!filename)
|
||||
return matrix;
|
||||
|
||||
int x, y, channels;
|
||||
uint8_t *ret_ptr = stbi_load(filename, &x, &y, &channels, 3);
|
||||
matrix.w = x;
|
||||
matrix.h = y;
|
||||
matrix.buf = calloc(matrix.w * matrix.h, sizeof(Pixel));
|
||||
memcpy(matrix.buf, ret_ptr, x * y * sizeof(Pixel));
|
||||
stbi_image_free(ret_ptr);
|
||||
|
||||
return matrix;
|
||||
}
|
||||
|
||||
Matrixu8 matrixu8_create_from_file(const char *filename)
|
||||
{
|
||||
Matrixu8 matrix = (Matrixu8) { 0 };
|
||||
|
||||
if (!filename)
|
||||
return matrix;
|
||||
|
||||
int x, y, channels;
|
||||
uint8_t *ret_ptr = stbi_load(filename, &x, &y, &channels, 4);
|
||||
printf("%s return channels: %d\n", __func__, channels);
|
||||
matrix.w = x;
|
||||
matrix.h = y;
|
||||
matrix.buf = calloc(matrix.w * matrix.h * 4, 1);
|
||||
memcpy(matrix.buf, ret_ptr, x * y * 4);
|
||||
stbi_image_free(ret_ptr);
|
||||
|
||||
return matrix;
|
||||
}
|
||||
|
||||
void matrixu8_write_to_bmp(Matrixu8 *matrix, const char *filename)
|
||||
{
|
||||
assert(matrix);
|
||||
assert(filename);
|
||||
|
||||
stbi_write_bmp(filename, matrix->w, matrix->h, 4, matrix->buf);
|
||||
}
|
||||
|
||||
size_t matrixu8_size(Matrixu8* m)
|
||||
{
|
||||
return m->w * m->h;
|
||||
}
|
||||
|
||||
size_t matrixp_size(Matrixp* m)
|
||||
{
|
||||
return m->w * m->h;
|
||||
}
|
||||
|
||||
Matrixd gauss_filter_create(uint32_t size, double sigma)
|
||||
{
|
||||
Matrixd ret = matrixd_create(size, size);
|
||||
|
||||
size_t _i, _j, index;
|
||||
|
||||
double k = (size - 1.0) / 2.0;
|
||||
double sigma2 = sigma * sigma;
|
||||
double pi_part = 1 / (2.0 * M_PI * sigma2);
|
||||
double sum = 0;
|
||||
|
||||
for (_i = 0; _i < size; _i++)
|
||||
for (_j = 0; _j < size; _j++) {
|
||||
index = _i * size + _j;
|
||||
|
||||
double i = _i + 1;
|
||||
double j = _j + 1;
|
||||
|
||||
double exp_numerator_1 = pow(i - (k + 1), 2);
|
||||
double exp_numerator_2 = pow(j - (k + 1), 2);
|
||||
double exp_denominator = 2 * sigma2;
|
||||
double _exp = exp(-(exp_numerator_1 + exp_numerator_2) / exp_denominator);
|
||||
|
||||
ret.buf[index] = pi_part * _exp;
|
||||
sum += ret.buf[index];
|
||||
}
|
||||
|
||||
for (_i = 0; _i < size; _i++)
|
||||
for (_j = 0; _j < size; _j++) {
|
||||
index = _i * size + _j;
|
||||
ret.buf[index] /= sum;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
const char *cl_get_error_string(int error)
|
||||
{
|
||||
switch(error){
|
||||
case 0: return "CL_SUCCESS";
|
||||
case -1: return "CL_DEVICE_NOT_FOUND";
|
||||
case -2: return "CL_DEVICE_NOT_AVAILABLE";
|
||||
case -3: return "CL_COMPILER_NOT_AVAILABLE";
|
||||
case -4: return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
|
||||
case -5: return "CL_OUT_OF_RESOURCES";
|
||||
case -6: return "CL_OUT_OF_HOST_MEMORY";
|
||||
case -7: return "CL_PROFILING_INFO_NOT_AVAILABLE";
|
||||
case -8: return "CL_MEM_COPY_OVERLAP";
|
||||
case -9: return "CL_IMAGE_FORMAT_MISMATCH";
|
||||
case -10: return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
|
||||
case -11: return "CL_BUILD_PROGRAM_FAILURE";
|
||||
case -12: return "CL_MAP_FAILURE";
|
||||
case -13: return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
|
||||
case -14: return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
|
||||
case -15: return "CL_COMPILE_PROGRAM_FAILURE";
|
||||
case -16: return "CL_LINKER_NOT_AVAILABLE";
|
||||
case -17: return "CL_LINK_PROGRAM_FAILURE";
|
||||
case -18: return "CL_DEVICE_PARTITION_FAILED";
|
||||
case -19: return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE";
|
||||
|
||||
// compile-time errors
|
||||
case -30: return "CL_INVALID_VALUE";
|
||||
case -31: return "CL_INVALID_DEVICE_TYPE";
|
||||
case -32: return "CL_INVALID_PLATFORM";
|
||||
case -33: return "CL_INVALID_DEVICE";
|
||||
case -34: return "CL_INVALID_CONTEXT";
|
||||
case -35: return "CL_INVALID_QUEUE_PROPERTIES";
|
||||
case -36: return "CL_INVALID_COMMAND_QUEUE";
|
||||
case -37: return "CL_INVALID_HOST_PTR";
|
||||
case -38: return "CL_INVALID_MEM_OBJECT";
|
||||
case -39: return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
|
||||
case -40: return "CL_INVALID_IMAGE_SIZE";
|
||||
case -41: return "CL_INVALID_SAMPLER";
|
||||
case -42: return "CL_INVALID_BINARY";
|
||||
case -43: return "CL_INVALID_BUILD_OPTIONS";
|
||||
case -44: return "CL_INVALID_PROGRAM";
|
||||
case -45: return "CL_INVALID_PROGRAM_EXECUTABLE";
|
||||
case -46: return "CL_INVALID_KERNEL_NAME";
|
||||
case -47: return "CL_INVALID_KERNEL_DEFINITION";
|
||||
case -48: return "CL_INVALID_KERNEL";
|
||||
case -49: return "CL_INVALID_ARG_INDEX";
|
||||
case -50: return "CL_INVALID_ARG_VALUE";
|
||||
case -51: return "CL_INVALID_ARG_SIZE";
|
||||
case -52: return "CL_INVALID_KERNEL_ARGS";
|
||||
case -53: return "CL_INVALID_WORK_DIMENSION";
|
||||
case -54: return "CL_INVALID_WORK_GROUP_SIZE";
|
||||
case -55: return "CL_INVALID_WORK_ITEM_SIZE";
|
||||
case -56: return "CL_INVALID_GLOBAL_OFFSET";
|
||||
case -57: return "CL_INVALID_EVENT_WAIT_LIST";
|
||||
case -58: return "CL_INVALID_EVENT";
|
||||
case -59: return "CL_INVALID_OPERATION";
|
||||
case -60: return "CL_INVALID_GL_OBJECT";
|
||||
case -61: return "CL_INVALID_BUFFER_SIZE";
|
||||
case -62: return "CL_INVALID_MIP_LEVEL";
|
||||
case -63: return "CL_INVALID_GLOBAL_WORK_SIZE";
|
||||
case -64: return "CL_INVALID_PROPERTY";
|
||||
case -65: return "CL_INVALID_IMAGE_DESCRIPTOR";
|
||||
case -66: return "CL_INVALID_COMPILER_OPTIONS";
|
||||
case -67: return "CL_INVALID_LINKER_OPTIONS";
|
||||
case -68: return "CL_INVALID_DEVICE_PARTITION_COUNT";
|
||||
|
||||
case -1000: return "CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR";
|
||||
case -1001: return "CL_PLATFORM_NOT_FOUND_KHR";
|
||||
case -1002: return "CL_INVALID_D3D10_DEVICE_KHR";
|
||||
case -1003: return "CL_INVALID_D3D10_RESOURCE_KHR";
|
||||
case -1004: return "CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR";
|
||||
case -1005: return "CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR";
|
||||
default: return "Unknown OpenCL error";
|
||||
}
|
||||
}
|
61
utils.h
Normal file
61
utils.h
Normal file
@ -0,0 +1,61 @@
|
||||
#ifndef UTILS_H
|
||||
#define UTILS_H
|
||||
|
||||
#include <mpfr.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#define DEBUG
|
||||
#define DEBUG_PRINT(fmt, ...) \
|
||||
do {if (DEBUG) fprintf(stderr, fmt, __VA_ARGS__);} while(0);
|
||||
|
||||
typedef struct {
|
||||
int x;
|
||||
int y;
|
||||
} Vec2i;
|
||||
|
||||
typedef struct {
|
||||
double x;
|
||||
double y;
|
||||
} Vec2d;
|
||||
|
||||
typedef struct {
|
||||
uint32_t w;
|
||||
uint32_t h;
|
||||
double *buf;
|
||||
} Matrixd;
|
||||
|
||||
typedef struct {
|
||||
uint8_t r;
|
||||
uint8_t g;
|
||||
uint8_t b;
|
||||
} Pixel;
|
||||
|
||||
typedef struct {
|
||||
uint32_t w;
|
||||
uint32_t h;
|
||||
Pixel *buf;
|
||||
} Matrixp;
|
||||
|
||||
typedef struct {
|
||||
uint32_t w;
|
||||
uint32_t h;
|
||||
uint8_t *buf;
|
||||
} Matrixu8;
|
||||
|
||||
Matrixd matrixd_create(uint32_t w, uint32_t h);
|
||||
size_t matrixd_size(Matrixd *m);
|
||||
void matrixd_print(Matrixd *m);
|
||||
|
||||
Matrixp matrixp_create(uint32_t w, uint32_t h);
|
||||
Matrixp matrixp_create_from_file(const char *filename);
|
||||
Matrixu8 matrixu8_create_from_file(const char *filename);
|
||||
void matrixu8_write_to_bmp(Matrixu8 *matrix, const char *filename);
|
||||
size_t matrixp_size(Matrixp* m);
|
||||
size_t matrixu8_size(Matrixu8* m);
|
||||
|
||||
Matrixd gauss_filter_create(uint32_t size, double sigma);
|
||||
|
||||
const char* cl_get_error_string(int error);
|
||||
|
||||
#endif
|
Loading…
x
Reference in New Issue
Block a user