diff --git a/CMakeLists.txt b/CMakeLists.txt index 41ca23fe5..a1adfef4a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1854,9 +1854,6 @@ if (ENABLE_VORTEX) if(NOT DEFINED VORTEX_PREFIX) message(FATAL_ERROR "should set 'VORTEX_PREFIX' option") endif() - if(NOT DEFINED VORTEX_XLEN) - message(FATAL_ERROR "should set 'VORTEX_XLEN' option") - endif() set(BUILD_VORTEX 1) set(VORTEX_DEVICE_EXTENSIONS "cl_khr_byte_addressable_store") set(VORTEX_DEVICE_CL_VERSION_MAJOR 1) diff --git a/README.vortex b/README.vortex index ebdbcb125..b6bb5f1dc 100644 --- a/README.vortex +++ b/README.vortex @@ -5,7 +5,7 @@ - export POCL_PATH=$TOOLDIR/pocl - export VORTEX_PREFIX=$HOME/tools/vortex - export LLVM_PREFIX=$TOOLDIR/llvm-vortex -- cmake -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=Debug -DCMAKE_INSTALL_PREFIX=$POCL_PATH -DWITH_LLVM_CONFIG=$LLVM_PREFIX/bin/llvm-config -DVORTEX_PREFIX=$VORTEX_PREFIX -DVORTEX_XLEN=32 -DENABLE_VORTEX=ON -DKERNEL_CACHE_DEFAULT=OFF -DENABLE_HOST_CPU_DEVICES=OFF -DENABLE_TESTS=OFF -DPOCL_DEBUG_MESSAGES=ON -DENABLE_ICD=OFF .. +- cmake -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=Debug -DCMAKE_INSTALL_PREFIX=$POCL_PATH -DWITH_LLVM_CONFIG=$LLVM_PREFIX/bin/llvm-config -DVORTEX_PREFIX=$VORTEX_PREFIX -DENABLE_VORTEX=ON -DKERNEL_CACHE_DEFAULT=OFF -DENABLE_HOST_CPU_DEVICES=OFF -DENABLE_TESTS=OFF -DPOCL_DEBUG_MESSAGES=ON -DENABLE_ICD=OFF .. - make -j`nproc` - make install - cp -r ../include $POCL_RT_PATH \ No newline at end of file diff --git a/config.h.in.cmake b/config.h.in.cmake index c5c3dee9f..f3e3775b8 100644 --- a/config.h.in.cmake +++ b/config.h.in.cmake @@ -123,8 +123,6 @@ #define VORTEX_DEVICE_EXTENSIONS "@VORTEX_DEVICE_EXTENSIONS@" -#define VORTEX_XLEN @VORTEX_XLEN@ - #endif #if defined(ENABLE_HOST_CPU_DEVICES) diff --git a/lib/CL/devices/vortex/pocl-vortex.c b/lib/CL/devices/vortex/pocl-vortex.c index 91119a766..0ab5cf0ab 100644 --- a/lib/CL/devices/vortex/pocl-vortex.c +++ b/lib/CL/devices/vortex/pocl-vortex.c @@ -47,6 +47,8 @@ typedef struct { pocl_lock_t compile_lock; + int is_64bit; + size_t ctx_refcount; } vortex_device_data_t; @@ -120,15 +122,14 @@ void pocl_vortex_init_device_ops(struct pocl_device_ops *ops) { ops->free_mapping_ptr = pocl_driver_free_mapping_ptr; } -char * pocl_vortex_build_hash (cl_device_id device) +char * pocl_vortex_build_hash (cl_device_id dev) { char *res = (char *)calloc(1000, sizeof(char)); - vortex_device_data_t *dd = (vortex_device_data_t *)device->data; -#if (VORTEX_XLEN == 64) - snprintf(res, 1000, "vortex-riscv64-unknown-unknown-elf"); -#else - snprintf(res, 1000, "vortex-riscv32-unknown-unknown-elf"); -#endif + if (dev->address_bits == 64) { + snprintf(res, 1000, "vortex-riscv64-unknown-unknown-elf"); + } else { + snprintf(res, 1000, "vortex-riscv32-unknown-unknown-elf"); + } return res; } @@ -143,9 +144,9 @@ pocl_vortex_init (unsigned j, cl_device_id dev, const char* parameters) int vx_err; vortex_device_data_t *dd; - const char* sz_cflags = pocl_get_string_option("POCL_VORTEX_CFLAGS", ""); + const char* sz_xlen = pocl_get_string_option("POCL_VORTEX_XLEN", "32"); - int is64bit = (VORTEX_XLEN == 64); + int is_64bit = (strcmp(sz_xlen, "64") == 0); assert (dev->data == NULL); @@ -169,19 +170,19 @@ pocl_vortex_init (unsigned j, cl_device_id dev, const char* parameters) dev->run_workgroup_pass = CL_FALSE; dev->execution_capabilities = CL_EXEC_KERNEL; //dev->global_as_id = VX_ADDR_SPACE_GLOBAL; - //dev->local_as_id = VX_ADDR_SPACE_LOCAL; + //dev->local_as_id = VX_ADDR_SPACE_LOCAL;439 //dev->constant_as_id = VX_ADDR_SPACE_CONSTANT; dev->autolocals_to_args = POCL_AUTOLOCALS_TO_ARGS_ALWAYS; dev->device_alloca_locals = CL_FALSE; dev->device_side_printf = 0; - dev->has_64bit_long = is64bit; + dev->has_64bit_long = is_64bit; dev->llvm_cpu = NULL; - dev->address_bits = VORTEX_XLEN; - dev->llvm_target_triplet = is64bit ? "riscv64-unknown-unknown" : "riscv32-unknown-unknown"; - dev->llvm_abi = is64bit ? "lp64d" : "ilp32f"; - dev->llvm_cpu = is64bit ? "generic-rv64" : "generic-rv32"; - dev->kernellib_name = is64bit ? "kernel-riscv64" : "kernel-riscv32"; + dev->address_bits = is_64bit ? 64 : 32; + dev->llvm_target_triplet = is_64bit ? "riscv64-unknown-unknown" : "riscv32-unknown-unknown"; + dev->llvm_abi = is_64bit ? "lp64d" : "ilp32f"; + dev->llvm_cpu = is_64bit ? "generic-rv64" : "generic-rv32"; + dev->kernellib_name = is_64bit ? "kernel-riscv64" : "kernel-riscv32"; dev->kernellib_fallback_name = NULL; dev->kernellib_subdir = "vortex"; dev->device_aux_functions = vortex_native_device_aux_funcs; @@ -252,6 +253,8 @@ pocl_vortex_init (unsigned j, cl_device_id dev, const char* parameters) dd->ctx_refcount = 0; + dd->is_64bit = is_64bit; + POCL_INIT_LOCK(dd->compile_lock); POCL_INIT_LOCK(dd->cq_lock); @@ -261,8 +264,8 @@ pocl_vortex_init (unsigned j, cl_device_id dev, const char* parameters) return CL_SUCCESS; } -cl_int pocl_vortex_uninit (unsigned j, cl_device_id device) { - vortex_device_data_t *dd = (vortex_device_data_t *)device->data; +cl_int pocl_vortex_uninit (unsigned j, cl_device_id dev) { + vortex_device_data_t *dd = (vortex_device_data_t *)dev->data; if (NULL == dd) return CL_SUCCESS; @@ -274,12 +277,12 @@ cl_int pocl_vortex_uninit (unsigned j, cl_device_id device) { POCL_DESTROY_LOCK (dd->compile_lock); POCL_DESTROY_LOCK (dd->cq_lock); POCL_MEM_FREE(dd); - device->data = NULL; + dev->data = NULL; return CL_SUCCESS; } -int pocl_vortex_init_context (cl_device_id device, cl_context context) { - vortex_device_data_t *dd = (vortex_device_data_t *)device->data; +int pocl_vortex_init_context (cl_device_id dev, cl_context context) { + vortex_device_data_t *dd = (vortex_device_data_t *)dev->data; if (NULL == dd) return CL_SUCCESS; @@ -288,13 +291,13 @@ int pocl_vortex_init_context (cl_device_id device, cl_context context) { return CL_SUCCESS; } -int pocl_vortex_free_context (cl_device_id device, cl_context context) { - vortex_device_data_t *dd = (vortex_device_data_t *)device->data; +int pocl_vortex_free_context (cl_device_id dev, cl_context context) { + vortex_device_data_t *dd = (vortex_device_data_t *)dev->data; if (NULL == dd) return CL_SUCCESS; if (--dd->ctx_refcount == 0) { - pocl_vortex_uninit(0, device); + pocl_vortex_uninit(0, dev); } return CL_SUCCESS; @@ -302,8 +305,8 @@ int pocl_vortex_free_context (cl_device_id device, cl_context context) { int pocl_vortex_post_build_program (cl_program program, cl_uint device_i) { int result; - cl_device_id device = program->devices[device_i]; - vortex_device_data_t *ddata = (vortex_device_data_t *)device->data; + cl_device_id dev = program->devices[device_i]; + vortex_device_data_t *ddata = (vortex_device_data_t *)dev->data; vortex_program_data_t *pdata = NULL; POCL_LOCK (ddata->compile_lock); @@ -339,14 +342,14 @@ int pocl_vortex_post_build_program (cl_program program, cl_uint device_i) { return result; } -int pocl_vortex_free_program (cl_device_id device, cl_program program, +int pocl_vortex_free_program (cl_device_id dev, cl_program program, unsigned device_i) { - vortex_device_data_t *ddata = (vortex_device_data_t *)device->data; + vortex_device_data_t *dd = (vortex_device_data_t *)dev->data; vortex_program_data_t *pdata = (vortex_program_data_t *)program->data[device_i]; if (pdata == NULL) return CL_SUCCESS; - pocl_driver_free_program (device, program, device_i); + pocl_driver_free_program (dev, program, device_i); POCL_MEM_FREE (pdata->kernel_names); POCL_MEM_FREE (pdata); @@ -355,7 +358,7 @@ int pocl_vortex_free_program (cl_device_id device, cl_program program, return CL_SUCCESS; } -int pocl_vortex_create_kernel (cl_device_id device, cl_program program, +int pocl_vortex_create_kernel (cl_device_id dev, cl_program program, cl_kernel kernel, unsigned device_i) { int result = CL_SUCCESS; pocl_kernel_metadata_t *meta = kernel->meta; @@ -392,7 +395,7 @@ int pocl_vortex_create_kernel (cl_device_id device, cl_program program, return result; } -int pocl_vortex_free_kernel (cl_device_id device, cl_program program, +int pocl_vortex_free_kernel (cl_device_id dev, cl_program program, cl_kernel kernel, unsigned device_i) { pocl_kernel_metadata_t *meta = kernel->meta; assert(meta->data != NULL); @@ -433,7 +436,7 @@ void pocl_vortex_run (void *data, _cl_command_node *cmd) { assert (data != NULL); dd = (vortex_device_data_t *)data; - int ptr_size = VORTEX_XLEN / 8; + int ptr_size = dd->is_64bit ? 8 : 4; // calculate kernel arguments buffer size int local_mem_size = 0; @@ -602,9 +605,9 @@ void pocl_vortex_run (void *data, _cl_command_node *cmd) { vx_mem_free(vx_kargs_buffer); } -cl_int pocl_vortex_alloc_mem_obj(cl_device_id device, cl_mem mem_obj, void *host_ptr) { +cl_int pocl_vortex_alloc_mem_obj(cl_device_id dev, cl_mem mem_obj, void *host_ptr) { int vx_err; - pocl_mem_identifier *p = &mem_obj->device_ptrs[device->global_mem_id]; + pocl_mem_identifier *p = &mem_obj->device_ptrs[dev->global_mem_id]; /* let other drivers preallocate */ if ((mem_obj->flags & CL_MEM_ALLOC_HOST_PTR) && (mem_obj->mem_host_ptr == NULL)) @@ -626,7 +629,7 @@ cl_int pocl_vortex_alloc_mem_obj(cl_device_id device, cl_mem mem_obj, void *host if ((flags & CL_MEM_WRITE_ONLY) != 0) vx_flags = VX_MEM_WRITE; - vortex_device_data_t* dd = (vortex_device_data_t *)device->data; + vortex_device_data_t* dd = (vortex_device_data_t *)dev->data; vx_buffer_h vx_buffer; vx_err = vx_mem_alloc(dd->vx_device, mem_obj->size, vx_flags, &vx_buffer); @@ -663,8 +666,8 @@ cl_int pocl_vortex_alloc_mem_obj(cl_device_id device, cl_mem mem_obj, void *host return CL_SUCCESS; } -void pocl_vortex_free(cl_device_id device, cl_mem mem_obj) { - pocl_mem_identifier *p = &mem_obj->device_ptrs[device->global_mem_id]; +void pocl_vortex_free(cl_device_id dev, cl_mem mem_obj) { + pocl_mem_identifier *p = &mem_obj->device_ptrs[dev->global_mem_id]; cl_mem_flags flags = mem_obj->flags; vortex_buffer_data_t* buf_data = (vortex_buffer_data_t*)p->mem_ptr; @@ -742,16 +745,16 @@ void pocl_vortex_submit (_cl_command_node *node, cl_command_queue cq) { return; } -void pocl_vortex_flush (cl_device_id device, cl_command_queue cq) { - vortex_device_data_t *dd = (vortex_device_data_t *)device->data; +void pocl_vortex_flush (cl_device_id dev, cl_command_queue cq) { + vortex_device_data_t *dd = (vortex_device_data_t *)dev->data; POCL_LOCK (dd->cq_lock); vortex_command_scheduler (dd); POCL_UNLOCK (dd->cq_lock); } -void pocl_vortex_join (cl_device_id device, cl_command_queue cq) { - vortex_device_data_t *dd = (vortex_device_data_t *)device->data; +void pocl_vortex_join (cl_device_id dev, cl_command_queue cq) { + vortex_device_data_t *dd = (vortex_device_data_t *)dev->data; POCL_LOCK (dd->cq_lock); vortex_command_scheduler (dd); @@ -760,8 +763,8 @@ void pocl_vortex_join (cl_device_id device, cl_command_queue cq) { return; } -void pocl_vortex_notify (cl_device_id device, cl_event event, cl_event finished) { - vortex_device_data_t *dd = (vortex_device_data_t *)device->data; +void pocl_vortex_notify (cl_device_id dev, cl_event event, cl_event finished) { + vortex_device_data_t *dd = (vortex_device_data_t *)dev->data; _cl_command_node * volatile node = event->command; if (finished->status < CL_COMPLETE) diff --git a/lib/kernel/vortex/CMakeLists.txt b/lib/kernel/vortex/CMakeLists.txt index c6dbf5542..71aaa4b2d 100644 --- a/lib/kernel/vortex/CMakeLists.txt +++ b/lib/kernel/vortex/CMakeLists.txt @@ -25,17 +25,15 @@ include("bitcode_rules") -if( ${VORTEX_XLEN} EQUAL 64 ) - set(LLVM_TARGET riscv64) - set(TARGET_MARCH rv64imafd) -else( ${VORTEX_XLEN} EQUAL 64 ) - set(LLVM_TARGET riscv32) - set(TARGET_MARCH rv32imaf) -endif( ${VORTEX_XLEN} EQUAL 64 ) +set(LLVM_TARGET_64 riscv64) +set(TARGET_MARCH_64 rv64imafd) + +set(LLVM_TARGET_32 riscv32) +set(TARGET_MARCH_32 rv32imaf) set(KERNEL_SOURCES ${SOURCES_GENERIC}) -foreach(FILE printf.c print_base.c +foreach(FILE printf.c printf_base.c get_work_dim.c get_num_groups.c get_local_size.c get_global_offset.c get_global_size.c get_group_id.c get_local_id.c get_global_id.c get_linear_id.c barrier.ll @@ -48,32 +46,38 @@ foreach(FILE workitems.c printf.c barrier.c) list(APPEND KERNEL_SOURCES "vortex/${FILE}") endforeach() -set(CLANG_FLAGS "-ffreestanding" "-target" "${LLVM_TARGET}" "-march=${TARGET_MARCH}" "-emit-llvm" "-D_CL_DISABLE_HALF" "-I${VORTEX_PREFIX}/kernel/include") - set(KERNEL_CL_FLAGS "-Xclang" "-cl-std=CL${VORTEX_DEVICE_CL_STD}" "-D__OPENCL_C_VERSION__=${VORTEX_DEVICE_CL_VERSION}" ${KERNEL_CL_FLAGS}) set(LLC_FLAGS "") -set(DEVICE_CL_FLAGS "-D__OPENCL_VERSION__=${VORTEX_DEVICE_CL_VERSION} -DPOCL_DEVICE_ADDRESS_BITS=${VORTEX_XLEN}") - separate_arguments(VORTEX_DEVICE_EXTENSIONS) foreach(EXT ${VORTEX_DEVICE_EXTENSIONS}) set(DEVICE_CL_FLAGS "${DEVICE_CL_FLAGS} -D${EXT}") endforeach() separate_arguments(DEVICE_CL_FLAGS) -make_kernel_bc(KERNEL_BC "${LLVM_TARGET}" "BCs" 0 0 0 ${KERNEL_SOURCES}) +set(CLANG_FLAGS "-ffreestanding" "-target" "${LLVM_TARGET_32}" "-march=${TARGET_MARCH_32}" "-emit-llvm" "-DPOCL_DEVICE_ADDRESS_BITS=32" "-D_CL_DISABLE_HALF" "-I${VORTEX_PREFIX}/kernel/include") +set(DEVICE_CL_FLAGS "-D__OPENCL_VERSION__=${VORTEX_DEVICE_CL_VERSION} -DPOCL_DEVICE_ADDRESS_BITS=32") +make_kernel_bc(KERNEL_BC_32 "${LLVM_TARGET_32}" "VX32-BCs" 0 0 0 ${KERNEL_SOURCES}) + +set(CLANG_FLAGS "-ffreestanding" "-target" "${LLVM_TARGET_64}" "-march=${TARGET_MARCH_64}" "-emit-llvm" "-DPOCL_DEVICE_ADDRESS_BITS=64" "-D_CL_DISABLE_HALF" "-I${VORTEX_PREFIX}/kernel/include") +set(DEVICE_CL_FLAGS "-D__OPENCL_VERSION__=${VORTEX_DEVICE_CL_VERSION} -DPOCL_DEVICE_ADDRESS_BITS=64") +make_kernel_bc(KERNEL_BC_64 "${LLVM_TARGET_64}" "VX64-BCs" 0 0 0 ${KERNEL_SOURCES}) # just debug -message(STATUS "${LLVM_TARGET} Kernel BC: ${KERNEL_BC}") +message(STATUS "${LLVM_TARGET_32} Kernel BC: ${KERNEL_BC_32}") +message(STATUS "${LLVM_TARGET_64} Kernel BC: ${KERNEL_BC_64}") -list(APPEND KERNEL_BC_LIST "${KERNEL_BC}") +list(APPEND KERNEL_BC_LIST "${KERNEL_BC_32}") +list(APPEND KERNEL_BC_LIST "${KERNEL_BC_64}") set(KERNEL_BC_LIST "${KERNEL_BC_LIST}" PARENT_SCOPE) # a target is needed... -add_custom_target("kernel_${LLVM_TARGET}" DEPENDS ${KERNEL_BC}) +add_custom_target("kernel_${LLVM_TARGET_32}" DEPENDS ${KERNEL_BC_32}) +add_custom_target("kernel_${LLVM_TARGET_64}" DEPENDS ${KERNEL_BC_64}) -list(APPEND KERNEL_TARGET_LIST "kernel_${LLVM_TARGET}") +list(APPEND KERNEL_TARGET_LIST "kernel_${LLVM_TARGET_32}") +list(APPEND KERNEL_TARGET_LIST "kernel_${LLVM_TARGET_64}") set(KERNEL_TARGET_LIST "${KERNEL_TARGET_LIST}" PARENT_SCOPE) -install(FILES "${KERNEL_BC}" DESTINATION "${POCL_INSTALL_PRIVATE_DATADIR}" COMPONENT "lib") +install(FILES "${KERNEL_BC_32}" "${KERNEL_BC_64}" DESTINATION "${POCL_INSTALL_PRIVATE_DATADIR}" COMPONENT "lib") diff --git a/lib/kernel/vortex/workitems.c b/lib/kernel/vortex/workitems.c index 5ac4add8d..4499e8a50 100644 --- a/lib/kernel/vortex/workitems.c +++ b/lib/kernel/vortex/workitems.c @@ -1,14 +1,22 @@ #include +#if __riscv_xlen == 64 + typedef uint64_t SizeT; +#elif __riscv_xlen == 32 + typedef uint32_t SizeT; +#else + #error "Unsupported RISC-V XLEN" +#endif + extern int g_work_dim; extern dim3_t g_global_offset; -uint32_t _CL_OVERLOADABLE _CL_READNONE _CL_OPTNONE +uint32_t _CL_OVERLOADABLE get_work_dim (void) { return g_work_dim; } -size_t _CL_OVERLOADABLE _CL_READNONE _CL_OPTNONE +SizeT _CL_OVERLOADABLE get_num_groups(uint32_t dimindx) { switch (dimindx) { default: return gridDim.x; @@ -17,7 +25,7 @@ get_num_groups(uint32_t dimindx) { } } -size_t _CL_OVERLOADABLE _CL_READNONE _CL_OPTNONE +SizeT _CL_OVERLOADABLE get_local_size(uint32_t dimindx) { switch (dimindx) { default: return blockDim.x; @@ -26,7 +34,7 @@ get_local_size(uint32_t dimindx) { } } -size_t _CL_OVERLOADABLE _CL_READNONE _CL_OPTNONE +SizeT _CL_OVERLOADABLE get_global_offset(uint32_t dimindx) { switch (dimindx) { default: return g_global_offset.x; @@ -35,7 +43,7 @@ get_global_offset(uint32_t dimindx) { } } -size_t _CL_OVERLOADABLE _CL_READNONE _CL_OPTNONE +SizeT _CL_OVERLOADABLE get_group_id(uint32_t dimindx) { switch (dimindx) { default: return blockIdx.x; @@ -44,7 +52,7 @@ get_group_id(uint32_t dimindx) { } } -size_t _CL_OVERLOADABLE _CL_READNONE _CL_OPTNONE +SizeT _CL_OVERLOADABLE get_local_id(uint32_t dimindx) { switch (dimindx) { default: return threadIdx.x; @@ -53,7 +61,7 @@ get_local_id(uint32_t dimindx) { } } -size_t _CL_OVERLOADABLE _CL_READNONE _CL_OPTNONE +SizeT _CL_OVERLOADABLE get_global_size(uint32_t dimindx) { switch (dimindx) { default: return blockDim.x * gridDim.x; @@ -62,7 +70,7 @@ get_global_size(uint32_t dimindx) { } } -size_t _CL_OVERLOADABLE _CL_READNONE _CL_OPTNONE +SizeT _CL_OVERLOADABLE get_global_id(uint32_t dimindx) { switch (dimindx) { default: return blockIdx.x * blockDim.x + threadIdx.x + g_global_offset.x; @@ -71,14 +79,14 @@ get_global_id(uint32_t dimindx) { } } -size_t _CL_OVERLOADABLE _CL_READNONE _CL_OPTNONE +SizeT _CL_OVERLOADABLE get_global_linear_id(void) { return ((blockIdx.z * blockDim.z + threadIdx.z) * blockDim.y * gridDim.y * blockDim.x * gridDim.x) + ((blockIdx.y * blockDim.y + threadIdx.y) * blockDim.x * gridDim.x) + ((blockIdx.x * blockDim.z + threadIdx.x)); } -size_t _CL_OVERLOADABLE _CL_READNONE _CL_OPTNONE +SizeT _CL_OVERLOADABLE get_local_linear_id(void) { return (threadIdx.z * blockDim.y * blockDim.x) + (threadIdx.y * blockDim.x) + threadIdx.x; } \ No newline at end of file