From patchwork Wed Nov 4 10:53:06 2015 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Yang, Rong R" X-Patchwork-Id: 7548751 Return-Path: X-Original-To: patchwork-intel-gfx@patchwork.kernel.org Delivered-To: patchwork-parsemail@patchwork2.web.kernel.org Received: from mail.kernel.org (mail.kernel.org [198.145.29.136]) by patchwork2.web.kernel.org (Postfix) with ESMTP id C3137BEEA4 for ; Wed, 4 Nov 2015 10:29:24 +0000 (UTC) Received: from mail.kernel.org (localhost [127.0.0.1]) by mail.kernel.org (Postfix) with ESMTP id 13D54205C6 for ; Wed, 4 Nov 2015 10:29:23 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) by mail.kernel.org (Postfix) with ESMTP id 2EA4B20595 for ; Wed, 4 Nov 2015 10:29:21 +0000 (UTC) Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 7BCCB6E688; Wed, 4 Nov 2015 02:29:20 -0800 (PST) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from mga14.intel.com (mga14.intel.com [192.55.52.115]) by gabe.freedesktop.org (Postfix) with ESMTP id 61BC46E66A; Wed, 4 Nov 2015 02:29:17 -0800 (PST) Received: from fmsmga001.fm.intel.com ([10.253.24.23]) by fmsmga103.fm.intel.com with ESMTP; 04 Nov 2015 02:29:16 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.20,242,1444719600"; d="scan'208";a="826720131" Received: from yr-ivb.sh.intel.com ([10.239.159.103]) by fmsmga001.fm.intel.com with ESMTP; 04 Nov 2015 02:29:15 -0800 From: Yang Rong To: intel-gfx@lists.freedesktop.org, beignet@lists.freedesktop.org, dri-devel@lists.freedesktop.org, chris@chris-wilson.co.uk, michal.winiarski@intel.com Date: Wed, 4 Nov 2015 18:53:06 +0800 Message-Id: <1446634386-885-1-git-send-email-rong.r.yang@intel.com> X-Mailer: git-send-email 2.1.4 Subject: [Intel-gfx] [PATCH] OCL20: Add svm support. X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.18 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , MIME-Version: 1.0 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" X-Spam-Status: No, score=-4.2 required=5.0 tests=BAYES_00, RCVD_IN_DNSWL_MED, T_RP_MATCHES_RCVD, UNPARSEABLE_RELAY autolearn=unavailable version=3.3.1 X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on mail.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP Enable CL_DEVICE_SVM_COARSE_GRAIN_BUFFER svm support, use userptr and softpin to implement it. Use userptr to share the page between cpu and gpu, and softpin to unify the cpu and gpu's address. Now it works on i386 system. x86_64 depends on backend support. This patch base on DRM library and DRM kernel driver's softpin patch: http://lists.freedesktop.org/archives/intel-gfx/2015-September/075446.html. Signed-off-by: Yang Rong --- src/cl_api.c | 87 ++++++++++++++++++++++++++++++++++ src/cl_context.c | 13 +++++ src/cl_context.h | 4 ++ src/cl_device_id.c | 1 + src/cl_device_id.h | 3 +- src/cl_driver.h | 3 ++ src/cl_driver_defs.c | 1 + src/cl_enqueue.c | 6 ++- src/cl_gen8_device.h | 4 +- src/cl_gen9_device.h | 4 +- src/cl_gt_device.h | 1 + src/cl_kernel.c | 36 ++++++++++++++ src/cl_kernel.h | 8 +++- src/cl_mem.c | 121 +++++++++++++++++++++++++++++++++++++++++++++-- src/cl_mem.h | 13 ++++- src/intel/intel_driver.c | 5 +- 16 files changed, 295 insertions(+), 15 deletions(-) diff --git a/src/cl_api.c b/src/cl_api.c index a18bc99..134870d 100644 --- a/src/cl_api.c +++ b/src/cl_api.c @@ -575,6 +575,80 @@ error: return mem; } +void * +clSVMAlloc (cl_context context, + cl_svm_mem_flags flags, + size_t size, + unsigned int alignment) +{ + cl_int err = CL_SUCCESS; + CHECK_CONTEXT (context); + return cl_mem_svm_allocate(context, flags, size, alignment); +error: + return NULL; +} + +void + clSVMFree (cl_context context, void* svm_pointer) +{ + cl_int err = CL_SUCCESS; + CHECK_CONTEXT (context); + return cl_mem_svm_delete(context, svm_pointer); +error: + return; +} + +cl_int +clEnqueueSVMMap (cl_command_queue command_queue, + cl_bool blocking_map, + cl_map_flags map_flags, + void *svm_ptr, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) +{ + cl_int err = CL_SUCCESS; + cl_mem buffer; + + CHECK_QUEUE(command_queue); + buffer = cl_context_get_svm_from_ptr(command_queue->ctx, svm_ptr); + if(buffer == NULL) { + err = CL_INVALID_VALUE; + goto error; + } + + clEnqueueMapBuffer(command_queue, buffer, blocking_map, map_flags, 0, size, + num_events_in_wait_list, event_wait_list, event, &err); +error: + return err; +} + +cl_int +clEnqueueSVMUnmap (cl_command_queue command_queue, + void *svm_ptr, + cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) +{ + cl_int err = CL_SUCCESS; + cl_mem buffer; + + CHECK_QUEUE(command_queue); + buffer = cl_context_get_svm_from_ptr(command_queue->ctx, svm_ptr); + if(buffer == NULL) { + err = CL_INVALID_VALUE; + goto error; + } + + err = clEnqueueUnmapMemObject(command_queue, buffer, svm_ptr, + num_events_in_wait_list, event_wait_list, event); + +error: + return err; +} + + cl_mem clCreateImage2D(cl_context context, cl_mem_flags flags, @@ -1248,6 +1322,19 @@ error: return err; } +cl_int +clSetKernelArgSVMPointer (cl_kernel kernel, + cl_uint arg_index, + const void *arg_value) +{ + cl_int err = CL_SUCCESS; + CHECK_KERNEL(kernel); + + err = cl_kernel_set_arg_svm_pointer(kernel, arg_index, arg_value); +error: + return err; +} + cl_int clGetKernelArgInfo(cl_kernel kernel, cl_uint arg_index, cl_kernel_arg_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) { diff --git a/src/cl_context.c b/src/cl_context.c index c45e0aa..3fcdb63 100644 --- a/src/cl_context.c +++ b/src/cl_context.c @@ -328,3 +328,16 @@ unlock: pthread_mutex_unlock(&ctx->program_lock); return cl_kernel_dup(ker); } + +cl_mem +cl_context_get_svm_from_ptr(cl_context ctx, void * p) +{ + cl_mem buf = ctx->svm_buffers; + while(buf) { + assert(buf->host_ptr && buf->is_svm); + if(buf->host_ptr == p) + return buf; + buf = buf->next; + } + return NULL; +} diff --git a/src/cl_context.h b/src/cl_context.h index ef94823..365ec32 100644 --- a/src/cl_context.h +++ b/src/cl_context.h @@ -106,11 +106,13 @@ struct _cl_context { cl_command_queue queues; /* All command queues currently allocated */ cl_program programs; /* All programs currently allocated */ cl_mem buffers; /* All memory object currently allocated */ + cl_mem svm_buffers; /* All svm object currently allocated */ cl_sampler samplers; /* All sampler object currently allocated */ cl_event events; /* All event object currently allocated */ pthread_mutex_t queue_lock; /* To allocate and deallocate queues */ pthread_mutex_t program_lock; /* To allocate and deallocate programs */ pthread_mutex_t buffer_lock; /* To allocate and deallocate buffers */ + pthread_mutex_t svm_lock; /* To allocate and deallocate SVM */ pthread_mutex_t sampler_lock; /* To allocate and deallocate samplers */ pthread_mutex_t event_lock; /* To allocate and deallocate events */ cl_program internal_prgs[CL_INTERNAL_KERNEL_MAX]; @@ -168,5 +170,7 @@ extern cl_buffer_mgr cl_context_get_bufmgr(cl_context ctx); extern cl_kernel cl_context_get_static_kernel_from_bin(cl_context ctx, cl_int index, const char * str_kernel, size_t size, const char * str_option); +/* Get the SVM from pointer, return NULL if pointer is not from SVM */ +extern cl_mem cl_context_get_svm_from_ptr(cl_context ctx, void *p); #endif /* __CL_CONTEXT_H__ */ diff --git a/src/cl_device_id.c b/src/cl_device_id.c index deb2fad..b518d48 100644 --- a/src/cl_device_id.c +++ b/src/cl_device_id.c @@ -836,6 +836,7 @@ cl_get_device_info(cl_device_id device, DECL_FIELD(REFERENCE_COUNT, device_reference_count) DECL_FIELD(IMAGE_PITCH_ALIGNMENT, image_pitch_alignment) DECL_FIELD(IMAGE_BASE_ADDRESS_ALIGNMENT, image_base_address_alignment) + DECL_FIELD(SVM_CAPABILITIES, svm_capabilities) case CL_DRIVER_VERSION: if (param_value_size_ret) { diff --git a/src/cl_device_id.h b/src/cl_device_id.h index 4a923ef..6341362 100644 --- a/src/cl_device_id.h +++ b/src/cl_device_id.h @@ -51,7 +51,8 @@ struct _cl_device_id { cl_uint native_vector_width_half; cl_uint max_clock_frequency; cl_uint address_bits; - cl_ulong max_mem_alloc_size; + size_t max_mem_alloc_size; + cl_device_svm_capabilities svm_capabilities; cl_bool image_support; cl_uint max_read_image_args; cl_uint max_write_image_args; diff --git a/src/cl_driver.h b/src/cl_driver.h index 4ffca09..639822b 100644 --- a/src/cl_driver.h +++ b/src/cl_driver.h @@ -299,6 +299,9 @@ extern cl_buffer_alloc_cb *cl_buffer_alloc; typedef cl_buffer (cl_buffer_alloc_userptr_cb)(cl_buffer_mgr, const char*, void *, size_t, unsigned long); extern cl_buffer_alloc_userptr_cb *cl_buffer_alloc_userptr; +typedef cl_buffer (cl_buffer_set_softpin_offset_cb)(cl_buffer, uint64_t); +extern cl_buffer_set_softpin_offset_cb *cl_buffer_set_softpin_offset; + /* Set a buffer's tiling mode */ typedef cl_buffer (cl_buffer_set_tiling_cb)(cl_buffer, int tiling, size_t stride); extern cl_buffer_set_tiling_cb *cl_buffer_set_tiling; diff --git a/src/cl_driver_defs.c b/src/cl_driver_defs.c index b77acdc..333028c 100644 --- a/src/cl_driver_defs.c +++ b/src/cl_driver_defs.c @@ -32,6 +32,7 @@ LOCAL cl_driver_update_device_info_cb *cl_driver_update_device_info = NULL; /* Buffer */ LOCAL cl_buffer_alloc_cb *cl_buffer_alloc = NULL; LOCAL cl_buffer_alloc_userptr_cb *cl_buffer_alloc_userptr = NULL; +LOCAL cl_buffer_set_softpin_offset_cb *cl_buffer_set_softpin_offset = NULL; LOCAL cl_buffer_set_tiling_cb *cl_buffer_set_tiling = NULL; LOCAL cl_buffer_alloc_from_texture_cb *cl_buffer_alloc_from_texture = NULL; LOCAL cl_buffer_release_from_texture_cb *cl_buffer_release_from_texture = NULL; diff --git a/src/cl_enqueue.c b/src/cl_enqueue.c index cec368c..371b2d4 100644 --- a/src/cl_enqueue.c +++ b/src/cl_enqueue.c @@ -261,7 +261,8 @@ cl_int cl_enqueue_map_buffer(enqueue_data *data) cl_int err = CL_SUCCESS; cl_mem mem = data->mem_obj; assert(mem->type == CL_MEM_BUFFER_TYPE || - mem->type == CL_MEM_SUBBUFFER_TYPE); + mem->type == CL_MEM_SUBBUFFER_TYPE || + mem->type == CL_MEM_SVM_TYPE); struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem; if (mem->is_userptr) @@ -362,7 +363,8 @@ cl_int cl_enqueue_unmap_mem_object(enqueue_data *data) if (memobj->flags & CL_MEM_USE_HOST_PTR) { if(memobj->type == CL_MEM_BUFFER_TYPE || - memobj->type == CL_MEM_SUBBUFFER_TYPE) { + memobj->type == CL_MEM_SUBBUFFER_TYPE || + memobj->type == CL_MEM_SVM_TYPE) { assert(mapped_ptr >= memobj->host_ptr && mapped_ptr + mapped_size <= memobj->host_ptr + memobj->size); /* Sync the data. */ diff --git a/src/cl_gen8_device.h b/src/cl_gen8_device.h index 08fde48..ab845d1 100644 --- a/src/cl_gen8_device.h +++ b/src/cl_gen8_device.h @@ -24,7 +24,7 @@ .local_mem_type = CL_GLOBAL, .local_mem_size = 64 << 10, .scratch_mem_size = 2 << 20, -.max_mem_alloc_size = 2 * 1024 * 1024 * 1024ul, -.global_mem_size = 4 * 1024 * 1024 * 1024ul, +.max_mem_alloc_size = 2 * 1024 * 1024 * 1024ull, +.global_mem_size = 4 * 1024 * 1024 * 1024ull, #include "cl_gt_device.h" diff --git a/src/cl_gen9_device.h b/src/cl_gen9_device.h index f50f9c7..03f4dad 100644 --- a/src/cl_gen9_device.h +++ b/src/cl_gen9_device.h @@ -24,8 +24,8 @@ .local_mem_type = CL_GLOBAL, .local_mem_size = 64 << 10, .scratch_mem_size = 2 << 20, -.max_mem_alloc_size = 4 * 1024 * 1024 * 1024ul, -.global_mem_size = 4 * 1024 * 1024 * 1024ul, +.max_mem_alloc_size = 2 * 1024 * 1024 * 1024ull, +.global_mem_size = 4 * 1024 * 1024 * 1024ull, #include "cl_gt_device.h" diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h index e61ff61..cf48b6d 100644 --- a/src/cl_gt_device.h +++ b/src/cl_gt_device.h @@ -41,6 +41,7 @@ .native_vector_width_half = 8, .preferred_wg_sz_mul = 16, .address_bits = 32, +.svm_capabilities = CL_DEVICE_SVM_COARSE_GRAIN_BUFFER, .image_support = CL_TRUE, .max_read_image_args = BTI_MAX_READ_IMAGE_ARGS, .max_write_image_args = BTI_MAX_WRITE_IMAGE_ARGS, diff --git a/src/cl_kernel.c b/src/cl_kernel.c index 58a1224..723eac3 100644 --- a/src/cl_kernel.c +++ b/src/cl_kernel.c @@ -213,11 +213,47 @@ cl_kernel_set_arg(cl_kernel k, cl_uint index, size_t sz, const void *value) cl_mem_delete(k->args[index].mem); k->args[index].mem = mem; k->args[index].is_set = 1; + k->args[index].is_svm = mem->is_svm; + if(mem->is_svm) + k->args[index].ptr = mem->host_ptr; k->args[index].local_sz = 0; k->args[index].bti = interp_kernel_get_arg_bti(k->opaque, index); return CL_SUCCESS; } + +LOCAL cl_int +cl_kernel_set_arg_svm_pointer(cl_kernel k, cl_uint index, const void *value) +{ + enum gbe_arg_type arg_type; /* kind of argument */ + size_t arg_sz; /* size of the argument */ + cl_context ctx = k->program->ctx; + cl_mem mem= cl_context_get_svm_from_ptr(ctx, value); + + if (UNLIKELY(index >= k->arg_n)) + return CL_INVALID_ARG_INDEX; + arg_type = interp_kernel_get_arg_type(k->opaque, index); + arg_sz = interp_kernel_get_arg_size(k->opaque, index); + + if(arg_type != GBE_ARG_GLOBAL_PTR && arg_type != GBE_ARG_CONSTANT_PTR ) + return CL_INVALID_ARG_VALUE; + + if(mem == NULL) + return CL_INVALID_ARG_VALUE; + + cl_mem_add_ref(mem); + if (k->args[index].mem) + cl_mem_delete(k->args[index].mem); + + k->args[index].ptr = value; + k->args[index].mem = mem; + k->args[index].is_set = 1; + k->args[index].is_svm = 1; + k->args[index].local_sz = 0; + k->args[index].bti = interp_kernel_get_arg_bti(k->opaque, index); + return 0; +} + LOCAL int cl_get_kernel_arg_info(cl_kernel k, cl_uint arg_index, cl_kernel_arg_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) diff --git a/src/cl_kernel.h b/src/cl_kernel.h index 140bbb1..5b3294b 100644 --- a/src/cl_kernel.h +++ b/src/cl_kernel.h @@ -38,8 +38,10 @@ typedef struct cl_argument { cl_mem mem; /* For image and regular buffers */ cl_sampler sampler; /* For sampler. */ unsigned char bti; - uint32_t local_sz:31; /* For __local size specification */ + void *ptr; /* SVM ptr value. */ + uint32_t local_sz:30; /* For __local size specification */ uint32_t is_set:1; /* All args must be set before NDRange */ + uint32_t is_svm:1; /* Indicate this argument is SVMPointer */ } cl_argument; /* One OCL function */ @@ -98,6 +100,10 @@ extern int cl_kernel_set_arg(cl_kernel, uint32_t arg_index, size_t arg_size, const void *arg_value); +extern int cl_kernel_set_arg_svm_pointer(cl_kernel, + uint32_t arg_index, + const void *arg_value); + /* Get the argument information */ extern int cl_get_kernel_arg_info(cl_kernel k, cl_uint arg_index, diff --git a/src/cl_mem.c b/src/cl_mem.c index be64abe..0d4d328 100644 --- a/src/cl_mem.c +++ b/src/cl_mem.c @@ -83,6 +83,7 @@ cl_get_mem_object_info(cl_mem mem, FIELD_SIZE(MEM_CONTEXT, cl_context); FIELD_SIZE(MEM_ASSOCIATED_MEMOBJECT, cl_mem); FIELD_SIZE(MEM_OFFSET, size_t); + FIELD_SIZE(MEM_USES_SVM_POINTER, cl_bool); default: return CL_INVALID_VALUE; } @@ -131,6 +132,8 @@ cl_get_mem_object_info(cl_mem mem, *((size_t *)param_value) = buf->sub_offset; } break; + case CL_MEM_USES_SVM_POINTER: + *((cl_uint *)param_value) = mem->is_svm; } return CL_SUCCESS; @@ -291,6 +294,9 @@ cl_mem_allocate(enum cl_mem_type type, if (type == CL_MEM_BUFFER_TYPE) { if (flags & CL_MEM_USE_HOST_PTR) { assert(host_ptr != NULL); + cl_mem svm_mem = NULL; + if((svm_mem = cl_context_get_svm_from_ptr(ctx, host_ptr)) != NULL) + mem->is_svm = 1; /* userptr not support tiling */ if (!is_tiled) { if (ALIGN((unsigned long)host_ptr, cacheline_size) == (unsigned long)host_ptr) { @@ -298,7 +304,12 @@ cl_mem_allocate(enum cl_mem_type type, mem->offset = host_ptr - aligned_host_ptr; mem->is_userptr = 1; size_t aligned_sz = ALIGN((mem->offset + sz), page_size); - mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL userptr memory object", aligned_host_ptr, aligned_sz, 0); + if(svm_mem != NULL) { + mem->bo = svm_mem->bo; + cl_mem_add_ref(svm_mem); + } else + mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL userptr memory object", aligned_host_ptr, aligned_sz, 0); + bufCreated = 1; } } @@ -608,6 +619,91 @@ void cl_mem_replace_buffer(cl_mem buffer, cl_buffer new_bo) } } +void* cl_mem_svm_allocate(cl_context ctx, cl_svm_mem_flags flags, + size_t size, unsigned int alignment) +{ + cl_int err = CL_SUCCESS; + size_t max_mem_size; + + if(UNLIKELY(alignment & (alignment - 1))) + return NULL; + + if ((err = cl_get_device_info(ctx->device, + CL_DEVICE_MAX_MEM_ALLOC_SIZE, + sizeof(max_mem_size), + &max_mem_size, + NULL)) != CL_SUCCESS) { + return NULL; + } + + if(UNLIKELY(size == 0 || size > max_mem_size)) { + return NULL; + } + + if (flags & (CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS)) { + return NULL; + } + if (flags && ((flags & (CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_FINE_GRAIN_BUFFER)) + || ((flags & CL_MEM_WRITE_ONLY) && (flags & CL_MEM_READ_ONLY)) + || ((flags & CL_MEM_WRITE_ONLY) && (flags & CL_MEM_READ_WRITE)) + || ((flags & CL_MEM_READ_ONLY) && (flags & CL_MEM_READ_WRITE)))) { + return NULL; + } + +#ifdef HAS_USERPTR + cl_buffer_mgr bufmgr = NULL; + void * ptr = NULL; + cl_mem mem; + _cl_mem_svm* svm; + if(UNLIKELY((svm = CALLOC(_cl_mem_svm)) == NULL)) + return NULL; + mem = &svm->base; + + mem->type = CL_MEM_SVM_TYPE; + SET_ICD(mem->dispatch) + mem->ref_n = 1; + mem->magic = CL_MAGIC_MEM_HEADER; + mem->flags = flags | CL_MEM_USE_HOST_PTR; + mem->is_userptr = 0; + mem->is_svm = 0; + mem->offset = 0; + + bufmgr = cl_context_get_bufmgr(ctx); + assert(bufmgr); + + int page_size = getpagesize(); + const size_t alignedSZ = ALIGN(size, page_size); + if(alignment == 0) + alignment = page_size; + else + alignment = ALIGN(alignment, page_size); + ptr = cl_aligned_malloc(alignedSZ, alignment); + if(ptr == NULL) return NULL; + + mem->host_ptr = ptr; + mem->is_svm = 1; + mem->is_userptr = 1; + mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL SVM memory object", ptr, alignedSZ, 0); + mem->size = size; + cl_buffer_set_softpin_offset(mem->bo, (size_t)ptr); + + cl_context_add_ref(ctx); + mem->ctx = ctx; + + /* Append the svm in the context svm list */ + pthread_mutex_lock(&ctx->svm_lock); + mem->next = ctx->svm_buffers; + if (ctx->svm_buffers != NULL) + ctx->svm_buffers->prev = mem; + ctx->svm_buffers = mem; + pthread_mutex_unlock(&ctx->svm_lock); +#else + printf("Warning: Need libdrm that support userptr and soft-pin to enable SVM."); +#endif + + return ptr; +} + void cl_mem_copy_image_region(const size_t *origin, const size_t *region, void *dst, size_t dst_row_pitch, size_t dst_slice_pitch, @@ -1153,6 +1249,18 @@ cl_mem_new_image(cl_context context, } LOCAL void +cl_mem_svm_delete(cl_context ctx, void *svm_pointer) +{ + cl_mem mem; + if(UNLIKELY(svm_pointer == NULL)) + return; + mem = cl_context_get_svm_from_ptr(ctx, svm_pointer); + if(mem == NULL) + return; + cl_mem_delete(mem); +} + +LOCAL void cl_mem_delete(cl_mem mem) { cl_int i; @@ -1234,13 +1342,18 @@ cl_mem_delete(cl_mem mem) buffer->parent->subs = buffer->sub_next; pthread_mutex_unlock(&buffer->parent->sub_lock); cl_mem_delete((cl_mem )(buffer->parent)); - } else if (LIKELY(mem->bo != NULL)) { + } else if(mem->is_svm && mem->type != CL_MEM_SVM_TYPE) { + cl_mem svm_mem = cl_context_get_svm_from_ptr(mem->ctx, mem->host_ptr); + if(svm_mem) + cl_mem_delete(svm_mem); + }else if (LIKELY(mem->bo != NULL)) { cl_buffer_unreference(mem->bo); } - if (mem->is_userptr && + if ((mem->is_userptr && (mem->flags & CL_MEM_ALLOC_HOST_PTR) && - (mem->type != CL_MEM_SUBBUFFER_TYPE)) + (mem->type != CL_MEM_SUBBUFFER_TYPE)) || + (mem->is_svm && mem->type == CL_MEM_SVM_TYPE)) cl_free(mem->host_ptr); cl_free(mem); diff --git a/src/cl_mem.h b/src/cl_mem.h index 4970a75..df23345 100644 --- a/src/cl_mem.h +++ b/src/cl_mem.h @@ -70,6 +70,7 @@ typedef struct _cl_mem_dstr_cb { enum cl_mem_type { CL_MEM_BUFFER_TYPE, CL_MEM_SUBBUFFER_TYPE, + CL_MEM_SVM_TYPE, CL_MEM_IMAGE_TYPE, CL_MEM_GL_IMAGE_TYPE, CL_MEM_BUFFER1D_IMAGE_TYPE @@ -93,10 +94,16 @@ typedef struct _cl_mem { int map_ref; /* The mapped count. */ uint8_t mapped_gtt; /* This object has mapped gtt, for unmap. */ cl_mem_dstr_cb *dstr_cb; /* The destroy callback. */ - uint8_t is_userptr; /* CL_MEM_USE_HOST_PTR is enabled*/ + uint8_t is_userptr; /* CL_MEM_USE_HOST_PTR is enabled */ + cl_bool is_svm; /* This object is svm */ size_t offset; /* offset of host_ptr to the page beginning, only for CL_MEM_USE_HOST_PTR*/ } _cl_mem; +typedef struct _cl_mem_svm { + _cl_mem base; + cl_svm_mem_flags flags; /* Flags specified at the creation time */ +} _cl_mem_svm; + struct _cl_mem_image { _cl_mem base; cl_image_format fmt; /* only for images */ @@ -195,6 +202,10 @@ extern cl_mem cl_mem_new_buffer(cl_context, cl_mem_flags, size_t, void*, cl_int* /* Create a new sub memory object */ extern cl_mem cl_mem_new_sub_buffer(cl_mem, cl_mem_flags, cl_buffer_create_type, const void *, cl_int *); +void* cl_mem_svm_allocate(cl_context, cl_svm_mem_flags, size_t, unsigned int); +void cl_mem_svm_delete(cl_context, void *svm_pointer); + + /* Idem but this is an image */ extern cl_mem cl_mem_new_image(cl_context context, diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c index 782a2de..7eaa6c5 100644 --- a/src/intel/intel_driver.c +++ b/src/intel/intel_driver.c @@ -832,8 +832,8 @@ intel_update_device_info(cl_device_id device) //We should get the device memory dynamically, but the //mapablce mem size usage is unknown. Just ignore it. size_t total_mem,map_mem; - if(drm_intel_get_aperture_sizes(driver->fd,&map_mem,&total_mem) == 0) - device->global_mem_size = (cl_ulong)total_mem; + //if(drm_intel_get_aperture_sizes(driver->fd,&map_mem,&total_mem) == 0) + // device->global_mem_size = (cl_ulong)total_mem; intel_driver_context_destroy(driver); intel_driver_close(driver); @@ -853,6 +853,7 @@ intel_setup_callbacks(void) cl_driver_update_device_info = (cl_driver_update_device_info_cb *) intel_update_device_info; cl_buffer_alloc = (cl_buffer_alloc_cb *) drm_intel_bo_alloc; cl_buffer_alloc_userptr = (cl_buffer_alloc_userptr_cb*) intel_buffer_alloc_userptr; + cl_buffer_set_softpin_offset = (cl_buffer_set_softpin_offset_cb *) drm_intel_bo_set_softpin_offset; cl_buffer_set_tiling = (cl_buffer_set_tiling_cb *) intel_buffer_set_tiling; #if defined(HAS_EGL) cl_buffer_alloc_from_texture = (cl_buffer_alloc_from_texture_cb *) intel_alloc_buffer_from_texture;