@@ -6,6 +6,7 @@
#include "xe_sync.h"
#include <linux/dma-fence-array.h>
+#include <linux/highmem.h>
#include <linux/kthread.h>
#include <linux/sched/mm.h>
#include <linux/uaccess.h>
@@ -28,6 +29,7 @@ struct xe_user_fence {
u64 __user *addr;
u64 value;
int signalled;
+ bool use_page;
};
static void user_fence_destroy(struct kref *kref)
@@ -53,7 +55,9 @@ static struct xe_user_fence *user_fence_create(struct xe_device *xe, u64 addr,
u64 value)
{
struct xe_user_fence *ufence;
+ struct page *page;
u64 __user *ptr = u64_to_user_ptr(addr);
+ int ret;
if (!access_ok(ptr, sizeof(ptr)))
return ERR_PTR(-EFAULT);
@@ -69,19 +73,55 @@ static struct xe_user_fence *user_fence_create(struct xe_device *xe, u64 addr,
ufence->mm = current->mm;
mmgrab(ufence->mm);
+ /* Prefault page */
+ ret = get_user_pages_fast(addr, 1, FOLL_WRITE, &page);
+ if (ret == 1) {
+ ufence->use_page = true;
+ put_page(page);
+ } else {
+ ufence->use_page = false;
+ }
+
return ufence;
}
static void user_fence_worker(struct work_struct *w)
{
struct xe_user_fence *ufence = container_of(w, struct xe_user_fence, worker);
-
- if (mmget_not_zero(ufence->mm)) {
- kthread_use_mm(ufence->mm);
- if (copy_to_user(ufence->addr, &ufence->value, sizeof(ufence->value)))
- XE_WARN_ON("Copy to user failed");
- kthread_unuse_mm(ufence->mm);
- mmput(ufence->mm);
+ struct mm_struct *mm = ufence->mm;
+
+ if (mmget_not_zero(mm)) {
+ kthread_use_mm(mm);
+ if (ufence->use_page) {
+ struct page *page;
+ int ret;
+
+ ret = get_user_pages_fast((unsigned long)ufence->addr,
+ 1, FOLL_WRITE, &page);
+ if (ret == 1) {
+ u64 *ptr;
+ u64 old = 0;
+ void *va;
+
+ va = kmap_local_page(page);
+ ptr = va + offset_in_page(ufence->addr);
+ while (!try_cmpxchg64(ptr, &old, ufence->value))
+ continue;
+ kunmap_local(va);
+
+ set_page_dirty_lock(page);
+ put_page(page);
+ } else {
+ ufence->use_page = false;
+ }
+ }
+ if (!ufence->use_page) {
+ if (copy_to_user(ufence->addr, &ufence->value,
+ sizeof(ufence->value)))
+ drm_warn(&ufence->xe->drm, "copy_to_user failed\n");
+ }
+ kthread_unuse_mm(mm);
+ mmput(mm);
}
wake_up_all(&ufence->xe->ufence_wq);
Attempt to get page on user fence creation and kmap_local_page on signaling. Should reduce latency and can ensure 64 bit atomicity compared to copy_to_user. v2: - Prefault page and drop ref (Thomas) - Use set_page_dirty_lock (Thomas) - try_cmpxchg64 loop (Thomas) v3: - Initialize use_page (CI) Signed-off-by: Matthew Brost <matthew.brost@intel.com> --- drivers/gpu/drm/xe/xe_sync.c | 54 +++++++++++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 7 deletions(-)