diff mbox

[0039/1094] drm/i915: Use multiple VMs -- the point of no return

Message ID 1413889294-31328-40-git-send-email-dheerajx.s.jamwal@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Dheeraj Jamwal Oct. 21, 2014, 10:43 a.m. UTC
From: Ben Widawsky <ben@bwidawsk.net>

As with processes which run on the CPU, the goal of multiple VMs is to
provide process isolation. Specific to GEN, there is also the ability to
map more objects per process (2GB each instead of 2Gb-2k total).

For the most part, all the pipes have been laid, and all we need to do
is remove asserts and actually start changing address spaces with the
context switch. Since prior to this we've converted the setting of the
page tables to a streamed version, this is quite easy.

One important thing to point out (since it'd been hotly contested) is
that with this patch, every context created will have it's own address
space (provided the HW can do it).

v2: Disable BDW on rebase

NOTE: I tried to make this commit as small as possible. I needed one
place where I could "turn everything on" and that is here. It could be
split into finer commits, but I didn't really see much point.

Cc: Eric Anholt <eric@anholt.net>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
(cherry picked from commit 7e0d96bc03c140cb8183955ad6f0290caa731e64)

Signed-off-by: Dheeraj Jamwal <dheerajx.s.jamwal@intel.com>
---
 drivers/gpu/drm/i915/i915_dma.c            |    3 ++
 drivers/gpu/drm/i915/i915_drv.c            |    3 +-
 drivers/gpu/drm/i915/i915_drv.h            |   12 +++++-
 drivers/gpu/drm/i915/i915_gem.c            |   22 ++++------
 drivers/gpu/drm/i915/i915_gem_context.c    |   60 ++++++++++++++++------------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   16 +++++---
 drivers/gpu/drm/i915/i915_gem_gtt.c        |   22 +++++++---
 drivers/gpu/drm/i915/i915_gpu_error.c      |    5 ---
 include/uapi/drm/i915_drm.h                |    1 +
 9 files changed, 86 insertions(+), 58 deletions(-)
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index d82b7b2..5802e0a 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1013,6 +1013,9 @@  static int i915_getparam(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_EXEC_HANDLE_LUT:
 		value = 1;
 		break;
+	case I915_PARAM_HAS_FULL_PPGTT:
+		value = USES_FULL_PPGTT(dev);
+		break;
 	default:
 		DRM_DEBUG("Unknown parameter %d\n", param->param);
 		return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index ec7bb0f..9d7456a 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -116,7 +116,8 @@  MODULE_PARM_DESC(enable_hangcheck,
 int i915_enable_ppgtt __read_mostly = -1;
 module_param_named(i915_enable_ppgtt, i915_enable_ppgtt, int, 0400);
 MODULE_PARM_DESC(i915_enable_ppgtt,
-		"Enable PPGTT (default: true)");
+		"Override PPGTT usage. "
+		"(-1=auto [default], 0=disabled, 1=aliasing, 2=full)");
 
 int i915_enable_psr __read_mostly = 0;
 module_param_named(enable_psr, i915_enable_psr, int, 0600);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 8a0832a..fef2f1a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1841,7 +1841,9 @@  struct drm_i915_file_private {
 
 #define HAS_HW_CONTEXTS(dev)	(INTEL_INFO(dev)->gen >= 6)
 #define HAS_ALIASING_PPGTT(dev)	(INTEL_INFO(dev)->gen >= 6 && !IS_VALLEYVIEW(dev))
+#define HAS_PPGTT(dev)		(INTEL_INFO(dev)->gen >= 7 && !IS_VALLEYVIEW(dev) && !IS_BROADWELL(dev))
 #define USES_ALIASING_PPGTT(dev) intel_enable_ppgtt(dev, false)
+#define USES_FULL_PPGTT(dev)	intel_enable_ppgtt(dev, true)
 
 #define HAS_OVERLAY(dev)		(INTEL_INFO(dev)->has_overlay)
 #define OVERLAY_NEEDS_PHYSICAL(dev)	(INTEL_INFO(dev)->overlay_needs_physical)
@@ -2031,6 +2033,8 @@  void i915_gem_object_init(struct drm_i915_gem_object *obj,
 			 const struct drm_i915_gem_object_ops *ops);
 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
 						  size_t size);
+void i915_init_vm(struct drm_i915_private *dev_priv,
+		  struct i915_address_space *vm);
 void i915_gem_free_object(struct drm_gem_object *obj);
 void i915_gem_vma_destroy(struct i915_vma *vma);
 
@@ -2310,7 +2314,8 @@  static inline bool intel_enable_ppgtt(struct drm_device *dev, bool full)
 	if (i915_enable_ppgtt == 0 || !HAS_ALIASING_PPGTT(dev))
 		return false;
 
-	BUG_ON(full);
+	if (i915_enable_ppgtt == 1 && full)
+		return false;
 
 #ifdef CONFIG_INTEL_IOMMU
 	/* Disable ppgtt on SNB if VT-d is on. */
@@ -2320,7 +2325,10 @@  static inline bool intel_enable_ppgtt(struct drm_device *dev, bool full)
 	}
 #endif
 
-	return HAS_ALIASING_PPGTT(dev);
+	if (full)
+		return HAS_PPGTT(dev);
+	else
+		return HAS_ALIASING_PPGTT(dev);
 }
 
 static inline void ppgtt_release(struct kref *kref)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 25d9b8d..303e796 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2274,7 +2274,10 @@  request_to_vm(struct drm_i915_gem_request *request)
 	struct drm_i915_private *dev_priv = request->ring->dev->dev_private;
 	struct i915_address_space *vm;
 
-	vm = &dev_priv->gtt.base;
+	if (request->ctx)
+		vm = request->ctx->vm;
+	else
+		vm = &dev_priv->gtt.base;
 
 	return vm;
 }
@@ -2766,9 +2769,6 @@  int i915_vma_unbind(struct i915_vma *vma)
 	drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
 	int ret;
 
-	/* For now we only ever use 1 vma per object */
-	WARN_ON(!list_is_singular(&obj->vma_list));
-
 	if (list_empty(&vma->vma_link))
 		return 0;
 
@@ -3321,17 +3321,12 @@  i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
 
 	i915_gem_object_pin_pages(obj);
 
-	BUG_ON(!i915_is_ggtt(vm));
-
 	vma = i915_gem_obj_lookup_or_create_vma(obj, vm);
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
 		goto err_unpin;
 	}
 
-	/* For now we only ever use 1 vma per object */
-	WARN_ON(!list_is_singular(&obj->vma_list));
-
 search_free:
 	/* FIXME: Some tests are failing when they receive a reloc of 0. To
 	 * prevent this, we simply don't allow the 0th offset. */
@@ -4237,9 +4232,6 @@  void i915_gem_free_object(struct drm_gem_object *gem_obj)
 	if (obj->phys_obj)
 		i915_gem_detach_phys_object(dev, obj);
 
-	/* NB: 0 or 1 elements */
-	WARN_ON(!list_empty(&obj->vma_list) &&
-		!list_is_singular(&obj->vma_list));
 	list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
 		int ret;
 
@@ -4637,9 +4629,11 @@  init_ring_lists(struct intel_ring_buffer *ring)
 	INIT_LIST_HEAD(&ring->request_list);
 }
 
-static void i915_init_vm(struct drm_i915_private *dev_priv,
-			 struct i915_address_space *vm)
+void i915_init_vm(struct drm_i915_private *dev_priv,
+		  struct i915_address_space *vm)
 {
+	if (!i915_is_ggtt(vm))
+		drm_mm_init(&vm->mm, vm->start, vm->total);
 	vm->dev = dev_priv->dev;
 	INIT_LIST_HEAD(&vm->active_list);
 	INIT_LIST_HEAD(&vm->inactive_list);
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 2cd8459..426d625 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -288,17 +288,15 @@  i915_gem_create_context(struct drm_device *dev,
 				DRM_DEBUG_DRIVER("Couldn't pin %d\n", ret);
 				goto err_destroy;
 			}
+
+			ctx->vm = &dev_priv->mm.aliasing_ppgtt->base;
 		}
 	} else if (USES_ALIASING_PPGTT(dev)) {
 		/* For platforms which only have aliasing PPGTT, we fake the
 		 * address space and refcounting. */
-		kref_get(&dev_priv->mm.aliasing_ppgtt->ref);
-	}
-
-	/* TODO: Until full ppgtt... */
-	if (USES_ALIASING_PPGTT(dev))
 		ctx->vm = &dev_priv->mm.aliasing_ppgtt->base;
-	else
+		kref_get(&dev_priv->mm.aliasing_ppgtt->ref);
+	} else
 		ctx->vm = &dev_priv->gtt.base;
 
 	return ctx;
@@ -500,7 +498,7 @@  int i915_gem_context_open(struct drm_device *dev, struct drm_file *file)
 
 	mutex_lock(&dev->struct_mutex);
 	file_priv->private_default_ctx =
-		i915_gem_create_context(dev, file_priv, false);
+		i915_gem_create_context(dev, file_priv, USES_FULL_PPGTT(dev));
 	mutex_unlock(&dev->struct_mutex);
 
 	if (IS_ERR(file_priv->private_default_ctx)) {
@@ -587,6 +585,7 @@  static int do_switch(struct intel_ring_buffer *ring,
 {
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
 	struct i915_hw_context *from = ring->last_context;
+	struct i915_hw_ppgtt *ppgtt = ctx_to_ppgtt(to);
 	u32 hw_flags = 0;
 	int ret, i;
 
@@ -598,17 +597,15 @@  static int do_switch(struct intel_ring_buffer *ring,
 	if (from == to && from->last_ring == ring && !to->remap_slice)
 		return 0;
 
-	if (ring != &dev_priv->ring[RCS]) {
-		if (from)
-			i915_gem_context_unreference(from);
-		goto done;
+	/* Trying to pin first makes error handling easier. */
+	if (ring == &dev_priv->ring[RCS]) {
+		ret = i915_gem_obj_ggtt_pin(to->obj,
+					    get_context_alignment(ring->dev),
+					    false, false);
+		if (ret)
+			return ret;
 	}
 
-	ret = i915_gem_obj_ggtt_pin(to->obj, get_context_alignment(ring->dev),
-				    false, false);
-	if (ret)
-		return ret;
-
 	/*
 	 * Pin can switch back to the default context if we end up calling into
 	 * evict_everything - as a last ditch gtt defrag effort that also
@@ -616,6 +613,18 @@  static int do_switch(struct intel_ring_buffer *ring,
 	 */
 	from = ring->last_context;
 
+	if (USES_FULL_PPGTT(ring->dev)) {
+		ret = ppgtt->switch_mm(ppgtt, ring, false);
+		if (ret)
+			goto unpin_out;
+	}
+
+	if (ring != &dev_priv->ring[RCS]) {
+		if (from)
+			i915_gem_context_unreference(from);
+		goto done;
+	}
+
 	/*
 	 * Clear this page out of any CPU caches for coherent swap-in/out. Note
 	 * that thanks to write = false in this call and us not setting any gpu
@@ -625,10 +634,8 @@  static int do_switch(struct intel_ring_buffer *ring,
 	 * XXX: We need a real interface to do this instead of trickery.
 	 */
 	ret = i915_gem_object_set_to_gtt_domain(to->obj, false);
-	if (ret) {
-		i915_gem_object_ggtt_unpin(to->obj);
-		return ret;
-	}
+	if (ret)
+		goto unpin_out;
 
 	if (!to->obj->has_global_gtt_mapping) {
 		struct i915_vma *vma = i915_gem_obj_to_vma(to->obj,
@@ -640,10 +647,8 @@  static int do_switch(struct intel_ring_buffer *ring,
 		hw_flags |= MI_RESTORE_INHIBIT;
 
 	ret = mi_set_context(ring, to, hw_flags);
-	if (ret) {
-		i915_gem_object_ggtt_unpin(to->obj);
-		return ret;
-	}
+	if (ret)
+		goto unpin_out;
 
 	for (i = 0; i < MAX_L3_SLICES; i++) {
 		if (!(to->remap_slice & (1<<i)))
@@ -688,6 +693,11 @@  done:
 	to->last_ring = ring;
 
 	return 0;
+
+unpin_out:
+	if (ring->id == RCS)
+		i915_gem_object_ggtt_unpin(to->obj);
+	return ret;
 }
 
 /**
@@ -736,7 +746,7 @@  int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 	if (ret)
 		return ret;
 
-	ctx = i915_gem_create_context(dev, file_priv, false);
+	ctx = i915_gem_create_context(dev, file_priv, USES_FULL_PPGTT(dev));
 	mutex_unlock(&dev->struct_mutex);
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 993d198..112f345 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1003,7 +1003,7 @@  i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	struct i915_hw_context *ctx;
 	struct i915_address_space *vm;
 	const u32 ctx_id = i915_execbuffer2_get_context_id(*args);
-	u32 exec_start, exec_len;
+	u32 exec_start = args->batch_start_offset, exec_len;
 	u32 mask, flags;
 	int ret, mode, i;
 	bool need_relocs;
@@ -1126,9 +1126,9 @@  i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 
 	i915_gem_context_reference(ctx);
 
-	/* HACK until we have full PPGTT */
-	/* vm = ctx->vm; */
-	vm = &dev_priv->gtt.base;
+	vm = ctx->vm;
+	if (!USES_FULL_PPGTT(dev))
+		vm = &dev_priv->gtt.base;
 
 	eb = eb_create(args);
 	if (eb == NULL) {
@@ -1184,6 +1184,11 @@  i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		vma->bind_vma(vma, batch_obj->cache_level, GLOBAL_BIND);
 	}
 
+	if (flags & I915_DISPATCH_SECURE)
+		exec_start += i915_gem_obj_ggtt_offset(batch_obj);
+	else
+		exec_start += i915_gem_obj_offset(batch_obj, vm);
+
 	ret = i915_gem_execbuffer_move_to_gpu(ring, &eb->vmas);
 	if (ret)
 		goto err;
@@ -1213,8 +1218,7 @@  i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 			goto err;
 	}
 
-	exec_start = i915_gem_obj_offset(batch_obj, vm) +
-		args->batch_start_offset;
+
 	exec_len = args->batch_len;
 	if (cliprects) {
 		for (i = 0; i < args->num_cliprects; i++) {
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 2c26d3d..b84d128 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -324,6 +324,7 @@  static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
 		container_of(vm, struct i915_hw_ppgtt, base);
 	int i, j;
 
+	list_del(&vm->global_link);
 	drm_mm_takedown(&vm->mm);
 
 	for (i = 0; i < ppgtt->num_pd_pages ; i++) {
@@ -757,6 +758,7 @@  static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
 		container_of(vm, struct i915_hw_ppgtt, base);
 	int i;
 
+	list_del(&vm->global_link);
 	drm_mm_takedown(&ppgtt->base.mm);
 	drm_mm_remove_node(&ppgtt->node);
 
@@ -903,17 +905,22 @@  int i915_gem_init_ppgtt(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
 		BUG();
 
 	if (!ret) {
+		struct drm_i915_private *dev_priv = dev->dev_private;
 		kref_init(&ppgtt->ref);
 		drm_mm_init(&ppgtt->base.mm, ppgtt->base.start,
 			    ppgtt->base.total);
-		if (INTEL_INFO(dev)->gen < 8)
+		i915_init_vm(dev_priv, &ppgtt->base);
+		if (INTEL_INFO(dev)->gen < 8) {
 			gen6_write_pdes(ppgtt);
+			DRM_DEBUG("Adding PPGTT at offset %x\n",
+				  ppgtt->pd_offset << 10);
+		}
 	}
 
 	return ret;
 }
 
-static void __always_unused
+static void
 ppgtt_bind_vma(struct i915_vma *vma,
 	       enum i915_cache_level cache_level,
 	       u32 flags)
@@ -925,7 +932,7 @@  ppgtt_bind_vma(struct i915_vma *vma,
 	vma->vm->insert_entries(vma->vm, vma->obj->pages, entry, cache_level);
 }
 
-static void __always_unused ppgtt_unbind_vma(struct i915_vma *vma)
+static void ppgtt_unbind_vma(struct i915_vma *vma)
 {
 	const unsigned long entry = vma->node.start >> PAGE_SHIFT;
 
@@ -1722,8 +1729,13 @@  static struct i915_vma *__i915_gem_vma_create(struct drm_i915_gem_object *obj,
 	case 8:
 	case 7:
 	case 6:
-		vma->unbind_vma = ggtt_unbind_vma;
-		vma->bind_vma = ggtt_bind_vma;
+		if (i915_is_ggtt(vm)) {
+			vma->unbind_vma = ggtt_unbind_vma;
+			vma->bind_vma = ggtt_bind_vma;
+		} else {
+			vma->unbind_vma = ppgtt_unbind_vma;
+			vma->bind_vma = ppgtt_bind_vma;
+		}
 		break;
 	case 5:
 	case 4:
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index de97a243..f69bc5f 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -920,11 +920,6 @@  static void i915_gem_capture_buffers(struct drm_i915_private *dev_priv,
 	list_for_each_entry(vm, &dev_priv->vm_list, global_link)
 		cnt++;
 
-	if (WARN(cnt > 1, "Multiple VMs not yet supported\n"))
-		cnt = 1;
-
-	vm = &dev_priv->gtt.base;
-
 	error->active_bo = kcalloc(cnt, sizeof(*error->active_bo), GFP_ATOMIC);
 	error->pinned_bo = kcalloc(cnt, sizeof(*error->pinned_bo), GFP_ATOMIC);
 	error->active_bo_count = kcalloc(cnt, sizeof(*error->active_bo_count),
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 126bfaa..602711b 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -337,6 +337,7 @@  typedef struct drm_i915_irq_wait {
 #define I915_PARAM_HAS_EXEC_NO_RELOC	 25
 #define I915_PARAM_HAS_EXEC_HANDLE_LUT   26
 #define I915_PARAM_HAS_WT     	 	 27
+#define I915_PARAM_HAS_FULL_PPGTT	 28
 
 typedef struct drm_i915_getparam {
 	int param;