diff mbox

[v3,5/5] drm/i915: Use batch length instead of object size in command parser

Message ID 1415042385-26445-6-git-send-email-bradley.d.volkin@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

bradley.d.volkin@intel.com Nov. 3, 2014, 7:19 p.m. UTC
From: Brad Volkin <bradley.d.volkin@intel.com>

Previously we couldn't trust the user-supplied batch length because
it came directly from userspace (i.e. untrusted code). It would have
affected what commands software parsed without regard to what hardware
would actually execute, leaving a potential hole.

With the parser now copying the user supplied batch buffer and writing
MI_NOP commands to any space after the copied region, we can safely use
the batch length input. This should be a performance win as the actual
batch length is frequently much smaller than the allocated object size.

v2: Fix handling of non-zero batch_start_offset

Signed-off-by: Brad Volkin <bradley.d.volkin@intel.com>
---
 drivers/gpu/drm/i915/i915_cmd_parser.c     | 48 ++++++++++++++++++++----------
 drivers/gpu/drm/i915/i915_drv.h            |  1 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  1 +
 3 files changed, 34 insertions(+), 16 deletions(-)

Comments

Shuang He Nov. 3, 2014, 7:32 p.m. UTC | #1
Tested-By: PRC QA PRTS (Patch Regression Test System Contact: shuang.he@intel.com)
-------------------------------------Summary-------------------------------------
Platform: baseline_drm_intel_nightly_pass_rate->patch_applied_pass_rate
BYT: pass/total=251/251->248/251
PNV: pass/total=328/328->325/328
ILK: pass/total=323/330->327/330
IVB: pass/total=304/304->293/304
SNB: pass/total=336/336->334/336
HSW: pass/total=312/312->298/312
BDW: pass/total=284/285->284/285
-------------------------------------Detailed-------------------------------------
test_platform: test_suite, test_case, result_with_drm_intel_nightly(count, machine_id...)...->result_with_patch_applied(count, machine_id)...
BYT: Intel_gpu_tools, igt_gem_cs_tlb_blt, PASS(1, M29) -> TIMEOUT(1, M36)
BYT: Intel_gpu_tools, igt_gem_persistent_relocs_forked-thrash-inactive, PASS(1, M29) -> TIMEOUT(1, M36)
BYT: Intel_gpu_tools, igt_gem_reloc_vs_gpu_forked-thrash-inactive, PASS(1, M29) -> TIMEOUT(1, M36)
PNV: Intel_gpu_tools, igt_gem_reloc_vs_gpu_forked-faulting-reloc-thrash-inactive, PASS(1, M25) -> TIMEOUT(1, M25)
PNV: Intel_gpu_tools, igt_gem_reloc_vs_gpu_forked-interruptible-thrash-inactive, PASS(1, M25) -> TIMEOUT(1, M25)
PNV: Intel_gpu_tools, igt_gem_reloc_vs_gpu_forked-thrash-inactive, PASS(1, M25) -> TIMEOUT(1, M25)
ILK: Intel_gpu_tools, igt_gem_persistent_relocs_forked-thrash-inactive, PASS(1, M26) -> TIMEOUT(1, M37)
ILK: Intel_gpu_tools, igt_gem_reloc_vs_gpu_forked-interruptible-faulting-reloc-thrash-inactive, PASS(1, M26) -> TIMEOUT(1, M37)
ILK: Intel_gpu_tools, igt_gem_reloc_vs_gpu_forked-thrash-inactive, PASS(1, M26) -> TIMEOUT(1, M37)
ILK: Intel_gpu_tools, igt_kms_flip_dpms-off-confusion, DMESG_WARN(1, M26) -> PASS(1, M37)
ILK: Intel_gpu_tools, igt_kms_flip_flip-vs-fences-interruptible, DMESG_WARN(1, M26) -> PASS(1, M37)
ILK: Intel_gpu_tools, igt_kms_flip_flip-vs-modeset-interruptible, DMESG_WARN(1, M26) -> PASS(1, M37)
ILK: Intel_gpu_tools, igt_kms_flip_flip-vs-panning-vs-hang, DMESG_WARN(1, M26) -> PASS(1, M37)
ILK: Intel_gpu_tools, igt_kms_flip_flip-vs-rmfb, DMESG_WARN(1, M26) -> PASS(1, M37)
ILK: Intel_gpu_tools, igt_kms_flip_wf_vblank-vs-modeset, DMESG_WARN(1, M26) -> PASS(1, M37)
ILK: Intel_gpu_tools, igt_kms_render_gpu-blit, DMESG_WARN(1, M26) -> PASS(1, M37)
IVB: Intel_gpu_tools, igt_drv_hangman_error-state-capture-blt, PASS(1, M21) -> FAIL(1, M34)
IVB: Intel_gpu_tools, igt_drv_hangman_error-state-capture-bsd, PASS(1, M21) -> FAIL(1, M34)
IVB: Intel_gpu_tools, igt_drv_hangman_error-state-capture-render, PASS(1, M21) -> FAIL(1, M34)
IVB: Intel_gpu_tools, igt_gem_exec_parse_cmd-crossing-page, PASS(1, M21) -> FAIL(1, M34)
IVB: Intel_gpu_tools, igt_gem_madvise_dontneed-before-exec, PASS(1, M21) -> FAIL(1, M34)
IVB: Intel_gpu_tools, igt_gem_persistent_relocs_forked-faulting-reloc-thrash-inactive, PASS(1, M21) -> NO_RESULT(1, M34)
IVB: Intel_gpu_tools, igt_gem_persistent_relocs_forked-interruptible-faulting-reloc-thrash-inactive, PASS(1, M21) -> NO_RESULT(1, M34)
IVB: Intel_gpu_tools, igt_gem_persistent_relocs_forked-interruptible-thrash-inactive, PASS(1, M21) -> NO_RESULT(1, M34)
IVB: Intel_gpu_tools, igt_gem_persistent_relocs_forked-thrash-inactive, PASS(1, M21) -> NO_RESULT(1, M34)
IVB: Intel_gpu_tools, igt_gem_reloc_vs_gpu_forked-interruptible-thrash-inactive, PASS(1, M21) -> FAIL(1, M34)
IVB: Intel_gpu_tools, igt_gem_reloc_vs_gpu_forked-thrash-inactive, PASS(1, M21) -> FAIL(1, M34)
SNB: Intel_gpu_tools, igt_gem_concurrent_blit_gpu-rcs-early-read-forked, PASS(1, M35) -> TIMEOUT(1, M35)
SNB: Intel_gpu_tools, igt_gem_concurrent_blit_gpuX-rcs-early-read-forked, PASS(1, M35) -> TIMEOUT(1, M35)
HSW: Intel_gpu_tools, igt_drv_hangman_error-state-capture-blt, PASS(1, M39) -> FAIL(1, M39)
HSW: Intel_gpu_tools, igt_drv_hangman_error-state-capture-bsd, PASS(1, M39) -> FAIL(1, M39)
HSW: Intel_gpu_tools, igt_drv_hangman_error-state-capture-render, PASS(1, M39) -> FAIL(1, M39)
HSW: Intel_gpu_tools, igt_drv_hangman_error-state-capture-vebox, PASS(1, M39) -> FAIL(1, M39)
HSW: Intel_gpu_tools, igt_gem_bad_reloc_negative-reloc, PASS(1, M39) -> NSPT(1, M39)
HSW: Intel_gpu_tools, igt_gem_bad_reloc_negative-reloc-lut, PASS(1, M39) -> NSPT(1, M39)
HSW: Intel_gpu_tools, igt_gem_exec_parse_cmd-crossing-page, PASS(1, M39) -> FAIL(1, M39)
HSW: Intel_gpu_tools, igt_gem_madvise_dontneed-before-exec, PASS(1, M39) -> FAIL(1, M39)
HSW: Intel_gpu_tools, igt_gem_persistent_relocs_forked-faulting-reloc-thrash-inactive, PASS(1, M39) -> NO_RESULT(1, M39)
HSW: Intel_gpu_tools, igt_gem_persistent_relocs_forked-interruptible-faulting-reloc-thrash-inactive, PASS(1, M39) -> NO_RESULT(1, M39)
HSW: Intel_gpu_tools, igt_gem_persistent_relocs_forked-interruptible-thrash-inactive, PASS(1, M39) -> NO_RESULT(1, M39)
HSW: Intel_gpu_tools, igt_gem_persistent_relocs_forked-thrash-inactive, PASS(1, M39) -> NO_RESULT(1, M39)
HSW: Intel_gpu_tools, igt_gem_reloc_vs_gpu_forked-interruptible-thrash-inactive, PASS(1, M39) -> FAIL(1, M39)
HSW: Intel_gpu_tools, igt_gem_reloc_vs_gpu_forked-thrash-inactive, PASS(1, M39) -> FAIL(1, M39)
BDW: Intel_gpu_tools, igt_gem_linear_blits_interruptible, TIMEOUT(1, M30) -> PASS(1, M42)
BDW: Intel_gpu_tools, igt_gem_reloc_vs_gpu_forked-faulting-reloc-thrash-inactive, PASS(1, M30) -> TIMEOUT(1, M42)
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index c8fe403..d4d13b1 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -840,11 +840,19 @@  finish:
 
 /* Returns a vmap'd pointer to dest_obj, which the caller must unmap */
 static u32 *copy_batch(struct drm_i915_gem_object *dest_obj,
-		       struct drm_i915_gem_object *src_obj)
+		       struct drm_i915_gem_object *src_obj,
+		       u32 batch_start_offset,
+		       u32 batch_len)
 {
 	int ret = 0;
 	int needs_clflush = 0;
-	u32 *src_addr, *dest_addr = NULL;
+	u32 *src_base, *dest_base = NULL;
+	u32 *src_addr, *dest_addr;
+	u32 offset = batch_start_offset / sizeof(*dest_addr);
+	u32 end = batch_start_offset + batch_len;
+
+	if (end > dest_obj->base.size || end > src_obj->base.size)
+		return ERR_PTR(-E2BIG);
 
 	ret = i915_gem_obj_prepare_shmem_read(src_obj, &needs_clflush);
 	if (ret) {
@@ -852,15 +860,17 @@  static u32 *copy_batch(struct drm_i915_gem_object *dest_obj,
 		return ERR_PTR(ret);
 	}
 
-	src_addr = vmap_batch(src_obj);
-	if (!src_addr) {
+	src_base = vmap_batch(src_obj);
+	if (!src_base) {
 		DRM_DEBUG_DRIVER("CMD: Failed to vmap batch\n");
 		ret = -ENOMEM;
 		goto unpin_src;
 	}
 
+	src_addr = src_base + offset;
+
 	if (needs_clflush)
-		drm_clflush_virt_range((char *)src_addr, src_obj->base.size);
+		drm_clflush_virt_range((char *)src_addr, batch_len);
 
 	ret = i915_gem_object_set_to_cpu_domain(dest_obj, true);
 	if (ret) {
@@ -868,24 +878,27 @@  static u32 *copy_batch(struct drm_i915_gem_object *dest_obj,
 		goto unmap_src;
 	}
 
-	dest_addr = vmap_batch(dest_obj);
-	if (!dest_addr) {
+	dest_base = vmap_batch(dest_obj);
+	if (!dest_base) {
 		DRM_DEBUG_DRIVER("CMD: Failed to vmap shadow batch\n");
 		ret = -ENOMEM;
 		goto unmap_src;
 	}
 
-	memcpy(dest_addr, src_addr, src_obj->base.size);
-	if (dest_obj->base.size > src_obj->base.size)
-		memset((u8 *)dest_addr + src_obj->base.size, 0,
-		       dest_obj->base.size - src_obj->base.size);
+	dest_addr = dest_base + offset;
+
+	if (batch_start_offset != 0)
+		memset((u8 *)dest_base, 0, batch_start_offset);
+
+	memcpy(dest_addr, src_addr, batch_len);
+	memset((u8 *)dest_addr + batch_len, 0, dest_obj->base.size - end);
 
 unmap_src:
-	vunmap(src_addr);
+	vunmap(src_base);
 unpin_src:
 	i915_gem_object_unpin_pages(src_obj);
 
-	return ret ? ERR_PTR(ret) : dest_addr;
+	return ret ? ERR_PTR(ret) : dest_base;
 }
 
 /**
@@ -1006,6 +1019,7 @@  static bool check_cmd(const struct intel_engine_cs *ring,
  * @batch_obj: the batch buffer in question
  * @shadow_batch_obj: copy of the batch buffer in question
  * @batch_start_offset: byte offset in the batch at which execution starts
+ * @batch_len: length of the commands in batch_obj
  * @is_master: is the submitting process the drm master?
  *
  * Parses the specified batch buffer looking for privilege violations as
@@ -1018,6 +1032,7 @@  int i915_parse_cmds(struct intel_engine_cs *ring,
 		    struct drm_i915_gem_object *batch_obj,
 		    struct drm_i915_gem_object *shadow_batch_obj,
 		    u32 batch_start_offset,
+		    u32 batch_len,
 		    bool is_master)
 {
 	int ret = 0;
@@ -1025,7 +1040,8 @@  int i915_parse_cmds(struct intel_engine_cs *ring,
 	struct drm_i915_cmd_descriptor default_desc = { 0 };
 	bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */
 
-	batch_base = copy_batch(shadow_batch_obj, batch_obj);
+	batch_base = copy_batch(shadow_batch_obj, batch_obj,
+				batch_start_offset, batch_len);
 	if (IS_ERR(batch_base)) {
 		DRM_DEBUG_DRIVER("CMD: Failed to copy batch\n");
 		return PTR_ERR(batch_base);
@@ -1034,11 +1050,11 @@  int i915_parse_cmds(struct intel_engine_cs *ring,
 	cmd = batch_base + (batch_start_offset / sizeof(*cmd));
 
 	/*
-	 * We use the source object's size because the shadow object is as
+	 * We use the batch length as size because the shadow object is as
 	 * large or larger and copy_batch() will write MI_NOPs to the extra
 	 * space. Parsing should be faster in some cases this way.
 	 */
-	batch_end = cmd + (batch_obj->base.size / sizeof(*batch_end));
+	batch_end = cmd + (batch_len / sizeof(*batch_end));
 
 	while (cmd < batch_end) {
 		const struct drm_i915_cmd_descriptor *desc;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 50d974d..6785ca2 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2791,6 +2791,7 @@  int i915_parse_cmds(struct intel_engine_cs *ring,
 		    struct drm_i915_gem_object *batch_obj,
 		    struct drm_i915_gem_object *shadow_batch_obj,
 		    u32 batch_start_offset,
+		    u32 batch_len,
 		    bool is_master);
 
 /* i915_suspend.c */
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index c216ff2..bab7ebf 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1381,6 +1381,7 @@  i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 				      batch_obj,
 				      shadow_batch_obj,
 				      args->batch_start_offset,
+				      args->batch_len,
 				      file->is_master);
 		if (ret) {
 			if (ret != -EACCES)