diff mbox

[5/6] drm/radeon: validate relocations in the order determined by userspace

Message ID 1393439112-10861-5-git-send-email-maraeo@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Marek Olšák Feb. 26, 2014, 6:25 p.m. UTC
From: Marek Olšák <marek.olsak@amd.com>

Userspace should set the first 4 bits of drm_radeon_cs_reloc::flags to
a number from 0 to 15. The higher the number, the higher the priority,
which means a buffer with a higher number will be validated sooner.

The old behavior is preserved: Buffers used for write are prioritized over
read-only buffers if the userspace doesn't set the number.

v2: add buffers to buckets directly, then concatenate them

Signed-off-by: Marek Olšák <marek.olsak@amd.com>
---
 drivers/gpu/drm/radeon/radeon.h        |  2 +-
 drivers/gpu/drm/radeon/radeon_cs.c     | 68 ++++++++++++++++++++++++++++++++--
 drivers/gpu/drm/radeon/radeon_object.c | 10 -----
 drivers/gpu/drm/radeon/radeon_object.h |  2 -
 4 files changed, 66 insertions(+), 16 deletions(-)

Comments

Christian König Feb. 27, 2014, 9:29 a.m. UTC | #1
Am 26.02.2014 19:25, schrieb Marek Olšák:
> From: Marek Olšák <marek.olsak@amd.com>
>
> Userspace should set the first 4 bits of drm_radeon_cs_reloc::flags to
> a number from 0 to 15. The higher the number, the higher the priority,
> which means a buffer with a higher number will be validated sooner.
>
> The old behavior is preserved: Buffers used for write are prioritized over
> read-only buffers if the userspace doesn't set the number.
>
> v2: add buffers to buckets directly, then concatenate them
>
> Signed-off-by: Marek Olšák <marek.olsak@amd.com>
> ---
>   drivers/gpu/drm/radeon/radeon.h        |  2 +-
>   drivers/gpu/drm/radeon/radeon_cs.c     | 68 ++++++++++++++++++++++++++++++++--
>   drivers/gpu/drm/radeon/radeon_object.c | 10 -----
>   drivers/gpu/drm/radeon/radeon_object.h |  2 -
>   4 files changed, 66 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
> index d37a57a..f7a3174 100644
> --- a/drivers/gpu/drm/radeon/radeon.h
> +++ b/drivers/gpu/drm/radeon/radeon.h
> @@ -481,7 +481,7 @@ struct radeon_bo_list {
>   	struct ttm_validate_buffer tv;
>   	struct radeon_bo	*bo;
>   	uint64_t		gpu_offset;
> -	bool			written;
> +	unsigned		priority;

As far as I can see priority is only used temporary in 
radeon_cs_parser_relocs, so we probably don't need it in here.

Apart from that patch looks good to me,
Christian.

>   	unsigned		domain;
>   	unsigned		alt_domain;
>   	u32			tiling_flags;
> diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
> index d49a3f7..779fa02 100644
> --- a/drivers/gpu/drm/radeon/radeon_cs.c
> +++ b/drivers/gpu/drm/radeon/radeon_cs.c
> @@ -31,10 +31,57 @@
>   #include "radeon.h"
>   #include "radeon_trace.h"
>   
> +#define RADEON_CS_MAX_PRIORITY		33u
> +#define RADEON_CS_MAX_BUCKET		(RADEON_CS_MAX_PRIORITY / 2u)
> +#define RADEON_CS_NUM_BUCKETS		(RADEON_CS_MAX_BUCKET + 1u)
> +
> +/* This is based on the bucket sort with O(n) time complexity.
> + * Relocations are sorted from the highest to the lowest priority as
> + * they are added.
> + *
> + * Odd numbers are added at the head of a bucket and even numbers are
> + * added at the tail, therefore all buckets are always sorted. */
> +struct radeon_cs_buckets {
> +	struct list_head bucket[RADEON_CS_NUM_BUCKETS];
> +};
> +
> +static void radeon_cs_buckets_init(struct radeon_cs_buckets *b)
> +{
> +	unsigned i;
> +
> +	for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++)
> +		INIT_LIST_HEAD(&b->bucket[i]);
> +}
> +
> +static void radeon_cs_buckets_add(struct radeon_cs_buckets *b,
> +				  struct radeon_bo_list *reloc)
> +{
> +	unsigned priority = reloc->priority;
> +	unsigned i = min(priority / 2u, RADEON_CS_MAX_BUCKET);
> +
> +	if (priority % 2 == 1) {
> +		list_add(&reloc->tv.head, &b->bucket[i]);
> +	} else {
> +		list_add_tail(&reloc->tv.head, &b->bucket[i]);
> +	}
> +}
> +
> +static void radeon_cs_buckets_get_list(struct radeon_cs_buckets *b,
> +				       struct list_head *out_list)
> +{
> +	unsigned i;
> +
> +	/* Connect the sorted buckets in the output list. */
> +	for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++) {
> +		list_splice(&b->bucket[i], out_list);
> +	}
> +}
> +
>   static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
>   {
>   	struct drm_device *ddev = p->rdev->ddev;
>   	struct radeon_cs_chunk *chunk;
> +	struct radeon_cs_buckets buckets;
>   	unsigned i, j;
>   	bool duplicate;
>   
> @@ -53,6 +100,9 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
>   	if (p->relocs == NULL) {
>   		return -ENOMEM;
>   	}
> +
> +	radeon_cs_buckets_init(&buckets);
> +
>   	for (i = 0; i < p->nrelocs; i++) {
>   		struct drm_radeon_cs_reloc *r;
>   
> @@ -80,7 +130,15 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
>   		p->relocs_ptr[i] = &p->relocs[i];
>   		p->relocs[i].robj = gem_to_radeon_bo(p->relocs[i].gobj);
>   		p->relocs[i].lobj.bo = p->relocs[i].robj;
> -		p->relocs[i].lobj.written = !!r->write_domain;
> +
> +		/* The userspace buffer priorities are from 0 to 15. A higher
> +		 * number means the buffer is more important.
> +		 * Also, the buffers used for write have a higher priority than
> +		 * the buffers used for read only, which doubles the range
> +		 * to 0 to 31. Numbers 32 and 33 are reserved for the kernel
> +		 * driver.
> +		 */
> +		p->relocs[i].lobj.priority = (r->flags & 0xf) * 2 + !!r->write_domain;
>   
>   		/* the first reloc of an UVD job is the msg and that must be in
>   		   VRAM, also but everything into VRAM on AGP cards to avoid
> @@ -94,6 +152,8 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
>   			p->relocs[i].lobj.alt_domain =
>   				RADEON_GEM_DOMAIN_VRAM;
>   
> +			/* prioritize this over any other relocation */
> +			p->relocs[i].lobj.priority = 32;
>   		} else {
>   			uint32_t domain = r->write_domain ?
>   				r->write_domain : r->read_domains;
> @@ -107,9 +167,11 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
>   		p->relocs[i].lobj.tv.bo = &p->relocs[i].robj->tbo;
>   		p->relocs[i].handle = r->handle;
>   
> -		radeon_bo_list_add_object(&p->relocs[i].lobj,
> -					  &p->validated);
> +		radeon_cs_buckets_add(&buckets, &p->relocs[i].lobj);
>   	}
> +
> +	radeon_cs_buckets_get_list(&buckets, &p->validated);
> +
>   	return radeon_bo_list_validate(&p->ticket, &p->validated, p->ring);
>   }
>   
> diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
> index d676ee2..19042ae 100644
> --- a/drivers/gpu/drm/radeon/radeon_object.c
> +++ b/drivers/gpu/drm/radeon/radeon_object.c
> @@ -368,16 +368,6 @@ void radeon_bo_fini(struct radeon_device *rdev)
>   	arch_phys_wc_del(rdev->mc.vram_mtrr);
>   }
>   
> -void radeon_bo_list_add_object(struct radeon_bo_list *lobj,
> -				struct list_head *head)
> -{
> -	if (lobj->written) {
> -		list_add(&lobj->tv.head, head);
> -	} else {
> -		list_add_tail(&lobj->tv.head, head);
> -	}
> -}
> -
>   int radeon_bo_list_validate(struct ww_acquire_ctx *ticket,
>   			    struct list_head *head, int ring)
>   {
> diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h
> index a9a8c11..6c3ca9e 100644
> --- a/drivers/gpu/drm/radeon/radeon_object.h
> +++ b/drivers/gpu/drm/radeon/radeon_object.h
> @@ -138,8 +138,6 @@ extern int radeon_bo_evict_vram(struct radeon_device *rdev);
>   extern void radeon_bo_force_delete(struct radeon_device *rdev);
>   extern int radeon_bo_init(struct radeon_device *rdev);
>   extern void radeon_bo_fini(struct radeon_device *rdev);
> -extern void radeon_bo_list_add_object(struct radeon_bo_list *lobj,
> -				struct list_head *head);
>   extern int radeon_bo_list_validate(struct ww_acquire_ctx *ticket,
>   				   struct list_head *head, int ring);
>   extern int radeon_bo_fbdev_mmap(struct radeon_bo *bo,
diff mbox

Patch

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index d37a57a..f7a3174 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -481,7 +481,7 @@  struct radeon_bo_list {
 	struct ttm_validate_buffer tv;
 	struct radeon_bo	*bo;
 	uint64_t		gpu_offset;
-	bool			written;
+	unsigned		priority;
 	unsigned		domain;
 	unsigned		alt_domain;
 	u32			tiling_flags;
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index d49a3f7..779fa02 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -31,10 +31,57 @@ 
 #include "radeon.h"
 #include "radeon_trace.h"
 
+#define RADEON_CS_MAX_PRIORITY		33u
+#define RADEON_CS_MAX_BUCKET		(RADEON_CS_MAX_PRIORITY / 2u)
+#define RADEON_CS_NUM_BUCKETS		(RADEON_CS_MAX_BUCKET + 1u)
+
+/* This is based on the bucket sort with O(n) time complexity.
+ * Relocations are sorted from the highest to the lowest priority as
+ * they are added.
+ *
+ * Odd numbers are added at the head of a bucket and even numbers are
+ * added at the tail, therefore all buckets are always sorted. */
+struct radeon_cs_buckets {
+	struct list_head bucket[RADEON_CS_NUM_BUCKETS];
+};
+
+static void radeon_cs_buckets_init(struct radeon_cs_buckets *b)
+{
+	unsigned i;
+
+	for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++)
+		INIT_LIST_HEAD(&b->bucket[i]);
+}
+
+static void radeon_cs_buckets_add(struct radeon_cs_buckets *b,
+				  struct radeon_bo_list *reloc)
+{
+	unsigned priority = reloc->priority;
+	unsigned i = min(priority / 2u, RADEON_CS_MAX_BUCKET);
+
+	if (priority % 2 == 1) {
+		list_add(&reloc->tv.head, &b->bucket[i]);
+	} else {
+		list_add_tail(&reloc->tv.head, &b->bucket[i]);
+	}
+}
+
+static void radeon_cs_buckets_get_list(struct radeon_cs_buckets *b,
+				       struct list_head *out_list)
+{
+	unsigned i;
+
+	/* Connect the sorted buckets in the output list. */
+	for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++) {
+		list_splice(&b->bucket[i], out_list);
+	}
+}
+
 static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
 {
 	struct drm_device *ddev = p->rdev->ddev;
 	struct radeon_cs_chunk *chunk;
+	struct radeon_cs_buckets buckets;
 	unsigned i, j;
 	bool duplicate;
 
@@ -53,6 +100,9 @@  static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
 	if (p->relocs == NULL) {
 		return -ENOMEM;
 	}
+
+	radeon_cs_buckets_init(&buckets);
+
 	for (i = 0; i < p->nrelocs; i++) {
 		struct drm_radeon_cs_reloc *r;
 
@@ -80,7 +130,15 @@  static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
 		p->relocs_ptr[i] = &p->relocs[i];
 		p->relocs[i].robj = gem_to_radeon_bo(p->relocs[i].gobj);
 		p->relocs[i].lobj.bo = p->relocs[i].robj;
-		p->relocs[i].lobj.written = !!r->write_domain;
+
+		/* The userspace buffer priorities are from 0 to 15. A higher
+		 * number means the buffer is more important.
+		 * Also, the buffers used for write have a higher priority than
+		 * the buffers used for read only, which doubles the range
+		 * to 0 to 31. Numbers 32 and 33 are reserved for the kernel
+		 * driver.
+		 */
+		p->relocs[i].lobj.priority = (r->flags & 0xf) * 2 + !!r->write_domain;
 
 		/* the first reloc of an UVD job is the msg and that must be in
 		   VRAM, also but everything into VRAM on AGP cards to avoid
@@ -94,6 +152,8 @@  static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
 			p->relocs[i].lobj.alt_domain =
 				RADEON_GEM_DOMAIN_VRAM;
 
+			/* prioritize this over any other relocation */
+			p->relocs[i].lobj.priority = 32;
 		} else {
 			uint32_t domain = r->write_domain ?
 				r->write_domain : r->read_domains;
@@ -107,9 +167,11 @@  static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
 		p->relocs[i].lobj.tv.bo = &p->relocs[i].robj->tbo;
 		p->relocs[i].handle = r->handle;
 
-		radeon_bo_list_add_object(&p->relocs[i].lobj,
-					  &p->validated);
+		radeon_cs_buckets_add(&buckets, &p->relocs[i].lobj);
 	}
+
+	radeon_cs_buckets_get_list(&buckets, &p->validated);
+
 	return radeon_bo_list_validate(&p->ticket, &p->validated, p->ring);
 }
 
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index d676ee2..19042ae 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -368,16 +368,6 @@  void radeon_bo_fini(struct radeon_device *rdev)
 	arch_phys_wc_del(rdev->mc.vram_mtrr);
 }
 
-void radeon_bo_list_add_object(struct radeon_bo_list *lobj,
-				struct list_head *head)
-{
-	if (lobj->written) {
-		list_add(&lobj->tv.head, head);
-	} else {
-		list_add_tail(&lobj->tv.head, head);
-	}
-}
-
 int radeon_bo_list_validate(struct ww_acquire_ctx *ticket,
 			    struct list_head *head, int ring)
 {
diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h
index a9a8c11..6c3ca9e 100644
--- a/drivers/gpu/drm/radeon/radeon_object.h
+++ b/drivers/gpu/drm/radeon/radeon_object.h
@@ -138,8 +138,6 @@  extern int radeon_bo_evict_vram(struct radeon_device *rdev);
 extern void radeon_bo_force_delete(struct radeon_device *rdev);
 extern int radeon_bo_init(struct radeon_device *rdev);
 extern void radeon_bo_fini(struct radeon_device *rdev);
-extern void radeon_bo_list_add_object(struct radeon_bo_list *lobj,
-				struct list_head *head);
 extern int radeon_bo_list_validate(struct ww_acquire_ctx *ticket,
 				   struct list_head *head, int ring);
 extern int radeon_bo_fbdev_mmap(struct radeon_bo *bo,