diff mbox

[17/18] drm/i915: Use a slab for object allocation

Message ID 1350666204-8101-17-git-send-email-chris@chris-wilson.co.uk (mailing list archive)
State Accepted
Delegated to: Ben Widawsky
Headers show

Commit Message

Chris Wilson Oct. 19, 2012, 5:03 p.m. UTC
The primary purpose of this was to debug some use-after-free memory
corruption that was causing an OOPS inside drm/i915. As it turned out
the corruption was being caused elsewhere and i915.ko as a major user of
many objects was being hit hardest.

Indeed as we do frequent the generic kmalloc caches, dedicating one to
ourselves (or at least naming one for us depending upon the core) aids
debugging our own slab usage.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/gpu/drm/i915/i915_dma.c        |    3 +++
 drivers/gpu/drm/i915/i915_drv.h        |    4 ++++
 drivers/gpu/drm/i915/i915_gem.c        |   28 +++++++++++++++++++++++-----
 drivers/gpu/drm/i915/i915_gem_dmabuf.c |    5 ++---
 drivers/gpu/drm/i915/i915_gem_stolen.c |    4 ++--
 5 files changed, 34 insertions(+), 10 deletions(-)

Comments

Paulo Zanoni Oct. 24, 2012, 8:21 p.m. UTC | #1
Hi

2012/10/19 Chris Wilson <chris@chris-wilson.co.uk>:
> The primary purpose of this was to debug some use-after-free memory
> corruption that was causing an OOPS inside drm/i915. As it turned out
> the corruption was being caused elsewhere and i915.ko as a major user of
> many objects was being hit hardest.
>
> Indeed as we do frequent the generic kmalloc caches, dedicating one to
> ourselves (or at least naming one for us depending upon the core) aids
> debugging our own slab usage.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>

So I applied patches 1-17 in 2 machines (SNB and HSW). Patch 18 does
not apply anymore, it needs to be rebased, so I did not apply it. Both
machines boot and work fine without any unusual errors.

While running "dmesg | grep stolen" one machines tells me it found
32mb and it matches the "pre-allocated" option on my BIOS. The other
machine gives me 64mb but I couldn't find anything related to this on
the BIOS. In both cases the PCI message "detected XXXK stolen memory"
matches the amount print by i915_gem_init_stolen.

On the SNB machine which had 64mb of stolen memory I also see
"reserving 33554432 bytes of contiguous stolen space for FBC".

I'm not sure if this is enough for a tested-by tag, so feel free to
add if you think this test was enough. Please tell me if I need to
test something else.
Tested-by: Paulo Zanoni <paulo.r.zanoni@intel.com>

Thanks,
Paulo

> ---
>  drivers/gpu/drm/i915/i915_dma.c        |    3 +++
>  drivers/gpu/drm/i915/i915_drv.h        |    4 ++++
>  drivers/gpu/drm/i915/i915_gem.c        |   28 +++++++++++++++++++++++-----
>  drivers/gpu/drm/i915/i915_gem_dmabuf.c |    5 ++---
>  drivers/gpu/drm/i915/i915_gem_stolen.c |    4 ++--
>  5 files changed, 34 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index 14271aa..69969be 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -1776,6 +1776,9 @@ int i915_driver_unload(struct drm_device *dev)
>
>         destroy_workqueue(dev_priv->wq);
>
> +       if (dev_priv->slab)
> +               kmem_cache_destroy(dev_priv->slab);
> +
>         pci_dev_put(dev_priv->bridge_dev);
>         kfree(dev->dev_private);
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 5a0e0cd..5084b29 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -398,6 +398,7 @@ struct intel_gmbus {
>
>  typedef struct drm_i915_private {
>         struct drm_device *dev;
> +       struct kmem_cache *slab;
>
>         const struct intel_device_info *info;
>
> @@ -1341,12 +1342,15 @@ int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
>  int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
>                         struct drm_file *file_priv);
>  void i915_gem_load(struct drm_device *dev);
> +void *i915_gem_object_alloc(struct drm_device *dev);
> +void i915_gem_object_free(struct drm_i915_gem_object *obj);
>  int i915_gem_init_object(struct drm_gem_object *obj);
>  void i915_gem_object_init(struct drm_i915_gem_object *obj,
>                          const struct drm_i915_gem_object_ops *ops);
>  struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
>                                                   size_t size);
>  void i915_gem_free_object(struct drm_gem_object *obj);
> +
>  int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj,
>                                      uint32_t alignment,
>                                      bool map_and_fenceable,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 0349899..8183c0f 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -193,6 +193,18 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
>         return 0;
>  }
>
> +void *i915_gem_object_alloc(struct drm_device *dev)
> +{
> +       struct drm_i915_private *dev_priv = dev->dev_private;
> +       return kmem_cache_alloc(dev_priv->slab, GFP_KERNEL | __GFP_ZERO);
> +}
> +
> +void i915_gem_object_free(struct drm_i915_gem_object *obj)
> +{
> +       struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
> +       kmem_cache_free(dev_priv->slab, obj);
> +}
> +
>  static int
>  i915_gem_create(struct drm_file *file,
>                 struct drm_device *dev,
> @@ -216,7 +228,7 @@ i915_gem_create(struct drm_file *file,
>         if (ret) {
>                 drm_gem_object_release(&obj->base);
>                 i915_gem_info_remove_obj(dev->dev_private, obj->base.size);
> -               kfree(obj);
> +               i915_gem_object_free(obj);
>                 return ret;
>         }
>
> @@ -3780,12 +3792,12 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
>         struct address_space *mapping;
>         u32 mask;
>
> -       obj = kzalloc(sizeof(*obj), GFP_KERNEL);
> +       obj = i915_gem_object_alloc(dev);
>         if (obj == NULL)
>                 return NULL;
>
>         if (drm_gem_object_init(dev, &obj->base, size) != 0) {
> -               kfree(obj);
> +               i915_gem_object_free(obj);
>                 return NULL;
>         }
>
> @@ -3868,7 +3880,7 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
>         i915_gem_info_remove_obj(dev_priv, obj->base.size);
>
>         kfree(obj->bit_17);
> -       kfree(obj);
> +       i915_gem_object_free(obj);
>  }
>
>  int
> @@ -4246,8 +4258,14 @@ init_ring_lists(struct intel_ring_buffer *ring)
>  void
>  i915_gem_load(struct drm_device *dev)
>  {
> -       int i;
>         drm_i915_private_t *dev_priv = dev->dev_private;
> +       int i;
> +
> +       dev_priv->slab =
> +               kmem_cache_create("i915_gem_object",
> +                                 sizeof(struct drm_i915_gem_object), 0,
> +                                 SLAB_HWCACHE_ALIGN,
> +                                 NULL);
>
>         INIT_LIST_HEAD(&dev_priv->mm.active_list);
>         INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
> diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> index ca3497e..f307e31 100644
> --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> @@ -276,8 +276,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
>         if (IS_ERR(attach))
>                 return ERR_CAST(attach);
>
> -
> -       obj = kzalloc(sizeof(*obj), GFP_KERNEL);
> +       obj = i915_gem_object_alloc(dev);
>         if (obj == NULL) {
>                 ret = -ENOMEM;
>                 goto fail_detach;
> @@ -285,7 +284,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
>
>         ret = drm_gem_private_object_init(dev, &obj->base, dma_buf->size);
>         if (ret) {
> -               kfree(obj);
> +               i915_gem_object_free(obj);
>                 goto fail_detach;
>         }
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
> index 86c4af4..d7cfa71 100644
> --- a/drivers/gpu/drm/i915/i915_gem_stolen.c
> +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
> @@ -261,7 +261,7 @@ _i915_gem_object_create_stolen(struct drm_device *dev,
>  {
>         struct drm_i915_gem_object *obj;
>
> -       obj = kzalloc(sizeof(*obj), GFP_KERNEL);
> +       obj = i915_gem_object_alloc(dev);
>         if (obj == NULL)
>                 return NULL;
>
> @@ -286,7 +286,7 @@ _i915_gem_object_create_stolen(struct drm_device *dev,
>         return obj;
>
>  cleanup:
> -       kfree(obj);
> +       i915_gem_object_free(obj);
>         return NULL;
>  }
>
> --
> 1.7.10.4
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Ben Widawsky Nov. 5, 2012, 5:49 p.m. UTC | #2
On Fri, 19 Oct 2012 18:03:23 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> The primary purpose of this was to debug some use-after-free memory
> corruption that was causing an OOPS inside drm/i915. As it turned out
> the corruption was being caused elsewhere and i915.ko as a major user of
> many objects was being hit hardest.
> 
> Indeed as we do frequent the generic kmalloc caches, dedicating one to
> ourselves (or at least naming one for us depending upon the core) aids
> debugging our own slab usage.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>

So I previously acked this patch, but you dropped that, so now you get
more words.

Why not go all the way and move all allocations we do in i915 to this
slab (adding a flag with whether or not to zero the memory first)? While
I agree in terms of space, nothing takes up as much as objects, I think
it would be nice to do this so we can totally track what our driver is
doing. In fact I would suggest this is something which core drm should
provide to all of the drivers (we already use drm_alloc calls in some
places).

Anyway, it's still:
Acked-by: Ben Widawsky <ben@bwidawsk.net>

> ---
>  drivers/gpu/drm/i915/i915_dma.c        |    3 +++
>  drivers/gpu/drm/i915/i915_drv.h        |    4 ++++
>  drivers/gpu/drm/i915/i915_gem.c        |   28 +++++++++++++++++++++++-----
>  drivers/gpu/drm/i915/i915_gem_dmabuf.c |    5 ++---
>  drivers/gpu/drm/i915/i915_gem_stolen.c |    4 ++--
>  5 files changed, 34 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index 14271aa..69969be 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -1776,6 +1776,9 @@ int i915_driver_unload(struct drm_device *dev)
>  
>  	destroy_workqueue(dev_priv->wq);
>  
> +	if (dev_priv->slab)
> +		kmem_cache_destroy(dev_priv->slab);
> +
>  	pci_dev_put(dev_priv->bridge_dev);
>  	kfree(dev->dev_private);
>  
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 5a0e0cd..5084b29 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -398,6 +398,7 @@ struct intel_gmbus {
>  
>  typedef struct drm_i915_private {
>  	struct drm_device *dev;
> +	struct kmem_cache *slab;
>  
>  	const struct intel_device_info *info;
>  
> @@ -1341,12 +1342,15 @@ int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
>  int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
>  			struct drm_file *file_priv);
>  void i915_gem_load(struct drm_device *dev);
> +void *i915_gem_object_alloc(struct drm_device *dev);
> +void i915_gem_object_free(struct drm_i915_gem_object *obj);
>  int i915_gem_init_object(struct drm_gem_object *obj);
>  void i915_gem_object_init(struct drm_i915_gem_object *obj,
>  			 const struct drm_i915_gem_object_ops *ops);
>  struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
>  						  size_t size);
>  void i915_gem_free_object(struct drm_gem_object *obj);
> +
>  int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj,
>  				     uint32_t alignment,
>  				     bool map_and_fenceable,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 0349899..8183c0f 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -193,6 +193,18 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
>  	return 0;
>  }
>  
> +void *i915_gem_object_alloc(struct drm_device *dev)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	return kmem_cache_alloc(dev_priv->slab, GFP_KERNEL | __GFP_ZERO);
> +}
> +
> +void i915_gem_object_free(struct drm_i915_gem_object *obj)
> +{
> +	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
> +	kmem_cache_free(dev_priv->slab, obj);
> +}
> +
>  static int
>  i915_gem_create(struct drm_file *file,
>  		struct drm_device *dev,
> @@ -216,7 +228,7 @@ i915_gem_create(struct drm_file *file,
>  	if (ret) {
>  		drm_gem_object_release(&obj->base);
>  		i915_gem_info_remove_obj(dev->dev_private, obj->base.size);
> -		kfree(obj);
> +		i915_gem_object_free(obj);
>  		return ret;
>  	}
>  
> @@ -3780,12 +3792,12 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
>  	struct address_space *mapping;
>  	u32 mask;
>  
> -	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
> +	obj = i915_gem_object_alloc(dev);
>  	if (obj == NULL)
>  		return NULL;
>  
>  	if (drm_gem_object_init(dev, &obj->base, size) != 0) {
> -		kfree(obj);
> +		i915_gem_object_free(obj);
>  		return NULL;
>  	}
>  
> @@ -3868,7 +3880,7 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
>  	i915_gem_info_remove_obj(dev_priv, obj->base.size);
>  
>  	kfree(obj->bit_17);
> -	kfree(obj);
> +	i915_gem_object_free(obj);
>  }
>  
>  int
> @@ -4246,8 +4258,14 @@ init_ring_lists(struct intel_ring_buffer *ring)
>  void
>  i915_gem_load(struct drm_device *dev)
>  {
> -	int i;
>  	drm_i915_private_t *dev_priv = dev->dev_private;
> +	int i;
> +
> +	dev_priv->slab =
> +		kmem_cache_create("i915_gem_object",
> +				  sizeof(struct drm_i915_gem_object), 0,
> +				  SLAB_HWCACHE_ALIGN,
> +				  NULL);
>  
>  	INIT_LIST_HEAD(&dev_priv->mm.active_list);
>  	INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
> diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> index ca3497e..f307e31 100644
> --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> @@ -276,8 +276,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
>  	if (IS_ERR(attach))
>  		return ERR_CAST(attach);
>  
> -
> -	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
> +	obj = i915_gem_object_alloc(dev);
>  	if (obj == NULL) {
>  		ret = -ENOMEM;
>  		goto fail_detach;
> @@ -285,7 +284,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
>  
>  	ret = drm_gem_private_object_init(dev, &obj->base, dma_buf->size);
>  	if (ret) {
> -		kfree(obj);
> +		i915_gem_object_free(obj);
>  		goto fail_detach;
>  	}
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
> index 86c4af4..d7cfa71 100644
> --- a/drivers/gpu/drm/i915/i915_gem_stolen.c
> +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
> @@ -261,7 +261,7 @@ _i915_gem_object_create_stolen(struct drm_device *dev,
>  {
>  	struct drm_i915_gem_object *obj;
>  
> -	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
> +	obj = i915_gem_object_alloc(dev);
>  	if (obj == NULL)
>  		return NULL;
>  
> @@ -286,7 +286,7 @@ _i915_gem_object_create_stolen(struct drm_device *dev,
>  	return obj;
>  
>  cleanup:
> -	kfree(obj);
> +	i915_gem_object_free(obj);
>  	return NULL;
>  }
>
Ben Widawsky Nov. 5, 2012, 6:07 p.m. UTC | #3
On Fri, 19 Oct 2012 18:03:23 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> The primary purpose of this was to debug some use-after-free memory
> corruption that was causing an OOPS inside drm/i915. As it turned out
> the corruption was being caused elsewhere and i915.ko as a major user of
> many objects was being hit hardest.
> 
> Indeed as we do frequent the generic kmalloc caches, dedicating one to
> ourselves (or at least naming one for us depending upon the core) aids
> debugging our own slab usage.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>

To sum up:
patches 5-11 are Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
patch 13 is Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
patch 14-17 is Acked-by: Ben Widawsky <ben@bwidawsk.net>

patches 1-17 are Tested-by: Ben Widawsky <ben@bwidawsk.net>

> ---
>  drivers/gpu/drm/i915/i915_dma.c        |    3 +++
>  drivers/gpu/drm/i915/i915_drv.h        |    4 ++++
>  drivers/gpu/drm/i915/i915_gem.c        |   28 +++++++++++++++++++++++-----
>  drivers/gpu/drm/i915/i915_gem_dmabuf.c |    5 ++---
>  drivers/gpu/drm/i915/i915_gem_stolen.c |    4 ++--
>  5 files changed, 34 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index 14271aa..69969be 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -1776,6 +1776,9 @@ int i915_driver_unload(struct drm_device *dev)
>  
>  	destroy_workqueue(dev_priv->wq);
>  
> +	if (dev_priv->slab)
> +		kmem_cache_destroy(dev_priv->slab);
> +
>  	pci_dev_put(dev_priv->bridge_dev);
>  	kfree(dev->dev_private);
>  
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 5a0e0cd..5084b29 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -398,6 +398,7 @@ struct intel_gmbus {
>  
>  typedef struct drm_i915_private {
>  	struct drm_device *dev;
> +	struct kmem_cache *slab;
>  
>  	const struct intel_device_info *info;
>  
> @@ -1341,12 +1342,15 @@ int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
>  int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
>  			struct drm_file *file_priv);
>  void i915_gem_load(struct drm_device *dev);
> +void *i915_gem_object_alloc(struct drm_device *dev);
> +void i915_gem_object_free(struct drm_i915_gem_object *obj);
>  int i915_gem_init_object(struct drm_gem_object *obj);
>  void i915_gem_object_init(struct drm_i915_gem_object *obj,
>  			 const struct drm_i915_gem_object_ops *ops);
>  struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
>  						  size_t size);
>  void i915_gem_free_object(struct drm_gem_object *obj);
> +
>  int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj,
>  				     uint32_t alignment,
>  				     bool map_and_fenceable,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 0349899..8183c0f 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -193,6 +193,18 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
>  	return 0;
>  }
>  
> +void *i915_gem_object_alloc(struct drm_device *dev)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	return kmem_cache_alloc(dev_priv->slab, GFP_KERNEL | __GFP_ZERO);
> +}
> +
> +void i915_gem_object_free(struct drm_i915_gem_object *obj)
> +{
> +	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
> +	kmem_cache_free(dev_priv->slab, obj);
> +}
> +
>  static int
>  i915_gem_create(struct drm_file *file,
>  		struct drm_device *dev,
> @@ -216,7 +228,7 @@ i915_gem_create(struct drm_file *file,
>  	if (ret) {
>  		drm_gem_object_release(&obj->base);
>  		i915_gem_info_remove_obj(dev->dev_private, obj->base.size);
> -		kfree(obj);
> +		i915_gem_object_free(obj);
>  		return ret;
>  	}
>  
> @@ -3780,12 +3792,12 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
>  	struct address_space *mapping;
>  	u32 mask;
>  
> -	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
> +	obj = i915_gem_object_alloc(dev);
>  	if (obj == NULL)
>  		return NULL;
>  
>  	if (drm_gem_object_init(dev, &obj->base, size) != 0) {
> -		kfree(obj);
> +		i915_gem_object_free(obj);
>  		return NULL;
>  	}
>  
> @@ -3868,7 +3880,7 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
>  	i915_gem_info_remove_obj(dev_priv, obj->base.size);
>  
>  	kfree(obj->bit_17);
> -	kfree(obj);
> +	i915_gem_object_free(obj);
>  }
>  
>  int
> @@ -4246,8 +4258,14 @@ init_ring_lists(struct intel_ring_buffer *ring)
>  void
>  i915_gem_load(struct drm_device *dev)
>  {
> -	int i;
>  	drm_i915_private_t *dev_priv = dev->dev_private;
> +	int i;
> +
> +	dev_priv->slab =
> +		kmem_cache_create("i915_gem_object",
> +				  sizeof(struct drm_i915_gem_object), 0,
> +				  SLAB_HWCACHE_ALIGN,
> +				  NULL);
>  
>  	INIT_LIST_HEAD(&dev_priv->mm.active_list);
>  	INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
> diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> index ca3497e..f307e31 100644
> --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> @@ -276,8 +276,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
>  	if (IS_ERR(attach))
>  		return ERR_CAST(attach);
>  
> -
> -	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
> +	obj = i915_gem_object_alloc(dev);
>  	if (obj == NULL) {
>  		ret = -ENOMEM;
>  		goto fail_detach;
> @@ -285,7 +284,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
>  
>  	ret = drm_gem_private_object_init(dev, &obj->base, dma_buf->size);
>  	if (ret) {
> -		kfree(obj);
> +		i915_gem_object_free(obj);
>  		goto fail_detach;
>  	}
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
> index 86c4af4..d7cfa71 100644
> --- a/drivers/gpu/drm/i915/i915_gem_stolen.c
> +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
> @@ -261,7 +261,7 @@ _i915_gem_object_create_stolen(struct drm_device *dev,
>  {
>  	struct drm_i915_gem_object *obj;
>  
> -	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
> +	obj = i915_gem_object_alloc(dev);
>  	if (obj == NULL)
>  		return NULL;
>  
> @@ -286,7 +286,7 @@ _i915_gem_object_create_stolen(struct drm_device *dev,
>  	return obj;
>  
>  cleanup:
> -	kfree(obj);
> +	i915_gem_object_free(obj);
>  	return NULL;
>  }
>
Ben Widawsky Nov. 5, 2012, 6:10 p.m. UTC | #4
On Mon, 5 Nov 2012 18:07:53 +0000
Ben Widawsky <ben@bwidawsk.net> wrote:

> On Fri, 19 Oct 2012 18:03:23 +0100
> Chris Wilson <chris@chris-wilson.co.uk> wrote:
> 
> > The primary purpose of this was to debug some use-after-free memory
> > corruption that was causing an OOPS inside drm/i915. As it turned out
> > the corruption was being caused elsewhere and i915.ko as a major user of
> > many objects was being hit hardest.
> > 
> > Indeed as we do frequent the generic kmalloc caches, dedicating one to
> > ourselves (or at least naming one for us depending upon the core) aids
> > debugging our own slab usage.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
> 
> To sum up:
> patches 5-11 are Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
oops, 11 is not reviewed-by me. 11 and 12 are meh IMO.

> patch 13 is Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
> patch 14-17 is Acked-by: Ben Widawsky <ben@bwidawsk.net>
> 
> patches 1-17 are Tested-by: Ben Widawsky <ben@bwidawsk.net>
> 
> > ---
> >  drivers/gpu/drm/i915/i915_dma.c        |    3 +++
> >  drivers/gpu/drm/i915/i915_drv.h        |    4 ++++
> >  drivers/gpu/drm/i915/i915_gem.c        |   28 +++++++++++++++++++++++-----
> >  drivers/gpu/drm/i915/i915_gem_dmabuf.c |    5 ++---
> >  drivers/gpu/drm/i915/i915_gem_stolen.c |    4 ++--
> >  5 files changed, 34 insertions(+), 10 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> > index 14271aa..69969be 100644
> > --- a/drivers/gpu/drm/i915/i915_dma.c
> > +++ b/drivers/gpu/drm/i915/i915_dma.c
> > @@ -1776,6 +1776,9 @@ int i915_driver_unload(struct drm_device *dev)
> >  
> >  	destroy_workqueue(dev_priv->wq);
> >  
> > +	if (dev_priv->slab)
> > +		kmem_cache_destroy(dev_priv->slab);
> > +
> >  	pci_dev_put(dev_priv->bridge_dev);
> >  	kfree(dev->dev_private);
> >  
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index 5a0e0cd..5084b29 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -398,6 +398,7 @@ struct intel_gmbus {
> >  
> >  typedef struct drm_i915_private {
> >  	struct drm_device *dev;
> > +	struct kmem_cache *slab;
> >  
> >  	const struct intel_device_info *info;
> >  
> > @@ -1341,12 +1342,15 @@ int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
> >  int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
> >  			struct drm_file *file_priv);
> >  void i915_gem_load(struct drm_device *dev);
> > +void *i915_gem_object_alloc(struct drm_device *dev);
> > +void i915_gem_object_free(struct drm_i915_gem_object *obj);
> >  int i915_gem_init_object(struct drm_gem_object *obj);
> >  void i915_gem_object_init(struct drm_i915_gem_object *obj,
> >  			 const struct drm_i915_gem_object_ops *ops);
> >  struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
> >  						  size_t size);
> >  void i915_gem_free_object(struct drm_gem_object *obj);
> > +
> >  int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj,
> >  				     uint32_t alignment,
> >  				     bool map_and_fenceable,
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index 0349899..8183c0f 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -193,6 +193,18 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
> >  	return 0;
> >  }
> >  
> > +void *i915_gem_object_alloc(struct drm_device *dev)
> > +{
> > +	struct drm_i915_private *dev_priv = dev->dev_private;
> > +	return kmem_cache_alloc(dev_priv->slab, GFP_KERNEL | __GFP_ZERO);
> > +}
> > +
> > +void i915_gem_object_free(struct drm_i915_gem_object *obj)
> > +{
> > +	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
> > +	kmem_cache_free(dev_priv->slab, obj);
> > +}
> > +
> >  static int
> >  i915_gem_create(struct drm_file *file,
> >  		struct drm_device *dev,
> > @@ -216,7 +228,7 @@ i915_gem_create(struct drm_file *file,
> >  	if (ret) {
> >  		drm_gem_object_release(&obj->base);
> >  		i915_gem_info_remove_obj(dev->dev_private, obj->base.size);
> > -		kfree(obj);
> > +		i915_gem_object_free(obj);
> >  		return ret;
> >  	}
> >  
> > @@ -3780,12 +3792,12 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
> >  	struct address_space *mapping;
> >  	u32 mask;
> >  
> > -	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
> > +	obj = i915_gem_object_alloc(dev);
> >  	if (obj == NULL)
> >  		return NULL;
> >  
> >  	if (drm_gem_object_init(dev, &obj->base, size) != 0) {
> > -		kfree(obj);
> > +		i915_gem_object_free(obj);
> >  		return NULL;
> >  	}
> >  
> > @@ -3868,7 +3880,7 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
> >  	i915_gem_info_remove_obj(dev_priv, obj->base.size);
> >  
> >  	kfree(obj->bit_17);
> > -	kfree(obj);
> > +	i915_gem_object_free(obj);
> >  }
> >  
> >  int
> > @@ -4246,8 +4258,14 @@ init_ring_lists(struct intel_ring_buffer *ring)
> >  void
> >  i915_gem_load(struct drm_device *dev)
> >  {
> > -	int i;
> >  	drm_i915_private_t *dev_priv = dev->dev_private;
> > +	int i;
> > +
> > +	dev_priv->slab =
> > +		kmem_cache_create("i915_gem_object",
> > +				  sizeof(struct drm_i915_gem_object), 0,
> > +				  SLAB_HWCACHE_ALIGN,
> > +				  NULL);
> >  
> >  	INIT_LIST_HEAD(&dev_priv->mm.active_list);
> >  	INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
> > diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> > index ca3497e..f307e31 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> > @@ -276,8 +276,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
> >  	if (IS_ERR(attach))
> >  		return ERR_CAST(attach);
> >  
> > -
> > -	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
> > +	obj = i915_gem_object_alloc(dev);
> >  	if (obj == NULL) {
> >  		ret = -ENOMEM;
> >  		goto fail_detach;
> > @@ -285,7 +284,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
> >  
> >  	ret = drm_gem_private_object_init(dev, &obj->base, dma_buf->size);
> >  	if (ret) {
> > -		kfree(obj);
> > +		i915_gem_object_free(obj);
> >  		goto fail_detach;
> >  	}
> >  
> > diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
> > index 86c4af4..d7cfa71 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_stolen.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
> > @@ -261,7 +261,7 @@ _i915_gem_object_create_stolen(struct drm_device *dev,
> >  {
> >  	struct drm_i915_gem_object *obj;
> >  
> > -	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
> > +	obj = i915_gem_object_alloc(dev);
> >  	if (obj == NULL)
> >  		return NULL;
> >  
> > @@ -286,7 +286,7 @@ _i915_gem_object_create_stolen(struct drm_device *dev,
> >  	return obj;
> >  
> >  cleanup:
> > -	kfree(obj);
> > +	i915_gem_object_free(obj);
> >  	return NULL;
> >  }
> >  
> 
> 
>
Chris Wilson Nov. 5, 2012, 8:57 p.m. UTC | #5
On Mon, 5 Nov 2012 17:49:35 +0000, Ben Widawsky <ben@bwidawsk.net> wrote:
> On Fri, 19 Oct 2012 18:03:23 +0100
> Chris Wilson <chris@chris-wilson.co.uk> wrote:
> 
> > The primary purpose of this was to debug some use-after-free memory
> > corruption that was causing an OOPS inside drm/i915. As it turned out
> > the corruption was being caused elsewhere and i915.ko as a major user of
> > many objects was being hit hardest.
> > 
> > Indeed as we do frequent the generic kmalloc caches, dedicating one to
> > ourselves (or at least naming one for us depending upon the core) aids
> > debugging our own slab usage.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
> 
> So I previously acked this patch, but you dropped that, so now you get
> more words.
> 
> Why not go all the way and move all allocations we do in i915 to this
> slab (adding a flag with whether or not to zero the memory first)? While
> I agree in terms of space, nothing takes up as much as objects, I think
> it would be nice to do this so we can totally track what our driver is
> doing. In fact I would suggest this is something which core drm should
> provide to all of the drivers (we already use drm_alloc calls in some
> places).

I thought the slab cache was a fixed size, so presumably you mean
introduce a slab per object we allocate? Of which the only other
interesting one is the requests. And we don't tend to have many requests
pending at any one time (outside of exceptional error cases), unlike the
thousands of bo that tend to be allocated. So is it worth dedicating a
separate slab to the few but frequently allocated requests?
-Chris
Ben Widawsky Nov. 7, 2012, 1:59 p.m. UTC | #6
On Mon, 05 Nov 2012 20:57:05 +0000
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> On Mon, 5 Nov 2012 17:49:35 +0000, Ben Widawsky <ben@bwidawsk.net> wrote:
> > On Fri, 19 Oct 2012 18:03:23 +0100
> > Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > 
> > > The primary purpose of this was to debug some use-after-free memory
> > > corruption that was causing an OOPS inside drm/i915. As it turned out
> > > the corruption was being caused elsewhere and i915.ko as a major user of
> > > many objects was being hit hardest.
> > > 
> > > Indeed as we do frequent the generic kmalloc caches, dedicating one to
> > > ourselves (or at least naming one for us depending upon the core) aids
> > > debugging our own slab usage.
> > > 
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
> > 
> > So I previously acked this patch, but you dropped that, so now you get
> > more words.
> > 
> > Why not go all the way and move all allocations we do in i915 to this
> > slab (adding a flag with whether or not to zero the memory first)? While
> > I agree in terms of space, nothing takes up as much as objects, I think
> > it would be nice to do this so we can totally track what our driver is
> > doing. In fact I would suggest this is something which core drm should
> > provide to all of the drivers (we already use drm_alloc calls in some
> > places).
> 
> I thought the slab cache was a fixed size, so presumably you mean
> introduce a slab per object we allocate? Of which the only other
> interesting one is the requests. And we don't tend to have many requests
> pending at any one time (outside of exceptional error cases), unlike the
> thousands of bo that tend to be allocated. So is it worth dedicating a
> separate slab to the few but frequently allocated requests?
> -Chris
> 

I guess I wasn't really thinking about the details. I knew in the back
of my mind that the slab cache was a fixed size, but was thinking we
could have a generically large enough object for most of our other
things (I think they're mostly small except for dev_priv). But, now
thinking about it a bit, I guess that wastes the memory we're trying to
scrutinize, so it's probably silly.

In any case, this made me review kmem_cache_create a bit, so we can bump
this patch to:
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 14271aa..69969be 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1776,6 +1776,9 @@  int i915_driver_unload(struct drm_device *dev)
 
 	destroy_workqueue(dev_priv->wq);
 
+	if (dev_priv->slab)
+		kmem_cache_destroy(dev_priv->slab);
+
 	pci_dev_put(dev_priv->bridge_dev);
 	kfree(dev->dev_private);
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5a0e0cd..5084b29 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -398,6 +398,7 @@  struct intel_gmbus {
 
 typedef struct drm_i915_private {
 	struct drm_device *dev;
+	struct kmem_cache *slab;
 
 	const struct intel_device_info *info;
 
@@ -1341,12 +1342,15 @@  int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file_priv);
 void i915_gem_load(struct drm_device *dev);
+void *i915_gem_object_alloc(struct drm_device *dev);
+void i915_gem_object_free(struct drm_i915_gem_object *obj);
 int i915_gem_init_object(struct drm_gem_object *obj);
 void i915_gem_object_init(struct drm_i915_gem_object *obj,
 			 const struct drm_i915_gem_object_ops *ops);
 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
 						  size_t size);
 void i915_gem_free_object(struct drm_gem_object *obj);
+
 int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj,
 				     uint32_t alignment,
 				     bool map_and_fenceable,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 0349899..8183c0f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -193,6 +193,18 @@  i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }
 
+void *i915_gem_object_alloc(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	return kmem_cache_alloc(dev_priv->slab, GFP_KERNEL | __GFP_ZERO);
+}
+
+void i915_gem_object_free(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
+	kmem_cache_free(dev_priv->slab, obj);
+}
+
 static int
 i915_gem_create(struct drm_file *file,
 		struct drm_device *dev,
@@ -216,7 +228,7 @@  i915_gem_create(struct drm_file *file,
 	if (ret) {
 		drm_gem_object_release(&obj->base);
 		i915_gem_info_remove_obj(dev->dev_private, obj->base.size);
-		kfree(obj);
+		i915_gem_object_free(obj);
 		return ret;
 	}
 
@@ -3780,12 +3792,12 @@  struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
 	struct address_space *mapping;
 	u32 mask;
 
-	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+	obj = i915_gem_object_alloc(dev);
 	if (obj == NULL)
 		return NULL;
 
 	if (drm_gem_object_init(dev, &obj->base, size) != 0) {
-		kfree(obj);
+		i915_gem_object_free(obj);
 		return NULL;
 	}
 
@@ -3868,7 +3880,7 @@  void i915_gem_free_object(struct drm_gem_object *gem_obj)
 	i915_gem_info_remove_obj(dev_priv, obj->base.size);
 
 	kfree(obj->bit_17);
-	kfree(obj);
+	i915_gem_object_free(obj);
 }
 
 int
@@ -4246,8 +4258,14 @@  init_ring_lists(struct intel_ring_buffer *ring)
 void
 i915_gem_load(struct drm_device *dev)
 {
-	int i;
 	drm_i915_private_t *dev_priv = dev->dev_private;
+	int i;
+
+	dev_priv->slab =
+		kmem_cache_create("i915_gem_object",
+				  sizeof(struct drm_i915_gem_object), 0,
+				  SLAB_HWCACHE_ALIGN,
+				  NULL);
 
 	INIT_LIST_HEAD(&dev_priv->mm.active_list);
 	INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index ca3497e..f307e31 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -276,8 +276,7 @@  struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
 	if (IS_ERR(attach))
 		return ERR_CAST(attach);
 
-
-	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+	obj = i915_gem_object_alloc(dev);
 	if (obj == NULL) {
 		ret = -ENOMEM;
 		goto fail_detach;
@@ -285,7 +284,7 @@  struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
 
 	ret = drm_gem_private_object_init(dev, &obj->base, dma_buf->size);
 	if (ret) {
-		kfree(obj);
+		i915_gem_object_free(obj);
 		goto fail_detach;
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index 86c4af4..d7cfa71 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -261,7 +261,7 @@  _i915_gem_object_create_stolen(struct drm_device *dev,
 {
 	struct drm_i915_gem_object *obj;
 
-	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+	obj = i915_gem_object_alloc(dev);
 	if (obj == NULL)
 		return NULL;
 
@@ -286,7 +286,7 @@  _i915_gem_object_create_stolen(struct drm_device *dev,
 	return obj;
 
 cleanup:
-	kfree(obj);
+	i915_gem_object_free(obj);
 	return NULL;
 }