diff mbox series

[10/32] uverbs: Convert idr to XArray

Message ID 20190221002107.22625-11-willy@infradead.org (mailing list archive)
State Accepted
Delegated to: Jason Gunthorpe
Headers show
Series Convert the Infiniband subsystem to XArray | expand

Commit Message

Matthew Wilcox (Oracle) Feb. 21, 2019, 12:20 a.m. UTC
The word 'idr' is scattered throughout the API, so I haven't changed it,
but the 'idr' variable is now an XArray.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 drivers/infiniband/core/rdma_core.c | 78 +++++++----------------------
 drivers/infiniband/core/uverbs.h    |  4 +-
 2 files changed, 18 insertions(+), 64 deletions(-)

Comments

Jason Gunthorpe Feb. 21, 2019, 6:37 p.m. UTC | #1
On Wed, Feb 20, 2019 at 04:20:45PM -0800, Matthew Wilcox wrote:
> The word 'idr' is scattered throughout the API, so I haven't changed it,
> but the 'idr' variable is now an XArray.
> 
> Signed-off-by: Matthew Wilcox <willy@infradead.org>
>  drivers/infiniband/core/rdma_core.c | 78 +++++++----------------------
>  drivers/infiniband/core/uverbs.h    |  4 +-
>  2 files changed, 18 insertions(+), 64 deletions(-)
> 
> diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
> index 6c4747e61d2b..1084685ddce7 100644
> +++ b/drivers/infiniband/core/rdma_core.c
> @@ -296,25 +296,8 @@ static struct ib_uobject *alloc_uobj(struct ib_uverbs_file *ufile,
>  
>  static int idr_add_uobj(struct ib_uobject *uobj)
>  {
> -	int ret;
> -
> -	idr_preload(GFP_KERNEL);
> -	spin_lock(&uobj->ufile->idr_lock);
> -
> -	/*
> -	 * We start with allocating an idr pointing to NULL. This represents an
> -	 * object which isn't initialized yet. We'll replace it later on with
> -	 * the real object once we commit.
> -	 */
> -	ret = idr_alloc(&uobj->ufile->idr, NULL, 0,
> -			min_t(unsigned long, U32_MAX - 1, INT_MAX), GFP_NOWAIT);
> -	if (ret >= 0)
> -		uobj->id = ret;
> -
> -	spin_unlock(&uobj->ufile->idr_lock);
> -	idr_preload_end();
> -
> -	return ret < 0 ? ret : 0;
> +	return xa_alloc(&uobj->ufile->idr, &uobj->id, uobj, xa_limit_32b,
> +			GFP_KERNEL);

This really does need to store NULL here. We can't allow
lookup_get_idr_uobject to see the pointer until the object is
compelted initialized.. The basic high level flow is something like

xa_alloc(&id, XA_ZERO_ENTRY)
copy_to_user(id)
xa_store(id, uobj);

Where the xa_store must only happen if the copy_to_user succeeded.

>  }
>  
>  /* Returns the ib_uobject or an error. The caller should check for IS_ERR. */
> @@ -324,29 +307,14 @@ lookup_get_idr_uobject(const struct uverbs_api_object *obj,
>  		       enum rdma_lookup_mode mode)
>  {
>  	struct ib_uobject *uobj;
> -	unsigned long idrno = id;
>  
> -	if (id < 0 || id > ULONG_MAX)
> +	if ((u64)id > ULONG_MAX)
>  		return ERR_PTR(-EINVAL);

'id' is controlled by userspace, negative values in the s64 are
strictly forbidden, so this isn't an equivalent transformation.

> @@ -587,16 +549,11 @@ static int alloc_commit_idr_uobject(struct ib_uobject *uobj)
>  {
>  	struct ib_uverbs_file *ufile = uobj->ufile;
>  
> -	spin_lock(&ufile->idr_lock);
>  	/*
> -	 * We already allocated this IDR with a NULL object, so
> -	 * this shouldn't fail.
> -	 *
> -	 * NOTE: Once we set the IDR we loose ownership of our kref on uobj.
> +	 * NOTE: Storing the uobj transfers our kref on uobj to the XArray.
>  	 * It will be put by remove_commit_idr_uobject()
>  	 */
> -	WARN_ON(idr_replace(&ufile->idr, uobj, uobj->id));
> -	spin_unlock(&ufile->idr_lock);
> +	xa_store(&ufile->idr, uobj->id, uobj, 0);

What does the GFP flags == 0 do? I've seen you use this as a marker
for stores that cannot fail?

IMHO it would be nice to have a xa_store_reserved() idea which returns
void and WARNS_ON if it is mis-used.

Jason
Matthew Wilcox (Oracle) Feb. 21, 2019, 7:06 p.m. UTC | #2
On Thu, Feb 21, 2019 at 11:37:02AM -0700, Jason Gunthorpe wrote:
> > +	return xa_alloc(&uobj->ufile->idr, &uobj->id, uobj, xa_limit_32b,
> > +			GFP_KERNEL);
> 
> This really does need to store NULL here. We can't allow
> lookup_get_idr_uobject to see the pointer until the object is
> compelted initialized.. The basic high level flow is something like

Braino on my part.  I remember looking at this, analysing the flow and
concluding it really did need to store NULL there ... and then my fingers
wrote uobj instead.  Fixed in my tree.

> > -	if (id < 0 || id > ULONG_MAX)
> > +	if ((u64)id > ULONG_MAX)
> 
> 'id' is controlled by userspace, negative values in the s64 are
> strictly forbidden, so this isn't an equivalent transformation.

Yet another braino.  I meant to type:

        if ((u64)id > LONG_MAX)

> > @@ -587,16 +549,11 @@ static int alloc_commit_idr_uobject(struct ib_uobject *uobj)
> >  {
> >  	struct ib_uverbs_file *ufile = uobj->ufile;
> >  
> > -	spin_lock(&ufile->idr_lock);
> >  	/*
> > -	 * We already allocated this IDR with a NULL object, so
> > -	 * this shouldn't fail.
> > -	 *
> > -	 * NOTE: Once we set the IDR we loose ownership of our kref on uobj.
> > +	 * NOTE: Storing the uobj transfers our kref on uobj to the XArray.
> >  	 * It will be put by remove_commit_idr_uobject()
> >  	 */
> > -	WARN_ON(idr_replace(&ufile->idr, uobj, uobj->id));
> > -	spin_unlock(&ufile->idr_lock);
> > +	xa_store(&ufile->idr, uobj->id, uobj, 0);
> 
> What does the GFP flags == 0 do? I've seen you use this as a marker
> for stores that cannot fail?

It's for stores which we know we're just overwriting, so there won't
be any need to allocate memory.  But I've started to move away from
this idea -- it's a quirk of the current implementation that it won't
need to allocate memory, and not even guaranteed true by the current
implementation (if you try to store a 2-byte aligned pointer in index 0,
it will have to allocate memory).

So I think I'm actually going to replace this 0 with a GFP_KERNEL (I just
checked the calling context, and it can absolutely sleep to allocate
memory).
Jason Gunthorpe Feb. 21, 2019, 7:12 p.m. UTC | #3
On Thu, Feb 21, 2019 at 11:06:58AM -0800, Matthew Wilcox wrote:
> On Thu, Feb 21, 2019 at 11:37:02AM -0700, Jason Gunthorpe wrote:
> > > +	return xa_alloc(&uobj->ufile->idr, &uobj->id, uobj, xa_limit_32b,
> > > +			GFP_KERNEL);
> > 
> > This really does need to store NULL here. We can't allow
> > lookup_get_idr_uobject to see the pointer until the object is
> > compelted initialized.. The basic high level flow is something like
> 
> Braino on my part.  I remember looking at this, analysing the flow and
> concluding it really did need to store NULL there ... and then my fingers
> wrote uobj instead.  Fixed in my tree.
> 
> > > -	if (id < 0 || id > ULONG_MAX)
> > > +	if ((u64)id > ULONG_MAX)
> > 
> > 'id' is controlled by userspace, negative values in the s64 are
> > strictly forbidden, so this isn't an equivalent transformation.
> 
> Yet another braino.  I meant to type:
> 
>         if ((u64)id > LONG_MAX)
> 
> > > @@ -587,16 +549,11 @@ static int alloc_commit_idr_uobject(struct ib_uobject *uobj)
> > >  {
> > >  	struct ib_uverbs_file *ufile = uobj->ufile;
> > >  
> > > -	spin_lock(&ufile->idr_lock);
> > >  	/*
> > > -	 * We already allocated this IDR with a NULL object, so
> > > -	 * this shouldn't fail.
> > > -	 *
> > > -	 * NOTE: Once we set the IDR we loose ownership of our kref on uobj.
> > > +	 * NOTE: Storing the uobj transfers our kref on uobj to the XArray.
> > >  	 * It will be put by remove_commit_idr_uobject()
> > >  	 */
> > > -	WARN_ON(idr_replace(&ufile->idr, uobj, uobj->id));
> > > -	spin_unlock(&ufile->idr_lock);
> > > +	xa_store(&ufile->idr, uobj->id, uobj, 0);
> > 
> > What does the GFP flags == 0 do? I've seen you use this as a marker
> > for stores that cannot fail?
> 
> It's for stores which we know we're just overwriting, so there won't
> be any need to allocate memory.  But I've started to move away from
> this idea -- it's a quirk of the current implementation that it won't
> need to allocate memory, and not even guaranteed true by the current
> implementation (if you try to store a 2-byte aligned pointer in index 0,
> it will have to allocate memory).
> 
> So I think I'm actually going to replace this 0 with a GFP_KERNEL (I just
> checked the calling context, and it can absolutely sleep to allocate
> memory).

In that case the error code has to be returned here..

But, IMHO, a major reason to use xa_reserve is to create a situation
where store can't fail, so it is disappointing to hear that isn't
possible anymore.

Jason
Matthew Wilcox (Oracle) Feb. 22, 2019, 1:43 a.m. UTC | #4
On Thu, Feb 21, 2019 at 12:12:58PM -0700, Jason Gunthorpe wrote:
> On Thu, Feb 21, 2019 at 11:06:58AM -0800, Matthew Wilcox wrote:
> > On Thu, Feb 21, 2019 at 11:37:02AM -0700, Jason Gunthorpe wrote:
> > > What does the GFP flags == 0 do? I've seen you use this as a marker
> > > for stores that cannot fail?
> > 
> > It's for stores which we know we're just overwriting, so there won't
> > be any need to allocate memory.  But I've started to move away from
> > this idea -- it's a quirk of the current implementation that it won't
> > need to allocate memory, and not even guaranteed true by the current
> > implementation (if you try to store a 2-byte aligned pointer in index 0,
> > it will have to allocate memory).
> > 
> > So I think I'm actually going to replace this 0 with a GFP_KERNEL (I just
> > checked the calling context, and it can absolutely sleep to allocate
> > memory).
> 
> In that case the error code has to be returned here..
> 
> But, IMHO, a major reason to use xa_reserve is to create a situation
> where store can't fail, so it is disappointing to hear that isn't
> possible anymore.

You're right.  This is just a bug, and I've fixed it (see the head of the
xarray tree).

But I still want to pass in GFP_KERNEL here.  For the replacement data
structure I'm working on to replace the radix tree, there will be times
when being able to reallocate nodes will be advantageous.  It'll never
be necessary, so it can't return an error, but it may lead to a more
balanced tree.
Jason Gunthorpe Feb. 22, 2019, 4:56 p.m. UTC | #5
On Thu, Feb 21, 2019 at 05:43:37PM -0800, Matthew Wilcox wrote:
> On Thu, Feb 21, 2019 at 12:12:58PM -0700, Jason Gunthorpe wrote:
> > On Thu, Feb 21, 2019 at 11:06:58AM -0800, Matthew Wilcox wrote:
> > > On Thu, Feb 21, 2019 at 11:37:02AM -0700, Jason Gunthorpe wrote:
> > > > What does the GFP flags == 0 do? I've seen you use this as a marker
> > > > for stores that cannot fail?
> > > 
> > > It's for stores which we know we're just overwriting, so there won't
> > > be any need to allocate memory.  But I've started to move away from
> > > this idea -- it's a quirk of the current implementation that it won't
> > > need to allocate memory, and not even guaranteed true by the current
> > > implementation (if you try to store a 2-byte aligned pointer in index 0,
> > > it will have to allocate memory).
> > > 
> > > So I think I'm actually going to replace this 0 with a GFP_KERNEL (I just
> > > checked the calling context, and it can absolutely sleep to allocate
> > > memory).
> > 
> > In that case the error code has to be returned here..
> > 
> > But, IMHO, a major reason to use xa_reserve is to create a situation
> > where store can't fail, so it is disappointing to hear that isn't
> > possible anymore.
> 
> You're right.  This is just a bug, and I've fixed it (see the head of the
> xarray tree).
>
> But I still want to pass in GFP_KERNEL here.  For the replacement data
> structure I'm working on to replace the radix tree, there will be times
> when being able to reallocate nodes will be advantageous.  It'll never
> be necessary, so it can't return an error, but it may lead to a more
> balanced tree.

I still fell like it would be clearer & safer to have a xa_replace()
type API for this case. Easier to understand what the contract is, and
easier for things like syzkaller to trigger the correctness check.

/*
 * xa_replace() can only be called on an entry that already has a value or is
 * reserved.  It replaces the value with entry and cannot fail. If gfp flags
 * are specified the function may optionally reallocate memory to optimize the
 * storage (future)
 */
void __xa_replace(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp)
{
	XA_STATE(xas, xa, index);
	void *curr;

	if (WARN_ON_ONCE(xa_is_advanced(entry)))
		return -EINVAL;
	if (!entry)
		entry = XA_ZERO_ENTRY;

	do {
		curr = xas_load(&xas);
		WARN_ON(curr == NULL);
		xas_store(&xas, entry);
		if (xa_track_free(xa))
			xas_clear_mark(&xas, XA_FREE_MARK);
	} while (WARN_ON(__xas_nomem(&xas, gfp)));
}

Jason
Jason Gunthorpe April 25, 2019, 5:03 p.m. UTC | #6
On Wed, Feb 20, 2019 at 04:20:45PM -0800, Matthew Wilcox wrote:
> The word 'idr' is scattered throughout the API, so I haven't changed it,
> but the 'idr' variable is now an XArray.
> 
> Signed-off-by: Matthew Wilcox <willy@infradead.org>
> ---
>  drivers/infiniband/core/rdma_core.c | 78 +++++++----------------------
>  drivers/infiniband/core/uverbs.h    |  4 +-
>  2 files changed, 18 insertions(+), 64 deletions(-)

I made the changes we discussed in email and applied this to for-next

Thanks,
Jason
diff mbox series

Patch

diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index 6c4747e61d2b..1084685ddce7 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -296,25 +296,8 @@  static struct ib_uobject *alloc_uobj(struct ib_uverbs_file *ufile,
 
 static int idr_add_uobj(struct ib_uobject *uobj)
 {
-	int ret;
-
-	idr_preload(GFP_KERNEL);
-	spin_lock(&uobj->ufile->idr_lock);
-
-	/*
-	 * We start with allocating an idr pointing to NULL. This represents an
-	 * object which isn't initialized yet. We'll replace it later on with
-	 * the real object once we commit.
-	 */
-	ret = idr_alloc(&uobj->ufile->idr, NULL, 0,
-			min_t(unsigned long, U32_MAX - 1, INT_MAX), GFP_NOWAIT);
-	if (ret >= 0)
-		uobj->id = ret;
-
-	spin_unlock(&uobj->ufile->idr_lock);
-	idr_preload_end();
-
-	return ret < 0 ? ret : 0;
+	return xa_alloc(&uobj->ufile->idr, &uobj->id, uobj, xa_limit_32b,
+			GFP_KERNEL);
 }
 
 /* Returns the ib_uobject or an error. The caller should check for IS_ERR. */
@@ -324,29 +307,14 @@  lookup_get_idr_uobject(const struct uverbs_api_object *obj,
 		       enum rdma_lookup_mode mode)
 {
 	struct ib_uobject *uobj;
-	unsigned long idrno = id;
 
-	if (id < 0 || id > ULONG_MAX)
+	if ((u64)id > ULONG_MAX)
 		return ERR_PTR(-EINVAL);
 
 	rcu_read_lock();
-	/* object won't be released as we're protected in rcu */
-	uobj = idr_find(&ufile->idr, idrno);
-	if (!uobj) {
-		uobj = ERR_PTR(-ENOENT);
-		goto free;
-	}
-
-	/*
-	 * The idr_find is guaranteed to return a pointer to something that
-	 * isn't freed yet, or NULL, as the free after idr_remove goes through
-	 * kfree_rcu(). However the object may still have been released and
-	 * kfree() could be called at any time.
-	 */
-	if (!kref_get_unless_zero(&uobj->ref))
+	uobj = xa_load(&ufile->idr, id);
+	if (!uobj || !kref_get_unless_zero(&uobj->ref))
 		uobj = ERR_PTR(-ENOENT);
-
-free:
 	rcu_read_unlock();
 	return uobj;
 }
@@ -398,7 +366,7 @@  struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj,
 	struct ib_uobject *uobj;
 	int ret;
 
-	if (IS_ERR(obj) && PTR_ERR(obj) == -ENOMSG) {
+	if (obj == ERR_PTR(-ENOMSG)) {
 		/* must be UVERBS_IDR_ANY_OBJECT, see uapi_get_object() */
 		uobj = lookup_get_idr_uobject(NULL, ufile, id, mode);
 		if (IS_ERR(uobj))
@@ -457,14 +425,12 @@  alloc_begin_idr_uobject(const struct uverbs_api_object *obj,
 	ret = ib_rdmacg_try_charge(&uobj->cg_obj, uobj->context->device,
 				   RDMACG_RESOURCE_HCA_OBJECT);
 	if (ret)
-		goto idr_remove;
+		goto remove;
 
 	return uobj;
 
-idr_remove:
-	spin_lock(&ufile->idr_lock);
-	idr_remove(&ufile->idr, uobj->id);
-	spin_unlock(&ufile->idr_lock);
+remove:
+	xa_erase(&ufile->idr, uobj->id);
 uobj_put:
 	uverbs_uobject_put(uobj);
 	return ERR_PTR(ret);
@@ -522,9 +488,7 @@  static void alloc_abort_idr_uobject(struct ib_uobject *uobj)
 	ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device,
 			   RDMACG_RESOURCE_HCA_OBJECT);
 
-	spin_lock(&uobj->ufile->idr_lock);
-	idr_remove(&uobj->ufile->idr, uobj->id);
-	spin_unlock(&uobj->ufile->idr_lock);
+	xa_erase(&uobj->ufile->idr, uobj->id);
 }
 
 static int __must_check destroy_hw_idr_uobject(struct ib_uobject *uobj,
@@ -554,9 +518,7 @@  static int __must_check destroy_hw_idr_uobject(struct ib_uobject *uobj,
 
 static void remove_handle_idr_uobject(struct ib_uobject *uobj)
 {
-	spin_lock(&uobj->ufile->idr_lock);
-	idr_remove(&uobj->ufile->idr, uobj->id);
-	spin_unlock(&uobj->ufile->idr_lock);
+	xa_erase(&uobj->ufile->idr, uobj->id);
 	/* Matches the kref in alloc_commit_idr_uobject */
 	uverbs_uobject_put(uobj);
 }
@@ -587,16 +549,11 @@  static int alloc_commit_idr_uobject(struct ib_uobject *uobj)
 {
 	struct ib_uverbs_file *ufile = uobj->ufile;
 
-	spin_lock(&ufile->idr_lock);
 	/*
-	 * We already allocated this IDR with a NULL object, so
-	 * this shouldn't fail.
-	 *
-	 * NOTE: Once we set the IDR we loose ownership of our kref on uobj.
+	 * NOTE: Storing the uobj transfers our kref on uobj to the XArray.
 	 * It will be put by remove_commit_idr_uobject()
 	 */
-	WARN_ON(idr_replace(&ufile->idr, uobj, uobj->id));
-	spin_unlock(&ufile->idr_lock);
+	xa_store(&ufile->idr, uobj->id, uobj, 0);
 
 	return 0;
 }
@@ -728,14 +685,13 @@  void rdma_lookup_put_uobject(struct ib_uobject *uobj,
 
 void setup_ufile_idr_uobject(struct ib_uverbs_file *ufile)
 {
-	spin_lock_init(&ufile->idr_lock);
-	idr_init(&ufile->idr);
+	xa_init_flags(&ufile->idr, XA_FLAGS_ALLOC);
 }
 
 void release_ufile_idr_uobject(struct ib_uverbs_file *ufile)
 {
 	struct ib_uobject *entry;
-	int id;
+	unsigned long id;
 
 	/*
 	 * At this point uverbs_cleanup_ufile() is guaranteed to have run, and
@@ -745,12 +701,12 @@  void release_ufile_idr_uobject(struct ib_uverbs_file *ufile)
 	 *
 	 * This is an optimized equivalent to remove_handle_idr_uobject
 	 */
-	idr_for_each_entry(&ufile->idr, entry, id) {
+	xa_for_each(&ufile->idr, id, entry) {
 		WARN_ON(entry->object);
 		uverbs_uobject_put(entry);
 	}
 
-	idr_destroy(&ufile->idr);
+	xa_destroy(&ufile->idr);
 }
 
 const struct uverbs_obj_type_class uverbs_idr_class = {
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index ea0bc6885517..580e220ef1b9 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -161,9 +161,7 @@  struct ib_uverbs_file {
 	struct mutex umap_lock;
 	struct list_head umaps;
 
-	struct idr		idr;
-	/* spinlock protects write access to idr */
-	spinlock_t		idr_lock;
+	struct xarray		idr;
 };
 
 struct ib_uverbs_event {