diff mbox series

[5/5] drm/mm: Use clear_bit_unlock() for releasing the drm_mm_node()

Message ID 20191003210100.22250-5-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [1/5] drm/i915/execlists: Skip redundant resubmission | expand

Commit Message

Chris Wilson Oct. 3, 2019, 9:01 p.m. UTC
A few callers need to serialise the destruction of their drm_mm_node and
ensure it is removed from the drm_mm before freeing. However, to be
completely sure that any access from another thread is complete before
we free the struct, we require the RELEASE semantics of
clear_bit_unlock().

This allows the conditional locking such as

Thread A				Thread B
    mutex_lock(mm_lock);                if (drm_mm_node_allocated(node)) {
    drm_mm_node_remove(node);               mutex_lock(mm_lock);
    mutex_unlock(mm_lock);                  drm_mm_node_remove(node);
                                            mutex_unlock(mm_lock);
                                         }
                                         kfree(node);

to serialise correctly without any lingering accesses from A to the
freed node. Allocation / insertion of the node is assumed never to race
with removal or eviction scanning.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/drm_mm.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

Comments

Tvrtko Ursulin Oct. 4, 2019, 9:15 a.m. UTC | #1
On 03/10/2019 22:01, Chris Wilson wrote:
> A few callers need to serialise the destruction of their drm_mm_node and
> ensure it is removed from the drm_mm before freeing. However, to be
> completely sure that any access from another thread is complete before
> we free the struct, we require the RELEASE semantics of
> clear_bit_unlock().
> 
> This allows the conditional locking such as
> 
> Thread A				Thread B
>      mutex_lock(mm_lock);                if (drm_mm_node_allocated(node)) {
>      drm_mm_node_remove(node);               mutex_lock(mm_lock);
>      mutex_unlock(mm_lock);                  drm_mm_node_remove(node);
>                                              mutex_unlock(mm_lock);
>                                           }
>                                           kfree(node);

My understanding is that release semantics on node allocated mean 1 -> 0 
transition is guaranteed to be seen only when thread A 
drm_mm_node_remove is fully done with the removal. But if it is in the 
middle of removal, node is still seen as allocated outside and thread B 
can enter the if-body, wait for the lock, and then drm_mm_node_remove 
will attempt a double removal. So I think another drm_mm_node_allocated 
under the lock is needed.

But the fkree part looks correct. There can be no false negatives with 
the release semantics on the allocated bit.

Regards,

Tvrtko

> to serialise correctly without any lingering accesses from A to the
> freed node. Allocation / insertion of the node is assumed never to race
> with removal or eviction scanning.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/drm_mm.c | 11 ++++++-----
>   1 file changed, 6 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
> index a9cab5e53731..2a6e34663146 100644
> --- a/drivers/gpu/drm/drm_mm.c
> +++ b/drivers/gpu/drm/drm_mm.c
> @@ -424,9 +424,9 @@ int drm_mm_reserve_node(struct drm_mm *mm, struct drm_mm_node *node)
>   
>   	node->mm = mm;
>   
> +	__set_bit(DRM_MM_NODE_ALLOCATED_BIT, &node->flags);
>   	list_add(&node->node_list, &hole->node_list);
>   	drm_mm_interval_tree_add_node(hole, node);
> -	__set_bit(DRM_MM_NODE_ALLOCATED_BIT, &node->flags);
>   	node->hole_size = 0;
>   
>   	rm_hole(hole);
> @@ -543,9 +543,9 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm,
>   		node->color = color;
>   		node->hole_size = 0;
>   
> +		__set_bit(DRM_MM_NODE_ALLOCATED_BIT, &node->flags);
>   		list_add(&node->node_list, &hole->node_list);
>   		drm_mm_interval_tree_add_node(hole, node);
> -		__set_bit(DRM_MM_NODE_ALLOCATED_BIT, &node->flags);
>   
>   		rm_hole(hole);
>   		if (adj_start > hole_start)
> @@ -589,11 +589,12 @@ void drm_mm_remove_node(struct drm_mm_node *node)
>   
>   	drm_mm_interval_tree_remove(node, &mm->interval_tree);
>   	list_del(&node->node_list);
> -	__clear_bit(DRM_MM_NODE_ALLOCATED_BIT, &node->flags);
>   
>   	if (drm_mm_hole_follows(prev_node))
>   		rm_hole(prev_node);
>   	add_hole(prev_node);
> +
> +	clear_bit_unlock(DRM_MM_NODE_ALLOCATED_BIT, &node->flags);
>   }
>   EXPORT_SYMBOL(drm_mm_remove_node);
>   
> @@ -614,6 +615,7 @@ void drm_mm_replace_node(struct drm_mm_node *old, struct drm_mm_node *new)
>   
>   	*new = *old;
>   
> +	__set_bit(DRM_MM_NODE_ALLOCATED_BIT, &new->flags);
>   	list_replace(&old->node_list, &new->node_list);
>   	rb_replace_node_cached(&old->rb, &new->rb, &mm->interval_tree);
>   
> @@ -627,8 +629,7 @@ void drm_mm_replace_node(struct drm_mm_node *old, struct drm_mm_node *new)
>   				&mm->holes_addr);
>   	}
>   
> -	__clear_bit(DRM_MM_NODE_ALLOCATED_BIT, &old->flags);
> -	__set_bit(DRM_MM_NODE_ALLOCATED_BIT, &new->flags);
> +	clear_bit_unlock(DRM_MM_NODE_ALLOCATED_BIT, &old->flags);
>   }
>   EXPORT_SYMBOL(drm_mm_replace_node);
>   
>
Chris Wilson Oct. 4, 2019, 11:07 a.m. UTC | #2
Quoting Tvrtko Ursulin (2019-10-04 10:15:20)
> 
> On 03/10/2019 22:01, Chris Wilson wrote:
> > A few callers need to serialise the destruction of their drm_mm_node and
> > ensure it is removed from the drm_mm before freeing. However, to be
> > completely sure that any access from another thread is complete before
> > we free the struct, we require the RELEASE semantics of
> > clear_bit_unlock().
> > 
> > This allows the conditional locking such as
> > 
> > Thread A                              Thread B
> >      mutex_lock(mm_lock);                if (drm_mm_node_allocated(node)) {
> >      drm_mm_node_remove(node);               mutex_lock(mm_lock);
> >      mutex_unlock(mm_lock);                  drm_mm_node_remove(node);
> >                                              mutex_unlock(mm_lock);
> >                                           }
> >                                           kfree(node);
> 
> My understanding is that release semantics on node allocated mean 1 -> 0 
> transition is guaranteed to be seen only when thread A 
> drm_mm_node_remove is fully done with the removal. But if it is in the 
> middle of removal, node is still seen as allocated outside and thread B 
> can enter the if-body, wait for the lock, and then drm_mm_node_remove 
> will attempt a double removal. So I think another drm_mm_node_allocated 
> under the lock is needed.

Yes. Check after the lock is indeed required in this scenario. And
drm_mm_node_remove() insists the caller doesn't try a double remove.
-Chris
Chris Wilson Oct. 4, 2019, 11:17 a.m. UTC | #3
Quoting Chris Wilson (2019-10-04 12:07:10)
> Quoting Tvrtko Ursulin (2019-10-04 10:15:20)
> > 
> > On 03/10/2019 22:01, Chris Wilson wrote:
> > > A few callers need to serialise the destruction of their drm_mm_node and
> > > ensure it is removed from the drm_mm before freeing. However, to be
> > > completely sure that any access from another thread is complete before
> > > we free the struct, we require the RELEASE semantics of
> > > clear_bit_unlock().
> > > 
> > > This allows the conditional locking such as
> > > 
> > > Thread A                              Thread B
> > >      mutex_lock(mm_lock);                if (drm_mm_node_allocated(node)) {
> > >      drm_mm_node_remove(node);               mutex_lock(mm_lock);
> > >      mutex_unlock(mm_lock);                  drm_mm_node_remove(node);
> > >                                              mutex_unlock(mm_lock);
> > >                                           }
> > >                                           kfree(node);
> > 
> > My understanding is that release semantics on node allocated mean 1 -> 0 
> > transition is guaranteed to be seen only when thread A 
> > drm_mm_node_remove is fully done with the removal. But if it is in the 
> > middle of removal, node is still seen as allocated outside and thread B 
> > can enter the if-body, wait for the lock, and then drm_mm_node_remove 
> > will attempt a double removal. So I think another drm_mm_node_allocated 
> > under the lock is needed.
> 
> Yes. Check after the lock is indeed required in this scenario. And
> drm_mm_node_remove() insists the caller doesn't try a double remove.

I had to go back and double check the vma code, and that's fine.
(We hit this case where one thread is evicting and another thread is
destroying the object. And for us we do the check under the lock inside
__i915_vma_unbind() on the destroy path.)
-Chris
Tvrtko Ursulin Oct. 4, 2019, 12:01 p.m. UTC | #4
On 04/10/2019 12:17, Chris Wilson wrote:
> Quoting Chris Wilson (2019-10-04 12:07:10)
>> Quoting Tvrtko Ursulin (2019-10-04 10:15:20)
>>>
>>> On 03/10/2019 22:01, Chris Wilson wrote:
>>>> A few callers need to serialise the destruction of their drm_mm_node and
>>>> ensure it is removed from the drm_mm before freeing. However, to be
>>>> completely sure that any access from another thread is complete before
>>>> we free the struct, we require the RELEASE semantics of
>>>> clear_bit_unlock().
>>>>
>>>> This allows the conditional locking such as
>>>>
>>>> Thread A                              Thread B
>>>>       mutex_lock(mm_lock);                if (drm_mm_node_allocated(node)) {
>>>>       drm_mm_node_remove(node);               mutex_lock(mm_lock);
>>>>       mutex_unlock(mm_lock);                  drm_mm_node_remove(node);
>>>>                                               mutex_unlock(mm_lock);
>>>>                                            }
>>>>                                            kfree(node);
>>>
>>> My understanding is that release semantics on node allocated mean 1 -> 0
>>> transition is guaranteed to be seen only when thread A
>>> drm_mm_node_remove is fully done with the removal. But if it is in the
>>> middle of removal, node is still seen as allocated outside and thread B
>>> can enter the if-body, wait for the lock, and then drm_mm_node_remove
>>> will attempt a double removal. So I think another drm_mm_node_allocated
>>> under the lock is needed.
>>
>> Yes. Check after the lock is indeed required in this scenario. And
>> drm_mm_node_remove() insists the caller doesn't try a double remove.
> 
> I had to go back and double check the vma code, and that's fine.
> (We hit this case where one thread is evicting and another thread is
> destroying the object. And for us we do the check under the lock inside
> __i915_vma_unbind() on the destroy path.)

So I think if you amend the commit message to contain the repeated check 
under the lock patch looks good to me. With that:

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
Daniel Vetter Oct. 9, 2019, 3:59 p.m. UTC | #5
On Fri, Oct 04, 2019 at 01:01:36PM +0100, Tvrtko Ursulin wrote:
> 
> On 04/10/2019 12:17, Chris Wilson wrote:
> > Quoting Chris Wilson (2019-10-04 12:07:10)
> > > Quoting Tvrtko Ursulin (2019-10-04 10:15:20)
> > > > 
> > > > On 03/10/2019 22:01, Chris Wilson wrote:
> > > > > A few callers need to serialise the destruction of their drm_mm_node and
> > > > > ensure it is removed from the drm_mm before freeing. However, to be
> > > > > completely sure that any access from another thread is complete before
> > > > > we free the struct, we require the RELEASE semantics of
> > > > > clear_bit_unlock().
> > > > > 
> > > > > This allows the conditional locking such as
> > > > > 
> > > > > Thread A                              Thread B
> > > > >       mutex_lock(mm_lock);                if (drm_mm_node_allocated(node)) {
> > > > >       drm_mm_node_remove(node);               mutex_lock(mm_lock);
> > > > >       mutex_unlock(mm_lock);                  drm_mm_node_remove(node);
> > > > >                                               mutex_unlock(mm_lock);
> > > > >                                            }
> > > > >                                            kfree(node);
> > > > 
> > > > My understanding is that release semantics on node allocated mean 1 -> 0
> > > > transition is guaranteed to be seen only when thread A
> > > > drm_mm_node_remove is fully done with the removal. But if it is in the
> > > > middle of removal, node is still seen as allocated outside and thread B
> > > > can enter the if-body, wait for the lock, and then drm_mm_node_remove
> > > > will attempt a double removal. So I think another drm_mm_node_allocated
> > > > under the lock is needed.
> > > 
> > > Yes. Check after the lock is indeed required in this scenario. And
> > > drm_mm_node_remove() insists the caller doesn't try a double remove.
> > 
> > I had to go back and double check the vma code, and that's fine.
> > (We hit this case where one thread is evicting and another thread is
> > destroying the object. And for us we do the check under the lock inside
> > __i915_vma_unbind() on the destroy path.)
> 
> So I think if you amend the commit message to contain the repeated check
> under the lock patch looks good to me. With that:
> 
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

I think a follow-up patch to update the kerneldoc to mention that we're
guaranteeing this now is missing here (best with the above fixed example).
Plus maybe a oneline code comment for the ALLOCATED_BIT.
-Daniel
diff mbox series

Patch

diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index a9cab5e53731..2a6e34663146 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -424,9 +424,9 @@  int drm_mm_reserve_node(struct drm_mm *mm, struct drm_mm_node *node)
 
 	node->mm = mm;
 
+	__set_bit(DRM_MM_NODE_ALLOCATED_BIT, &node->flags);
 	list_add(&node->node_list, &hole->node_list);
 	drm_mm_interval_tree_add_node(hole, node);
-	__set_bit(DRM_MM_NODE_ALLOCATED_BIT, &node->flags);
 	node->hole_size = 0;
 
 	rm_hole(hole);
@@ -543,9 +543,9 @@  int drm_mm_insert_node_in_range(struct drm_mm * const mm,
 		node->color = color;
 		node->hole_size = 0;
 
+		__set_bit(DRM_MM_NODE_ALLOCATED_BIT, &node->flags);
 		list_add(&node->node_list, &hole->node_list);
 		drm_mm_interval_tree_add_node(hole, node);
-		__set_bit(DRM_MM_NODE_ALLOCATED_BIT, &node->flags);
 
 		rm_hole(hole);
 		if (adj_start > hole_start)
@@ -589,11 +589,12 @@  void drm_mm_remove_node(struct drm_mm_node *node)
 
 	drm_mm_interval_tree_remove(node, &mm->interval_tree);
 	list_del(&node->node_list);
-	__clear_bit(DRM_MM_NODE_ALLOCATED_BIT, &node->flags);
 
 	if (drm_mm_hole_follows(prev_node))
 		rm_hole(prev_node);
 	add_hole(prev_node);
+
+	clear_bit_unlock(DRM_MM_NODE_ALLOCATED_BIT, &node->flags);
 }
 EXPORT_SYMBOL(drm_mm_remove_node);
 
@@ -614,6 +615,7 @@  void drm_mm_replace_node(struct drm_mm_node *old, struct drm_mm_node *new)
 
 	*new = *old;
 
+	__set_bit(DRM_MM_NODE_ALLOCATED_BIT, &new->flags);
 	list_replace(&old->node_list, &new->node_list);
 	rb_replace_node_cached(&old->rb, &new->rb, &mm->interval_tree);
 
@@ -627,8 +629,7 @@  void drm_mm_replace_node(struct drm_mm_node *old, struct drm_mm_node *new)
 				&mm->holes_addr);
 	}
 
-	__clear_bit(DRM_MM_NODE_ALLOCATED_BIT, &old->flags);
-	__set_bit(DRM_MM_NODE_ALLOCATED_BIT, &new->flags);
+	clear_bit_unlock(DRM_MM_NODE_ALLOCATED_BIT, &old->flags);
 }
 EXPORT_SYMBOL(drm_mm_replace_node);