diff mbox series

[1/4] rcu/kvfree: Support dynamic rcu_head for single argument objects

Message ID 20240828110929.3713-1-urezki@gmail.com (mailing list archive)
State New
Headers show
Series [1/4] rcu/kvfree: Support dynamic rcu_head for single argument objects | expand

Commit Message

Uladzislau Rezki Aug. 28, 2024, 11:09 a.m. UTC
Add a support of dynamically attaching an rcu_head to an object
which gets freed via the single argument of kvfree_rcu(). This is
used in the path, when a page allocation fails due to a high memory
pressure.

The basic idea behind of this is to minimize a hit of slow path
which requires a caller to wait until a grace period is passed.

Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
---
 kernel/rcu/tree.c | 53 +++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 49 insertions(+), 4 deletions(-)

Comments

Vlastimil Babka Aug. 28, 2024, 2:58 p.m. UTC | #1
On 8/28/24 13:09, Uladzislau Rezki (Sony) wrote:
> Add a support of dynamically attaching an rcu_head to an object
> which gets freed via the single argument of kvfree_rcu(). This is
> used in the path, when a page allocation fails due to a high memory
> pressure.
> 
> The basic idea behind of this is to minimize a hit of slow path
> which requires a caller to wait until a grace period is passed.
> 
> Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>

So IIUC it's a situation where we can't allocate a page, but we hope the
kmalloc-32 slab has still free objects to give us dyn_rcu_head's before it
would have to also make a page allocation?

So that may really be possible and there might potentially be many such
objects, but I wonder if there's really a benefit. The system is struggling
for memory and the single-argument caller specifically is _mightsleep so it
could e.g. instead go direct reclaim a page rather than start depleting the
kmalloc-32 slab, no?

> ---
>  kernel/rcu/tree.c | 53 +++++++++++++++++++++++++++++++++++++++++++----
>  1 file changed, 49 insertions(+), 4 deletions(-)
> 
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index be00aac5f4e7..0124411fecfb 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -3425,6 +3425,11 @@ kvfree_rcu_bulk(struct kfree_rcu_cpu *krcp,
>  	cond_resched_tasks_rcu_qs();
>  }
>  
> +struct dyn_rcu_head {
> +	unsigned long *ptr;
> +	struct rcu_head rh;
> +};
> +
>  static void
>  kvfree_rcu_list(struct rcu_head *head)
>  {
> @@ -3433,15 +3438,32 @@ kvfree_rcu_list(struct rcu_head *head)
>  	for (; head; head = next) {
>  		void *ptr = (void *) head->func;
>  		unsigned long offset = (void *) head - ptr;
> +		struct dyn_rcu_head *drhp = NULL;
> +
> +		/*
> +		 * For dynamically attached rcu_head, a ->func field
> +		 * points to _offset_, i.e. not to a pointer which has
> +		 * to be freed. For such objects, adjust an offset and
> +		 * pointer.
> +		 */
> +		if (__is_kvfree_rcu_offset((unsigned long) ptr)) {
> +			drhp = container_of(head, struct dyn_rcu_head, rh);
> +			offset = (unsigned long) drhp->rh.func;
> +			ptr = drhp->ptr;
> +		}
>  
>  		next = head->next;
>  		debug_rcu_head_unqueue((struct rcu_head *)ptr);
>  		rcu_lock_acquire(&rcu_callback_map);
>  		trace_rcu_invoke_kvfree_callback(rcu_state.name, head, offset);
>  
> -		if (!WARN_ON_ONCE(!__is_kvfree_rcu_offset(offset)))
> +		if (!WARN_ON_ONCE(!__is_kvfree_rcu_offset(offset))) {
>  			kvfree(ptr);
>  
> +			if (drhp)
> +				kvfree(drhp);
> +		}
> +
>  		rcu_lock_release(&rcu_callback_map);
>  		cond_resched_tasks_rcu_qs();
>  	}
> @@ -3787,6 +3809,21 @@ add_ptr_to_bulk_krc_lock(struct kfree_rcu_cpu **krcp,
>  	return true;
>  }
>  
> +static struct rcu_head *
> +attach_rcu_head_to_object(void *obj)
> +{
> +	struct dyn_rcu_head *rhp;
> +
> +	rhp = kmalloc(sizeof(struct dyn_rcu_head), GFP_KERNEL |
> +		__GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
> +
> +	if (!rhp)
> +		return NULL;
> +
> +	rhp->ptr = obj;
> +	return &rhp->rh;
> +}
> +
>  /*
>   * Queue a request for lazy invocation of the appropriate free routine
>   * after a grace period.  Please note that three paths are maintained,
> @@ -3830,9 +3867,17 @@ void kvfree_call_rcu(struct rcu_head *head, void *ptr)
>  	if (!success) {
>  		run_page_cache_worker(krcp);
>  
> -		if (head == NULL)
> -			// Inline if kvfree_rcu(one_arg) call.
> -			goto unlock_return;
> +		if (!head) {
> +			krc_this_cpu_unlock(krcp, flags);
> +			head = attach_rcu_head_to_object(ptr);
> +			krcp = krc_this_cpu_lock(&flags);
> +
> +			if (!head)
> +				// Inline if kvfree_rcu(one_arg) call.
> +				goto unlock_return;
> +
> +			ptr = (rcu_callback_t) offsetof(struct dyn_rcu_head, rh);
> +		}
>  
>  		head->func = ptr;
>  		head->next = krcp->head;
Uladzislau Rezki Aug. 28, 2024, 5 p.m. UTC | #2
On Wed, Aug 28, 2024 at 04:58:48PM +0200, Vlastimil Babka wrote:
> On 8/28/24 13:09, Uladzislau Rezki (Sony) wrote:
> > Add a support of dynamically attaching an rcu_head to an object
> > which gets freed via the single argument of kvfree_rcu(). This is
> > used in the path, when a page allocation fails due to a high memory
> > pressure.
> > 
> > The basic idea behind of this is to minimize a hit of slow path
> > which requires a caller to wait until a grace period is passed.
> > 
> > Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
> 
> So IIUC it's a situation where we can't allocate a page, but we hope the
> kmalloc-32 slab has still free objects to give us dyn_rcu_head's before it
> would have to also make a page allocation?
> 
Yes, you understood it correctly :)

>
> So that may really be possible and there might potentially be many such
> objects, but I wonder if there's really a benefit. The system is struggling
> for memory and the single-argument caller specifically is _mightsleep so it
> could e.g. instead go direct reclaim a page rather than start depleting the
> kmalloc-32 slab, no?
> 
This is a good question about benefit and i need to say that i do not
have a strong opinion here. I post this patch to get some opinions about
it. This "dynamic attaching" we discussed with RCU folk a few years ago
and decided not to go with it. I have not found an information why.

The page request path, which is "normal/fast", can lead to a "light"
direct reclaim, if still fails, then we are in a high pressure situation.
Depleting a slab is probably not worth it, especially that the patch in
this series:

[PATCH 4/4] rcu/kvfree: Switch to expedited version in slow path

switches to more faster synchronize_rcu() version to speedup a reclaim.

+ this [PATCH 3/4] rcu/kvfree: Use polled API in a slow path
which also improves a slow path in terms of that a GP might be already
passed for the object being freed.

I am totally OK to drop this patch. This is fine to me.

--
Uladzislau Rezki
Paul E. McKenney Aug. 28, 2024, 6 p.m. UTC | #3
On Wed, Aug 28, 2024 at 07:00:11PM +0200, Uladzislau Rezki wrote:
> On Wed, Aug 28, 2024 at 04:58:48PM +0200, Vlastimil Babka wrote:
> > On 8/28/24 13:09, Uladzislau Rezki (Sony) wrote:
> > > Add a support of dynamically attaching an rcu_head to an object
> > > which gets freed via the single argument of kvfree_rcu(). This is
> > > used in the path, when a page allocation fails due to a high memory
> > > pressure.
> > > 
> > > The basic idea behind of this is to minimize a hit of slow path
> > > which requires a caller to wait until a grace period is passed.
> > > 
> > > Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
> > 
> > So IIUC it's a situation where we can't allocate a page, but we hope the
> > kmalloc-32 slab has still free objects to give us dyn_rcu_head's before it
> > would have to also make a page allocation?
> > 
> Yes, you understood it correctly :)
> 
> >
> > So that may really be possible and there might potentially be many such
> > objects, but I wonder if there's really a benefit. The system is struggling
> > for memory and the single-argument caller specifically is _mightsleep so it
> > could e.g. instead go direct reclaim a page rather than start depleting the
> > kmalloc-32 slab, no?
> > 
> This is a good question about benefit and i need to say that i do not
> have a strong opinion here. I post this patch to get some opinions about
> it. This "dynamic attaching" we discussed with RCU folk a few years ago
> and decided not to go with it. I have not found an information why.

If I remember correctly, I asked "How are you testing this?", which
was then taken as a criticism rather than a question.  ;-)

No one has reported an OOM-related problem with the code in its current
form, for what little that is worth.

							Thanx, Paul

> The page request path, which is "normal/fast", can lead to a "light"
> direct reclaim, if still fails, then we are in a high pressure situation.
> Depleting a slab is probably not worth it, especially that the patch in
> this series:
> 
> [PATCH 4/4] rcu/kvfree: Switch to expedited version in slow path
> 
> switches to more faster synchronize_rcu() version to speedup a reclaim.
> 
> + this [PATCH 3/4] rcu/kvfree: Use polled API in a slow path
> which also improves a slow path in terms of that a GP might be already
> passed for the object being freed.
> 
> I am totally OK to drop this patch. This is fine to me.
> 
> --
> Uladzislau Rezki
diff mbox series

Patch

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index be00aac5f4e7..0124411fecfb 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3425,6 +3425,11 @@  kvfree_rcu_bulk(struct kfree_rcu_cpu *krcp,
 	cond_resched_tasks_rcu_qs();
 }
 
+struct dyn_rcu_head {
+	unsigned long *ptr;
+	struct rcu_head rh;
+};
+
 static void
 kvfree_rcu_list(struct rcu_head *head)
 {
@@ -3433,15 +3438,32 @@  kvfree_rcu_list(struct rcu_head *head)
 	for (; head; head = next) {
 		void *ptr = (void *) head->func;
 		unsigned long offset = (void *) head - ptr;
+		struct dyn_rcu_head *drhp = NULL;
+
+		/*
+		 * For dynamically attached rcu_head, a ->func field
+		 * points to _offset_, i.e. not to a pointer which has
+		 * to be freed. For such objects, adjust an offset and
+		 * pointer.
+		 */
+		if (__is_kvfree_rcu_offset((unsigned long) ptr)) {
+			drhp = container_of(head, struct dyn_rcu_head, rh);
+			offset = (unsigned long) drhp->rh.func;
+			ptr = drhp->ptr;
+		}
 
 		next = head->next;
 		debug_rcu_head_unqueue((struct rcu_head *)ptr);
 		rcu_lock_acquire(&rcu_callback_map);
 		trace_rcu_invoke_kvfree_callback(rcu_state.name, head, offset);
 
-		if (!WARN_ON_ONCE(!__is_kvfree_rcu_offset(offset)))
+		if (!WARN_ON_ONCE(!__is_kvfree_rcu_offset(offset))) {
 			kvfree(ptr);
 
+			if (drhp)
+				kvfree(drhp);
+		}
+
 		rcu_lock_release(&rcu_callback_map);
 		cond_resched_tasks_rcu_qs();
 	}
@@ -3787,6 +3809,21 @@  add_ptr_to_bulk_krc_lock(struct kfree_rcu_cpu **krcp,
 	return true;
 }
 
+static struct rcu_head *
+attach_rcu_head_to_object(void *obj)
+{
+	struct dyn_rcu_head *rhp;
+
+	rhp = kmalloc(sizeof(struct dyn_rcu_head), GFP_KERNEL |
+		__GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
+
+	if (!rhp)
+		return NULL;
+
+	rhp->ptr = obj;
+	return &rhp->rh;
+}
+
 /*
  * Queue a request for lazy invocation of the appropriate free routine
  * after a grace period.  Please note that three paths are maintained,
@@ -3830,9 +3867,17 @@  void kvfree_call_rcu(struct rcu_head *head, void *ptr)
 	if (!success) {
 		run_page_cache_worker(krcp);
 
-		if (head == NULL)
-			// Inline if kvfree_rcu(one_arg) call.
-			goto unlock_return;
+		if (!head) {
+			krc_this_cpu_unlock(krcp, flags);
+			head = attach_rcu_head_to_object(ptr);
+			krcp = krc_this_cpu_lock(&flags);
+
+			if (!head)
+				// Inline if kvfree_rcu(one_arg) call.
+				goto unlock_return;
+
+			ptr = (rcu_callback_t) offsetof(struct dyn_rcu_head, rh);
+		}
 
 		head->func = ptr;
 		head->next = krcp->head;