diff mbox series

[6/6] sunrpc: introduce possibility that requested number of threads is different from actual

Message ID 20241023024222.691745-7-neilb@suse.de (mailing list archive)
State New
Headers show
Series prepare for dynamic server thread management | expand

Commit Message

NeilBrown Oct. 23, 2024, 2:37 a.m. UTC
New fields sp_nractual and sv_nractual track how many actual threads are
running.  sp_nrhtreads and sv_nrthreads will be the number that were
explicitly request.  Currently nractually == nrthreads.

sv_nractual is used for sizing UDP incoming socket space - in the rare
case that UDP is used.  This is because each thread might need to keep a
request in the skbs.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 include/linux/sunrpc/svc.h |  6 ++++--
 net/sunrpc/svc.c           | 22 +++++++++++++++-------
 net/sunrpc/svcsock.c       |  2 +-
 3 files changed, 20 insertions(+), 10 deletions(-)

Comments

Jeff Layton Oct. 23, 2024, 1:32 p.m. UTC | #1
On Wed, 2024-10-23 at 13:37 +1100, NeilBrown wrote:
> New fields sp_nractual and sv_nractual track how many actual threads are
> running.  sp_nrhtreads and sv_nrthreads will be the number that were
> explicitly request.  Currently nractually == nrthreads.
> 
> sv_nractual is used for sizing UDP incoming socket space - in the rare
> case that UDP is used.  This is because each thread might need to keep a
> request in the skbs.
> 
> Signed-off-by: NeilBrown <neilb@suse.de>
> ---
>  include/linux/sunrpc/svc.h |  6 ++++--
>  net/sunrpc/svc.c           | 22 +++++++++++++++-------
>  net/sunrpc/svcsock.c       |  2 +-
>  3 files changed, 20 insertions(+), 10 deletions(-)
> 
> diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
> index 9d288a673705..3f2c90061b4a 100644
> --- a/include/linux/sunrpc/svc.h
> +++ b/include/linux/sunrpc/svc.h
> @@ -36,7 +36,8 @@
>  struct svc_pool {
>  	unsigned int		sp_id;		/* pool id; also node id on NUMA */
>  	struct lwq		sp_xprts;	/* pending transports */
> -	unsigned int		sp_nrthreads;	/* # of threads in pool */
> +	unsigned int		sp_nrthreads;	/* # of threads requested for pool */
> +	unsigned int		sp_nractual;	/* # of threads running */
>  	struct list_head	sp_all_threads;	/* all server threads */
>  	struct llist_head	sp_idle_threads; /* idle server threads */
>  
> @@ -71,7 +72,8 @@ struct svc_serv {
>  	struct svc_stat *	sv_stats;	/* RPC statistics */
>  	spinlock_t		sv_lock;
>  	unsigned int		sv_nprogs;	/* Number of sv_programs */
> -	unsigned int		sv_nrthreads;	/* # of server threads */
> +	unsigned int		sv_nrthreads;	/* # of server threads requested*/
> +	unsigned int		sv_nractual;	/* # of running threads */
>  	unsigned int		sv_max_payload;	/* datagram payload size */
>  	unsigned int		sv_max_mesg;	/* max_payload + 1 page for overheads */
>  	unsigned int		sv_xdrsize;	/* XDR buffer size */
> diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
> index bd4f02b34f44..d332f9d3d875 100644
> --- a/net/sunrpc/svc.c
> +++ b/net/sunrpc/svc.c
> @@ -781,8 +781,12 @@ svc_pool_victim(struct svc_serv *serv, struct svc_pool *target_pool,
>  	}
>  
>  	if (pool && pool->sp_nrthreads) {
> -		set_bit(SP_VICTIM_REMAINS, &pool->sp_flags);
> -		set_bit(SP_NEED_VICTIM, &pool->sp_flags);
> +		if (pool->sp_nrthreads <= pool->sp_nractual) {
> +			set_bit(SP_VICTIM_REMAINS, &pool->sp_flags);
> +			set_bit(SP_NEED_VICTIM, &pool->sp_flags);
> +			pool->sp_nractual -= 1;
> +			serv->sv_nractual -= 1;
> +		}

This is a little strange. It decrements the thread counts before the
threads actually exit. So, these counts are really just aspirational at
this point. Not a bug, just a note to myself while I am going over
this.

>  		return pool;
>  	}
>  	return NULL;
> @@ -803,6 +807,12 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
>  		chosen_pool = svc_pool_next(serv, pool, &state);
>  		node = svc_pool_map_get_node(chosen_pool->sp_id);
>  
> +		serv->sv_nrthreads += 1;
> +		chosen_pool->sp_nrthreads += 1;
> +
> +		if (chosen_pool->sp_nrthreads <= chosen_pool->sp_nractual)
> +			continue;
> +
>  		rqstp = svc_prepare_thread(serv, chosen_pool, node);
>  		if (!rqstp)
>  			return -ENOMEM;
> @@ -812,8 +822,8 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
>  			svc_exit_thread(rqstp);
>  			return PTR_ERR(task);
>  		}
> -		serv->sv_nrthreads += 1;
> -		chosen_pool->sp_nrthreads += 1;
> +		serv->sv_nractual += 1;
> +		chosen_pool->sp_nractual += 1;
>  
>  		rqstp->rq_task = task;
>  		if (serv->sv_nrpools > 1)
> @@ -850,6 +860,7 @@ svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
>  			    TASK_IDLE);
>  		nrservs++;
>  	} while (nrservs < 0);
> +	svc_sock_update_bufs(serv);

Nice little change here. No need to do this on every thread exit --
just do it once when they're all done. This change probably should be
split into a separate patch.

>  	return 0;
>  }
>  
> @@ -955,13 +966,10 @@ void svc_rqst_release_pages(struct svc_rqst *rqstp)
>  void
>  svc_exit_thread(struct svc_rqst *rqstp)
>  {
> -	struct svc_serv	*serv = rqstp->rq_server;
>  	struct svc_pool	*pool = rqstp->rq_pool;
>  
>  	list_del_rcu(&rqstp->rq_all);
>  
> -	svc_sock_update_bufs(serv);
> -
>  	svc_rqst_free(rqstp);
>  
>  	clear_and_wake_up_bit(SP_VICTIM_REMAINS, &pool->sp_flags);
> diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
> index 825ec5357691..191dbc648bd0 100644
> --- a/net/sunrpc/svcsock.c
> +++ b/net/sunrpc/svcsock.c
> @@ -588,7 +588,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
>  	     * provides an upper bound on the number of threads
>  	     * which will access the socket.
>  	     */
> -	    svc_sock_setbufsize(svsk, serv->sv_nrthreads + 3);
> +	    svc_sock_setbufsize(svsk, serv->sv_nractual + 3);
>  
>  	clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
>  	err = kernel_recvmsg(svsk->sk_sock, &msg, NULL,

Other than maybe splitting that one change into a separate patch, this
looks good to me.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
kernel test robot Oct. 30, 2024, 6:35 a.m. UTC | #2
Hello,

kernel test robot noticed a 10.2% regression of fsmark.files_per_sec on:


commit: d7f6562adeebe62458eb11437b260d3f470849cd ("[PATCH 6/6] sunrpc: introduce possibility that requested number of threads is different from actual")
url: https://github.com/intel-lab-lkp/linux/commits/NeilBrown/SUNRPC-move-nrthreads-counting-to-start-stop-threads/20241023-104539
base: git://git.linux-nfs.org/projects/trondmy/linux-nfs.git linux-next
patch link: https://lore.kernel.org/all/20241023024222.691745-7-neilb@suse.de/
patch subject: [PATCH 6/6] sunrpc: introduce possibility that requested number of threads is different from actual

testcase: fsmark
config: x86_64-rhel-8.3
compiler: gcc-12
test machine: 48 threads 2 sockets Intel(R) Xeon(R) CPU E5-2697 v2 @ 2.70GHz (Ivy Bridge-EP) with 64G memory
parameters:

	iterations: 1x
	nr_threads: 32t
	disk: 1SSD
	fs: btrfs
	fs2: nfsv4
	filesize: 8K
	test_size: 400M
	sync_method: fsyncBeforeClose
	nr_directories: 16d
	nr_files_per_directory: 256fpd
	cpufreq_governor: performance


In addition to that, the commit also has significant impact on the following tests:

+------------------+------------------------------------------------------------------------------------------------+
| testcase: change | fsmark: fsmark.files_per_sec 10.2% regression                                                  |
| test machine     | 48 threads 2 sockets Intel(R) Xeon(R) CPU E5-2697 v2 @ 2.70GHz (Ivy Bridge-EP) with 64G memory |
| test parameters  | cpufreq_governor=performance                                                                   |
|                  | disk=1SSD                                                                                      |
|                  | filesize=9B                                                                                    |
|                  | fs2=nfsv4                                                                                      |
|                  | fs=btrfs                                                                                       |
|                  | iterations=1x                                                                                  |
|                  | nr_directories=16d                                                                             |
|                  | nr_files_per_directory=256fpd                                                                  |
|                  | nr_threads=32t                                                                                 |
|                  | sync_method=fsyncBeforeClose                                                                   |
|                  | test_size=400M                                                                                 |
+------------------+------------------------------------------------------------------------------------------------+


If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <oliver.sang@intel.com>
| Closes: https://lore.kernel.org/oe-lkp/202410301321.d8aebe67-oliver.sang@intel.com


Details are as below:
-------------------------------------------------------------------------------------------------->


The kernel config and materials to reproduce are available at:
https://download.01.org/0day-ci/archive/20241030/202410301321.d8aebe67-oliver.sang@intel.com

=========================================================================================
compiler/cpufreq_governor/disk/filesize/fs2/fs/iterations/kconfig/nr_directories/nr_files_per_directory/nr_threads/rootfs/sync_method/tbox_group/test_size/testcase:
  gcc-12/performance/1SSD/8K/nfsv4/btrfs/1x/x86_64-rhel-8.3/16d/256fpd/32t/debian-12-x86_64-20240206.cgz/fsyncBeforeClose/lkp-ivb-2ep2/400M/fsmark

commit: 
  4e9c43765c ("sunrpc: remove all connection limit configuration")
  d7f6562ade ("sunrpc: introduce possibility that requested number of threads is different from actual")

4e9c43765c3fd361 d7f6562adeebe62458eb11437b2 
---------------- --------------------------- 
         %stddev     %change         %stddev
             \          |                \  
      0.09 ± 78%    +173.9%       0.24 ± 29%  perf-stat.i.major-faults
      0.07 ± 66%     -61.1%       0.03 ± 23%  perf-sched.sch_delay.avg.ms.do_wait.kernel_wait4.__do_sys_wait4.do_syscall_64
      0.02 ± 88%   +1806.2%       0.46 ±196%  perf-sched.wait_time.avg.ms.irqentry_exit_to_user_mode.asm_sysvec_reschedule_ipi.[unknown]
    -17.50           -31.4%     -12.00        sched_debug.cpu.nr_uninterruptible.min
      5.32 ± 10%     -11.9%       4.69 ±  8%  sched_debug.cpu.nr_uninterruptible.stddev
  76584407 ±  5%     +40.6%  1.077e+08 ±  4%  fsmark.app_overhead
      3016 ±  2%     -10.2%       2707        fsmark.files_per_sec
     36.83            -7.7%      34.00        fsmark.time.percent_of_cpu_this_job_got
    285398 ±  4%     +10.5%     315320        meminfo.Active
    282671 ±  4%     +10.6%     312596        meminfo.Active(file)
     15709 ± 11%     -28.8%      11189 ±  2%  meminfo.Dirty
     70764 ±  4%     +10.6%      78241        proc-vmstat.nr_active_file
    468998            +5.4%     494463        proc-vmstat.nr_dirtied
      3931 ± 11%     -28.8%       2797 ±  2%  proc-vmstat.nr_dirty
    462247            +6.7%     493402        proc-vmstat.nr_written
     70764 ±  4%     +10.6%      78241        proc-vmstat.nr_zone_active_file
      3620 ± 10%     -30.7%       2507 ±  5%  proc-vmstat.nr_zone_write_pending
   1135721            +2.6%    1165013        proc-vmstat.numa_hit
   1086049            +2.7%    1115353        proc-vmstat.numa_local
     56063 ±  2%     +18.0%      66177        proc-vmstat.pgactivate
   1521044            +2.3%    1555275        proc-vmstat.pgalloc_normal
   2522626           +10.2%    2778923        proc-vmstat.pgpgout
      3.75 ± 26%      -1.9        1.80 ±  7%  perf-profile.calltrace.cycles-pp.__x64_sys_exit_group.x64_sys_call.do_syscall_64.entry_SYSCALL_64_after_hwframe
      3.75 ± 26%      -1.9        1.80 ±  7%  perf-profile.calltrace.cycles-pp.x64_sys_call.do_syscall_64.entry_SYSCALL_64_after_hwframe
      2.54 ± 39%      -1.7        0.89 ±100%  perf-profile.calltrace.cycles-pp.event_function_call.perf_event_release_kernel.perf_release.__fput.task_work_run
      2.54 ± 39%      -1.7        0.89 ±100%  perf-profile.calltrace.cycles-pp.smp_call_function_single.event_function_call.perf_event_release_kernel.perf_release.__fput
      0.61 ±141%      +1.5        2.08 ± 25%  perf-profile.calltrace.cycles-pp.do_syscall_64.entry_SYSCALL_64_after_hwframe.write.writen.record__pushfn
      0.61 ±141%      +1.5        2.08 ± 25%  perf-profile.calltrace.cycles-pp.entry_SYSCALL_64_after_hwframe.write.writen.record__pushfn.perf_mmap__push
      0.61 ±141%      +1.5        2.08 ± 25%  perf-profile.calltrace.cycles-pp.generic_perform_write.shmem_file_write_iter.vfs_write.ksys_write.do_syscall_64
      0.61 ±141%      +1.5        2.08 ± 25%  perf-profile.calltrace.cycles-pp.ksys_write.do_syscall_64.entry_SYSCALL_64_after_hwframe.write.writen
      0.61 ±141%      +1.5        2.08 ± 25%  perf-profile.calltrace.cycles-pp.shmem_file_write_iter.vfs_write.ksys_write.do_syscall_64.entry_SYSCALL_64_after_hwframe
      0.61 ±141%      +1.5        2.08 ± 25%  perf-profile.calltrace.cycles-pp.vfs_write.ksys_write.do_syscall_64.entry_SYSCALL_64_after_hwframe.write
      0.51 ±141%      +1.6        2.09 ± 29%  perf-profile.calltrace.cycles-pp.evlist_cpu_iterator__next.__evlist__disable.__cmd_record.cmd_record.run_builtin
      0.81 ±100%      +2.2        3.01 ± 44%  perf-profile.calltrace.cycles-pp.__evlist__disable.__cmd_record.cmd_record.run_builtin.handle_internal_command
      3.11 ± 27%      +2.6        5.74 ± 24%  perf-profile.calltrace.cycles-pp.__cmd_record.cmd_record.run_builtin.handle_internal_command.main
      3.11 ± 27%      +2.6        5.74 ± 24%  perf-profile.calltrace.cycles-pp.cmd_record.run_builtin.handle_internal_command.main
      0.61 ±141%      +1.5        2.08 ± 25%  perf-profile.children.cycles-pp.generic_perform_write
      0.61 ±141%      +1.5        2.08 ± 25%  perf-profile.children.cycles-pp.ksys_write
      0.61 ±141%      +1.5        2.08 ± 25%  perf-profile.children.cycles-pp.shmem_file_write_iter
      0.61 ±141%      +1.5        2.08 ± 25%  perf-profile.children.cycles-pp.vfs_write
      0.81 ±100%      +2.2        3.01 ± 44%  perf-profile.children.cycles-pp.__evlist__disable


***************************************************************************************************
lkp-ivb-2ep2: 48 threads 2 sockets Intel(R) Xeon(R) CPU E5-2697 v2 @ 2.70GHz (Ivy Bridge-EP) with 64G memory
=========================================================================================
compiler/cpufreq_governor/disk/filesize/fs2/fs/iterations/kconfig/nr_directories/nr_files_per_directory/nr_threads/rootfs/sync_method/tbox_group/test_size/testcase:
  gcc-12/performance/1SSD/9B/nfsv4/btrfs/1x/x86_64-rhel-8.3/16d/256fpd/32t/debian-12-x86_64-20240206.cgz/fsyncBeforeClose/lkp-ivb-2ep2/400M/fsmark

commit: 
  4e9c43765c ("sunrpc: remove all connection limit configuration")
  d7f6562ade ("sunrpc: introduce possibility that requested number of threads is different from actual")

4e9c43765c3fd361 d7f6562adeebe62458eb11437b2 
---------------- --------------------------- 
         %stddev     %change         %stddev
             \          |                \  
      2.35            -2.0%       2.30        iostat.cpu.user
    137839 ± 27%     +41.1%     194529 ± 17%  numa-meminfo.node0.Active
    136284 ± 27%     +41.7%     193101 ± 17%  numa-meminfo.node0.Active(file)
      2.80 ± 32%      -1.6        1.16 ± 46%  perf-profile.children.cycles-pp.entry_SYSRETQ_unsafe_stack
      2.80 ± 32%      -1.6        1.16 ± 46%  perf-profile.self.cycles-pp.entry_SYSRETQ_unsafe_stack
 1.397e+08 ±  2%     +14.5%  1.599e+08        fsmark.app_overhead
      4135           -10.2%       3712        fsmark.files_per_sec
     48.67            -8.2%      44.67        fsmark.time.percent_of_cpu_this_job_got
     34164 ± 27%     +41.6%      48381 ± 17%  numa-vmstat.node0.nr_active_file
    276516 ± 17%     +26.7%     350296 ± 10%  numa-vmstat.node0.nr_dirtied
    276215 ± 17%     +26.5%     349400 ± 10%  numa-vmstat.node0.nr_written
     34164 ± 27%     +41.6%      48381 ± 17%  numa-vmstat.node0.nr_zone_active_file
     80.71 ± 30%     +38.5%     111.75 ± 20%  sched_debug.cfs_rq:/.removed.load_avg.avg
    270.60 ± 12%     +16.0%     313.94 ±  9%  sched_debug.cfs_rq:/.removed.load_avg.stddev
     23.59 ± 44%     +49.9%      35.35 ± 24%  sched_debug.cfs_rq:/.removed.runnable_avg.avg
     23.58 ± 44%     +49.8%      35.33 ± 24%  sched_debug.cfs_rq:/.removed.util_avg.avg
      3281 ± 11%   +1047.0%      37635 ±193%  sched_debug.cpu.avg_idle.min
    598666            +7.0%     640842        proc-vmstat.nr_dirtied
   1074454            +1.1%    1086656        proc-vmstat.nr_file_pages
    197321            +3.7%     204605        proc-vmstat.nr_inactive_file
    598082            +6.8%     638786        proc-vmstat.nr_written
    197321            +3.7%     204605        proc-vmstat.nr_zone_inactive_file
   1716397            +2.3%    1755672        proc-vmstat.numa_hit
   1666723            +2.4%    1705951        proc-vmstat.numa_local
    145474            +7.4%     156311        proc-vmstat.pgactivate
   2183965            +2.0%    2226921        proc-vmstat.pgalloc_normal
   3250818           +10.5%    3591588        proc-vmstat.pgpgout
      0.00 ±223%    +633.3%       0.02 ± 53%  perf-sched.sch_delay.avg.ms.do_nanosleep.hrtimer_nanosleep.common_nsleep.__x64_sys_clock_nanosleep
      0.01 ±223%    +729.3%       0.06 ± 29%  perf-sched.sch_delay.avg.ms.rcu_gp_kthread.kthread.ret_from_fork.ret_from_fork_asm
      0.01 ±223%    +466.7%       0.03 ± 22%  perf-sched.sch_delay.max.ms.do_nanosleep.hrtimer_nanosleep.__x64_sys_nanosleep.do_syscall_64
      0.00 ±223%    +973.3%       0.03 ± 61%  perf-sched.sch_delay.max.ms.do_nanosleep.hrtimer_nanosleep.common_nsleep.__x64_sys_clock_nanosleep
      0.02 ±223%    +480.6%       0.09 ± 25%  perf-sched.sch_delay.max.ms.rcu_gp_kthread.kthread.ret_from_fork.ret_from_fork_asm
      0.67 ±223%    +506.5%       4.04        perf-sched.wait_and_delay.avg.ms.rcu_gp_kthread.kthread.ret_from_fork.ret_from_fork_asm
      3.74 ± 52%    +190.1%      10.85 ± 36%  perf-sched.wait_and_delay.avg.ms.schedule_timeout.rcu_gp_fqs_loop.rcu_gp_kthread.kthread
      0.68 ±223%    +526.0%       4.25 ±  8%  perf-sched.wait_and_delay.max.ms.rcu_gp_kthread.kthread.ret_from_fork.ret_from_fork_asm
     18.67 ±173%    +375.2%      88.72 ± 23%  perf-sched.wait_and_delay.max.ms.schedule_timeout.rcu_gp_fqs_loop.rcu_gp_kthread.kthread
      0.01 ±143%    +212.5%       0.03 ± 47%  perf-sched.wait_time.avg.ms.irqentry_exit_to_user_mode.asm_sysvec_apic_timer_interrupt.[unknown]
      0.66 ±223%    +504.3%       3.99        perf-sched.wait_time.avg.ms.rcu_gp_kthread.kthread.ret_from_fork.ret_from_fork_asm
      3.69 ± 53%    +192.6%      10.79 ± 36%  perf-sched.wait_time.avg.ms.schedule_timeout.rcu_gp_fqs_loop.rcu_gp_kthread.kthread
      0.67 ±223%    +525.6%       4.17 ±  8%  perf-sched.wait_time.max.ms.rcu_gp_kthread.kthread.ret_from_fork.ret_from_fork_asm
     18.65 ±173%    +375.4%      88.66 ± 23%  perf-sched.wait_time.max.ms.schedule_timeout.rcu_gp_fqs_loop.rcu_gp_kthread.kthread





Disclaimer:
Results have been estimated based on internal Intel analysis and are provided
for informational purposes only. Any difference in system hardware or software
design or configuration may affect actual performance.
diff mbox series

Patch

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 9d288a673705..3f2c90061b4a 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -36,7 +36,8 @@ 
 struct svc_pool {
 	unsigned int		sp_id;		/* pool id; also node id on NUMA */
 	struct lwq		sp_xprts;	/* pending transports */
-	unsigned int		sp_nrthreads;	/* # of threads in pool */
+	unsigned int		sp_nrthreads;	/* # of threads requested for pool */
+	unsigned int		sp_nractual;	/* # of threads running */
 	struct list_head	sp_all_threads;	/* all server threads */
 	struct llist_head	sp_idle_threads; /* idle server threads */
 
@@ -71,7 +72,8 @@  struct svc_serv {
 	struct svc_stat *	sv_stats;	/* RPC statistics */
 	spinlock_t		sv_lock;
 	unsigned int		sv_nprogs;	/* Number of sv_programs */
-	unsigned int		sv_nrthreads;	/* # of server threads */
+	unsigned int		sv_nrthreads;	/* # of server threads requested*/
+	unsigned int		sv_nractual;	/* # of running threads */
 	unsigned int		sv_max_payload;	/* datagram payload size */
 	unsigned int		sv_max_mesg;	/* max_payload + 1 page for overheads */
 	unsigned int		sv_xdrsize;	/* XDR buffer size */
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index bd4f02b34f44..d332f9d3d875 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -781,8 +781,12 @@  svc_pool_victim(struct svc_serv *serv, struct svc_pool *target_pool,
 	}
 
 	if (pool && pool->sp_nrthreads) {
-		set_bit(SP_VICTIM_REMAINS, &pool->sp_flags);
-		set_bit(SP_NEED_VICTIM, &pool->sp_flags);
+		if (pool->sp_nrthreads <= pool->sp_nractual) {
+			set_bit(SP_VICTIM_REMAINS, &pool->sp_flags);
+			set_bit(SP_NEED_VICTIM, &pool->sp_flags);
+			pool->sp_nractual -= 1;
+			serv->sv_nractual -= 1;
+		}
 		return pool;
 	}
 	return NULL;
@@ -803,6 +807,12 @@  svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
 		chosen_pool = svc_pool_next(serv, pool, &state);
 		node = svc_pool_map_get_node(chosen_pool->sp_id);
 
+		serv->sv_nrthreads += 1;
+		chosen_pool->sp_nrthreads += 1;
+
+		if (chosen_pool->sp_nrthreads <= chosen_pool->sp_nractual)
+			continue;
+
 		rqstp = svc_prepare_thread(serv, chosen_pool, node);
 		if (!rqstp)
 			return -ENOMEM;
@@ -812,8 +822,8 @@  svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
 			svc_exit_thread(rqstp);
 			return PTR_ERR(task);
 		}
-		serv->sv_nrthreads += 1;
-		chosen_pool->sp_nrthreads += 1;
+		serv->sv_nractual += 1;
+		chosen_pool->sp_nractual += 1;
 
 		rqstp->rq_task = task;
 		if (serv->sv_nrpools > 1)
@@ -850,6 +860,7 @@  svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
 			    TASK_IDLE);
 		nrservs++;
 	} while (nrservs < 0);
+	svc_sock_update_bufs(serv);
 	return 0;
 }
 
@@ -955,13 +966,10 @@  void svc_rqst_release_pages(struct svc_rqst *rqstp)
 void
 svc_exit_thread(struct svc_rqst *rqstp)
 {
-	struct svc_serv	*serv = rqstp->rq_server;
 	struct svc_pool	*pool = rqstp->rq_pool;
 
 	list_del_rcu(&rqstp->rq_all);
 
-	svc_sock_update_bufs(serv);
-
 	svc_rqst_free(rqstp);
 
 	clear_and_wake_up_bit(SP_VICTIM_REMAINS, &pool->sp_flags);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 825ec5357691..191dbc648bd0 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -588,7 +588,7 @@  static int svc_udp_recvfrom(struct svc_rqst *rqstp)
 	     * provides an upper bound on the number of threads
 	     * which will access the socket.
 	     */
-	    svc_sock_setbufsize(svsk, serv->sv_nrthreads + 3);
+	    svc_sock_setbufsize(svsk, serv->sv_nractual + 3);
 
 	clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
 	err = kernel_recvmsg(svsk->sk_sock, &msg, NULL,