Message ID | 20240715074657.18174-6-neilb@suse.de (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | support automatic changes to nfsd thread count | expand |
On Mon, 2024-07-15 at 17:14 +1000, NeilBrown wrote: > sp_nrthreads is only ever accessed under the service mutex > nlmsvc_mutex nfs_callback_mutex nfsd_mutex > so these is no need for it to be an atomic_t. > > The fact that all code using it is single-threaded means that we can > simplify svc_pool_victim and remove the temporary elevation of > sp_nrthreads. > > Signed-off-by: NeilBrown <neilb@suse.de> > --- > fs/nfsd/nfsctl.c | 2 +- > fs/nfsd/nfssvc.c | 2 +- > include/linux/sunrpc/svc.h | 4 ++-- > net/sunrpc/svc.c | 31 +++++++++++-------------------- > 4 files changed, 15 insertions(+), 24 deletions(-) > > diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c > index 5b0f2e0d7ccf..d85b6d1fa31f 100644 > --- a/fs/nfsd/nfsctl.c > +++ b/fs/nfsd/nfsctl.c > @@ -1769,7 +1769,7 @@ int nfsd_nl_threads_get_doit(struct sk_buff *skb, struct genl_info *info) > struct svc_pool *sp = &nn->nfsd_serv->sv_pools[i]; > > err = nla_put_u32(skb, NFSD_A_SERVER_THREADS, > - atomic_read(&sp->sp_nrthreads)); > + sp->sp_nrthreads); > if (err) > goto err_unlock; > } > diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c > index 4438cdcd4873..7377422a34df 100644 > --- a/fs/nfsd/nfssvc.c > +++ b/fs/nfsd/nfssvc.c > @@ -641,7 +641,7 @@ int nfsd_get_nrthreads(int n, int *nthreads, struct net *net) > > if (serv) > for (i = 0; i < serv->sv_nrpools && i < n; i++) > - nthreads[i] = atomic_read(&serv->sv_pools[i].sp_nrthreads); > + nthreads[i] = serv->sv_pools[i].sp_nrthreads; > return 0; > } > > diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h > index e4fa25fafa97..99e9345d829e 100644 > --- a/include/linux/sunrpc/svc.h > +++ b/include/linux/sunrpc/svc.h > @@ -33,9 +33,9 @@ > * node traffic on multi-node NUMA NFS servers. > */ > struct svc_pool { > - unsigned int sp_id; /* pool id; also node id on NUMA */ > + unsigned int sp_id; /* pool id; also node id on NUMA */ > struct lwq sp_xprts; /* pending transports */ > - atomic_t sp_nrthreads; /* # of threads in pool */ > + unsigned int sp_nrthreads; /* # of threads in pool */ > struct list_head sp_all_threads; /* all server threads */ > struct llist_head sp_idle_threads; /* idle server threads */ > > diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c > index 072ad115ae3d..0d8588bc693c 100644 > --- a/net/sunrpc/svc.c > +++ b/net/sunrpc/svc.c > @@ -725,7 +725,7 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) > serv->sv_nrthreads += 1; > spin_unlock_bh(&serv->sv_lock); > > - atomic_inc(&pool->sp_nrthreads); > + pool->sp_nrthreads += 1; > > /* Protected by whatever lock the service uses when calling > * svc_set_num_threads() > @@ -780,31 +780,22 @@ svc_pool_victim(struct svc_serv *serv, struct svc_pool *target_pool, > struct svc_pool *pool; > unsigned int i; > > -retry: > pool = target_pool; > > - if (pool != NULL) { > - if (atomic_inc_not_zero(&pool->sp_nrthreads)) > - goto found_pool; > - return NULL; > - } else { > + if (!pool) { > for (i = 0; i < serv->sv_nrpools; i++) { > pool = &serv->sv_pools[--(*state) % serv->sv_nrpools]; > - if (atomic_inc_not_zero(&pool->sp_nrthreads)) > - goto found_pool; > + if (pool->sp_nrthreads) > + break; > } > - return NULL; > } > > -found_pool: > - set_bit(SP_VICTIM_REMAINS, &pool->sp_flags); > - set_bit(SP_NEED_VICTIM, &pool->sp_flags); > - if (!atomic_dec_and_test(&pool->sp_nrthreads)) > + if (pool && pool->sp_nrthreads) { > + set_bit(SP_VICTIM_REMAINS, &pool->sp_flags); > + set_bit(SP_NEED_VICTIM, &pool->sp_flags); > return pool; > - /* Nothing left in this pool any more */ > - clear_bit(SP_NEED_VICTIM, &pool->sp_flags); > - clear_bit(SP_VICTIM_REMAINS, &pool->sp_flags); > - goto retry; > + } > + return NULL; > } > > static int > @@ -883,7 +874,7 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) > if (!pool) > nrservs -= serv->sv_nrthreads; > else > - nrservs -= atomic_read(&pool->sp_nrthreads); > + nrservs -= pool->sp_nrthreads; > > if (nrservs > 0) > return svc_start_kthreads(serv, pool, nrservs); > @@ -953,7 +944,7 @@ svc_exit_thread(struct svc_rqst *rqstp) > > list_del_rcu(&rqstp->rq_all); > > - atomic_dec(&pool->sp_nrthreads); > + pool->sp_nrthreads -= 1; > > spin_lock_bh(&serv->sv_lock); > serv->sv_nrthreads -= 1; I don't think svc_exit_thread is called with the nfsd_mutex held, so if several threads were exiting at the same time, they could race here.
On Mon, 2024-07-15 at 10:12 -0400, Jeff Layton wrote: > On Mon, 2024-07-15 at 17:14 +1000, NeilBrown wrote: > > sp_nrthreads is only ever accessed under the service mutex > > nlmsvc_mutex nfs_callback_mutex nfsd_mutex > > so these is no need for it to be an atomic_t. > > > > The fact that all code using it is single-threaded means that we > > can > > simplify svc_pool_victim and remove the temporary elevation of > > sp_nrthreads. > > > > Signed-off-by: NeilBrown <neilb@suse.de> > > --- > > fs/nfsd/nfsctl.c | 2 +- > > fs/nfsd/nfssvc.c | 2 +- > > include/linux/sunrpc/svc.h | 4 ++-- > > net/sunrpc/svc.c | 31 +++++++++++-------------------- > > 4 files changed, 15 insertions(+), 24 deletions(-) > > > > diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c > > index 5b0f2e0d7ccf..d85b6d1fa31f 100644 > > --- a/fs/nfsd/nfsctl.c > > +++ b/fs/nfsd/nfsctl.c > > @@ -1769,7 +1769,7 @@ int nfsd_nl_threads_get_doit(struct sk_buff > > *skb, struct genl_info *info) > > struct svc_pool *sp = &nn->nfsd_serv- > > >sv_pools[i]; > > > > err = nla_put_u32(skb, > > NFSD_A_SERVER_THREADS, > > - atomic_read(&sp- > > >sp_nrthreads)); > > + sp->sp_nrthreads); > > if (err) > > goto err_unlock; > > } > > diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c > > index 4438cdcd4873..7377422a34df 100644 > > --- a/fs/nfsd/nfssvc.c > > +++ b/fs/nfsd/nfssvc.c > > @@ -641,7 +641,7 @@ int nfsd_get_nrthreads(int n, int *nthreads, > > struct net *net) > > > > if (serv) > > for (i = 0; i < serv->sv_nrpools && i < n; i++) > > - nthreads[i] = atomic_read(&serv- > > >sv_pools[i].sp_nrthreads); > > + nthreads[i] = serv- > > >sv_pools[i].sp_nrthreads; > > return 0; > > } > > > > diff --git a/include/linux/sunrpc/svc.h > > b/include/linux/sunrpc/svc.h > > index e4fa25fafa97..99e9345d829e 100644 > > --- a/include/linux/sunrpc/svc.h > > +++ b/include/linux/sunrpc/svc.h > > @@ -33,9 +33,9 @@ > > * node traffic on multi-node NUMA NFS servers. > > */ > > struct svc_pool { > > - unsigned int sp_id; /* pool id; also > > node id on NUMA */ > > + unsigned int sp_id; /* pool id; also > > node id on NUMA */ > > struct lwq sp_xprts; /* pending > > transports */ > > - atomic_t sp_nrthreads; /* # of threads in > > pool */ > > + unsigned int sp_nrthreads; /* # of threads in > > pool */ > > struct list_head sp_all_threads; /* all > > server threads */ > > struct llist_head sp_idle_threads; /* idle server > > threads */ > > > > diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c > > index 072ad115ae3d..0d8588bc693c 100644 > > --- a/net/sunrpc/svc.c > > +++ b/net/sunrpc/svc.c > > @@ -725,7 +725,7 @@ svc_prepare_thread(struct svc_serv *serv, > > struct svc_pool *pool, int node) > > serv->sv_nrthreads += 1; > > spin_unlock_bh(&serv->sv_lock); > > > > - atomic_inc(&pool->sp_nrthreads); > > + pool->sp_nrthreads += 1; > > > > /* Protected by whatever lock the service uses when > > calling > > * svc_set_num_threads() > > @@ -780,31 +780,22 @@ svc_pool_victim(struct svc_serv *serv, struct > > svc_pool *target_pool, > > struct svc_pool *pool; > > unsigned int i; > > > > -retry: > > pool = target_pool; > > > > - if (pool != NULL) { > > - if (atomic_inc_not_zero(&pool->sp_nrthreads)) > > - goto found_pool; > > - return NULL; > > - } else { > > + if (!pool) { > > for (i = 0; i < serv->sv_nrpools; i++) { > > pool = &serv->sv_pools[--(*state) % serv- > > >sv_nrpools]; > > - if (atomic_inc_not_zero(&pool- > > >sp_nrthreads)) > > - goto found_pool; > > + if (pool->sp_nrthreads) > > + break; > > } > > - return NULL; > > } > > > > -found_pool: > > - set_bit(SP_VICTIM_REMAINS, &pool->sp_flags); > > - set_bit(SP_NEED_VICTIM, &pool->sp_flags); > > - if (!atomic_dec_and_test(&pool->sp_nrthreads)) > > + if (pool && pool->sp_nrthreads) { > > + set_bit(SP_VICTIM_REMAINS, &pool->sp_flags); > > + set_bit(SP_NEED_VICTIM, &pool->sp_flags); > > return pool; > > - /* Nothing left in this pool any more */ > > - clear_bit(SP_NEED_VICTIM, &pool->sp_flags); > > - clear_bit(SP_VICTIM_REMAINS, &pool->sp_flags); > > - goto retry; > > + } > > + return NULL; > > } > > > > static int > > @@ -883,7 +874,7 @@ svc_set_num_threads(struct svc_serv *serv, > > struct svc_pool *pool, int nrservs) > > if (!pool) > > nrservs -= serv->sv_nrthreads; > > else > > - nrservs -= atomic_read(&pool->sp_nrthreads); > > + nrservs -= pool->sp_nrthreads; > > > > if (nrservs > 0) > > return svc_start_kthreads(serv, pool, nrservs); > > @@ -953,7 +944,7 @@ svc_exit_thread(struct svc_rqst *rqstp) > > > > list_del_rcu(&rqstp->rq_all); > > > > - atomic_dec(&pool->sp_nrthreads); > > + pool->sp_nrthreads -= 1; > > > > spin_lock_bh(&serv->sv_lock); > > serv->sv_nrthreads -= 1; > > I don't think svc_exit_thread is called with the nfsd_mutex held, so > if > several threads were exiting at the same time, they could race here. > Ok, the changelog on #7 might point out why I'm wron here. nfsd() calls svc_exit_thread when exiting, but I missed that that would imply that svc_stop_kthreads() is running in another task (and that the nfsd_mutex would actually be held). It also looks like they do have to be torn down serially as well, so there should be no race there after all. Either way, could I trouble you to add a comment about this above svc_exit_thread? That's a really subtle interaction and it would be good to document it. Thanks,
On Tue, 16 Jul 2024, Jeff Layton wrote: > On Mon, 2024-07-15 at 17:14 +1000, NeilBrown wrote: > > sp_nrthreads is only ever accessed under the service mutex > > nlmsvc_mutex nfs_callback_mutex nfsd_mutex > > so these is no need for it to be an atomic_t. > > > > The fact that all code using it is single-threaded means that we can > > simplify svc_pool_victim and remove the temporary elevation of > > sp_nrthreads. > > > > Signed-off-by: NeilBrown <neilb@suse.de> > > --- > > fs/nfsd/nfsctl.c | 2 +- > > fs/nfsd/nfssvc.c | 2 +- > > include/linux/sunrpc/svc.h | 4 ++-- > > net/sunrpc/svc.c | 31 +++++++++++-------------------- > > 4 files changed, 15 insertions(+), 24 deletions(-) > > > > diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c > > index 5b0f2e0d7ccf..d85b6d1fa31f 100644 > > --- a/fs/nfsd/nfsctl.c > > +++ b/fs/nfsd/nfsctl.c > > @@ -1769,7 +1769,7 @@ int nfsd_nl_threads_get_doit(struct sk_buff *skb, struct genl_info *info) > > struct svc_pool *sp = &nn->nfsd_serv->sv_pools[i]; > > > > err = nla_put_u32(skb, NFSD_A_SERVER_THREADS, > > - atomic_read(&sp->sp_nrthreads)); > > + sp->sp_nrthreads); > > if (err) > > goto err_unlock; > > } > > diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c > > index 4438cdcd4873..7377422a34df 100644 > > --- a/fs/nfsd/nfssvc.c > > +++ b/fs/nfsd/nfssvc.c > > @@ -641,7 +641,7 @@ int nfsd_get_nrthreads(int n, int *nthreads, struct net *net) > > > > if (serv) > > for (i = 0; i < serv->sv_nrpools && i < n; i++) > > - nthreads[i] = atomic_read(&serv->sv_pools[i].sp_nrthreads); > > + nthreads[i] = serv->sv_pools[i].sp_nrthreads; > > return 0; > > } > > > > diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h > > index e4fa25fafa97..99e9345d829e 100644 > > --- a/include/linux/sunrpc/svc.h > > +++ b/include/linux/sunrpc/svc.h > > @@ -33,9 +33,9 @@ > > * node traffic on multi-node NUMA NFS servers. > > */ > > struct svc_pool { > > - unsigned int sp_id; /* pool id; also node id on NUMA */ > > + unsigned int sp_id; /* pool id; also node id on NUMA */ > > struct lwq sp_xprts; /* pending transports */ > > - atomic_t sp_nrthreads; /* # of threads in pool */ > > + unsigned int sp_nrthreads; /* # of threads in pool */ > > struct list_head sp_all_threads; /* all server threads */ > > struct llist_head sp_idle_threads; /* idle server threads */ > > > > diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c > > index 072ad115ae3d..0d8588bc693c 100644 > > --- a/net/sunrpc/svc.c > > +++ b/net/sunrpc/svc.c > > @@ -725,7 +725,7 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) > > serv->sv_nrthreads += 1; > > spin_unlock_bh(&serv->sv_lock); > > > > - atomic_inc(&pool->sp_nrthreads); > > + pool->sp_nrthreads += 1; > > > > /* Protected by whatever lock the service uses when calling > > * svc_set_num_threads() > > @@ -780,31 +780,22 @@ svc_pool_victim(struct svc_serv *serv, struct svc_pool *target_pool, > > struct svc_pool *pool; > > unsigned int i; > > > > -retry: > > pool = target_pool; > > > > - if (pool != NULL) { > > - if (atomic_inc_not_zero(&pool->sp_nrthreads)) > > - goto found_pool; > > - return NULL; > > - } else { > > + if (!pool) { > > for (i = 0; i < serv->sv_nrpools; i++) { > > pool = &serv->sv_pools[--(*state) % serv->sv_nrpools]; > > - if (atomic_inc_not_zero(&pool->sp_nrthreads)) > > - goto found_pool; > > + if (pool->sp_nrthreads) > > + break; > > } > > - return NULL; > > } > > > > -found_pool: > > - set_bit(SP_VICTIM_REMAINS, &pool->sp_flags); > > - set_bit(SP_NEED_VICTIM, &pool->sp_flags); > > - if (!atomic_dec_and_test(&pool->sp_nrthreads)) > > + if (pool && pool->sp_nrthreads) { > > + set_bit(SP_VICTIM_REMAINS, &pool->sp_flags); > > + set_bit(SP_NEED_VICTIM, &pool->sp_flags); > > return pool; > > - /* Nothing left in this pool any more */ > > - clear_bit(SP_NEED_VICTIM, &pool->sp_flags); > > - clear_bit(SP_VICTIM_REMAINS, &pool->sp_flags); > > - goto retry; > > + } > > + return NULL; > > } > > > > static int > > @@ -883,7 +874,7 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) > > if (!pool) > > nrservs -= serv->sv_nrthreads; > > else > > - nrservs -= atomic_read(&pool->sp_nrthreads); > > + nrservs -= pool->sp_nrthreads; > > > > if (nrservs > 0) > > return svc_start_kthreads(serv, pool, nrservs); > > @@ -953,7 +944,7 @@ svc_exit_thread(struct svc_rqst *rqstp) > > > > list_del_rcu(&rqstp->rq_all); > > > > - atomic_dec(&pool->sp_nrthreads); > > + pool->sp_nrthreads -= 1; > > > > spin_lock_bh(&serv->sv_lock); > > serv->sv_nrthreads -= 1; > > I don't think svc_exit_thread is called with the nfsd_mutex held, so if > several threads were exiting at the same time, they could race here. This is subtle and deserves explanation in the commit. svc_exit_thread() is called in a thread *after* svc_thread_should_stop() has returned true. That means RQ_VICTIM is set and most likely SP_NEED_VICTIM was set SP_NEED_VICTIM is set in svc_pool_victim() which is called from svc_stop_kthreads() which requires that the mutex is held. svc_stop_kthreads() waits for SP_VICTIM_REMAINS to be cleared which is the last thing that svc_exit_thread() does. So when svc_exit_thread() is called, the mutex is held by some other thread that is calling svc_set_num_threads(). This is also why the list_del_rcu() in svc_exit_thread() is safe. The case there svc_exit_thread() is called but SP_NEED_VICTIM wasn't set (only RQ_VICTIM) is in the ETIMEDOUT case of nfsd(), in which case nfsd() ensures that the mutex is held. This was why [PATCH 07/14] Change unshare_fs_struct() to never fail. was needed. If that fails in the current code, svc_exit_thread() can be called without the mutex - which is already a theoretical problem for the list_del_rcu(). Thanks, NeilBrown
On Tue, Jul 16, 2024 at 11:33:40AM +1000, NeilBrown wrote: > On Tue, 16 Jul 2024, Jeff Layton wrote: > > On Mon, 2024-07-15 at 17:14 +1000, NeilBrown wrote: > > > sp_nrthreads is only ever accessed under the service mutex > > > nlmsvc_mutex nfs_callback_mutex nfsd_mutex > > > so these is no need for it to be an atomic_t. > > > > > > The fact that all code using it is single-threaded means that we can > > > simplify svc_pool_victim and remove the temporary elevation of > > > sp_nrthreads. > > > > > > Signed-off-by: NeilBrown <neilb@suse.de> > > > --- > > > fs/nfsd/nfsctl.c | 2 +- > > > fs/nfsd/nfssvc.c | 2 +- > > > include/linux/sunrpc/svc.h | 4 ++-- > > > net/sunrpc/svc.c | 31 +++++++++++-------------------- > > > 4 files changed, 15 insertions(+), 24 deletions(-) > > > > > > diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c > > > index 5b0f2e0d7ccf..d85b6d1fa31f 100644 > > > --- a/fs/nfsd/nfsctl.c > > > +++ b/fs/nfsd/nfsctl.c > > > @@ -1769,7 +1769,7 @@ int nfsd_nl_threads_get_doit(struct sk_buff *skb, struct genl_info *info) > > > struct svc_pool *sp = &nn->nfsd_serv->sv_pools[i]; > > > > > > err = nla_put_u32(skb, NFSD_A_SERVER_THREADS, > > > - atomic_read(&sp->sp_nrthreads)); > > > + sp->sp_nrthreads); > > > if (err) > > > goto err_unlock; > > > } > > > diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c > > > index 4438cdcd4873..7377422a34df 100644 > > > --- a/fs/nfsd/nfssvc.c > > > +++ b/fs/nfsd/nfssvc.c > > > @@ -641,7 +641,7 @@ int nfsd_get_nrthreads(int n, int *nthreads, struct net *net) > > > > > > if (serv) > > > for (i = 0; i < serv->sv_nrpools && i < n; i++) > > > - nthreads[i] = atomic_read(&serv->sv_pools[i].sp_nrthreads); > > > + nthreads[i] = serv->sv_pools[i].sp_nrthreads; > > > return 0; > > > } > > > > > > diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h > > > index e4fa25fafa97..99e9345d829e 100644 > > > --- a/include/linux/sunrpc/svc.h > > > +++ b/include/linux/sunrpc/svc.h > > > @@ -33,9 +33,9 @@ > > > * node traffic on multi-node NUMA NFS servers. > > > */ > > > struct svc_pool { > > > - unsigned int sp_id; /* pool id; also node id on NUMA */ > > > + unsigned int sp_id; /* pool id; also node id on NUMA */ > > > struct lwq sp_xprts; /* pending transports */ > > > - atomic_t sp_nrthreads; /* # of threads in pool */ > > > + unsigned int sp_nrthreads; /* # of threads in pool */ > > > struct list_head sp_all_threads; /* all server threads */ > > > struct llist_head sp_idle_threads; /* idle server threads */ > > > > > > diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c > > > index 072ad115ae3d..0d8588bc693c 100644 > > > --- a/net/sunrpc/svc.c > > > +++ b/net/sunrpc/svc.c > > > @@ -725,7 +725,7 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) > > > serv->sv_nrthreads += 1; > > > spin_unlock_bh(&serv->sv_lock); > > > > > > - atomic_inc(&pool->sp_nrthreads); > > > + pool->sp_nrthreads += 1; > > > > > > /* Protected by whatever lock the service uses when calling > > > * svc_set_num_threads() > > > @@ -780,31 +780,22 @@ svc_pool_victim(struct svc_serv *serv, struct svc_pool *target_pool, > > > struct svc_pool *pool; > > > unsigned int i; > > > > > > -retry: > > > pool = target_pool; > > > > > > - if (pool != NULL) { > > > - if (atomic_inc_not_zero(&pool->sp_nrthreads)) > > > - goto found_pool; > > > - return NULL; > > > - } else { > > > + if (!pool) { > > > for (i = 0; i < serv->sv_nrpools; i++) { > > > pool = &serv->sv_pools[--(*state) % serv->sv_nrpools]; > > > - if (atomic_inc_not_zero(&pool->sp_nrthreads)) > > > - goto found_pool; > > > + if (pool->sp_nrthreads) > > > + break; > > > } > > > - return NULL; > > > } > > > > > > -found_pool: > > > - set_bit(SP_VICTIM_REMAINS, &pool->sp_flags); > > > - set_bit(SP_NEED_VICTIM, &pool->sp_flags); > > > - if (!atomic_dec_and_test(&pool->sp_nrthreads)) > > > + if (pool && pool->sp_nrthreads) { > > > + set_bit(SP_VICTIM_REMAINS, &pool->sp_flags); > > > + set_bit(SP_NEED_VICTIM, &pool->sp_flags); > > > return pool; > > > - /* Nothing left in this pool any more */ > > > - clear_bit(SP_NEED_VICTIM, &pool->sp_flags); > > > - clear_bit(SP_VICTIM_REMAINS, &pool->sp_flags); > > > - goto retry; > > > + } > > > + return NULL; > > > } > > > > > > static int > > > @@ -883,7 +874,7 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) > > > if (!pool) > > > nrservs -= serv->sv_nrthreads; > > > else > > > - nrservs -= atomic_read(&pool->sp_nrthreads); > > > + nrservs -= pool->sp_nrthreads; > > > > > > if (nrservs > 0) > > > return svc_start_kthreads(serv, pool, nrservs); > > > @@ -953,7 +944,7 @@ svc_exit_thread(struct svc_rqst *rqstp) > > > > > > list_del_rcu(&rqstp->rq_all); > > > > > > - atomic_dec(&pool->sp_nrthreads); > > > + pool->sp_nrthreads -= 1; > > > > > > spin_lock_bh(&serv->sv_lock); > > > serv->sv_nrthreads -= 1; > > > > I don't think svc_exit_thread is called with the nfsd_mutex held, so if > > several threads were exiting at the same time, they could race here. > > This is subtle and deserves explanation in the commit. Hi Neil, assuming you mean "commit message" here, are you planning to resend 5/14 with this update? > svc_exit_thread() is called in a thread *after* svc_thread_should_stop() > has returned true. That means RQ_VICTIM is set and most likely > SP_NEED_VICTIM was set > > SP_NEED_VICTIM is set in svc_pool_victim() which is called from > svc_stop_kthreads() which requires that the mutex is held. > svc_stop_kthreads() waits for SP_VICTIM_REMAINS to be cleared which is > the last thing that svc_exit_thread() does. > So when svc_exit_thread() is called, the mutex is held by some other > thread that is calling svc_set_num_threads(). > > This is also why the list_del_rcu() in svc_exit_thread() is safe. > > The case there svc_exit_thread() is called but SP_NEED_VICTIM wasn't set > (only RQ_VICTIM) is in the ETIMEDOUT case of nfsd(), in which case > nfsd() ensures that the mutex is held. > > This was why > [PATCH 07/14] Change unshare_fs_struct() to never fail. > was needed. If that fails in the current code, svc_exit_thread() can be > called without the mutex - which is already a theoretical problem for > the list_del_rcu(). > > Thanks, > NeilBrown
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 5b0f2e0d7ccf..d85b6d1fa31f 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1769,7 +1769,7 @@ int nfsd_nl_threads_get_doit(struct sk_buff *skb, struct genl_info *info) struct svc_pool *sp = &nn->nfsd_serv->sv_pools[i]; err = nla_put_u32(skb, NFSD_A_SERVER_THREADS, - atomic_read(&sp->sp_nrthreads)); + sp->sp_nrthreads); if (err) goto err_unlock; } diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 4438cdcd4873..7377422a34df 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -641,7 +641,7 @@ int nfsd_get_nrthreads(int n, int *nthreads, struct net *net) if (serv) for (i = 0; i < serv->sv_nrpools && i < n; i++) - nthreads[i] = atomic_read(&serv->sv_pools[i].sp_nrthreads); + nthreads[i] = serv->sv_pools[i].sp_nrthreads; return 0; } diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index e4fa25fafa97..99e9345d829e 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -33,9 +33,9 @@ * node traffic on multi-node NUMA NFS servers. */ struct svc_pool { - unsigned int sp_id; /* pool id; also node id on NUMA */ + unsigned int sp_id; /* pool id; also node id on NUMA */ struct lwq sp_xprts; /* pending transports */ - atomic_t sp_nrthreads; /* # of threads in pool */ + unsigned int sp_nrthreads; /* # of threads in pool */ struct list_head sp_all_threads; /* all server threads */ struct llist_head sp_idle_threads; /* idle server threads */ diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 072ad115ae3d..0d8588bc693c 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -725,7 +725,7 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) serv->sv_nrthreads += 1; spin_unlock_bh(&serv->sv_lock); - atomic_inc(&pool->sp_nrthreads); + pool->sp_nrthreads += 1; /* Protected by whatever lock the service uses when calling * svc_set_num_threads() @@ -780,31 +780,22 @@ svc_pool_victim(struct svc_serv *serv, struct svc_pool *target_pool, struct svc_pool *pool; unsigned int i; -retry: pool = target_pool; - if (pool != NULL) { - if (atomic_inc_not_zero(&pool->sp_nrthreads)) - goto found_pool; - return NULL; - } else { + if (!pool) { for (i = 0; i < serv->sv_nrpools; i++) { pool = &serv->sv_pools[--(*state) % serv->sv_nrpools]; - if (atomic_inc_not_zero(&pool->sp_nrthreads)) - goto found_pool; + if (pool->sp_nrthreads) + break; } - return NULL; } -found_pool: - set_bit(SP_VICTIM_REMAINS, &pool->sp_flags); - set_bit(SP_NEED_VICTIM, &pool->sp_flags); - if (!atomic_dec_and_test(&pool->sp_nrthreads)) + if (pool && pool->sp_nrthreads) { + set_bit(SP_VICTIM_REMAINS, &pool->sp_flags); + set_bit(SP_NEED_VICTIM, &pool->sp_flags); return pool; - /* Nothing left in this pool any more */ - clear_bit(SP_NEED_VICTIM, &pool->sp_flags); - clear_bit(SP_VICTIM_REMAINS, &pool->sp_flags); - goto retry; + } + return NULL; } static int @@ -883,7 +874,7 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) if (!pool) nrservs -= serv->sv_nrthreads; else - nrservs -= atomic_read(&pool->sp_nrthreads); + nrservs -= pool->sp_nrthreads; if (nrservs > 0) return svc_start_kthreads(serv, pool, nrservs); @@ -953,7 +944,7 @@ svc_exit_thread(struct svc_rqst *rqstp) list_del_rcu(&rqstp->rq_all); - atomic_dec(&pool->sp_nrthreads); + pool->sp_nrthreads -= 1; spin_lock_bh(&serv->sv_lock); serv->sv_nrthreads -= 1;
sp_nrthreads is only ever accessed under the service mutex nlmsvc_mutex nfs_callback_mutex nfsd_mutex so these is no need for it to be an atomic_t. The fact that all code using it is single-threaded means that we can simplify svc_pool_victim and remove the temporary elevation of sp_nrthreads. Signed-off-by: NeilBrown <neilb@suse.de> --- fs/nfsd/nfsctl.c | 2 +- fs/nfsd/nfssvc.c | 2 +- include/linux/sunrpc/svc.h | 4 ++-- net/sunrpc/svc.c | 31 +++++++++++-------------------- 4 files changed, 15 insertions(+), 24 deletions(-)