@@ -931,9 +931,11 @@ static int
nfsd(void *vrqstp)
{
struct svc_rqst *rqstp = (struct svc_rqst *) vrqstp;
+ struct svc_pool *pool = rqstp->rq_pool;
struct svc_xprt *perm_sock = list_entry(rqstp->rq_server->sv_permsocks.next, typeof(struct svc_xprt), xpt_list);
struct net *net = perm_sock->xpt_net;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ bool have_mutex = false;
/* At this point, the thread shares current->fs
* with the init process. We need to create files with the
@@ -954,7 +956,33 @@ nfsd(void *vrqstp)
/* Update sv_maxconn if it has changed */
rqstp->rq_server->sv_maxconn = nn->max_connections;
- svc_recv(rqstp);
+ switch (svc_recv(rqstp)) {
+ case -ETIMEDOUT: /* Nothing to do */
+ if (mutex_trylock(&nfsd_mutex)) {
+ if (pool->sp_nractual > pool->sp_nrthreads) {
+ set_bit(RQ_VICTIM, &rqstp->rq_flags);
+ pool->sp_nractual -= 1;
+ printk("Kill a victim\n");
+ have_mutex = true;
+ } else
+ mutex_unlock(&nfsd_mutex);
+ } else printk("trylock failed\n");
+ break;
+ case -EBUSY: /* Too much to do */
+ if (pool->sp_nractual < nfsd_max_pool_threads(pool, nn) &&
+ mutex_trylock(&nfsd_mutex)) {
+ // check no idle threads?
+ if (pool->sp_nractual < nfsd_max_pool_threads(pool,nn)) {
+ printk("start new thread\n");
+ svc_new_thread(rqstp->rq_server, pool);
+ }
+ mutex_unlock(&nfsd_mutex);
+ }
+ clear_bit(SP_TASK_STARTING, &pool->sp_flags);
+ break;
+ default:
+ break;
+ }
nfsd_file_net_dispose(nn);
}
@@ -963,6 +991,8 @@ nfsd(void *vrqstp)
/* Release the thread */
svc_exit_thread(rqstp);
+ if (have_mutex)
+ mutex_unlock(&nfsd_mutex);
return 0;
}
@@ -1203,6 +1203,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
commit_reset_write_verifier(nn, rqstp, host_err);
goto out_nfserr;
}
+ msleep(40);
*cnt = host_err;
nfsd_stats_io_write_add(nn, exp, *cnt);
fsnotify_modify(file);
@@ -53,6 +53,7 @@ enum {
SP_TASK_PENDING, /* still work to do even if no xprt is queued */
SP_NEED_VICTIM, /* One thread needs to agree to exit */
SP_VICTIM_REMAINS, /* One thread needs to actually exit */
+ SP_TASK_STARTING, /* Task has started but not added to idle yet */
};
@@ -410,6 +411,7 @@ struct svc_serv *svc_create(struct svc_program *, unsigned int,
bool svc_rqst_replace_page(struct svc_rqst *rqstp,
struct page *page);
void svc_rqst_release_pages(struct svc_rqst *rqstp);
+int svc_new_thread(struct svc_serv *serv, struct svc_pool *pool);
void svc_exit_thread(struct svc_rqst *);
struct svc_serv * svc_create_pooled(struct svc_program *prog,
struct svc_stat *stats,
@@ -56,7 +56,7 @@ static inline u32 svc_sock_final_rec(struct svc_sock *svsk)
/*
* Function prototypes.
*/
-void svc_recv(struct svc_rqst *rqstp);
+int svc_recv(struct svc_rqst *rqstp);
void svc_send(struct svc_rqst *rqstp);
int svc_addsock(struct svc_serv *serv, struct net *net,
const int fd, char *name_return, const size_t len,
@@ -796,19 +796,46 @@ svc_pool_victim(struct svc_serv *serv, struct svc_pool *target_pool,
return NULL;
}
-static int
-svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
+int svc_new_thread(struct svc_serv *serv, struct svc_pool *pool)
{
struct svc_rqst *rqstp;
struct task_struct *task;
- struct svc_pool *chosen_pool;
- unsigned int state = serv->sv_nrthreads-1;
int node;
- do {
- nrservs--;
- chosen_pool = svc_pool_next(serv, pool, &state);
- node = svc_pool_map_get_node(chosen_pool->sp_id);
+ node = svc_pool_map_get_node(pool->sp_id);
+
+ rqstp = svc_prepare_thread(serv, pool, node);
+ if (IS_ERR(rqstp))
+ return PTR_ERR(rqstp);
+ set_bit(SP_TASK_STARTING, &pool->sp_flags);
+ task = kthread_create_on_node(serv->sv_threadfn, rqstp,
+ node, "%s", serv->sv_name);
+ if (IS_ERR(task)) {
+ clear_bit(SP_TASK_STARTING, &pool->sp_flags);
+ svc_exit_thread(rqstp);
+ return PTR_ERR(task);
+ }
+ serv->sv_nractual += 1;
+ pool->sp_nractual += 1;
+
+ rqstp->rq_task = task;
+ if (serv->sv_nrpools > 1)
+ svc_pool_map_set_cpumask(task, pool->sp_id);
+
+ svc_sock_update_bufs(serv);
+ wake_up_process(task);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(svc_new_thread);
+
+static int
+svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
+{
+ unsigned int state = serv->sv_nrthreads-1;
+ int err = 0;
+
+ while (!err && nrservs--) {
+ struct svc_pool *chosen_pool = svc_pool_next(serv, pool, &state);
serv->sv_nrthreads += 1;
chosen_pool->sp_nrthreads += 1;
@@ -816,27 +843,10 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
if (chosen_pool->sp_nrthreads <= chosen_pool->sp_nractual)
continue;
- rqstp = svc_prepare_thread(serv, chosen_pool, node);
- if (IS_ERR(rqstp))
- return PTR_ERR(rqstp);
- task = kthread_create_on_node(serv->sv_threadfn, rqstp,
- node, "%s", serv->sv_name);
- if (IS_ERR(task)) {
- svc_exit_thread(rqstp);
- return PTR_ERR(task);
- }
- serv->sv_nractual += 1;
- chosen_pool->sp_nractual += 1;
-
- rqstp->rq_task = task;
- if (serv->sv_nrpools > 1)
- svc_pool_map_set_cpumask(task, chosen_pool->sp_id);
-
- svc_sock_update_bufs(serv);
- wake_up_process(task);
- } while (nrservs > 0);
+ err = svc_new_thread(serv, chosen_pool);
+ }
- return 0;
+ return err;
}
static int
@@ -729,15 +729,19 @@ svc_thread_should_sleep(struct svc_rqst *rqstp)
return true;
}
-static void svc_thread_wait_for_work(struct svc_rqst *rqstp)
+static bool svc_thread_wait_for_work(struct svc_rqst *rqstp)
{
struct svc_pool *pool = rqstp->rq_pool;
+ bool did_wait = false;
if (svc_thread_should_sleep(rqstp)) {
set_current_state(TASK_IDLE | TASK_FREEZABLE);
llist_add(&rqstp->rq_idle, &pool->sp_idle_threads);
- if (likely(svc_thread_should_sleep(rqstp)))
- schedule();
+ clear_bit(SP_TASK_STARTING, &pool->sp_flags);
+ if (likely(svc_thread_should_sleep(rqstp))) {
+ schedule_timeout(5*HZ);
+ did_wait = true;
+ }
while (!llist_del_first_this(&pool->sp_idle_threads,
&rqstp->rq_idle)) {
@@ -749,7 +753,12 @@ static void svc_thread_wait_for_work(struct svc_rqst *rqstp)
* for this new work. This thread can safely sleep
* until woken again.
*/
- schedule();
+ if (did_wait) {
+ schedule_timeout(HZ);
+ } else {
+ schedule_timeout(5*HZ);
+ did_wait = true;
+ }
set_current_state(TASK_IDLE | TASK_FREEZABLE);
}
__set_current_state(TASK_RUNNING);
@@ -757,6 +766,7 @@ static void svc_thread_wait_for_work(struct svc_rqst *rqstp)
cond_resched();
}
try_to_freeze();
+ return did_wait;
}
static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt)
@@ -840,6 +850,8 @@ static void svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
static void svc_thread_wake_next(struct svc_rqst *rqstp)
{
+ clear_bit(SP_TASK_STARTING, &rqstp->rq_pool->sp_flags);
+
if (!svc_thread_should_sleep(rqstp))
/* More work pending after I dequeued some,
* wake another worker
@@ -854,21 +866,31 @@ static void svc_thread_wake_next(struct svc_rqst *rqstp)
* This code is carefully organised not to touch any cachelines in
* the shared svc_serv structure, only cachelines in the local
* svc_pool.
+ *
+ * Returns -ETIMEDOUT if idle for an extended period
+ * -EBUSY is there is more work to do than available threads
+ * 0 otherwise.
*/
-void svc_recv(struct svc_rqst *rqstp)
+int svc_recv(struct svc_rqst *rqstp)
{
struct svc_pool *pool = rqstp->rq_pool;
+ bool did_wait;
+ int ret = 0;
if (!svc_alloc_arg(rqstp))
- return;
+ return ret;
- svc_thread_wait_for_work(rqstp);
+ did_wait = svc_thread_wait_for_work(rqstp);
+
+ if (did_wait && svc_thread_should_sleep(rqstp) &&
+ pool->sp_nractual > pool->sp_nrthreads)
+ ret = -ETIMEDOUT;
clear_bit(SP_TASK_PENDING, &pool->sp_flags);
if (svc_thread_should_stop(rqstp)) {
svc_thread_wake_next(rqstp);
- return;
+ return ret;
}
rqstp->rq_xprt = svc_xprt_dequeue(pool);
@@ -882,8 +904,13 @@ void svc_recv(struct svc_rqst *rqstp)
*/
if (pool->sp_idle_threads.first)
rqstp->rq_chandle.thread_wait = 5 * HZ;
- else
+ else {
rqstp->rq_chandle.thread_wait = 1 * HZ;
+ if (!did_wait &&
+ !test_and_set_bit(SP_TASK_STARTING,
+ &pool->sp_flags))
+ ret = -EBUSY;
+ }
trace_svc_xprt_dequeue(rqstp);
svc_handle_xprt(rqstp, xprt);
@@ -902,6 +929,7 @@ void svc_recv(struct svc_rqst *rqstp)
}
}
#endif
+ return ret;
}
EXPORT_SYMBOL_GPL(svc_recv);
svc_recv() is changed to return a status. This can be: -ETIMEDOUT - waited for 5 seconds and found nothing to do. This is boring. Also there are more actual threads than really needed. -EBUSY - I did something, but there is more stuff to do and no one idle who I can wake up to do it. BTW I successful set a flag: SP_TASK_STARTING. You better clear it. 0 - just minding my own business, nothing to see here. nfsd() is changed to pay attention to this status. In the case of -ETIMEDOUT, if the service mutex can be taken (trylock), the thread becomes and RQ_VICTIM so that it will exit. In the case of -EBUSY, if the actual number of threads is below the calculated maximum, a new thread is started. SP_TASK_STARTING is cleared. To support the above, some code is split out of svc_start_kthreads() into svc_new_thread(). I think we want memory pressure to be able to push a thread into returning -ETIMEDOUT. That can come later. There are printk's in here. They can be discarded or turned into trace points once we are sure about what we want. Signed-off-by: NeilBrown <neilb@suse.de> --- fs/nfsd/nfssvc.c | 32 ++++++++++++++++- fs/nfsd/vfs.c | 1 + include/linux/sunrpc/svc.h | 2 ++ include/linux/sunrpc/svcsock.h | 2 +- net/sunrpc/svc.c | 66 +++++++++++++++++++--------------- net/sunrpc/svc_xprt.c | 46 +++++++++++++++++++----- 6 files changed, 110 insertions(+), 39 deletions(-)