Message ID | 1312238117.23392.19.camel@lade.trondhjem.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Aug 1, 2011, at 6:35 PM, Trond Myklebust wrote: > On Mon, 2011-08-01 at 17:10 -0400, Trond Myklebust wrote: >> Looking at the callback code, I see that if tbl->highest_used_slotid != >> 0, then we BUG() while holding the backchannel's tbl->slot_tbl_lock >> spinlock. That seems a likely candidate for the above hang. >> >> Andy, how we are guaranteed that tbl->highest_used_slotid won't take >> values other than 0, and why do we commit suicide when it does? As far >> as I can see, there is no guarantee that we call nfs4_cb_take_slot() in >> nfs4_callback_compound(), however we appear to unconditionally call >> nfs4_cb_free_slot() provided there is a session. >> >> The other strangeness would be the fact that there is nothing enforcing >> the NFS4_SESSION_DRAINING flag. If the session is draining, then the >> back-channel simply ignores that and goes ahead with processing the >> callback. Is this to avoid deadlocks with the server returning >> NFS4ERR_BACK_CHAN_BUSY when the client does a DESTROY_SESSION? When NFS4_SESSION_DRAINING is set, the backchannel is drained first - and waits for current processing to complete signaled by the highest_used_slotid == -1. Any other backchannel requests that occur under the NFS4_SESSION_DRAINING flag are processed - but just the CB_SEQUENCE operation which returns NFS4ERR_DELAY. It does indeed prevent the BACK_CHAN_BUSY deadlock. > > How about something like the following? Looks good. Nice catch. One change below and a comment inline -->Andy > > 8<------------------------------------------------------------------------------- > From c0c499b0ca9d0af8cbdc29c40effba38475461d9 Mon Sep 17 00:00:00 2001 > From: Trond Myklebust <Trond.Myklebust@netapp.com> > Date: Mon, 1 Aug 2011 18:31:12 -0400 > Subject: [PATCH] NFSv4.1: Fix the callback 'highest_used_slotid' behaviour > > Currently, there is no guarantee that we will call nfs4_cb_take_slot() even > though nfs4_callback_compound() will consistently call > nfs4_cb_free_slot() provided the cb_process_state has set the 'clp' field. > The result is that we can trigger the BUG_ON() upon the next call to > nfs4_cb_take_slot(). > This patch fixes the above problem by using the slot id that was taken in > the CB_SEQUENCE operation as a flag for whether or not we need to call > nfs4_cb_free_slot(). > It also fixes an atomicity problem: we need to set tbl->highest_used_slotid > atomically with the check for NFS4_SESSION_DRAINING, otherwise we end up > racing with the various tests in nfs4_begin_drain_session(). But the code below doesn't do this - it locks the backchannel slot table to check the flag, but does not set the highest_used_slot atomically with this check. > > Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> > --- > fs/nfs/callback.h | 2 +- > fs/nfs/callback_proc.c | 18 +++++++++++++----- > fs/nfs/callback_xdr.c | 24 +++++++----------------- > 3 files changed, 21 insertions(+), 23 deletions(-) > > diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h > index b257383..07df5f1 100644 > --- a/fs/nfs/callback.h > +++ b/fs/nfs/callback.h > @@ -38,6 +38,7 @@ enum nfs4_callback_opnum { > struct cb_process_state { > __be32 drc_status; > struct nfs_client *clp; > + int slotid; > }; > > struct cb_compound_hdr_arg { > @@ -166,7 +167,6 @@ extern unsigned nfs4_callback_layoutrecall( > void *dummy, struct cb_process_state *cps); > > extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses); > -extern void nfs4_cb_take_slot(struct nfs_client *clp); > > struct cb_devicenotifyitem { > uint32_t cbd_notify_type; > diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c > index 74780f9..1bd2c81 100644 > --- a/fs/nfs/callback_proc.c > +++ b/fs/nfs/callback_proc.c > @@ -348,7 +348,7 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args) > /* Normal */ > if (likely(args->csa_sequenceid == slot->seq_nr + 1)) { > slot->seq_nr++; > - return htonl(NFS4_OK); > + goto out_ok; > } > > /* Replay */ > @@ -367,11 +367,14 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args) > /* Wraparound */ > if (args->csa_sequenceid == 1 && (slot->seq_nr + 1) == 0) { > slot->seq_nr = 1; > - return htonl(NFS4_OK); > + goto out_ok; > } > > /* Misordered request */ > return htonl(NFS4ERR_SEQ_MISORDERED); > +out_ok: > + tbl->highest_used_slotid = args->csa_slotid; > + return htonl(NFS4_OK); > } > > /* > @@ -433,26 +436,32 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, > struct cb_sequenceres *res, > struct cb_process_state *cps) > { > + struct nfs4_slot_table *tbl; > struct nfs_client *clp; > int i; > __be32 status = htonl(NFS4ERR_BADSESSION); > > - cps->clp = NULL; > - > clp = nfs4_find_client_sessionid(args->csa_addr, &args->csa_sessionid); > if (clp == NULL) > goto out; > > + tbl = &clp->cl_session->bc_slot_table; > + > + spin_lock(&tbl->slot_tbl_lock); > /* state manager is resetting the session */ > if (test_bit(NFS4_SESSION_DRAINING, &clp->cl_session->session_state)) { > status = NFS4ERR_DELAY; status = htonl(NFS4ERR_DELAY); > + spin_unlock(&tbl->slot_tbl_lock); > goto out; > } > > status = validate_seqid(&clp->cl_session->bc_slot_table, args); > + spin_unlock(&tbl->slot_tbl_lock); > if (status) > goto out; > > + cps->slotid = args->csa_slotid; > + > /* > * Check for pending referring calls. If a match is found, a > * related callback was received before the response to the original > @@ -469,7 +478,6 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, > res->csr_slotid = args->csa_slotid; > res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; > res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; > - nfs4_cb_take_slot(clp); > > out: > cps->clp = clp; /* put in nfs4_callback_compound */ > diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c > index c6c86a7..918ad64 100644 > --- a/fs/nfs/callback_xdr.c > +++ b/fs/nfs/callback_xdr.c > @@ -754,26 +754,15 @@ static void nfs4_callback_free_slot(struct nfs4_session *session) > * Let the state manager know callback processing done. > * A single slot, so highest used slotid is either 0 or -1 > */ > - tbl->highest_used_slotid--; > + tbl->highest_used_slotid = -1; > nfs4_check_drain_bc_complete(session); > spin_unlock(&tbl->slot_tbl_lock); > } > > -static void nfs4_cb_free_slot(struct nfs_client *clp) > +static void nfs4_cb_free_slot(struct cb_process_state *cps) > { > - if (clp && clp->cl_session) > - nfs4_callback_free_slot(clp->cl_session); > -} > - > -/* A single slot, so highest used slotid is either 0 or -1 */ > -void nfs4_cb_take_slot(struct nfs_client *clp) > -{ > - struct nfs4_slot_table *tbl = &clp->cl_session->bc_slot_table; > - > - spin_lock(&tbl->slot_tbl_lock); > - tbl->highest_used_slotid++; > - BUG_ON(tbl->highest_used_slotid != 0); > - spin_unlock(&tbl->slot_tbl_lock); > + if (cps->slotid != -1) > + nfs4_callback_free_slot(cps->clp->cl_session); > } > > #else /* CONFIG_NFS_V4_1 */ > @@ -784,7 +773,7 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op) > return htonl(NFS4ERR_MINOR_VERS_MISMATCH); > } > > -static void nfs4_cb_free_slot(struct nfs_client *clp) > +static void nfs4_cb_free_slot(struct cb_process_state *cps) > { > } > #endif /* CONFIG_NFS_V4_1 */ > @@ -866,6 +855,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r > struct cb_process_state cps = { > .drc_status = 0, > .clp = NULL, > + .slotid = -1, > }; > unsigned int nops = 0; > > @@ -906,7 +896,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r > > *hdr_res.status = status; > *hdr_res.nops = htonl(nops); > - nfs4_cb_free_slot(cps.clp); > + nfs4_cb_free_slot(&cps); > nfs_put_client(cps.clp); > dprintk("%s: done, status = %u\n", __func__, ntohl(status)); > return rpc_success; > -- > 1.7.6 > > > -- > Trond Myklebust > Linux NFS client maintainer > > NetApp > Trond.Myklebust@netapp.com > www.netapp.com > -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, 2011-08-01 at 18:57 -0400, Andy Adamson wrote: > On Aug 1, 2011, at 6:35 PM, Trond Myklebust wrote: > > > On Mon, 2011-08-01 at 17:10 -0400, Trond Myklebust wrote: > >> Looking at the callback code, I see that if tbl->highest_used_slotid != > >> 0, then we BUG() while holding the backchannel's tbl->slot_tbl_lock > >> spinlock. That seems a likely candidate for the above hang. > >> > >> Andy, how we are guaranteed that tbl->highest_used_slotid won't take > >> values other than 0, and why do we commit suicide when it does? As far > >> as I can see, there is no guarantee that we call nfs4_cb_take_slot() in > >> nfs4_callback_compound(), however we appear to unconditionally call > >> nfs4_cb_free_slot() provided there is a session. > >> > >> The other strangeness would be the fact that there is nothing enforcing > >> the NFS4_SESSION_DRAINING flag. If the session is draining, then the > >> back-channel simply ignores that and goes ahead with processing the > >> callback. Is this to avoid deadlocks with the server returning > >> NFS4ERR_BACK_CHAN_BUSY when the client does a DESTROY_SESSION? > > > When NFS4_SESSION_DRAINING is set, the backchannel is drained first - and waits for current processing to complete signaled by the highest_used_slotid == -1. Any other backchannel requests that occur under the NFS4_SESSION_DRAINING flag are processed - but just the CB_SEQUENCE operation which returns NFS4ERR_DELAY. It does indeed prevent the BACK_CHAN_BUSY deadlock. > > > > > How about something like the following? > > Looks good. Nice catch. One change below and a comment inline > > -->Andy > > > > > 8<------------------------------------------------------------------------------- > > From c0c499b0ca9d0af8cbdc29c40effba38475461d9 Mon Sep 17 00:00:00 2001 > > From: Trond Myklebust <Trond.Myklebust@netapp.com> > > Date: Mon, 1 Aug 2011 18:31:12 -0400 > > Subject: [PATCH] NFSv4.1: Fix the callback 'highest_used_slotid' behaviour > > > > Currently, there is no guarantee that we will call nfs4_cb_take_slot() even > > though nfs4_callback_compound() will consistently call > > nfs4_cb_free_slot() provided the cb_process_state has set the 'clp' field. > > The result is that we can trigger the BUG_ON() upon the next call to > > nfs4_cb_take_slot(). > > This patch fixes the above problem by using the slot id that was taken in > > the CB_SEQUENCE operation as a flag for whether or not we need to call > > nfs4_cb_free_slot(). > > It also fixes an atomicity problem: we need to set tbl->highest_used_slotid > > atomically with the check for NFS4_SESSION_DRAINING, otherwise we end up > > racing with the various tests in nfs4_begin_drain_session(). > > But the code below doesn't do this - it locks the backchannel slot table to check the flag, but does not set the highest_used_slot atomically with this check. It should. The patch ensures that we hold the backchannel slot table lock across both the NFS4_SESSION_DRAINING test and the validate_seqid(), and also ensures that the latter function sets tbl->highest_used_slotid on success. > > > > Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> > > --- > > fs/nfs/callback.h | 2 +- > > fs/nfs/callback_proc.c | 18 +++++++++++++----- > > fs/nfs/callback_xdr.c | 24 +++++++----------------- > > 3 files changed, 21 insertions(+), 23 deletions(-) > > > > diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h > > index b257383..07df5f1 100644 > > --- a/fs/nfs/callback.h > > +++ b/fs/nfs/callback.h > > @@ -38,6 +38,7 @@ enum nfs4_callback_opnum { > > struct cb_process_state { > > __be32 drc_status; > > struct nfs_client *clp; > > + int slotid; > > }; > > > > struct cb_compound_hdr_arg { > > @@ -166,7 +167,6 @@ extern unsigned nfs4_callback_layoutrecall( > > void *dummy, struct cb_process_state *cps); > > > > extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses); > > -extern void nfs4_cb_take_slot(struct nfs_client *clp); > > > > struct cb_devicenotifyitem { > > uint32_t cbd_notify_type; > > diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c > > index 74780f9..1bd2c81 100644 > > --- a/fs/nfs/callback_proc.c > > +++ b/fs/nfs/callback_proc.c > > @@ -348,7 +348,7 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args) > > /* Normal */ > > if (likely(args->csa_sequenceid == slot->seq_nr + 1)) { > > slot->seq_nr++; > > - return htonl(NFS4_OK); > > + goto out_ok; > > } > > > > /* Replay */ > > @@ -367,11 +367,14 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args) > > /* Wraparound */ > > if (args->csa_sequenceid == 1 && (slot->seq_nr + 1) == 0) { > > slot->seq_nr = 1; > > - return htonl(NFS4_OK); > > + goto out_ok; > > } > > > > /* Misordered request */ > > return htonl(NFS4ERR_SEQ_MISORDERED); > > +out_ok: > > + tbl->highest_used_slotid = args->csa_slotid; > > + return htonl(NFS4_OK); > > } > > > > /* > > @@ -433,26 +436,32 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, > > struct cb_sequenceres *res, > > struct cb_process_state *cps) > > { > > + struct nfs4_slot_table *tbl; > > struct nfs_client *clp; > > int i; > > __be32 status = htonl(NFS4ERR_BADSESSION); > > > > - cps->clp = NULL; > > - > > clp = nfs4_find_client_sessionid(args->csa_addr, &args->csa_sessionid); > > if (clp == NULL) > > goto out; > > > > + tbl = &clp->cl_session->bc_slot_table; > > + > > + spin_lock(&tbl->slot_tbl_lock); > > /* state manager is resetting the session */ > > if (test_bit(NFS4_SESSION_DRAINING, &clp->cl_session->session_state)) { > > status = NFS4ERR_DELAY; > > status = htonl(NFS4ERR_DELAY); Yep. I'll fix that too... > > + spin_unlock(&tbl->slot_tbl_lock); > > goto out; > > } > > > > status = validate_seqid(&clp->cl_session->bc_slot_table, args); > > + spin_unlock(&tbl->slot_tbl_lock); This is what guarantees the atomicity. Even if nfs4_begin_drain_session() sets NFS4_SESSION_DRAINING, we know that it can't test the value of tbl->highest_used_slotid without first taking the tbl->slot_tbl_lock, and we're holding that... > > if (status) > > goto out; > > > > + cps->slotid = args->csa_slotid; >
Trond Myklebust wrote: On Mon, 2011-08-01 at 17:10 -0400, Trond Myklebust wrote: > Looking at the callback code, I see that if tbl->highest_used_slotid != > 0, then we BUG() while holding the backchannel's tbl->slot_tbl_lock > spinlock. That seems a likely candidate for the above hang. > > Andy, how we are guaranteed that tbl->highest_used_slotid won't take > values other than 0, and why do we commit suicide when it does? As far > as I can see, there is no guarantee that we call nfs4_cb_take_slot() in > nfs4_callback_compound(), however we appear to unconditionally call > nfs4_cb_free_slot() provided there is a session. > > The other strangeness would be the fact that there is nothing enforcing > the NFS4_SESSION_DRAINING flag. If the session is draining, then the > back-channel simply ignores that and goes ahead with processing the > callback. Is this to avoid deadlocks with the server returning > NFS4ERR_BACK_CHAN_BUSY when the client does a DESTROY_SESSION? How about something like the following? I applied this patch, along with Andy's htonl correction. It now fails in a different way, with a deadlock. The test runs several processes in parallel. INFO: task t_mtab:1767 blocked for more than 10 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. t_mtab D 0000000000000000 0 1767 1634 0x00000080 ffff8800376afd48 0000000000000086 ffff8800376afcd8 ffffffff00000000 ffff8800376ae010 ffff880037ef4500 0000000000012c80 ffff8800376affd8 ffff8800376affd8 0000000000012c80 ffffffff81a0c020 ffff880037ef4500 Call Trace: [<ffffffff8145411a>] __mutex_lock_common+0x110/0x171 [<ffffffff81454191>] __mutex_lock_slowpath+0x16/0x18 [<ffffffff81454257>] mutex_lock+0x1e/0x32 [<ffffffff811169a2>] kern_path_create+0x75/0x11e [<ffffffff810fe836>] ? kmem_cache_alloc+0x5f/0xf1 [<ffffffff812127d9>] ? strncpy_from_user+0x43/0x72 [<ffffffff81114077>] ? getname_flags+0x158/0x1d2 [<ffffffff81116a86>] user_path_create+0x3b/0x52 [<ffffffff81117466>] sys_linkat+0x9a/0x120 [<ffffffff8109932e>] ? audit_syscall_entry+0x119/0x145 [<ffffffff81117505>] sys_link+0x19/0x1c [<ffffffff8145b612>] system_call_fastpath+0x16/0x1b INFO: task t_mtab:1768 blocked for more than 10 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. t_mtab D 0000000000000000 0 1768 1634 0x00000080 ffff880037ccbc18 0000000000000082 ffff880037ccbbe8 ffffffff00000000 ffff880037cca010 ffff880037ef2e00 0000000000012c80 ffff880037ccbfd8 ffff880037ccbfd8 0000000000012c80 ffffffff81a0c020 ffff880037ef2e00 Call Trace: [<ffffffff8145411a>] __mutex_lock_common+0x110/0x171 [<ffffffff81454191>] __mutex_lock_slowpath+0x16/0x18 [<ffffffff81454257>] mutex_lock+0x1e/0x32 [<ffffffff8111565d>] ? walk_component+0x362/0x38f [<ffffffff811e7b9a>] ima_file_check+0x53/0x111 [<ffffffff81115ae0>] do_last+0x456/0x566 [<ffffffff81114467>] ? path_init+0x179/0x2b8 [<ffffffff81116148>] path_openat+0xca/0x30e [<ffffffff8111647b>] do_filp_open+0x38/0x84 [<ffffffff812127d9>] ? strncpy_from_user+0x43/0x72 [<ffffffff81120014>] ? alloc_fd+0x76/0x11f [<ffffffff81109696>] do_sys_open+0x6e/0x100 [<ffffffff81109751>] sys_open+0x1b/0x1d [<ffffffff8145b612>] system_call_fastpath+0x16/0x1b INFO: task t_mtab:1767 blocked for more than 10 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. t_mtab D 0000000000000000 0 1767 1634 0x00000080 ffff8800376afc18 0000000000000086 ffff8800376afbe8 ffffffff00000000 ffff8800376ae010 ffff880037ef4500 0000000000012c80 ffff8800376affd8 ffff8800376affd8 0000000000012c80 ffffffff81a0c020 ffff880037ef4500 Call Trace: [<ffffffff8145411a>] __mutex_lock_common+0x110/0x171 [<ffffffff81454191>] __mutex_lock_slowpath+0x16/0x18 [<ffffffff81454257>] mutex_lock+0x1e/0x32 [<ffffffff8111565d>] ? walk_component+0x362/0x38f [<ffffffff811e7b9a>] ima_file_check+0x53/0x111 [<ffffffff81115ae0>] do_last+0x456/0x566 [<ffffffff81114467>] ? path_init+0x179/0x2b8 [<ffffffff81116148>] path_openat+0xca/0x30e [<ffffffff8111647b>] do_filp_open+0x38/0x84 [<ffffffff812127d9>] ? strncpy_from_user+0x43/0x72 [<ffffffff81120014>] ? alloc_fd+0x76/0x11f [<ffffffff81109696>] do_sys_open+0x6e/0x100 [<ffffffff81109751>] sys_open+0x1b/0x1d [<ffffffff8145b612>] system_call_fastpath+0x16/0x1b INFO: task t_mtab:1768 blocked for more than 10 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. t_mtab D 0000000000000000 0 1768 1634 0x00000080 ffff880037ccbc68 0000000000000082 0000000000000000 0000000000000000 ffff880037cca010 ffff880037ef2e00 0000000000012c80 ffff880037ccbfd8 ffff880037ccbfd8 0000000000012c80 ffffffff81a0c020 ffff880037ef2e00 Call Trace: [<ffffffff8145411a>] __mutex_lock_common+0x110/0x171 [<ffffffffa0267ca5>] ? nfs_permission+0xd7/0x168 [nfs] [<ffffffff81454191>] __mutex_lock_slowpath+0x16/0x18 [<ffffffff81454257>] mutex_lock+0x1e/0x32 [<ffffffff8111581e>] do_last+0x194/0x566 [<ffffffff81114467>] ? path_init+0x179/0x2b8 [<ffffffff81116148>] path_openat+0xca/0x30e [<ffffffffa028d8fd>] ? __nfs4_close+0xf4/0x101 [nfs] [<ffffffff8111647b>] do_filp_open+0x38/0x84 [<ffffffff812127d9>] ? strncpy_from_user+0x43/0x72 [<ffffffff81120014>] ? alloc_fd+0x76/0x11f [<ffffffff81109696>] do_sys_open+0x6e/0x100 [<ffffffff81109751>] sys_open+0x1b/0x1d [<ffffffff8145b612>] system_call_fastpath+0x16/0x1b INFO: task t_mtab:1767 blocked for more than 10 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. t_mtab D 0000000000000000 0 1767 1634 0x00000080 ffff8800376afd48 0000000000000086 ffff8800376afcd8 ffffffff00000000 ffff8800376ae010 ffff880037ef4500 0000000000012c80 ffff8800376affd8 ffff8800376affd8 0000000000012c80 ffffffff81a0c020 ffff880037ef4500 Call Trace: [<ffffffff8145411a>] __mutex_lock_common+0x110/0x171 [<ffffffff81454191>] __mutex_lock_slowpath+0x16/0x18 [<ffffffff81454257>] mutex_lock+0x1e/0x32 [<ffffffff811169a2>] kern_path_create+0x75/0x11e [<ffffffff810fe836>] ? kmem_cache_alloc+0x5f/0xf1 [<ffffffff812127d9>] ? strncpy_from_user+0x43/0x72 [<ffffffff81114077>] ? getname_flags+0x158/0x1d2 [<ffffffff81116a86>] user_path_create+0x3b/0x52 [<ffffffff81117466>] sys_linkat+0x9a/0x120 [<ffffffff8109932e>] ? audit_syscall_entry+0x119/0x145 [<ffffffff81117505>] sys_link+0x19/0x1c [<ffffffff8145b612>] system_call_fastpath+0x16/0x1b INFO: task t_mtab:1769 blocked for more than 10 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. t_mtab D 0000000000000000 0 1769 1634 0x00000080 ffff88006c2d1c18 0000000000000082 ffff88006c2d1be8 ffffffff00000000 ffff88006c2d0010 ffff880037ef0000 0000000000012c80 ffff88006c2d1fd8 ffff88006c2d1fd8 0000000000012c80 ffffffff81a0c020 ffff880037ef0000 Call Trace: [<ffffffff8145411a>] __mutex_lock_common+0x110/0x171 [<ffffffff81454191>] __mutex_lock_slowpath+0x16/0x18 [<ffffffff81454257>] mutex_lock+0x1e/0x32 [<ffffffff8111565d>] ? walk_component+0x362/0x38f [<ffffffff811e7b9a>] ima_file_check+0x53/0x111 [<ffffffff81115ae0>] do_last+0x456/0x566 [<ffffffff81114467>] ? path_init+0x179/0x2b8 [<ffffffff81116148>] path_openat+0xca/0x30e [<ffffffff8111647b>] do_filp_open+0x38/0x84 [<ffffffff812127d9>] ? strncpy_from_user+0x43/0x72 [<ffffffff81120014>] ? alloc_fd+0x76/0x11f [<ffffffff81109696>] do_sys_open+0x6e/0x100 [<ffffffff81109751>] sys_open+0x1b/0x1d [<ffffffff8145b612>] system_call_fastpath+0x16/0x1b -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> -----Original Message----- > From: Jim Rees [mailto:rees@umich.edu] > Sent: Monday, August 01, 2011 10:22 PM > To: Myklebust, Trond > Cc: Peng Tao; Adamson, Andy; Christoph Hellwig; linux- > nfs@vger.kernel.org; peter honeyman > Subject: Re: [PATCH v4 00/27] add block layout driver to pnfs client > > Trond Myklebust wrote: > > On Mon, 2011-08-01 at 17:10 -0400, Trond Myklebust wrote: > > Looking at the callback code, I see that if tbl- > >highest_used_slotid != > > 0, then we BUG() while holding the backchannel's tbl->slot_tbl_lock > > spinlock. That seems a likely candidate for the above hang. > > > > Andy, how we are guaranteed that tbl->highest_used_slotid won't > take > > values other than 0, and why do we commit suicide when it does? As > far > > as I can see, there is no guarantee that we call > nfs4_cb_take_slot() in > > nfs4_callback_compound(), however we appear to unconditionally call > > nfs4_cb_free_slot() provided there is a session. > > > > The other strangeness would be the fact that there is nothing > enforcing > > the NFS4_SESSION_DRAINING flag. If the session is draining, then > the > > back-channel simply ignores that and goes ahead with processing the > > callback. Is this to avoid deadlocks with the server returning > > NFS4ERR_BACK_CHAN_BUSY when the client does a DESTROY_SESSION? > > How about something like the following? > > I applied this patch, along with Andy's htonl correction. It now fails > in a > different way, with a deadlock. The test runs several processes in > parallel. > > INFO: task t_mtab:1767 blocked for more than 10 seconds. > "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this > message. > t_mtab D 0000000000000000 0 1767 1634 0x00000080 > ffff8800376afd48 0000000000000086 ffff8800376afcd8 ffffffff00000000 > ffff8800376ae010 ffff880037ef4500 0000000000012c80 ffff8800376affd8 > ffff8800376affd8 0000000000012c80 ffffffff81a0c020 ffff880037ef4500 > Call Trace: > [<ffffffff8145411a>] __mutex_lock_common+0x110/0x171 > [<ffffffff81454191>] __mutex_lock_slowpath+0x16/0x18 > [<ffffffff81454257>] mutex_lock+0x1e/0x32 > [<ffffffff811169a2>] kern_path_create+0x75/0x11e > [<ffffffff810fe836>] ? kmem_cache_alloc+0x5f/0xf1 > [<ffffffff812127d9>] ? strncpy_from_user+0x43/0x72 > [<ffffffff81114077>] ? getname_flags+0x158/0x1d2 > [<ffffffff81116a86>] user_path_create+0x3b/0x52 > [<ffffffff81117466>] sys_linkat+0x9a/0x120 > [<ffffffff8109932e>] ? audit_syscall_entry+0x119/0x145 > [<ffffffff81117505>] sys_link+0x19/0x1c > [<ffffffff8145b612>] system_call_fastpath+0x16/0x1b That's a different issue. If you do an 'echo t >/proc/sysrq-trigger', then do you see any other process that is stuck in the nfs layer and that might be holding the inode->i_mutex? -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Myklebust, Trond wrote: That's a different issue. If you do an 'echo t >/proc/sysrq-trigger', then do you see any other process that is stuck in the nfs layer and that might be holding the inode->i_mutex? Hard to tell. There are a couple of possibilities. I've put the console output here, and will investigate more in the morning: http://www.citi.umich.edu/projects/nfsv4/pnfs/block/download/console.txt -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, 2011-08-01 at 23:23 -0400, Jim Rees wrote: > Myklebust, Trond wrote: > > That's a different issue. If you do an 'echo t >/proc/sysrq-trigger', > then do you see any other process that is stuck in the nfs layer and > that might be holding the inode->i_mutex? > > Hard to tell. There are a couple of possibilities. I've put the console > output here, and will investigate more in the morning: > > http://www.citi.umich.edu/projects/nfsv4/pnfs/block/download/console.txt Hmm... You don't seem to have all the mutex debugging options enabled, but I strongly suspect that this thread is the culprit. > t_mtab D 0000000000000000 0 1833 1698 0x00000080 > ffff880072f73ac8 0000000000000086 ffff880072f73a98 ffffffff00000000 > ffff880072f72010 ffff8800698fdc00 0000000000012c80 ffff880072f73fd8 > ffff880072f73fd8 0000000000012c80 ffffffff81a0c020 ffff8800698fdc00 > Call Trace: > [<ffffffffa01f5a52>] ? rpc_queue_empty+0x29/0x29 [sunrpc] > [<ffffffffa01f5a81>] rpc_wait_bit_killable+0x2f/0x33 [sunrpc] > [<ffffffff81453eec>] __wait_on_bit+0x43/0x76 > [<ffffffff81453f88>] out_of_line_wait_on_bit+0x69/0x74 > [<ffffffffa01f5a52>] ? rpc_queue_empty+0x29/0x29 [sunrpc] > [<ffffffff81069213>] ? autoremove_wake_function+0x38/0x38 > [<ffffffffa01f5f0a>] __rpc_execute+0xed/0x249 [sunrpc] > [<ffffffffa01f60a3>] rpc_execute+0x3d/0x42 [sunrpc] > [<ffffffffa01efbca>] rpc_run_task+0x79/0x81 [sunrpc] > [<ffffffffa028c3b7>] nfs4_call_sync_sequence+0x60/0x81 [nfs] > [<ffffffff810fe4af>] ? kmem_cache_alloc_trace+0xab/0xbd > [<ffffffffa028c55e>] _nfs4_call_sync_session+0x14/0x16 [nfs] > [<ffffffffa0279b7d>] ? nfs_alloc_fattr+0x24/0x57 [nfs] > [<ffffffffa028ecd1>] _nfs4_proc_remove+0xcd/0x110 [nfs] > [<ffffffffa028ed43>] nfs4_proc_remove+0x2f/0x55 [nfs] > [<ffffffffa027524f>] nfs_unlink+0xf5/0x1a3 [nfs] > [<ffffffff81114c66>] vfs_unlink+0x70/0xd1 > [<ffffffff8111714d>] do_unlinkat+0xd5/0x161 > [<ffffffff8112247e>] ? mntput+0x21/0x23 > [<ffffffff8111381d>] ? path_put+0x1d/0x21 > [<ffffffff8109932e>] ? audit_syscall_entry+0x119/0x145 > [<ffffffff811171ea>] sys_unlink+0x11/0x13 > [<ffffffff8145b612>] system_call_fastpath+0x16/0x1b Any idea which file that thread is deleting, and why that might be hanging?
Trond Myklebust wrote: Any idea which file that thread is deleting, and why that might be hanging? The test is a copy of the part of the mount command that writes /etc/mtab. So it's doing some sequence of link, write, flock. It does this thousands of times, much of it in parallel. I'll get a trace of a single instance of this so we can see what the actual syscalls are. I'll also see about turning on the mutex debug. Give me a couple of hours. If you want to look at the source code for the test, clone this: git://oss.sgi.com/xfs/cmds/xfstests and it's in src/t_mtab.c. It's driven by the "089" script. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, 2011-08-01 at 19:11 -0400, Trond Myklebust wrote: > On Mon, 2011-08-01 at 18:57 -0400, Andy Adamson wrote: > > On Aug 1, 2011, at 6:35 PM, Trond Myklebust wrote: > > > /* > > > @@ -433,26 +436,32 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, > > > struct cb_sequenceres *res, > > > struct cb_process_state *cps) > > > { > > > + struct nfs4_slot_table *tbl; > > > struct nfs_client *clp; > > > int i; > > > __be32 status = htonl(NFS4ERR_BADSESSION); > > > > > > - cps->clp = NULL; > > > - > > > clp = nfs4_find_client_sessionid(args->csa_addr, &args->csa_sessionid); > > > if (clp == NULL) > > > goto out; > > > > > > + tbl = &clp->cl_session->bc_slot_table; > > > + > > > + spin_lock(&tbl->slot_tbl_lock); > > > /* state manager is resetting the session */ > > > if (test_bit(NFS4_SESSION_DRAINING, &clp->cl_session->session_state)) { > > > status = NFS4ERR_DELAY; > > > > status = htonl(NFS4ERR_DELAY); > > Yep. I'll fix that too... Looking at this again, I'm not sure that the above is safe. If you return NFS4ERR_DELAY on the SEQUENCE operation, then that has a very specific meaning of "I'm executing your command, but it will take a while to complete.". It therefore seems to me that if we return NFS4ERR_DELAY, then we are basically setting ourselves up for a deadlock: * On the one hand, we're trying to destroy the session * On the other hand, we're telling the server that 'your callback is being processed'. In that situation, it seems to me that it should be perfectly valid for the server to reject our DESTROY_SESSION call with a NFS4ERR_BACK_CHAN_BUSY until we process the callback. Comments? Cheers Trond
Here's what the test is doing. It does multiple parallel instances of this, each one doing thousands of the following in a loop. Console output with mutex and lock debug is in http://www.citi.umich.edu/projects/nfsv4/pnfs/block/download/console.txt getpid() = 2431 open("t_mtab~2431", O_WRONLY|O_CREAT, 0) = 3 close(3) = 0 link("t_mtab~2431", "t_mtab~") = 0 unlink("t_mtab~2431") = 0 open("t_mtab~", O_WRONLY) = 3 fcntl(3, F_SETLK, {type=F_WRLCK, whence=SEEK_SET, start=0, len=0}) = 0 close(3) = 0 brk(0) = 0x19ab000 brk(0x19cc000) = 0x19cc000 brk(0) = 0x19cc000 open("t_mtab", O_RDONLY) = 3 open("t_mtab.tmp", O_WRONLY|O_CREAT|O_TRUNC, 0666) = 4 read(3, "/proc on /proc type proc (rw,rel"..., 4096) = 2598 write(4, "/proc on /proc type proc (rw,rel"..., 2598) = 2598 read(3, "", 4096) = 0 close(3) = 0 fchmod(4, 0644) = 0 close(4) = 0 stat("t_mtab", {st_mode=S_IFREG|0644, st_size=2598, ...}) = 0 chown("t_mtab.tmp", 99, 0) = -1 EINVAL (Invalid argument) rename("t_mtab.tmp", "t_mtab") = 0 unlink("t_mtab~") = 0 fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 0), ...}) = 0 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fd95cd76000 write(1, "completed 1 iterations\n", 23) = 23 exit_group(0) = ? -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> -----Original Message----- > From: Jim Rees [mailto:rees@umich.edu] > Sent: Tuesday, August 02, 2011 9:48 PM > To: Myklebust, Trond > Cc: Peng Tao; Adamson, Andy; Christoph Hellwig; linux- > nfs@vger.kernel.org; peter honeyman > Subject: Re: [PATCH v4 00/27] add block layout driver to pnfs client > > Here's what the test is doing. It does multiple parallel instances of > this, > each one doing thousands of the following in a loop. Console output > with > mutex and lock debug is in > http://www.citi.umich.edu/projects/nfsv4/pnfs/block/download/console.tx > t Hmm... That trace appears to show that the contention is between processes trying to grab the same inode->i_mutex (in ima_file_check() and do_unlinkat()). The question is why is the unlink process hanging for such a long time? I suspect another callback issue is causing the unlink() to stall: either our client failing to handle a server callback correctly, or possibly the server failing to respond correctly to our reply. Can you try to turn on the callback debugging ('echo 256 > /proc/sys/sunrpc/nfs_debug')? A wireshark trace of what is going on during the hang itself might also help. Cheers Trond -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Myklebust, Trond wrote: Can you try to turn on the callback debugging ('echo 256 > /proc/sys/sunrpc/nfs_debug')? A wireshark trace of what is going on during the hang itself might also help. Will do, tomorrow morning. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Myklebust, Trond wrote: Can you try to turn on the callback debugging ('echo 256 > /proc/sys/sunrpc/nfs_debug')? A wireshark trace of what is going on during the hang itself might also help. It doesn't fail with callback debugging turned on. I suspect the output to my slow serial console disrupts the timing. The trace is in http://www.citi.umich.edu/projects/nfsv4/pnfs/block/download/089.pcap -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Aug 3, 2011 at 10:38 AM, Jim Rees <rees@umich.edu> wrote: > Myklebust, Trond wrote: > > Can you try to turn on the callback debugging ('echo 256 > > /proc/sys/sunrpc/nfs_debug')? A wireshark trace of what is going on > during the hang itself might also help. > > It doesn't fail with callback debugging turned on. I suspect the output to > my slow serial console disrupts the timing. > > The trace is in > http://www.citi.umich.edu/projects/nfsv4/pnfs/block/download/089.pcap In frame 8785, client returned NFS4ERR_DELAY in response to CB_SEQUENCE and the next RPC was 12s later. So it should be the case that Trond mentioned above.
Peng Tao wrote: On Wed, Aug 3, 2011 at 10:38 AM, Jim Rees <rees@umich.edu> wrote: > Myklebust, Trond wrote: > > Can you try to turn on the callback debugging ('echo 256 > > /proc/sys/sunrpc/nfs_debug')? A wireshark trace of what is going on > during the hang itself might also help. > > It doesn't fail with callback debugging turned on. I suspect the output to > my slow serial console disrupts the timing. > > The trace is in > http://www.citi.umich.edu/projects/nfsv4/pnfs/block/download/089.pcap In frame 8785, client returned NFS4ERR_DELAY in response to CB_SEQUENCE and the next RPC was 12s later. So it should be the case that Trond mentioned above. Yes, and there's some funny stuff earlier too. In frame 6381 the client returns a delegation, then in 6387 the server tries to recall the same delegation. Which I suppose is legal but I wonder why. This is followed by another 12 sec delay. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Peng Tao wrote: On Wed, Aug 3, 2011 at 10:38 AM, Jim Rees <rees@umich.edu> wrote: > Myklebust, Trond wrote: > > Can you try to turn on the callback debugging ('echo 256 > > /proc/sys/sunrpc/nfs_debug')? A wireshark trace of what is going on > during the hang itself might also help. > > It doesn't fail with callback debugging turned on. I suspect the output to > my slow serial console disrupts the timing. > > The trace is in > http://www.citi.umich.edu/projects/nfsv4/pnfs/block/download/089.pcap In frame 8785, client returned NFS4ERR_DELAY in response to CB_SEQUENCE and the next RPC was 12s later. So it should be the case that Trond mentioned above. That trace was taken with Trond's two patches applied. So they did not fix the problem for me. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Aug 3, 2011 at 7:53 PM, Jim Rees <rees@umich.edu> wrote: > Peng Tao wrote: > > On Wed, Aug 3, 2011 at 10:38 AM, Jim Rees <rees@umich.edu> wrote: > > Myklebust, Trond wrote: > > > > Can you try to turn on the callback debugging ('echo 256 > > > /proc/sys/sunrpc/nfs_debug')? A wireshark trace of what is going on > > during the hang itself might also help. > > > > It doesn't fail with callback debugging turned on. I suspect the output to > > my slow serial console disrupts the timing. > > > > The trace is in > > http://www.citi.umich.edu/projects/nfsv4/pnfs/block/download/089.pcap > In frame 8785, client returned NFS4ERR_DELAY in response to > CB_SEQUENCE and the next RPC was 12s later. So it should be the case > that Trond mentioned above. > > That trace was taken with Trond's two patches applied. So they did not fix > the problem for me. > How do you reproduce it? I was reproducing it by running ./check -nfs and let it run to case 089 and kill it after case 089 run for seconds.
Peng Tao wrote: How do you reproduce it? I was reproducing it by running ./check -nfs and let it run to case 089 and kill it after case 089 run for seconds. ./check -nfs 089 After less than a minute, I start getting hung tasks (I have hung task timeout set to 10 sec). -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index b257383..07df5f1 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -38,6 +38,7 @@ enum nfs4_callback_opnum { struct cb_process_state { __be32 drc_status; struct nfs_client *clp; + int slotid; }; struct cb_compound_hdr_arg { @@ -166,7 +167,6 @@ extern unsigned nfs4_callback_layoutrecall( void *dummy, struct cb_process_state *cps); extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses); -extern void nfs4_cb_take_slot(struct nfs_client *clp); struct cb_devicenotifyitem { uint32_t cbd_notify_type; diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 74780f9..1bd2c81 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -348,7 +348,7 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args) /* Normal */ if (likely(args->csa_sequenceid == slot->seq_nr + 1)) { slot->seq_nr++; - return htonl(NFS4_OK); + goto out_ok; } /* Replay */ @@ -367,11 +367,14 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args) /* Wraparound */ if (args->csa_sequenceid == 1 && (slot->seq_nr + 1) == 0) { slot->seq_nr = 1; - return htonl(NFS4_OK); + goto out_ok; } /* Misordered request */ return htonl(NFS4ERR_SEQ_MISORDERED); +out_ok: + tbl->highest_used_slotid = args->csa_slotid; + return htonl(NFS4_OK); } /* @@ -433,26 +436,32 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, struct cb_sequenceres *res, struct cb_process_state *cps) { + struct nfs4_slot_table *tbl; struct nfs_client *clp; int i; __be32 status = htonl(NFS4ERR_BADSESSION); - cps->clp = NULL; - clp = nfs4_find_client_sessionid(args->csa_addr, &args->csa_sessionid); if (clp == NULL) goto out; + tbl = &clp->cl_session->bc_slot_table; + + spin_lock(&tbl->slot_tbl_lock); /* state manager is resetting the session */ if (test_bit(NFS4_SESSION_DRAINING, &clp->cl_session->session_state)) { status = NFS4ERR_DELAY; + spin_unlock(&tbl->slot_tbl_lock); goto out; } status = validate_seqid(&clp->cl_session->bc_slot_table, args); + spin_unlock(&tbl->slot_tbl_lock); if (status) goto out; + cps->slotid = args->csa_slotid; + /* * Check for pending referring calls. If a match is found, a * related callback was received before the response to the original @@ -469,7 +478,6 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, res->csr_slotid = args->csa_slotid; res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; - nfs4_cb_take_slot(clp); out: cps->clp = clp; /* put in nfs4_callback_compound */ diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index c6c86a7..918ad64 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -754,26 +754,15 @@ static void nfs4_callback_free_slot(struct nfs4_session *session) * Let the state manager know callback processing done. * A single slot, so highest used slotid is either 0 or -1 */ - tbl->highest_used_slotid--; + tbl->highest_used_slotid = -1; nfs4_check_drain_bc_complete(session); spin_unlock(&tbl->slot_tbl_lock); } -static void nfs4_cb_free_slot(struct nfs_client *clp) +static void nfs4_cb_free_slot(struct cb_process_state *cps) { - if (clp && clp->cl_session) - nfs4_callback_free_slot(clp->cl_session); -} - -/* A single slot, so highest used slotid is either 0 or -1 */ -void nfs4_cb_take_slot(struct nfs_client *clp) -{ - struct nfs4_slot_table *tbl = &clp->cl_session->bc_slot_table; - - spin_lock(&tbl->slot_tbl_lock); - tbl->highest_used_slotid++; - BUG_ON(tbl->highest_used_slotid != 0); - spin_unlock(&tbl->slot_tbl_lock); + if (cps->slotid != -1) + nfs4_callback_free_slot(cps->clp->cl_session); } #else /* CONFIG_NFS_V4_1 */ @@ -784,7 +773,7 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op) return htonl(NFS4ERR_MINOR_VERS_MISMATCH); } -static void nfs4_cb_free_slot(struct nfs_client *clp) +static void nfs4_cb_free_slot(struct cb_process_state *cps) { } #endif /* CONFIG_NFS_V4_1 */ @@ -866,6 +855,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r struct cb_process_state cps = { .drc_status = 0, .clp = NULL, + .slotid = -1, }; unsigned int nops = 0; @@ -906,7 +896,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r *hdr_res.status = status; *hdr_res.nops = htonl(nops); - nfs4_cb_free_slot(cps.clp); + nfs4_cb_free_slot(&cps); nfs_put_client(cps.clp); dprintk("%s: done, status = %u\n", __func__, ntohl(status)); return rpc_success;