Message ID | 1369073630-7193-1-git-send-email-andros@netapp.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Mon, 2013-05-20 at 14:13 -0400, andros@netapp.com wrote: > From: Andy Adamson <andros@netapp.com> > > On a CB_RECALL the callback service thread flushes the inode using > filemap_flush prior to scheduling the state manager thread to return the > delegation. When pNFS is used and I/O has not yet gone to the data server > servicing the inode, a LAYOUTGET can preceed the I/O. Unlike the async > filemap_flush call, the LAYOUTGET must proceed to completion. > > If the state manager starts to recover data while the inode flush is sending > the LAYOUTGET, a deadlock occurs as the callback service thread holds the > single callback session slot until the flushing is done which blocks the state > manager thread, and the state manager thread has set the session draining bit > which puts the inode flush LAYOUTGET RPC to sleep on the forechannel slot > table waitq. > > Separate the draining of the back channel from the draining of the fore channel > by moving the NFS4_SESSION_DRAINING bit from session scope into the fore > and back slot tables. Drain the back channel first allowing the LAYOUTGET > call to proceed (and fail) so the callback service thread frees the callback > slot. Then proceed with draining the forechannel. > > Signed-off-by: Andy Adamson <andros@netapp.com> > --- > fs/nfs/callback_proc.c | 2 +- > fs/nfs/callback_xdr.c | 2 +- > fs/nfs/nfs4proc.c | 2 +- > fs/nfs/nfs4session.c | 6 ++++-- > fs/nfs/nfs4session.h | 13 ++++++++----- > fs/nfs/nfs4state.c | 15 +++++++-------- > 6 files changed, 22 insertions(+), 18 deletions(-) > > @@ -395,12 +395,14 @@ int nfs4_setup_session_slot_tables(struct nfs4_session *ses) > /* Fore channel */ > tbl = &ses->fc_slot_table; > tbl->session = ses; > + tbl->slot_tbl_state = 0; > status = nfs4_realloc_slot_table(tbl, ses->fc_attrs.max_reqs, 1); > if (status) /* -ENOMEM */ > return status; > /* Back channel */ > tbl = &ses->bc_slot_table; > tbl->session = ses; > + tbl->slot_tbl_state = 0; > status = nfs4_realloc_slot_table(tbl, ses->bc_attrs.max_reqs, 0); > if (status && tbl->slots == NULL) > /* Fore and back channel share a connection so get Hi Andy, The above hunk is buggy. nfs4_setup_session_slot_tables() is called from nfs4_proc_create_session(), so it must not reset the NFS4_SLOT_TBL_DRAINING flag. I'm therefore removing the above 2 lines. I've applied the rest of the patch. Thanks! Trond
On May 20, 2013, at 2:29 PM, "Myklebust, Trond" <Trond.Myklebust@netapp.com> wrote: > On Mon, 2013-05-20 at 14:13 -0400, andros@netapp.com wrote: >> From: Andy Adamson <andros@netapp.com> >> >> On a CB_RECALL the callback service thread flushes the inode using >> filemap_flush prior to scheduling the state manager thread to return the >> delegation. When pNFS is used and I/O has not yet gone to the data server >> servicing the inode, a LAYOUTGET can preceed the I/O. Unlike the async >> filemap_flush call, the LAYOUTGET must proceed to completion. >> >> If the state manager starts to recover data while the inode flush is sending >> the LAYOUTGET, a deadlock occurs as the callback service thread holds the >> single callback session slot until the flushing is done which blocks the state >> manager thread, and the state manager thread has set the session draining bit >> which puts the inode flush LAYOUTGET RPC to sleep on the forechannel slot >> table waitq. >> >> Separate the draining of the back channel from the draining of the fore channel >> by moving the NFS4_SESSION_DRAINING bit from session scope into the fore >> and back slot tables. Drain the back channel first allowing the LAYOUTGET >> call to proceed (and fail) so the callback service thread frees the callback >> slot. Then proceed with draining the forechannel. >> >> Signed-off-by: Andy Adamson <andros@netapp.com> >> --- >> fs/nfs/callback_proc.c | 2 +- >> fs/nfs/callback_xdr.c | 2 +- >> fs/nfs/nfs4proc.c | 2 +- >> fs/nfs/nfs4session.c | 6 ++++-- >> fs/nfs/nfs4session.h | 13 ++++++++----- >> fs/nfs/nfs4state.c | 15 +++++++-------- >> 6 files changed, 22 insertions(+), 18 deletions(-) >> > >> @@ -395,12 +395,14 @@ int nfs4_setup_session_slot_tables(struct nfs4_session *ses) >> /* Fore channel */ >> tbl = &ses->fc_slot_table; >> tbl->session = ses; >> + tbl->slot_tbl_state = 0; >> status = nfs4_realloc_slot_table(tbl, ses->fc_attrs.max_reqs, 1); >> if (status) /* -ENOMEM */ >> return status; >> /* Back channel */ >> tbl = &ses->bc_slot_table; >> tbl->session = ses; >> + tbl->slot_tbl_state = 0; >> status = nfs4_realloc_slot_table(tbl, ses->bc_attrs.max_reqs, 0); >> if (status && tbl->slots == NULL) >> /* Fore and back channel share a connection so get > > Hi Andy, > Hi Trond > The above hunk is buggy. nfs4_setup_session_slot_tables() is called from > nfs4_proc_create_session(), so it must not reset the > NFS4_SLOT_TBL_DRAINING flag. Yes - I see. Good catch > > I'm therefore removing the above 2 lines. I've applied the rest of the > patch. > Thanks! -->Andy > Thanks! > Trond > > -- > Trond Myklebust > Linux NFS client maintainer > > NetApp > Trond.Myklebust@netapp.com > www.netapp.com -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index a13d26e..0bc2768 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -414,7 +414,7 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, spin_lock(&tbl->slot_tbl_lock); /* state manager is resetting the session */ - if (test_bit(NFS4_SESSION_DRAINING, &clp->cl_session->session_state)) { + if (test_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state)) { spin_unlock(&tbl->slot_tbl_lock); status = htonl(NFS4ERR_DELAY); /* Return NFS4ERR_BADSESSION if we're draining the session diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 59461c9..a35582c 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -763,7 +763,7 @@ static void nfs4_callback_free_slot(struct nfs4_session *session) * A single slot, so highest used slotid is either 0 or -1 */ tbl->highest_used_slotid = NFS4_NO_SLOT; - nfs4_session_drain_complete(session, tbl); + nfs4_slot_tbl_drain_complete(tbl); spin_unlock(&tbl->slot_tbl_lock); } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 8fbc100..4e2fe71 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -572,7 +572,7 @@ int nfs41_setup_sequence(struct nfs4_session *session, task->tk_timeout = 0; spin_lock(&tbl->slot_tbl_lock); - if (test_bit(NFS4_SESSION_DRAINING, &session->session_state) && + if (test_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state) && !args->sa_privileged) { /* The state manager will wait until the slot table is empty */ dprintk("%s session is draining\n", __func__); diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c index ebda5f4..7ecfb75 100644 --- a/fs/nfs/nfs4session.c +++ b/fs/nfs/nfs4session.c @@ -73,7 +73,7 @@ void nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot) tbl->highest_used_slotid = new_max; else { tbl->highest_used_slotid = NFS4_NO_SLOT; - nfs4_session_drain_complete(tbl->session, tbl); + nfs4_slot_tbl_drain_complete(tbl); } } dprintk("%s: slotid %u highest_used_slotid %d\n", __func__, @@ -226,7 +226,7 @@ static bool nfs41_assign_slot(struct rpc_task *task, void *pslot) struct nfs4_slot *slot = pslot; struct nfs4_slot_table *tbl = slot->table; - if (nfs4_session_draining(tbl->session) && !args->sa_privileged) + if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged) return false; slot->generation = tbl->generation; args->sa_slot = slot; @@ -395,12 +395,14 @@ int nfs4_setup_session_slot_tables(struct nfs4_session *ses) /* Fore channel */ tbl = &ses->fc_slot_table; tbl->session = ses; + tbl->slot_tbl_state = 0; status = nfs4_realloc_slot_table(tbl, ses->fc_attrs.max_reqs, 1); if (status) /* -ENOMEM */ return status; /* Back channel */ tbl = &ses->bc_slot_table; tbl->session = ses; + tbl->slot_tbl_state = 0; status = nfs4_realloc_slot_table(tbl, ses->bc_attrs.max_reqs, 0); if (status && tbl->slots == NULL) /* Fore and back channel share a connection so get diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h index 6f3cb39..ff7d9f0 100644 --- a/fs/nfs/nfs4session.h +++ b/fs/nfs/nfs4session.h @@ -25,6 +25,10 @@ struct nfs4_slot { }; /* Sessions */ +enum nfs4_slot_tbl_state { + NFS4_SLOT_TBL_DRAINING, +}; + #define SLOT_TABLE_SZ DIV_ROUND_UP(NFS4_MAX_SLOT_TABLE, 8*sizeof(long)) struct nfs4_slot_table { struct nfs4_session *session; /* Parent session */ @@ -43,6 +47,7 @@ struct nfs4_slot_table { unsigned long generation; /* Generation counter for target_highest_slotid */ struct completion complete; + unsigned long slot_tbl_state; }; /* @@ -68,7 +73,6 @@ struct nfs4_session { enum nfs4_session_state { NFS4_SESSION_INITING, - NFS4_SESSION_DRAINING, }; #if defined(CONFIG_NFS_V4_1) @@ -88,12 +92,11 @@ extern void nfs4_destroy_session(struct nfs4_session *session); extern int nfs4_init_session(struct nfs_server *server); extern int nfs4_init_ds_session(struct nfs_client *, unsigned long); -extern void nfs4_session_drain_complete(struct nfs4_session *session, - struct nfs4_slot_table *tbl); +extern void nfs4_slot_tbl_drain_complete(struct nfs4_slot_table *tbl); -static inline bool nfs4_session_draining(struct nfs4_session *session) +static inline bool nfs4_slot_tbl_draining(struct nfs4_slot_table *tbl) { - return !!test_bit(NFS4_SESSION_DRAINING, &session->session_state); + return !!test_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state); } bool nfs41_wake_and_assign_slot(struct nfs4_slot_table *tbl, diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 300d17d..1fab140 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -241,7 +241,7 @@ static void nfs4_end_drain_session(struct nfs_client *clp) if (ses == NULL) return; tbl = &ses->fc_slot_table; - if (test_and_clear_bit(NFS4_SESSION_DRAINING, &ses->session_state)) { + if (test_and_clear_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state)) { spin_lock(&tbl->slot_tbl_lock); nfs41_wake_slot_table(tbl); spin_unlock(&tbl->slot_tbl_lock); @@ -251,15 +251,15 @@ static void nfs4_end_drain_session(struct nfs_client *clp) /* * Signal state manager thread if session fore channel is drained */ -void nfs4_session_drain_complete(struct nfs4_session *session, - struct nfs4_slot_table *tbl) +void nfs4_slot_tbl_drain_complete(struct nfs4_slot_table *tbl) { - if (nfs4_session_draining(session)) + if (nfs4_slot_tbl_draining(tbl)) complete(&tbl->complete); } -static int nfs4_wait_on_slot_tbl(struct nfs4_slot_table *tbl) +static int nfs4_drain_slot_tbl(struct nfs4_slot_table *tbl) { + set_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state); spin_lock(&tbl->slot_tbl_lock); if (tbl->highest_used_slotid != NFS4_NO_SLOT) { INIT_COMPLETION(tbl->complete); @@ -275,13 +275,12 @@ static int nfs4_begin_drain_session(struct nfs_client *clp) struct nfs4_session *ses = clp->cl_session; int ret = 0; - set_bit(NFS4_SESSION_DRAINING, &ses->session_state); /* back channel */ - ret = nfs4_wait_on_slot_tbl(&ses->bc_slot_table); + ret = nfs4_drain_slot_tbl(&ses->bc_slot_table); if (ret) return ret; /* fore channel */ - return nfs4_wait_on_slot_tbl(&ses->fc_slot_table); + return nfs4_drain_slot_tbl(&ses->fc_slot_table); } static void nfs41_finish_session_reset(struct nfs_client *clp)