Message ID | 20171120214118.4240-1-smayhew@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Hi Scott, On 11/20/2017 04:41 PM, Scott Mayhew wrote: > The following deadlock can occur between a process waiting for a client > to initialize in while walking the client list and another process > waiting for the nfs_clid_init_mutex so it can initialize that client: > > Process 1 Process 2 > --------- --------- > spin_lock(&nn->nfs_client_lock); > list_add_tail(&CLIENTA->cl_share_link, > &nn->nfs_client_list); > spin_unlock(&nn->nfs_client_lock); > spin_lock(&nn->nfs_client_lock); > list_add_tail(&CLIENTB->cl_share_link, > &nn->nfs_client_list); > spin_unlock(&nn->nfs_client_lock); > mutex_lock(&nfs_clid_init_mutex); > nfs41_walk_client_list(clp, result, cred); > nfs_wait_client_init_complete(CLIENTA); > (waiting for nfs_clid_init_mutex) > > Add and initilize the client with the nfs_clid_init_mutex held in > order to prevent that deadlock. > > Signed-off-by: Scott Mayhew <smayhew@redhat.com> > --- > fs/nfs/client.c | 21 +++++++++++++++++++-- > fs/nfs/nfs4state.c | 4 ---- > 2 files changed, 19 insertions(+), 6 deletions(-) > > diff --git a/fs/nfs/client.c b/fs/nfs/client.c > index 0ac2fb1..db38c47 100644 > --- a/fs/nfs/client.c > +++ b/fs/nfs/client.c > @@ -60,6 +60,7 @@ static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq); > static DEFINE_SPINLOCK(nfs_version_lock); > static DEFINE_MUTEX(nfs_version_mutex); > static LIST_HEAD(nfs_versions); > +static DEFINE_MUTEX(nfs_clid_init_mutex); > > /* > * RPC cruft for NFS > @@ -386,7 +387,7 @@ nfs_found_client(const struct nfs_client_initdata *cl_init, > */ > struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init) > { > - struct nfs_client *clp, *new = NULL; > + struct nfs_client *clp, *new = NULL, *result = NULL; > struct nfs_net *nn = net_generic(cl_init->net, nfs_net_id); > const struct nfs_rpc_ops *rpc_ops = cl_init->nfs_mod->rpc_ops; > > @@ -407,11 +408,27 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init) > return nfs_found_client(cl_init, clp); > } > if (new) { > + /* add and initialize the client with the > + * nfs_clid_init_mutex held to prevent a deadlock > + * with the server trunking detection > + */ > + spin_unlock(&nn->nfs_client_lock); > + mutex_lock(&nfs_clid_init_mutex); > + spin_lock(&nn->nfs_client_lock); > + clp = nfs_match_client(cl_init); > + if (clp) { > + spin_unlock(&nn->nfs_client_lock); > + mutex_unlock(&nfs_clid_init_mutex); > + new->rpc_ops->free_client(new); > + return nfs_found_client(cl_init, clp); > + } Is there any reason you can't just grab the nfs_clid_init_mutex at the very beginning of the do/while loop, right before getting the nn->nfs_client_lock? Then you wouldn't need to repeat the nfs_match_client() check, which might help clean the code up a bit. Thanks, Anna > list_add_tail(&new->cl_share_link, > &nn->nfs_client_list); > spin_unlock(&nn->nfs_client_lock); > new->cl_flags = cl_init->init_flags; > - return rpc_ops->init_client(new, cl_init); > + result = rpc_ops->init_client(new, cl_init); > + mutex_unlock(&nfs_clid_init_mutex); > + return result; > } > > spin_unlock(&nn->nfs_client_lock); > diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c > index 54fd56d..668164e 100644 > --- a/fs/nfs/nfs4state.c > +++ b/fs/nfs/nfs4state.c > @@ -77,8 +77,6 @@ const nfs4_stateid invalid_stateid = { > .type = NFS4_INVALID_STATEID_TYPE, > }; > > -static DEFINE_MUTEX(nfs_clid_init_mutex); > - > int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) > { > struct nfs4_setclientid_res clid = { > @@ -2164,7 +2162,6 @@ int nfs4_discover_server_trunking(struct nfs_client *clp, > clnt = clp->cl_rpcclient; > i = 0; > > - mutex_lock(&nfs_clid_init_mutex); > again: > status = -ENOENT; > cred = nfs4_get_clid_cred(clp); > @@ -2232,7 +2229,6 @@ int nfs4_discover_server_trunking(struct nfs_client *clp, > } > > out_unlock: > - mutex_unlock(&nfs_clid_init_mutex); > dprintk("NFS: %s: status = %d\n", __func__, status); > return status; > } > -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
T24gTW9uLCAyMDE3LTExLTIwIGF0IDE2OjQxIC0wNTAwLCBTY290dCBNYXloZXcgd3JvdGU6DQo+ IFRoZSBmb2xsb3dpbmcgZGVhZGxvY2sgY2FuIG9jY3VyIGJldHdlZW4gYSBwcm9jZXNzIHdhaXRp bmcgZm9yIGENCj4gY2xpZW50DQo+IHRvIGluaXRpYWxpemUgaW4gd2hpbGUgd2Fsa2luZyB0aGUg Y2xpZW50IGxpc3QgYW5kIGFub3RoZXIgcHJvY2Vzcw0KPiB3YWl0aW5nIGZvciB0aGUgbmZzX2Ns aWRfaW5pdF9tdXRleCBzbyBpdCBjYW4gaW5pdGlhbGl6ZSB0aGF0IGNsaWVudDoNCj4gDQo+IFBy b2Nlc3MgMSAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBQcm9jZXNzIDINCj4gLS0tLS0t LS0tICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIC0tLS0tLS0tLQ0KPiBzcGluX2xvY2so Jm5uLT5uZnNfY2xpZW50X2xvY2spOw0KPiBsaXN0X2FkZF90YWlsKCZDTElFTlRBLT5jbF9zaGFy ZV9saW5rLA0KPiAgICAgICAgICZubi0+bmZzX2NsaWVudF9saXN0KTsNCj4gc3Bpbl91bmxvY2so Jm5uLT5uZnNfY2xpZW50X2xvY2spOw0KPiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg ICAgICAgICAgc3Bpbl9sb2NrKCZubi0NCj4gPm5mc19jbGllbnRfbG9jayk7DQo+ICAgICAgICAg ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBsaXN0X2FkZF90YWlsKCZDTElFTlRCLQ0K PiA+Y2xfc2hhcmVfbGluaywNCj4gICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg ICAgICAgICAgICAgJm5uLQ0KPiA+bmZzX2NsaWVudF9saXN0KTsNCj4gICAgICAgICAgICAgICAg ICAgICAgICAgICAgICAgICAgICAgICAgIHNwaW5fdW5sb2NrKCZubi0NCj4gPm5mc19jbGllbnRf bG9jayk7DQo+ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBtdXRleF9s b2NrKCZuZnNfY2xpZF9pbml0X211dA0KPiBleCk7DQo+ICAgICAgICAgICAgICAgICAgICAgICAg ICAgICAgICAgICAgICAgICBuZnM0MV93YWxrX2NsaWVudF9saXN0KGNscCwNCj4gcmVzdWx0LCBj cmVkKTsNCj4gICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIG5mc193YWl0 X2NsaWVudF9pbml0X2NvbXBsZXRlDQo+IChDTElFTlRBKTsNCj4gKHdhaXRpbmcgZm9yIG5mc19j bGlkX2luaXRfbXV0ZXgpDQo+IA0KPiBBZGQgYW5kIGluaXRpbGl6ZSB0aGUgY2xpZW50IHdpdGgg dGhlIG5mc19jbGlkX2luaXRfbXV0ZXggaGVsZCBpbg0KPiBvcmRlciB0byBwcmV2ZW50IHRoYXQg ZGVhZGxvY2suDQo+IA0KPiBTaWduZWQtb2ZmLWJ5OiBTY290dCBNYXloZXcgPHNtYXloZXdAcmVk aGF0LmNvbT4NCj4gLS0tDQo+ICBmcy9uZnMvY2xpZW50LmMgICAgfCAyMSArKysrKysrKysrKysr KysrKysrLS0NCj4gIGZzL25mcy9uZnM0c3RhdGUuYyB8ICA0IC0tLS0NCj4gIDIgZmlsZXMgY2hh bmdlZCwgMTkgaW5zZXJ0aW9ucygrKSwgNiBkZWxldGlvbnMoLSkNCj4gDQo+IGRpZmYgLS1naXQg YS9mcy9uZnMvY2xpZW50LmMgYi9mcy9uZnMvY2xpZW50LmMNCj4gaW5kZXggMGFjMmZiMS4uZGIz OGM0NyAxMDA2NDQNCj4gLS0tIGEvZnMvbmZzL2NsaWVudC5jDQo+ICsrKyBiL2ZzL25mcy9jbGll bnQuYw0KPiBAQCAtNjAsNiArNjAsNyBAQCBzdGF0aWMNCj4gREVDTEFSRV9XQUlUX1FVRVVFX0hF QUQobmZzX2NsaWVudF9hY3RpdmVfd3EpOw0KPiAgc3RhdGljIERFRklORV9TUElOTE9DSyhuZnNf dmVyc2lvbl9sb2NrKTsNCj4gIHN0YXRpYyBERUZJTkVfTVVURVgobmZzX3ZlcnNpb25fbXV0ZXgp Ow0KPiAgc3RhdGljIExJU1RfSEVBRChuZnNfdmVyc2lvbnMpOw0KPiArc3RhdGljIERFRklORV9N VVRFWChuZnNfY2xpZF9pbml0X211dGV4KTsNCj4gIA0KPiAgLyoNCj4gICAqIFJQQyBjcnVmdCBm b3IgTkZTDQo+IEBAIC0zODYsNyArMzg3LDcgQEAgbmZzX2ZvdW5kX2NsaWVudChjb25zdCBzdHJ1 Y3QgbmZzX2NsaWVudF9pbml0ZGF0YQ0KPiAqY2xfaW5pdCwNCj4gICAqLw0KPiAgc3RydWN0IG5m c19jbGllbnQgKm5mc19nZXRfY2xpZW50KGNvbnN0IHN0cnVjdCBuZnNfY2xpZW50X2luaXRkYXRh DQo+ICpjbF9pbml0KQ0KPiAgew0KPiAtCXN0cnVjdCBuZnNfY2xpZW50ICpjbHAsICpuZXcgPSBO VUxMOw0KPiArCXN0cnVjdCBuZnNfY2xpZW50ICpjbHAsICpuZXcgPSBOVUxMLCAqcmVzdWx0ID0g TlVMTDsNCj4gIAlzdHJ1Y3QgbmZzX25ldCAqbm4gPSBuZXRfZ2VuZXJpYyhjbF9pbml0LT5uZXQs IG5mc19uZXRfaWQpOw0KPiAgCWNvbnN0IHN0cnVjdCBuZnNfcnBjX29wcyAqcnBjX29wcyA9IGNs X2luaXQtPm5mc19tb2QtDQo+ID5ycGNfb3BzOw0KPiAgDQo+IEBAIC00MDcsMTEgKzQwOCwyNyBA QCBzdHJ1Y3QgbmZzX2NsaWVudCAqbmZzX2dldF9jbGllbnQoY29uc3Qgc3RydWN0DQo+IG5mc19j bGllbnRfaW5pdGRhdGEgKmNsX2luaXQpDQo+ICAJCQlyZXR1cm4gbmZzX2ZvdW5kX2NsaWVudChj bF9pbml0LCBjbHApOw0KPiAgCQl9DQo+ICAJCWlmIChuZXcpIHsNCj4gKwkJCS8qIGFkZCBhbmQg aW5pdGlhbGl6ZSB0aGUgY2xpZW50IHdpdGggdGhlDQo+ICsJCQkgKiBuZnNfY2xpZF9pbml0X211 dGV4IGhlbGQgdG8gcHJldmVudCBhDQo+IGRlYWRsb2NrDQo+ICsJCQkgKiB3aXRoIHRoZSBzZXJ2 ZXIgdHJ1bmtpbmcgZGV0ZWN0aW9uDQo+ICsJCQkgKi8NCj4gKwkJCXNwaW5fdW5sb2NrKCZubi0+ bmZzX2NsaWVudF9sb2NrKTsNCj4gKwkJCW11dGV4X2xvY2soJm5mc19jbGlkX2luaXRfbXV0ZXgp Ow0KPiArCQkJc3Bpbl9sb2NrKCZubi0+bmZzX2NsaWVudF9sb2NrKTsNCj4gKwkJCWNscCA9IG5m c19tYXRjaF9jbGllbnQoY2xfaW5pdCk7DQo+ICsJCQlpZiAoY2xwKSB7DQo+ICsJCQkJc3Bpbl91 bmxvY2soJm5uLT5uZnNfY2xpZW50X2xvY2spOw0KPiArCQkJCW11dGV4X3VubG9jaygmbmZzX2Ns aWRfaW5pdF9tdXRleCk7DQo+ICsJCQkJbmV3LT5ycGNfb3BzLT5mcmVlX2NsaWVudChuZXcpOw0K PiArCQkJCXJldHVybiBuZnNfZm91bmRfY2xpZW50KGNsX2luaXQsDQo+IGNscCk7DQo+ICsJCQl9 DQo+ICAJCQlsaXN0X2FkZF90YWlsKCZuZXctPmNsX3NoYXJlX2xpbmssDQo+ICAJCQkJCSZubi0+ bmZzX2NsaWVudF9saXN0KTsNCj4gIAkJCXNwaW5fdW5sb2NrKCZubi0+bmZzX2NsaWVudF9sb2Nr KTsNCj4gIAkJCW5ldy0+Y2xfZmxhZ3MgPSBjbF9pbml0LT5pbml0X2ZsYWdzOw0KPiAtCQkJcmV0 dXJuIHJwY19vcHMtPmluaXRfY2xpZW50KG5ldywgY2xfaW5pdCk7DQo+ICsJCQlyZXN1bHQgPSBy cGNfb3BzLT5pbml0X2NsaWVudChuZXcsIGNsX2luaXQpOw0KPiArCQkJbXV0ZXhfdW5sb2NrKCZu ZnNfY2xpZF9pbml0X211dGV4KTsNCj4gKwkJCXJldHVybiByZXN1bHQ7DQo+ICAJCX0NCj4gIA0K PiAgCQlzcGluX3VubG9jaygmbm4tPm5mc19jbGllbnRfbG9jayk7DQo+IGRpZmYgLS1naXQgYS9m cy9uZnMvbmZzNHN0YXRlLmMgYi9mcy9uZnMvbmZzNHN0YXRlLmMNCj4gaW5kZXggNTRmZDU2ZC4u NjY4MTY0ZSAxMDA2NDQNCj4gLS0tIGEvZnMvbmZzL25mczRzdGF0ZS5jDQo+ICsrKyBiL2ZzL25m cy9uZnM0c3RhdGUuYw0KPiBAQCAtNzcsOCArNzcsNiBAQCBjb25zdCBuZnM0X3N0YXRlaWQgaW52 YWxpZF9zdGF0ZWlkID0gew0KPiAgCS50eXBlID0gTkZTNF9JTlZBTElEX1NUQVRFSURfVFlQRSwN Cj4gIH07DQo+ICANCj4gLXN0YXRpYyBERUZJTkVfTVVURVgobmZzX2NsaWRfaW5pdF9tdXRleCk7 DQo+IC0NCj4gIGludCBuZnM0X2luaXRfY2xpZW50aWQoc3RydWN0IG5mc19jbGllbnQgKmNscCwg c3RydWN0IHJwY19jcmVkDQo+ICpjcmVkKQ0KPiAgew0KPiAgCXN0cnVjdCBuZnM0X3NldGNsaWVu dGlkX3JlcyBjbGlkID0gew0KPiBAQCAtMjE2NCw3ICsyMTYyLDYgQEAgaW50IG5mczRfZGlzY292 ZXJfc2VydmVyX3RydW5raW5nKHN0cnVjdA0KPiBuZnNfY2xpZW50ICpjbHAsDQo+ICAJY2xudCA9 IGNscC0+Y2xfcnBjY2xpZW50Ow0KPiAgCWkgPSAwOw0KPiAgDQo+IC0JbXV0ZXhfbG9jaygmbmZz X2NsaWRfaW5pdF9tdXRleCk7DQo+ICBhZ2FpbjoNCj4gIAlzdGF0dXMgID0gLUVOT0VOVDsNCj4g IAljcmVkID0gbmZzNF9nZXRfY2xpZF9jcmVkKGNscCk7DQo+IEBAIC0yMjMyLDcgKzIyMjksNiBA QCBpbnQgbmZzNF9kaXNjb3Zlcl9zZXJ2ZXJfdHJ1bmtpbmcoc3RydWN0DQo+IG5mc19jbGllbnQg KmNscCwNCj4gIAl9DQo+ICANCj4gIG91dF91bmxvY2s6DQo+IC0JbXV0ZXhfdW5sb2NrKCZuZnNf Y2xpZF9pbml0X211dGV4KTsNCj4gIAlkcHJpbnRrKCJORlM6ICVzOiBzdGF0dXMgPSAlZFxuIiwg X19mdW5jX18sIHN0YXR1cyk7DQo+ICAJcmV0dXJuIHN0YXR1czsNCj4gIH0NCg0KWW91ciBpbml0 aWFsIGZpeCB3YXMgZmluZS4gSXQganVzdCBuZWVkZWQgMiBjaGFuZ2VzOg0KDQoxKSBSZW1vdmUg dGhlIHRlc3QgZm9yICdjbHAtPmNsX21pbm9ydmVyc2lvbiA+IDAnLg0KMikgUmVmY291bnQgdGhl IHN0cnVjdCBuZnNfY2xpZW50IHdoaWxlIHlvdSBhcmUgd2FpdGluZyBmb3IgaXQgdG8gYmUNCmlu aXRpYWxpc2VkLg0KDQotLSANClRyb25kIE15a2xlYnVzdA0KTGludXggTkZTIGNsaWVudCBtYWlu dGFpbmVyLCBQcmltYXJ5RGF0YQ0KdHJvbmQubXlrbGVidXN0QHByaW1hcnlkYXRhLmNvbQ0K -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, 29 Nov 2017, Trond Myklebust wrote: > On Mon, 2017-11-20 at 16:41 -0500, Scott Mayhew wrote: > > The following deadlock can occur between a process waiting for a > > client > > to initialize in while walking the client list and another process > > waiting for the nfs_clid_init_mutex so it can initialize that client: > > > > Process 1 Process 2 > > --------- --------- > > spin_lock(&nn->nfs_client_lock); > > list_add_tail(&CLIENTA->cl_share_link, > > &nn->nfs_client_list); > > spin_unlock(&nn->nfs_client_lock); > > spin_lock(&nn- > > >nfs_client_lock); > > list_add_tail(&CLIENTB- > > >cl_share_link, > > &nn- > > >nfs_client_list); > > spin_unlock(&nn- > > >nfs_client_lock); > > mutex_lock(&nfs_clid_init_mut > > ex); > > nfs41_walk_client_list(clp, > > result, cred); > > nfs_wait_client_init_complete > > (CLIENTA); > > (waiting for nfs_clid_init_mutex) > > > > Add and initilize the client with the nfs_clid_init_mutex held in > > order to prevent that deadlock. > > > > Signed-off-by: Scott Mayhew <smayhew@redhat.com> > > --- > > fs/nfs/client.c | 21 +++++++++++++++++++-- > > fs/nfs/nfs4state.c | 4 ---- > > 2 files changed, 19 insertions(+), 6 deletions(-) > > > > diff --git a/fs/nfs/client.c b/fs/nfs/client.c > > index 0ac2fb1..db38c47 100644 > > --- a/fs/nfs/client.c > > +++ b/fs/nfs/client.c > > @@ -60,6 +60,7 @@ static > > DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq); > > static DEFINE_SPINLOCK(nfs_version_lock); > > static DEFINE_MUTEX(nfs_version_mutex); > > static LIST_HEAD(nfs_versions); > > +static DEFINE_MUTEX(nfs_clid_init_mutex); > > > > /* > > * RPC cruft for NFS > > @@ -386,7 +387,7 @@ nfs_found_client(const struct nfs_client_initdata > > *cl_init, > > */ > > struct nfs_client *nfs_get_client(const struct nfs_client_initdata > > *cl_init) > > { > > - struct nfs_client *clp, *new = NULL; > > + struct nfs_client *clp, *new = NULL, *result = NULL; > > struct nfs_net *nn = net_generic(cl_init->net, nfs_net_id); > > const struct nfs_rpc_ops *rpc_ops = cl_init->nfs_mod- > > >rpc_ops; > > > > @@ -407,11 +408,27 @@ struct nfs_client *nfs_get_client(const struct > > nfs_client_initdata *cl_init) > > return nfs_found_client(cl_init, clp); > > } > > if (new) { > > + /* add and initialize the client with the > > + * nfs_clid_init_mutex held to prevent a > > deadlock > > + * with the server trunking detection > > + */ > > + spin_unlock(&nn->nfs_client_lock); > > + mutex_lock(&nfs_clid_init_mutex); > > + spin_lock(&nn->nfs_client_lock); > > + clp = nfs_match_client(cl_init); > > + if (clp) { > > + spin_unlock(&nn->nfs_client_lock); > > + mutex_unlock(&nfs_clid_init_mutex); > > + new->rpc_ops->free_client(new); > > + return nfs_found_client(cl_init, > > clp); > > + } > > list_add_tail(&new->cl_share_link, > > &nn->nfs_client_list); > > spin_unlock(&nn->nfs_client_lock); > > new->cl_flags = cl_init->init_flags; > > - return rpc_ops->init_client(new, cl_init); > > + result = rpc_ops->init_client(new, cl_init); > > + mutex_unlock(&nfs_clid_init_mutex); > > + return result; > > } > > > > spin_unlock(&nn->nfs_client_lock); > > diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c > > index 54fd56d..668164e 100644 > > --- a/fs/nfs/nfs4state.c > > +++ b/fs/nfs/nfs4state.c > > @@ -77,8 +77,6 @@ const nfs4_stateid invalid_stateid = { > > .type = NFS4_INVALID_STATEID_TYPE, > > }; > > > > -static DEFINE_MUTEX(nfs_clid_init_mutex); > > - > > int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred > > *cred) > > { > > struct nfs4_setclientid_res clid = { > > @@ -2164,7 +2162,6 @@ int nfs4_discover_server_trunking(struct > > nfs_client *clp, > > clnt = clp->cl_rpcclient; > > i = 0; > > > > - mutex_lock(&nfs_clid_init_mutex); > > again: > > status = -ENOENT; > > cred = nfs4_get_clid_cred(clp); > > @@ -2232,7 +2229,6 @@ int nfs4_discover_server_trunking(struct > > nfs_client *clp, > > } > > > > out_unlock: > > - mutex_unlock(&nfs_clid_init_mutex); > > dprintk("NFS: %s: status = %d\n", __func__, status); > > return status; > > } > > Your initial fix was fine. It just needed 2 changes: > > 1) Remove the test for 'clp->cl_minorversion > 0'. > 2) Refcount the struct nfs_client while you are waiting for it to be > initialised. I guess I didn't explain it well enough in the email but I actually did try that already and I had problems with a multi homed NFS server. Once the first process finds and uses an existing client, who's going to mark the old client ready so that the other processes finish waiting? Maybe what I need is to mark the old client ready with an error like -EPERM or something, i.e. ---8<--- error = nfs4_discover_server_trunking(clp, &old); if (error < 0) goto error; if (clp != old) clp->cl_preserve_clid = true; nfs_mark_client_ready(clp, -EPERM); nfs_put_client(clp); clear_bit(NFS_CS_TSM_POSSIBLE, &clp->cl_flags); return old; ---8<--- I'll test that to see if that makes any difference. -Scott > > -- > Trond Myklebust > Linux NFS client maintainer, PrimaryData > trond.myklebust@primarydata.com -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 0ac2fb1..db38c47 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -60,6 +60,7 @@ static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq); static DEFINE_SPINLOCK(nfs_version_lock); static DEFINE_MUTEX(nfs_version_mutex); static LIST_HEAD(nfs_versions); +static DEFINE_MUTEX(nfs_clid_init_mutex); /* * RPC cruft for NFS @@ -386,7 +387,7 @@ nfs_found_client(const struct nfs_client_initdata *cl_init, */ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init) { - struct nfs_client *clp, *new = NULL; + struct nfs_client *clp, *new = NULL, *result = NULL; struct nfs_net *nn = net_generic(cl_init->net, nfs_net_id); const struct nfs_rpc_ops *rpc_ops = cl_init->nfs_mod->rpc_ops; @@ -407,11 +408,27 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init) return nfs_found_client(cl_init, clp); } if (new) { + /* add and initialize the client with the + * nfs_clid_init_mutex held to prevent a deadlock + * with the server trunking detection + */ + spin_unlock(&nn->nfs_client_lock); + mutex_lock(&nfs_clid_init_mutex); + spin_lock(&nn->nfs_client_lock); + clp = nfs_match_client(cl_init); + if (clp) { + spin_unlock(&nn->nfs_client_lock); + mutex_unlock(&nfs_clid_init_mutex); + new->rpc_ops->free_client(new); + return nfs_found_client(cl_init, clp); + } list_add_tail(&new->cl_share_link, &nn->nfs_client_list); spin_unlock(&nn->nfs_client_lock); new->cl_flags = cl_init->init_flags; - return rpc_ops->init_client(new, cl_init); + result = rpc_ops->init_client(new, cl_init); + mutex_unlock(&nfs_clid_init_mutex); + return result; } spin_unlock(&nn->nfs_client_lock); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 54fd56d..668164e 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -77,8 +77,6 @@ const nfs4_stateid invalid_stateid = { .type = NFS4_INVALID_STATEID_TYPE, }; -static DEFINE_MUTEX(nfs_clid_init_mutex); - int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) { struct nfs4_setclientid_res clid = { @@ -2164,7 +2162,6 @@ int nfs4_discover_server_trunking(struct nfs_client *clp, clnt = clp->cl_rpcclient; i = 0; - mutex_lock(&nfs_clid_init_mutex); again: status = -ENOENT; cred = nfs4_get_clid_cred(clp); @@ -2232,7 +2229,6 @@ int nfs4_discover_server_trunking(struct nfs_client *clp, } out_unlock: - mutex_unlock(&nfs_clid_init_mutex); dprintk("NFS: %s: status = %d\n", __func__, status); return status; }
The following deadlock can occur between a process waiting for a client to initialize in while walking the client list and another process waiting for the nfs_clid_init_mutex so it can initialize that client: Process 1 Process 2 --------- --------- spin_lock(&nn->nfs_client_lock); list_add_tail(&CLIENTA->cl_share_link, &nn->nfs_client_list); spin_unlock(&nn->nfs_client_lock); spin_lock(&nn->nfs_client_lock); list_add_tail(&CLIENTB->cl_share_link, &nn->nfs_client_list); spin_unlock(&nn->nfs_client_lock); mutex_lock(&nfs_clid_init_mutex); nfs41_walk_client_list(clp, result, cred); nfs_wait_client_init_complete(CLIENTA); (waiting for nfs_clid_init_mutex) Add and initilize the client with the nfs_clid_init_mutex held in order to prevent that deadlock. Signed-off-by: Scott Mayhew <smayhew@redhat.com> --- fs/nfs/client.c | 21 +++++++++++++++++++-- fs/nfs/nfs4state.c | 4 ---- 2 files changed, 19 insertions(+), 6 deletions(-)