Message ID | 20171107142927.9468-1-smayhew@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
T24gVHVlLCAyMDE3LTExLTA3IGF0IDA5OjI5IC0wNTAwLCBTY290dCBNYXloZXcgd3JvdGU6DQo+ IFRoZSBmb2xsb3dpbmcgZGVhZGxvY2sgY2FuIG9jY3VyIGJldHdlZW4gYSBwcm9jZXNzIHdhaXRp bmcgZm9yIGENCj4gY2xpZW50DQo+IHRvIGluaXRpYWxpemUgaW4gd2hpbGUgd2Fsa2luZyB0aGUg Y2xpZW50IGxpc3QgYW5kIGFub3RoZXIgcHJvY2Vzcw0KPiB3YWl0aW5nIGZvciB0aGUgbmZzX2Ns aWRfaW5pdF9tdXRleCBzbyBpdCBjYW4gaW5pdGlhbGl6ZSB0aGF0IGNsaWVudDoNCj4gDQo+IFBy b2Nlc3MgMSAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBQcm9jZXNzIDINCj4gLS0tLS0t LS0tICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIC0tLS0tLS0tLQ0KPiBzcGluX2xvY2so Jm5uLT5uZnNfY2xpZW50X2xvY2spOw0KPiBsaXN0X2FkZF90YWlsKCZDTElFTlRBLT5jbF9zaGFy ZV9saW5rLA0KPiAgICAgICAgICZubi0+bmZzX2NsaWVudF9saXN0KTsNCj4gc3Bpbl91bmxvY2so Jm5uLT5uZnNfY2xpZW50X2xvY2spOw0KPiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg ICAgICAgICAgc3Bpbl9sb2NrKCZubi0NCj4gPm5mc19jbGllbnRfbG9jayk7DQo+ICAgICAgICAg ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBsaXN0X2FkZF90YWlsKCZDTElFTlRCLQ0K PiA+Y2xfc2hhcmVfbGluaywNCj4gICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg ICAgICAgICAgICAgJm5uLQ0KPiA+bmZzX2NsaWVudF9saXN0KTsNCj4gICAgICAgICAgICAgICAg ICAgICAgICAgICAgICAgICAgICAgICAgIHNwaW5fdW5sb2NrKCZubi0NCj4gPm5mc19jbGllbnRf bG9jayk7DQo+ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBtdXRleF9s b2NrKCZuZnNfY2xpZF9pbml0X211dA0KPiBleCk7DQo+ICAgICAgICAgICAgICAgICAgICAgICAg ICAgICAgICAgICAgICAgICBuZnM0MV93YWxrX2NsaWVudF9saXN0KGNscCwNCj4gcmVzdWx0LCBj cmVkKTsNCj4gICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIG5mc193YWl0 X2NsaWVudF9pbml0X2NvbXBsZXRlDQo+IChDTElFTlRBKTsNCj4gKHdhaXRpbmcgZm9yIG5mc19j bGlkX2luaXRfbXV0ZXgpDQo+IA0KPiBNYWtlIHN1cmUgbmZzX21hdGNoX2NsaWVudCgpIG9ubHkg ZXZhbHVhdGVzIGNsaWVudHMgdGhhdCBoYXZlDQo+IGNvbXBsZXRlZA0KPiBpbml0aWFsaXphdGlv biBpbiBvcmRlciB0byBwcmV2ZW50IHRoYXQgZGVhZGxvY2suDQo+IA0KPiBTaWduZWQtb2ZmLWJ5 OiBTY290dCBNYXloZXcgPHNtYXloZXdAcmVkaGF0LmNvbT4NCj4gLS0tDQo+ICBmcy9uZnMvY2xp ZW50LmMgfCA5ICsrKysrKysrKw0KPiAgMSBmaWxlIGNoYW5nZWQsIDkgaW5zZXJ0aW9ucygrKQ0K PiANCj4gZGlmZiAtLWdpdCBhL2ZzL25mcy9jbGllbnQuYyBiL2ZzL25mcy9jbGllbnQuYw0KPiBp bmRleCAyMjg4MGVmLi44YjA5Mzk5NCAxMDA2NDQNCj4gLS0tIGEvZnMvbmZzL2NsaWVudC5jDQo+ ICsrKyBiL2ZzL25mcy9jbGllbnQuYw0KPiBAQCAtMjkxLDEyICsyOTEsMjEgQEAgc3RhdGljIHN0 cnVjdCBuZnNfY2xpZW50DQo+ICpuZnNfbWF0Y2hfY2xpZW50KGNvbnN0IHN0cnVjdCBuZnNfY2xp ZW50X2luaXRkYXRhICpkYXQNCj4gIAljb25zdCBzdHJ1Y3Qgc29ja2FkZHIgKnNhcCA9IGRhdGEt PmFkZHI7DQo+ICAJc3RydWN0IG5mc19uZXQgKm5uID0gbmV0X2dlbmVyaWMoZGF0YS0+bmV0LCBu ZnNfbmV0X2lkKTsNCj4gIA0KPiArYWdhaW46DQo+ICAJbGlzdF9mb3JfZWFjaF9lbnRyeShjbHAs ICZubi0+bmZzX2NsaWVudF9saXN0LA0KPiBjbF9zaGFyZV9saW5rKSB7DQo+ICAJICAgICAgICBj b25zdCBzdHJ1Y3Qgc29ja2FkZHIgKmNsYXAgPSAoc3RydWN0IHNvY2thZGRyDQo+ICopJmNscC0+ Y2xfYWRkcjsNCj4gIAkJLyogRG9uJ3QgbWF0Y2ggY2xpZW50cyB0aGF0IGZhaWxlZCB0byBpbml0 aWFsaXNlDQo+IHByb3Blcmx5ICovDQo+ICAJCWlmIChjbHAtPmNsX2NvbnNfc3RhdGUgPCAwKQ0K PiAgCQkJY29udGludWU7DQo+ICANCj4gKwkJaWYgKGNscC0+Y2xfbWlub3J2ZXJzaW9uID4gMCAm Jg0KPiArCQkJCWNscC0+Y2xfY29uc19zdGF0ZSA+IE5GU19DU19SRUFEWSkgew0KPiArCQkJc3Bp bl91bmxvY2soJm5uLT5uZnNfY2xpZW50X2xvY2spOw0KPiArCQkJbmZzX3dhaXRfY2xpZW50X2lu aXRfY29tcGxldGUoY2xwKTsNCj4gKwkJCXNwaW5fbG9jaygmbm4tPm5mc19jbGllbnRfbG9jayk7 DQo+ICsJCQlnb3RvIGFnYWluOw0KPiArCQl9DQo+ICsNCj4gIAkJLyogRGlmZmVyZW50IE5GUyB2 ZXJzaW9ucyBjYW5ub3Qgc2hhcmUgdGhlIHNhbWUNCj4gbmZzX2NsaWVudCAqLw0KPiAgCQlpZiAo Y2xwLT5ycGNfb3BzICE9IGRhdGEtPm5mc19tb2QtPnJwY19vcHMpDQo+ICAJCQljb250aW51ZTsN Cg0KV2h5IHRoZSB0ZXN0IGZvciBjbHAtPmNsX21pbm9ydmVyc2lvbj8gV2hhdCdzIHNvIG1pbm9y IHZlcnNpb24gc3BlY2lmaWMNCmFib3V0IGFueSBvZiB0aGlzPw0KDQotLSANClRyb25kIE15a2xl YnVzdA0KTGludXggTkZTIGNsaWVudCBtYWludGFpbmVyLCBQcmltYXJ5RGF0YQ0KdHJvbmQubXlr bGVidXN0QHByaW1hcnlkYXRhLmNvbQ0K -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, 07 Nov 2017, Trond Myklebust wrote: > On Tue, 2017-11-07 at 09:29 -0500, Scott Mayhew wrote: > > The following deadlock can occur between a process waiting for a > > client > > to initialize in while walking the client list and another process > > waiting for the nfs_clid_init_mutex so it can initialize that client: > > > > Process 1 Process 2 > > --------- --------- > > spin_lock(&nn->nfs_client_lock); > > list_add_tail(&CLIENTA->cl_share_link, > > &nn->nfs_client_list); > > spin_unlock(&nn->nfs_client_lock); > > spin_lock(&nn- > > >nfs_client_lock); > > list_add_tail(&CLIENTB- > > >cl_share_link, > > &nn- > > >nfs_client_list); > > spin_unlock(&nn- > > >nfs_client_lock); > > mutex_lock(&nfs_clid_init_mut > > ex); > > nfs41_walk_client_list(clp, > > result, cred); > > nfs_wait_client_init_complete > > (CLIENTA); > > (waiting for nfs_clid_init_mutex) > > > > Make sure nfs_match_client() only evaluates clients that have > > completed > > initialization in order to prevent that deadlock. > > > > Signed-off-by: Scott Mayhew <smayhew@redhat.com> > > --- > > fs/nfs/client.c | 9 +++++++++ > > 1 file changed, 9 insertions(+) > > > > diff --git a/fs/nfs/client.c b/fs/nfs/client.c > > index 22880ef..8b093994 100644 > > --- a/fs/nfs/client.c > > +++ b/fs/nfs/client.c > > @@ -291,12 +291,21 @@ static struct nfs_client > > *nfs_match_client(const struct nfs_client_initdata *dat > > const struct sockaddr *sap = data->addr; > > struct nfs_net *nn = net_generic(data->net, nfs_net_id); > > > > +again: > > list_for_each_entry(clp, &nn->nfs_client_list, > > cl_share_link) { > > const struct sockaddr *clap = (struct sockaddr > > *)&clp->cl_addr; > > /* Don't match clients that failed to initialise > > properly */ > > if (clp->cl_cons_state < 0) > > continue; > > > > + if (clp->cl_minorversion > 0 && > > + clp->cl_cons_state > NFS_CS_READY) { > > + spin_unlock(&nn->nfs_client_lock); > > + nfs_wait_client_init_complete(clp); > > + spin_lock(&nn->nfs_client_lock); > > + goto again; > > + } > > + > > /* Different NFS versions cannot share the same > > nfs_client */ > > if (clp->rpc_ops != data->nfs_mod->rpc_ops) > > continue; > > Why the test for clp->cl_minorversion? What's so minor version specific > about any of this? The deadlock doesn't occur with v4.0 clients because those are being marked NFS_CS_READY in nfs4_client_client(), before the trunking detection if (!nfs4_has_session(clp)) nfs_mark_client_ready(clp, NFS_CS_READY); error = nfs4_discover_server_trunking(clp, &old); Now that I think about it, that seems wrong because nfs4_match_client() could be comparing cl_clientid and cl_owner_id values that haven't been established yet... in fact when I run my reproducer against two ip addresses on the same server I wind up with multiple clients with the same cl_clientid and cl_owner_id crash> list -H 0xffff9fc2b6327118 -o nfs_client.cl_share_link -s nfs_client.cl_clientid,cl_owner_id ffff9fc2b352c800 cl_clientid = 0xb81ff359309120bb cl_owner_id = 0xffff9fc2ae9644c0 "Linux NFSv4.0 fedora26.smayhew.test" ffff9fc2aded7400 cl_clientid = 0xb81ff359309120bb cl_owner_id = 0xffff9fc2b0584f80 "Linux NFSv4.0 fedora26.smayhew.test" I'll poke around a bit more. -Scott > > -- > Trond Myklebust > Linux NFS client maintainer, PrimaryData > trond.myklebust@primarydata.com -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, 2017-11-07 at 13:26 -0500, Scott Mayhew wrote: > On Tue, 07 Nov 2017, Trond Myklebust wrote: > > > On Tue, 2017-11-07 at 09:29 -0500, Scott Mayhew wrote: > > > The following deadlock can occur between a process waiting for a > > > client > > > to initialize in while walking the client list and another > > > process > > > waiting for the nfs_clid_init_mutex so it can initialize that > > > client: > > > > > > Process 1 Process 2 > > > --------- --------- > > > spin_lock(&nn->nfs_client_lock); > > > list_add_tail(&CLIENTA->cl_share_link, > > > &nn->nfs_client_list); > > > spin_unlock(&nn->nfs_client_lock); > > > spin_lock(&nn- > > > > nfs_client_lock); > > > > > > list_add_tail(&CLIENTB- > > > > cl_share_link, > > > > > > &nn- > > > > nfs_client_list); > > > > > > spin_unlock(&nn- > > > > nfs_client_lock); > > > > > > mutex_lock(&nfs_clid_init > > > _mut > > > ex); > > > nfs41_walk_client_list(cl > > > p, > > > result, cred); > > > nfs_wait_client_init_comp > > > lete > > > (CLIENTA); > > > (waiting for nfs_clid_init_mutex) > > > > > > Make sure nfs_match_client() only evaluates clients that have > > > completed > > > initialization in order to prevent that deadlock. > > > > > > Signed-off-by: Scott Mayhew <smayhew@redhat.com> > > > --- > > > fs/nfs/client.c | 9 +++++++++ > > > 1 file changed, 9 insertions(+) > > > > > > diff --git a/fs/nfs/client.c b/fs/nfs/client.c > > > index 22880ef..8b093994 100644 > > > --- a/fs/nfs/client.c > > > +++ b/fs/nfs/client.c > > > @@ -291,12 +291,21 @@ static struct nfs_client > > > *nfs_match_client(const struct nfs_client_initdata *dat > > > const struct sockaddr *sap = data->addr; > > > struct nfs_net *nn = net_generic(data->net, nfs_net_id); > > > > > > +again: > > > list_for_each_entry(clp, &nn->nfs_client_list, > > > cl_share_link) { > > > const struct sockaddr *clap = (struct sockaddr > > > *)&clp->cl_addr; > > > /* Don't match clients that failed to initialise > > > properly */ > > > if (clp->cl_cons_state < 0) > > > continue; > > > > > > + if (clp->cl_minorversion > 0 && > > > + clp->cl_cons_state > > > > NFS_CS_READY) { > > > + spin_unlock(&nn->nfs_client_lock); > > > + nfs_wait_client_init_complete(clp); > > > + spin_lock(&nn->nfs_client_lock); > > > + goto again; > > > + } > > > + > > > /* Different NFS versions cannot share the same > > > nfs_client */ > > > if (clp->rpc_ops != data->nfs_mod->rpc_ops) > > > continue; > > > > Why the test for clp->cl_minorversion? What's so minor version > > specific > > about any of this? > > The deadlock doesn't occur with v4.0 clients because those are being > marked NFS_CS_READY in nfs4_client_client(), before the trunking > detection Sure, but the root cause you are asserting is that the nfs_client has not finished initialising. What is minorversion-specific (or even NFSv4-specific) about that? -- Trond Myklebust Linux NFS client maintainer, PrimaryData trond.myklebust@primarydata.com
On Tue, 07 Nov 2017, Trond Myklebust wrote: > On Tue, 2017-11-07 at 13:26 -0500, Scott Mayhew wrote: > > On Tue, 07 Nov 2017, Trond Myklebust wrote: > > > > > On Tue, 2017-11-07 at 09:29 -0500, Scott Mayhew wrote: > > > > The following deadlock can occur between a process waiting for a > > > > client > > > > to initialize in while walking the client list and another > > > > process > > > > waiting for the nfs_clid_init_mutex so it can initialize that > > > > client: > > > > > > > > Process 1 Process 2 > > > > --------- --------- > > > > spin_lock(&nn->nfs_client_lock); > > > > list_add_tail(&CLIENTA->cl_share_link, > > > > &nn->nfs_client_list); > > > > spin_unlock(&nn->nfs_client_lock); > > > > spin_lock(&nn- > > > > > nfs_client_lock); > > > > > > > > list_add_tail(&CLIENTB- > > > > > cl_share_link, > > > > > > > > &nn- > > > > > nfs_client_list); > > > > > > > > spin_unlock(&nn- > > > > > nfs_client_lock); > > > > > > > > mutex_lock(&nfs_clid_init > > > > _mut > > > > ex); > > > > nfs41_walk_client_list(cl > > > > p, > > > > result, cred); > > > > nfs_wait_client_init_comp > > > > lete > > > > (CLIENTA); > > > > (waiting for nfs_clid_init_mutex) > > > > > > > > Make sure nfs_match_client() only evaluates clients that have > > > > completed > > > > initialization in order to prevent that deadlock. > > > > > > > > Signed-off-by: Scott Mayhew <smayhew@redhat.com> > > > > --- > > > > fs/nfs/client.c | 9 +++++++++ > > > > 1 file changed, 9 insertions(+) > > > > > > > > diff --git a/fs/nfs/client.c b/fs/nfs/client.c > > > > index 22880ef..8b093994 100644 > > > > --- a/fs/nfs/client.c > > > > +++ b/fs/nfs/client.c > > > > @@ -291,12 +291,21 @@ static struct nfs_client > > > > *nfs_match_client(const struct nfs_client_initdata *dat > > > > const struct sockaddr *sap = data->addr; > > > > struct nfs_net *nn = net_generic(data->net, nfs_net_id); > > > > > > > > +again: > > > > list_for_each_entry(clp, &nn->nfs_client_list, > > > > cl_share_link) { > > > > const struct sockaddr *clap = (struct sockaddr > > > > *)&clp->cl_addr; > > > > /* Don't match clients that failed to initialise > > > > properly */ > > > > if (clp->cl_cons_state < 0) > > > > continue; > > > > > > > > + if (clp->cl_minorversion > 0 && > > > > + clp->cl_cons_state > > > > > NFS_CS_READY) { > > > > + spin_unlock(&nn->nfs_client_lock); > > > > + nfs_wait_client_init_complete(clp); > > > > + spin_lock(&nn->nfs_client_lock); > > > > + goto again; > > > > + } > > > > + > > > > /* Different NFS versions cannot share the same > > > > nfs_client */ > > > > if (clp->rpc_ops != data->nfs_mod->rpc_ops) > > > > continue; > > > > > > Why the test for clp->cl_minorversion? What's so minor version > > > specific > > > about any of this? > > > > The deadlock doesn't occur with v4.0 clients because those are being > > marked NFS_CS_READY in nfs4_client_client(), before the trunking > > detection > > Sure, but the root cause you are asserting is that the nfs_client has > not finished initialising. What is minorversion-specific (or even > NFSv4-specific) about that? It's NFSv4-specific because one of the processes involved in the deadlock (the one waiting on the nfs_client_active_wq while holding the nfs_clid_init_mutex) is performing trunking detection, which is NFSv4-specific. Anyways, this patch is no good because it breaks when issuing mounts against multiple IPs of a multi-homed NFS server. I don't have a reference on the the client I'm waiting on, so if the process doing trunking detection with that client finds and uses an existing client, then the client I'm waiting on goes away (and even if I had a reference on it, if the process doing trunking detection finds and uses an existing client, there's nothing that would mark the client that I'm waiting for ready). I have another approach that fixes the issue. It's kinda ugly but I'm going to post it anyway because I don't really have any other ideas. Also I forgot to include the reproducer info, so here it is: 1 nfs client, 2 nfs servers on nfs servers: # for i in $(seq 1 25) ; do mkdir /exports/dir$i ; done # echo "/exports *(rw,no_root_squash)" >> /etc/exports # exportfs -ar on nfs client: # for i in $(seq 1 25) ; do mkdir -p /mnt/test$i ; done # hosts=(server1 server2) # for i in $(seq 1 25) ; do hostnum=$(($i % 2)) mount.nfs ${hosts[$hostnum]}:/exports/dir$i /mnt/test$i -o vers=4,minorversion=1 & done -Scott > > -- > Trond Myklebust > Linux NFS client maintainer, PrimaryData > trond.myklebust@primarydata.com -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 22880ef..8b093994 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -291,12 +291,21 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat const struct sockaddr *sap = data->addr; struct nfs_net *nn = net_generic(data->net, nfs_net_id); +again: list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) { const struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr; /* Don't match clients that failed to initialise properly */ if (clp->cl_cons_state < 0) continue; + if (clp->cl_minorversion > 0 && + clp->cl_cons_state > NFS_CS_READY) { + spin_unlock(&nn->nfs_client_lock); + nfs_wait_client_init_complete(clp); + spin_lock(&nn->nfs_client_lock); + goto again; + } + /* Different NFS versions cannot share the same nfs_client */ if (clp->rpc_ops != data->nfs_mod->rpc_ops) continue;
The following deadlock can occur between a process waiting for a client to initialize in while walking the client list and another process waiting for the nfs_clid_init_mutex so it can initialize that client: Process 1 Process 2 --------- --------- spin_lock(&nn->nfs_client_lock); list_add_tail(&CLIENTA->cl_share_link, &nn->nfs_client_list); spin_unlock(&nn->nfs_client_lock); spin_lock(&nn->nfs_client_lock); list_add_tail(&CLIENTB->cl_share_link, &nn->nfs_client_list); spin_unlock(&nn->nfs_client_lock); mutex_lock(&nfs_clid_init_mutex); nfs41_walk_client_list(clp, result, cred); nfs_wait_client_init_complete(CLIENTA); (waiting for nfs_clid_init_mutex) Make sure nfs_match_client() only evaluates clients that have completed initialization in order to prevent that deadlock. Signed-off-by: Scott Mayhew <smayhew@redhat.com> --- fs/nfs/client.c | 9 +++++++++ 1 file changed, 9 insertions(+)