diff mbox series

[v2,2/3] NFSv4 introduce max_connect mount options

Message ID 20210609215319.5518-3-olga.kornievskaia@gmail.com (mailing list archive)
State New, archived
Headers show
Series don't collapse transports for the trunkable | expand

Commit Message

Olga Kornievskaia June 9, 2021, 9:53 p.m. UTC
From: Olga Kornievskaia <kolga@netapp.com>

This option will control up to how many xprts can the client
establish to the server. This patch parses the value and sets
up structures that keep track of max_connect.

Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
---
 fs/nfs/client.c           |  1 +
 fs/nfs/fs_context.c       |  8 ++++++++
 fs/nfs/internal.h         |  2 ++
 fs/nfs/nfs4client.c       | 12 ++++++++++--
 fs/nfs/super.c            |  2 ++
 include/linux/nfs_fs_sb.h |  1 +
 6 files changed, 24 insertions(+), 2 deletions(-)

Comments

Wang Yugui June 10, 2021, 1:49 a.m. UTC | #1
Hi,

We could extend 'nconnect' option rather than a new option 'max_connect'?

max of nconnect is 16 when trunk support of single ip (kernel 5.3-5.?).
max of nconnect is 64 when trunk support of multiple ip (kernel 5.?-).

Best Regards
Wang Yugui (wangyugui@e16-tech.com)
2021/06/10

> From: Olga Kornievskaia <kolga@netapp.com>
> 
> This option will control up to how many xprts can the client
> establish to the server. This patch parses the value and sets
> up structures that keep track of max_connect.
> 
> Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
> ---
>  fs/nfs/client.c           |  1 +
>  fs/nfs/fs_context.c       |  8 ++++++++
>  fs/nfs/internal.h         |  2 ++
>  fs/nfs/nfs4client.c       | 12 ++++++++++--
>  fs/nfs/super.c            |  2 ++
>  include/linux/nfs_fs_sb.h |  1 +
>  6 files changed, 24 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/nfs/client.c b/fs/nfs/client.c
> index 330f65727c45..486dec59972b 100644
> --- a/fs/nfs/client.c
> +++ b/fs/nfs/client.c
> @@ -179,6 +179,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
>  
>  	clp->cl_proto = cl_init->proto;
>  	clp->cl_nconnect = cl_init->nconnect;
> +	clp->cl_max_connect = cl_init->max_connect ? cl_init->max_connect : 1;
>  	clp->cl_net = get_net(cl_init->net);
>  
>  	clp->cl_principal = "*";
> diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
> index d95c9a39bc70..cfbff7098f8e 100644
> --- a/fs/nfs/fs_context.c
> +++ b/fs/nfs/fs_context.c
> @@ -29,6 +29,7 @@
>  #endif
>  
>  #define NFS_MAX_CONNECTIONS 16
> +#define NFS_MAX_TRANSPORTS 128
>  
>  enum nfs_param {
>  	Opt_ac,
> @@ -60,6 +61,7 @@ enum nfs_param {
>  	Opt_mountvers,
>  	Opt_namelen,
>  	Opt_nconnect,
> +	Opt_max_connect,
>  	Opt_port,
>  	Opt_posix,
>  	Opt_proto,
> @@ -158,6 +160,7 @@ static const struct fs_parameter_spec nfs_fs_parameters[] = {
>  	fsparam_u32   ("mountvers",	Opt_mountvers),
>  	fsparam_u32   ("namlen",	Opt_namelen),
>  	fsparam_u32   ("nconnect",	Opt_nconnect),
> +	fsparam_u32   ("max_connect",	Opt_max_connect),
>  	fsparam_string("nfsvers",	Opt_vers),
>  	fsparam_u32   ("port",		Opt_port),
>  	fsparam_flag_no("posix",	Opt_posix),
> @@ -770,6 +773,11 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
>  			goto out_of_bounds;
>  		ctx->nfs_server.nconnect = result.uint_32;
>  		break;
> +	case Opt_max_connect:
> +		if (result.uint_32 < 1 || result.uint_32 > NFS_MAX_TRANSPORTS)
> +			goto out_of_bounds;
> +		ctx->nfs_server.max_connect = result.uint_32;
> +		break;
>  	case Opt_lookupcache:
>  		switch (result.uint_32) {
>  		case Opt_lookupcache_all:
> diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
> index a36af04188c2..66fc936834f2 100644
> --- a/fs/nfs/internal.h
> +++ b/fs/nfs/internal.h
> @@ -67,6 +67,7 @@ struct nfs_client_initdata {
>  	int proto;
>  	u32 minorversion;
>  	unsigned int nconnect;
> +	unsigned int max_connect;
>  	struct net *net;
>  	const struct rpc_timeout *timeparms;
>  	const struct cred *cred;
> @@ -121,6 +122,7 @@ struct nfs_fs_context {
>  		int			port;
>  		unsigned short		protocol;
>  		unsigned short		nconnect;
> +		unsigned short		max_connect;
>  		unsigned short		export_path_len;
>  	} nfs_server;
>  
> diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
> index 42719384e25f..640c8235d817 100644
> --- a/fs/nfs/nfs4client.c
> +++ b/fs/nfs/nfs4client.c
> @@ -863,6 +863,7 @@ static int nfs4_set_client(struct nfs_server *server,
>  		const char *ip_addr,
>  		int proto, const struct rpc_timeout *timeparms,
>  		u32 minorversion, unsigned int nconnect,
> +		unsigned int max_connect,
>  		struct net *net)
>  {
>  	struct nfs_client_initdata cl_init = {
> @@ -881,6 +882,8 @@ static int nfs4_set_client(struct nfs_server *server,
>  
>  	if (minorversion == 0)
>  		__set_bit(NFS_CS_REUSEPORT, &cl_init.init_flags);
> +	else
> +		cl_init.max_connect = max_connect;
>  	if (proto == XPRT_TRANSPORT_TCP)
>  		cl_init.nconnect = nconnect;
>  
> @@ -950,8 +953,10 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
>  		return ERR_PTR(-EINVAL);
>  	cl_init.hostname = buf;
>  
> -	if (mds_clp->cl_nconnect > 1 && ds_proto == XPRT_TRANSPORT_TCP)
> +	if (mds_clp->cl_nconnect > 1 && ds_proto == XPRT_TRANSPORT_TCP) {
>  		cl_init.nconnect = mds_clp->cl_nconnect;
> +		cl_init.max_connect = mds_clp->cl_max_connect;
> +	}
>  
>  	if (mds_srv->flags & NFS_MOUNT_NORESVPORT)
>  		__set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
> @@ -1120,6 +1125,7 @@ static int nfs4_init_server(struct nfs_server *server, struct fs_context *fc)
>  				&timeparms,
>  				ctx->minorversion,
>  				ctx->nfs_server.nconnect,
> +				ctx->nfs_server.max_connect,
>  				fc->net_ns);
>  	if (error < 0)
>  		return error;
> @@ -1209,6 +1215,7 @@ struct nfs_server *nfs4_create_referral_server(struct fs_context *fc)
>  				parent_server->client->cl_timeout,
>  				parent_client->cl_mvops->minor_version,
>  				parent_client->cl_nconnect,
> +				parent_client->cl_max_connect,
>  				parent_client->cl_net);
>  	if (!error)
>  		goto init_server;
> @@ -1224,6 +1231,7 @@ struct nfs_server *nfs4_create_referral_server(struct fs_context *fc)
>  				parent_server->client->cl_timeout,
>  				parent_client->cl_mvops->minor_version,
>  				parent_client->cl_nconnect,
> +				parent_client->cl_max_connect,
>  				parent_client->cl_net);
>  	if (error < 0)
>  		goto error;
> @@ -1321,7 +1329,7 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname,
>  	error = nfs4_set_client(server, hostname, sap, salen, buf,
>  				clp->cl_proto, clnt->cl_timeout,
>  				clp->cl_minorversion,
> -				clp->cl_nconnect, net);
> +				clp->cl_nconnect, clp->cl_max_connect, net);
>  	clear_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status);
>  	if (error != 0) {
>  		nfs_server_insert_lists(server);
> diff --git a/fs/nfs/super.c b/fs/nfs/super.c
> index fe58525cfed4..e65c83494c05 100644
> --- a/fs/nfs/super.c
> +++ b/fs/nfs/super.c
> @@ -480,6 +480,8 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
>  	if (clp->cl_nconnect > 0)
>  		seq_printf(m, ",nconnect=%u", clp->cl_nconnect);
>  	if (version == 4) {
> +		if (clp->cl_max_connect > 1)
> +			seq_printf(m, ",max_connect=%u", clp->cl_max_connect);
>  		if (nfss->port != NFS_PORT)
>  			seq_printf(m, ",port=%u", nfss->port);
>  	} else
> diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
> index d71a0e90faeb..2a9acbfe00f0 100644
> --- a/include/linux/nfs_fs_sb.h
> +++ b/include/linux/nfs_fs_sb.h
> @@ -62,6 +62,7 @@ struct nfs_client {
>  
>  	u32			cl_minorversion;/* NFSv4 minorversion */
>  	unsigned int		cl_nconnect;	/* Number of connections */
> +	unsigned int		cl_max_connect; /* max number of xprts allowed */
>  	const char *		cl_principal;  /* used for machine cred */
>  
>  #if IS_ENABLED(CONFIG_NFS_V4)
> -- 
> 2.27.0
Wang Yugui June 10, 2021, 2:22 a.m. UTC | #2
Hi,

> We could extend 'nconnect' option rather than a new option 'max_connect'?
> 
> max of nconnect is 16 when trunk support of single ip (kernel 5.3-5.?).
> max of nconnect is 64 when trunk support of multiple ip (kernel 5.?-).

I'm sorry that is a bad idea.

we still need 'nconnect' for single ip.

'nconnect' have a default value of '1', but 'max_connect'  may have a
default value of '256' or '128', even hard-coded in nfs server.

Best Regards
Wang Yugui (wangyugui@e16-tech.com)
2021/06/10


> Best Regards
> Wang Yugui (wangyugui@e16-tech.com)
> 2021/06/10
> 
> > From: Olga Kornievskaia <kolga@netapp.com>
> > 
> > This option will control up to how many xprts can the client
> > establish to the server. This patch parses the value and sets
> > up structures that keep track of max_connect.
> > 
> > Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
> > ---
> >  fs/nfs/client.c           |  1 +
> >  fs/nfs/fs_context.c       |  8 ++++++++
> >  fs/nfs/internal.h         |  2 ++
> >  fs/nfs/nfs4client.c       | 12 ++++++++++--
> >  fs/nfs/super.c            |  2 ++
> >  include/linux/nfs_fs_sb.h |  1 +
> >  6 files changed, 24 insertions(+), 2 deletions(-)
> > 
> > diff --git a/fs/nfs/client.c b/fs/nfs/client.c
> > index 330f65727c45..486dec59972b 100644
> > --- a/fs/nfs/client.c
> > +++ b/fs/nfs/client.c
> > @@ -179,6 +179,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
> >  
> >  	clp->cl_proto = cl_init->proto;
> >  	clp->cl_nconnect = cl_init->nconnect;
> > +	clp->cl_max_connect = cl_init->max_connect ? cl_init->max_connect : 1;
> >  	clp->cl_net = get_net(cl_init->net);
> >  
> >  	clp->cl_principal = "*";
> > diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
> > index d95c9a39bc70..cfbff7098f8e 100644
> > --- a/fs/nfs/fs_context.c
> > +++ b/fs/nfs/fs_context.c
> > @@ -29,6 +29,7 @@
> >  #endif
> >  
> >  #define NFS_MAX_CONNECTIONS 16
> > +#define NFS_MAX_TRANSPORTS 128
> >  
> >  enum nfs_param {
> >  	Opt_ac,
> > @@ -60,6 +61,7 @@ enum nfs_param {
> >  	Opt_mountvers,
> >  	Opt_namelen,
> >  	Opt_nconnect,
> > +	Opt_max_connect,
> >  	Opt_port,
> >  	Opt_posix,
> >  	Opt_proto,
> > @@ -158,6 +160,7 @@ static const struct fs_parameter_spec nfs_fs_parameters[] = {
> >  	fsparam_u32   ("mountvers",	Opt_mountvers),
> >  	fsparam_u32   ("namlen",	Opt_namelen),
> >  	fsparam_u32   ("nconnect",	Opt_nconnect),
> > +	fsparam_u32   ("max_connect",	Opt_max_connect),
> >  	fsparam_string("nfsvers",	Opt_vers),
> >  	fsparam_u32   ("port",		Opt_port),
> >  	fsparam_flag_no("posix",	Opt_posix),
> > @@ -770,6 +773,11 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
> >  			goto out_of_bounds;
> >  		ctx->nfs_server.nconnect = result.uint_32;
> >  		break;
> > +	case Opt_max_connect:
> > +		if (result.uint_32 < 1 || result.uint_32 > NFS_MAX_TRANSPORTS)
> > +			goto out_of_bounds;
> > +		ctx->nfs_server.max_connect = result.uint_32;
> > +		break;
> >  	case Opt_lookupcache:
> >  		switch (result.uint_32) {
> >  		case Opt_lookupcache_all:
> > diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
> > index a36af04188c2..66fc936834f2 100644
> > --- a/fs/nfs/internal.h
> > +++ b/fs/nfs/internal.h
> > @@ -67,6 +67,7 @@ struct nfs_client_initdata {
> >  	int proto;
> >  	u32 minorversion;
> >  	unsigned int nconnect;
> > +	unsigned int max_connect;
> >  	struct net *net;
> >  	const struct rpc_timeout *timeparms;
> >  	const struct cred *cred;
> > @@ -121,6 +122,7 @@ struct nfs_fs_context {
> >  		int			port;
> >  		unsigned short		protocol;
> >  		unsigned short		nconnect;
> > +		unsigned short		max_connect;
> >  		unsigned short		export_path_len;
> >  	} nfs_server;
> >  
> > diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
> > index 42719384e25f..640c8235d817 100644
> > --- a/fs/nfs/nfs4client.c
> > +++ b/fs/nfs/nfs4client.c
> > @@ -863,6 +863,7 @@ static int nfs4_set_client(struct nfs_server *server,
> >  		const char *ip_addr,
> >  		int proto, const struct rpc_timeout *timeparms,
> >  		u32 minorversion, unsigned int nconnect,
> > +		unsigned int max_connect,
> >  		struct net *net)
> >  {
> >  	struct nfs_client_initdata cl_init = {
> > @@ -881,6 +882,8 @@ static int nfs4_set_client(struct nfs_server *server,
> >  
> >  	if (minorversion == 0)
> >  		__set_bit(NFS_CS_REUSEPORT, &cl_init.init_flags);
> > +	else
> > +		cl_init.max_connect = max_connect;
> >  	if (proto == XPRT_TRANSPORT_TCP)
> >  		cl_init.nconnect = nconnect;
> >  
> > @@ -950,8 +953,10 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
> >  		return ERR_PTR(-EINVAL);
> >  	cl_init.hostname = buf;
> >  
> > -	if (mds_clp->cl_nconnect > 1 && ds_proto == XPRT_TRANSPORT_TCP)
> > +	if (mds_clp->cl_nconnect > 1 && ds_proto == XPRT_TRANSPORT_TCP) {
> >  		cl_init.nconnect = mds_clp->cl_nconnect;
> > +		cl_init.max_connect = mds_clp->cl_max_connect;
> > +	}
> >  
> >  	if (mds_srv->flags & NFS_MOUNT_NORESVPORT)
> >  		__set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
> > @@ -1120,6 +1125,7 @@ static int nfs4_init_server(struct nfs_server *server, struct fs_context *fc)
> >  				&timeparms,
> >  				ctx->minorversion,
> >  				ctx->nfs_server.nconnect,
> > +				ctx->nfs_server.max_connect,
> >  				fc->net_ns);
> >  	if (error < 0)
> >  		return error;
> > @@ -1209,6 +1215,7 @@ struct nfs_server *nfs4_create_referral_server(struct fs_context *fc)
> >  				parent_server->client->cl_timeout,
> >  				parent_client->cl_mvops->minor_version,
> >  				parent_client->cl_nconnect,
> > +				parent_client->cl_max_connect,
> >  				parent_client->cl_net);
> >  	if (!error)
> >  		goto init_server;
> > @@ -1224,6 +1231,7 @@ struct nfs_server *nfs4_create_referral_server(struct fs_context *fc)
> >  				parent_server->client->cl_timeout,
> >  				parent_client->cl_mvops->minor_version,
> >  				parent_client->cl_nconnect,
> > +				parent_client->cl_max_connect,
> >  				parent_client->cl_net);
> >  	if (error < 0)
> >  		goto error;
> > @@ -1321,7 +1329,7 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname,
> >  	error = nfs4_set_client(server, hostname, sap, salen, buf,
> >  				clp->cl_proto, clnt->cl_timeout,
> >  				clp->cl_minorversion,
> > -				clp->cl_nconnect, net);
> > +				clp->cl_nconnect, clp->cl_max_connect, net);
> >  	clear_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status);
> >  	if (error != 0) {
> >  		nfs_server_insert_lists(server);
> > diff --git a/fs/nfs/super.c b/fs/nfs/super.c
> > index fe58525cfed4..e65c83494c05 100644
> > --- a/fs/nfs/super.c
> > +++ b/fs/nfs/super.c
> > @@ -480,6 +480,8 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
> >  	if (clp->cl_nconnect > 0)
> >  		seq_printf(m, ",nconnect=%u", clp->cl_nconnect);
> >  	if (version == 4) {
> > +		if (clp->cl_max_connect > 1)
> > +			seq_printf(m, ",max_connect=%u", clp->cl_max_connect);
> >  		if (nfss->port != NFS_PORT)
> >  			seq_printf(m, ",port=%u", nfss->port);
> >  	} else
> > diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
> > index d71a0e90faeb..2a9acbfe00f0 100644
> > --- a/include/linux/nfs_fs_sb.h
> > +++ b/include/linux/nfs_fs_sb.h
> > @@ -62,6 +62,7 @@ struct nfs_client {
> >  
> >  	u32			cl_minorversion;/* NFSv4 minorversion */
> >  	unsigned int		cl_nconnect;	/* Number of connections */
> > +	unsigned int		cl_max_connect; /* max number of xprts allowed */
> >  	const char *		cl_principal;  /* used for machine cred */
> >  
> >  #if IS_ENABLED(CONFIG_NFS_V4)
> > -- 
> > 2.27.0
>
Chuck Lever June 10, 2021, 1:30 p.m. UTC | #3
> On Jun 9, 2021, at 5:53 PM, Olga Kornievskaia <olga.kornievskaia@gmail.com> wrote:
> 
> From: Olga Kornievskaia <kolga@netapp.com>
> 
> This option will control up to how many xprts can the client
> establish to the server. This patch parses the value and sets
> up structures that keep track of max_connect.
> 
> Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
> ---
> fs/nfs/client.c           |  1 +
> fs/nfs/fs_context.c       |  8 ++++++++
> fs/nfs/internal.h         |  2 ++
> fs/nfs/nfs4client.c       | 12 ++++++++++--
> fs/nfs/super.c            |  2 ++
> include/linux/nfs_fs_sb.h |  1 +
> 6 files changed, 24 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/nfs/client.c b/fs/nfs/client.c
> index 330f65727c45..486dec59972b 100644
> --- a/fs/nfs/client.c
> +++ b/fs/nfs/client.c
> @@ -179,6 +179,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
> 
> 	clp->cl_proto = cl_init->proto;
> 	clp->cl_nconnect = cl_init->nconnect;
> +	clp->cl_max_connect = cl_init->max_connect ? cl_init->max_connect : 1;

So, 1 is the default setting, meaning the "add another transport"
facility is disabled by default. Would it be less surprising for
an admin to allow some extra connections by default?


> 	clp->cl_net = get_net(cl_init->net);
> 
> 	clp->cl_principal = "*";
> diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
> index d95c9a39bc70..cfbff7098f8e 100644
> --- a/fs/nfs/fs_context.c
> +++ b/fs/nfs/fs_context.c
> @@ -29,6 +29,7 @@
> #endif
> 
> #define NFS_MAX_CONNECTIONS 16
> +#define NFS_MAX_TRANSPORTS 128

This maximum seems excessive... again, there are diminishing
returns to adding more connections to the same server. what's
wrong with re-using NFS_MAX_CONNECTIONS for the maximum?

As always, I'm a little queasy about adding yet another mount
option. Are there real use cases where a whole-client setting
(like a sysfs attribute) would be inadequate? Is there a way
the client could figure out a reasonable maximum without a
human intervention, say, by counting the number of NICs on
the system?


> enum nfs_param {
> 	Opt_ac,
> @@ -60,6 +61,7 @@ enum nfs_param {
> 	Opt_mountvers,
> 	Opt_namelen,
> 	Opt_nconnect,
> +	Opt_max_connect,
> 	Opt_port,
> 	Opt_posix,
> 	Opt_proto,
> @@ -158,6 +160,7 @@ static const struct fs_parameter_spec nfs_fs_parameters[] = {
> 	fsparam_u32   ("mountvers",	Opt_mountvers),
> 	fsparam_u32   ("namlen",	Opt_namelen),
> 	fsparam_u32   ("nconnect",	Opt_nconnect),
> +	fsparam_u32   ("max_connect",	Opt_max_connect),
> 	fsparam_string("nfsvers",	Opt_vers),
> 	fsparam_u32   ("port",		Opt_port),
> 	fsparam_flag_no("posix",	Opt_posix),
> @@ -770,6 +773,11 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
> 			goto out_of_bounds;
> 		ctx->nfs_server.nconnect = result.uint_32;
> 		break;
> +	case Opt_max_connect:
> +		if (result.uint_32 < 1 || result.uint_32 > NFS_MAX_TRANSPORTS)
> +			goto out_of_bounds;
> +		ctx->nfs_server.max_connect = result.uint_32;
> +		break;
> 	case Opt_lookupcache:
> 		switch (result.uint_32) {
> 		case Opt_lookupcache_all:
> diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
> index a36af04188c2..66fc936834f2 100644
> --- a/fs/nfs/internal.h
> +++ b/fs/nfs/internal.h
> @@ -67,6 +67,7 @@ struct nfs_client_initdata {
> 	int proto;
> 	u32 minorversion;
> 	unsigned int nconnect;
> +	unsigned int max_connect;
> 	struct net *net;
> 	const struct rpc_timeout *timeparms;
> 	const struct cred *cred;
> @@ -121,6 +122,7 @@ struct nfs_fs_context {
> 		int			port;
> 		unsigned short		protocol;
> 		unsigned short		nconnect;
> +		unsigned short		max_connect;
> 		unsigned short		export_path_len;
> 	} nfs_server;
> 
> diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
> index 42719384e25f..640c8235d817 100644
> --- a/fs/nfs/nfs4client.c
> +++ b/fs/nfs/nfs4client.c
> @@ -863,6 +863,7 @@ static int nfs4_set_client(struct nfs_server *server,
> 		const char *ip_addr,
> 		int proto, const struct rpc_timeout *timeparms,
> 		u32 minorversion, unsigned int nconnect,
> +		unsigned int max_connect,
> 		struct net *net)
> {
> 	struct nfs_client_initdata cl_init = {
> @@ -881,6 +882,8 @@ static int nfs4_set_client(struct nfs_server *server,
> 
> 	if (minorversion == 0)
> 		__set_bit(NFS_CS_REUSEPORT, &cl_init.init_flags);
> +	else
> +		cl_init.max_connect = max_connect;
> 	if (proto == XPRT_TRANSPORT_TCP)
> 		cl_init.nconnect = nconnect;
> 
> @@ -950,8 +953,10 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
> 		return ERR_PTR(-EINVAL);
> 	cl_init.hostname = buf;
> 
> -	if (mds_clp->cl_nconnect > 1 && ds_proto == XPRT_TRANSPORT_TCP)
> +	if (mds_clp->cl_nconnect > 1 && ds_proto == XPRT_TRANSPORT_TCP) {
> 		cl_init.nconnect = mds_clp->cl_nconnect;
> +		cl_init.max_connect = mds_clp->cl_max_connect;
> +	}
> 
> 	if (mds_srv->flags & NFS_MOUNT_NORESVPORT)
> 		__set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
> @@ -1120,6 +1125,7 @@ static int nfs4_init_server(struct nfs_server *server, struct fs_context *fc)
> 				&timeparms,
> 				ctx->minorversion,
> 				ctx->nfs_server.nconnect,
> +				ctx->nfs_server.max_connect,
> 				fc->net_ns);
> 	if (error < 0)
> 		return error;
> @@ -1209,6 +1215,7 @@ struct nfs_server *nfs4_create_referral_server(struct fs_context *fc)
> 				parent_server->client->cl_timeout,
> 				parent_client->cl_mvops->minor_version,
> 				parent_client->cl_nconnect,
> +				parent_client->cl_max_connect,
> 				parent_client->cl_net);
> 	if (!error)
> 		goto init_server;
> @@ -1224,6 +1231,7 @@ struct nfs_server *nfs4_create_referral_server(struct fs_context *fc)
> 				parent_server->client->cl_timeout,
> 				parent_client->cl_mvops->minor_version,
> 				parent_client->cl_nconnect,
> +				parent_client->cl_max_connect,
> 				parent_client->cl_net);
> 	if (error < 0)
> 		goto error;
> @@ -1321,7 +1329,7 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname,
> 	error = nfs4_set_client(server, hostname, sap, salen, buf,
> 				clp->cl_proto, clnt->cl_timeout,
> 				clp->cl_minorversion,
> -				clp->cl_nconnect, net);
> +				clp->cl_nconnect, clp->cl_max_connect, net);
> 	clear_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status);
> 	if (error != 0) {
> 		nfs_server_insert_lists(server);
> diff --git a/fs/nfs/super.c b/fs/nfs/super.c
> index fe58525cfed4..e65c83494c05 100644
> --- a/fs/nfs/super.c
> +++ b/fs/nfs/super.c
> @@ -480,6 +480,8 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
> 	if (clp->cl_nconnect > 0)
> 		seq_printf(m, ",nconnect=%u", clp->cl_nconnect);
> 	if (version == 4) {
> +		if (clp->cl_max_connect > 1)
> +			seq_printf(m, ",max_connect=%u", clp->cl_max_connect);
> 		if (nfss->port != NFS_PORT)
> 			seq_printf(m, ",port=%u", nfss->port);
> 	} else
> diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
> index d71a0e90faeb..2a9acbfe00f0 100644
> --- a/include/linux/nfs_fs_sb.h
> +++ b/include/linux/nfs_fs_sb.h
> @@ -62,6 +62,7 @@ struct nfs_client {
> 
> 	u32			cl_minorversion;/* NFSv4 minorversion */
> 	unsigned int		cl_nconnect;	/* Number of connections */
> +	unsigned int		cl_max_connect; /* max number of xprts allowed */
> 	const char *		cl_principal;  /* used for machine cred */
> 
> #if IS_ENABLED(CONFIG_NFS_V4)
> -- 
> 2.27.0
> 

--
Chuck Lever
Trond Myklebust June 10, 2021, 1:34 p.m. UTC | #4
On Thu, 2021-06-10 at 13:30 +0000, Chuck Lever III wrote:
> 
> 
> > On Jun 9, 2021, at 5:53 PM, Olga Kornievskaia < 
> > olga.kornievskaia@gmail.com> wrote:
> > 
> > From: Olga Kornievskaia <kolga@netapp.com>
> > 
> > This option will control up to how many xprts can the client
> > establish to the server. This patch parses the value and sets
> > up structures that keep track of max_connect.
> > 
> > Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
> > ---
> > fs/nfs/client.c           |  1 +
> > fs/nfs/fs_context.c       |  8 ++++++++
> > fs/nfs/internal.h         |  2 ++
> > fs/nfs/nfs4client.c       | 12 ++++++++++--
> > fs/nfs/super.c            |  2 ++
> > include/linux/nfs_fs_sb.h |  1 +
> > 6 files changed, 24 insertions(+), 2 deletions(-)
> > 
> > diff --git a/fs/nfs/client.c b/fs/nfs/client.c
> > index 330f65727c45..486dec59972b 100644
> > --- a/fs/nfs/client.c
> > +++ b/fs/nfs/client.c
> > @@ -179,6 +179,7 @@ struct nfs_client *nfs_alloc_client(const
> > struct nfs_client_initdata *cl_init)
> > 
> >         clp->cl_proto = cl_init->proto;
> >         clp->cl_nconnect = cl_init->nconnect;
> > +       clp->cl_max_connect = cl_init->max_connect ? cl_init-
> > >max_connect : 1;
> 
> So, 1 is the default setting, meaning the "add another transport"
> facility is disabled by default. Would it be less surprising for
> an admin to allow some extra connections by default?
> 
> 
> >         clp->cl_net = get_net(cl_init->net);
> > 
> >         clp->cl_principal = "*";
> > diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
> > index d95c9a39bc70..cfbff7098f8e 100644
> > --- a/fs/nfs/fs_context.c
> > +++ b/fs/nfs/fs_context.c
> > @@ -29,6 +29,7 @@
> > #endif
> > 
> > #define NFS_MAX_CONNECTIONS 16
> > +#define NFS_MAX_TRANSPORTS 128
> 
> This maximum seems excessive... again, there are diminishing
> returns to adding more connections to the same server. what's
> wrong with re-using NFS_MAX_CONNECTIONS for the maximum?
> 
> As always, I'm a little queasy about adding yet another mount
> option. Are there real use cases where a whole-client setting
> (like a sysfs attribute) would be inadequate? Is there a way
> the client could figure out a reasonable maximum without a
> human intervention, say, by counting the number of NICs on
> the system?

Oh, hell no! We're not tying anything to the number of NICs...
Chuck Lever June 10, 2021, 1:56 p.m. UTC | #5
> On Jun 10, 2021, at 9:34 AM, Trond Myklebust <trondmy@hammerspace.com> wrote:
> 
> On Thu, 2021-06-10 at 13:30 +0000, Chuck Lever III wrote:
>> 
>> 
>>> On Jun 9, 2021, at 5:53 PM, Olga Kornievskaia < 
>>> olga.kornievskaia@gmail.com> wrote:
>>> 
>>> From: Olga Kornievskaia <kolga@netapp.com>
>>> 
>>> This option will control up to how many xprts can the client
>>> establish to the server. This patch parses the value and sets
>>> up structures that keep track of max_connect.
>>> 
>>> Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
>>> ---
>>> fs/nfs/client.c           |  1 +
>>> fs/nfs/fs_context.c       |  8 ++++++++
>>> fs/nfs/internal.h         |  2 ++
>>> fs/nfs/nfs4client.c       | 12 ++++++++++--
>>> fs/nfs/super.c            |  2 ++
>>> include/linux/nfs_fs_sb.h |  1 +
>>> 6 files changed, 24 insertions(+), 2 deletions(-)
>>> 
>>> diff --git a/fs/nfs/client.c b/fs/nfs/client.c
>>> index 330f65727c45..486dec59972b 100644
>>> --- a/fs/nfs/client.c
>>> +++ b/fs/nfs/client.c
>>> @@ -179,6 +179,7 @@ struct nfs_client *nfs_alloc_client(const
>>> struct nfs_client_initdata *cl_init)
>>> 
>>>         clp->cl_proto = cl_init->proto;
>>>         clp->cl_nconnect = cl_init->nconnect;
>>> +       clp->cl_max_connect = cl_init->max_connect ? cl_init-
>>>> max_connect : 1;
>> 
>> So, 1 is the default setting, meaning the "add another transport"
>> facility is disabled by default. Would it be less surprising for
>> an admin to allow some extra connections by default?
>> 
>> 
>>>         clp->cl_net = get_net(cl_init->net);
>>> 
>>>         clp->cl_principal = "*";
>>> diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
>>> index d95c9a39bc70..cfbff7098f8e 100644
>>> --- a/fs/nfs/fs_context.c
>>> +++ b/fs/nfs/fs_context.c
>>> @@ -29,6 +29,7 @@
>>> #endif
>>> 
>>> #define NFS_MAX_CONNECTIONS 16
>>> +#define NFS_MAX_TRANSPORTS 128
>> 
>> This maximum seems excessive... again, there are diminishing
>> returns to adding more connections to the same server. what's
>> wrong with re-using NFS_MAX_CONNECTIONS for the maximum?
>> 
>> As always, I'm a little queasy about adding yet another mount
>> option. Are there real use cases where a whole-client setting
>> (like a sysfs attribute) would be inadequate? Is there a way
>> the client could figure out a reasonable maximum without a
>> human intervention, say, by counting the number of NICs on
>> the system?
> 
> Oh, hell no! We're not tying anything to the number of NICs...

That's a bit of an over-reaction. :-) A little more explanation
would be welcome. I mean, don't you expect someone to ask "How
do I pick a good value?" and someone might reasonably answer
"Well, start with the number of NICs on your client times 3" or
something like that.

IMO we're about to add another admin setting without understanding
how it will be used, how to select a good maximum value, or even
whether this maximum needs to be adjustable. In a previous e-mail
Olga has already demonstrated that it will be difficult to explain
how to use this setting with nconnect=.

Thus I would favor a (moderate) soldered-in maximum to start with,
and then as real world use cases arise, consider adding a tuning
mechanism based on actual requirements.


--
Chuck Lever
Trond Myklebust June 10, 2021, 2:13 p.m. UTC | #6
On Thu, 2021-06-10 at 13:56 +0000, Chuck Lever III wrote:
> 
> 
> > On Jun 10, 2021, at 9:34 AM, Trond Myklebust <
> > trondmy@hammerspace.com> wrote:
> > 
> > On Thu, 2021-06-10 at 13:30 +0000, Chuck Lever III wrote:
> > > 
> > > 
> > > > On Jun 9, 2021, at 5:53 PM, Olga Kornievskaia < 
> > > > olga.kornievskaia@gmail.com> wrote:
> > > > 
> > > > From: Olga Kornievskaia <kolga@netapp.com>
> > > > 
> > > > This option will control up to how many xprts can the client
> > > > establish to the server. This patch parses the value and sets
> > > > up structures that keep track of max_connect.
> > > > 
> > > > Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
> > > > ---
> > > > fs/nfs/client.c           |  1 +
> > > > fs/nfs/fs_context.c       |  8 ++++++++
> > > > fs/nfs/internal.h         |  2 ++
> > > > fs/nfs/nfs4client.c       | 12 ++++++++++--
> > > > fs/nfs/super.c            |  2 ++
> > > > include/linux/nfs_fs_sb.h |  1 +
> > > > 6 files changed, 24 insertions(+), 2 deletions(-)
> > > > 
> > > > diff --git a/fs/nfs/client.c b/fs/nfs/client.c
> > > > index 330f65727c45..486dec59972b 100644
> > > > --- a/fs/nfs/client.c
> > > > +++ b/fs/nfs/client.c
> > > > @@ -179,6 +179,7 @@ struct nfs_client *nfs_alloc_client(const
> > > > struct nfs_client_initdata *cl_init)
> > > > 
> > > >         clp->cl_proto = cl_init->proto;
> > > >         clp->cl_nconnect = cl_init->nconnect;
> > > > +       clp->cl_max_connect = cl_init->max_connect ? cl_init-
> > > > > max_connect : 1;
> > > 
> > > So, 1 is the default setting, meaning the "add another transport"
> > > facility is disabled by default. Would it be less surprising for
> > > an admin to allow some extra connections by default?
> > > 
> > > 
> > > >         clp->cl_net = get_net(cl_init->net);
> > > > 
> > > >         clp->cl_principal = "*";
> > > > diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
> > > > index d95c9a39bc70..cfbff7098f8e 100644
> > > > --- a/fs/nfs/fs_context.c
> > > > +++ b/fs/nfs/fs_context.c
> > > > @@ -29,6 +29,7 @@
> > > > #endif
> > > > 
> > > > #define NFS_MAX_CONNECTIONS 16
> > > > +#define NFS_MAX_TRANSPORTS 128
> > > 
> > > This maximum seems excessive... again, there are diminishing
> > > returns to adding more connections to the same server. what's
> > > wrong with re-using NFS_MAX_CONNECTIONS for the maximum?
> > > 
> > > As always, I'm a little queasy about adding yet another mount
> > > option. Are there real use cases where a whole-client setting
> > > (like a sysfs attribute) would be inadequate? Is there a way
> > > the client could figure out a reasonable maximum without a
> > > human intervention, say, by counting the number of NICs on
> > > the system?
> > 
> > Oh, hell no! We're not tying anything to the number of NICs...
> 
> That's a bit of an over-reaction. :-) A little more explanation
> would be welcome. I mean, don't you expect someone to ask "How
> do I pick a good value?" and someone might reasonably answer
> "Well, start with the number of NICs on your client times 3" or
> something like that.
> 
> IMO we're about to add another admin setting without understanding
> how it will be used, how to select a good maximum value, or even
> whether this maximum needs to be adjustable. In a previous e-mail
> Olga has already demonstrated that it will be difficult to explain
> how to use this setting with nconnect=.
> 
> Thus I would favor a (moderate) soldered-in maximum to start with,
> and then as real world use cases arise, consider adding a tuning
> mechanism based on actual requirements.

It's not an overreaction. It's insane to think that counting NICs gives
you any notion whatsoever about the network topology and connectivity
between the client and server. It doesn't even tell you how many of
those NICs might potentially be available to your application.

We're not doing any automation based on that kind of layering
violation.
Olga Kornievskaia June 10, 2021, 2:29 p.m. UTC | #7
On Thu, Jun 10, 2021 at 9:56 AM Chuck Lever III <chuck.lever@oracle.com> wrote:
>
>
>
> > On Jun 10, 2021, at 9:34 AM, Trond Myklebust <trondmy@hammerspace.com> wrote:
> >
> > On Thu, 2021-06-10 at 13:30 +0000, Chuck Lever III wrote:
> >>
> >>
> >>> On Jun 9, 2021, at 5:53 PM, Olga Kornievskaia <
> >>> olga.kornievskaia@gmail.com> wrote:
> >>>
> >>> From: Olga Kornievskaia <kolga@netapp.com>
> >>>
> >>> This option will control up to how many xprts can the client
> >>> establish to the server. This patch parses the value and sets
> >>> up structures that keep track of max_connect.
> >>>
> >>> Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
> >>> ---
> >>> fs/nfs/client.c           |  1 +
> >>> fs/nfs/fs_context.c       |  8 ++++++++
> >>> fs/nfs/internal.h         |  2 ++
> >>> fs/nfs/nfs4client.c       | 12 ++++++++++--
> >>> fs/nfs/super.c            |  2 ++
> >>> include/linux/nfs_fs_sb.h |  1 +
> >>> 6 files changed, 24 insertions(+), 2 deletions(-)
> >>>
> >>> diff --git a/fs/nfs/client.c b/fs/nfs/client.c
> >>> index 330f65727c45..486dec59972b 100644
> >>> --- a/fs/nfs/client.c
> >>> +++ b/fs/nfs/client.c
> >>> @@ -179,6 +179,7 @@ struct nfs_client *nfs_alloc_client(const
> >>> struct nfs_client_initdata *cl_init)
> >>>
> >>>         clp->cl_proto = cl_init->proto;
> >>>         clp->cl_nconnect = cl_init->nconnect;
> >>> +       clp->cl_max_connect = cl_init->max_connect ? cl_init-
> >>>> max_connect : 1;
> >>
> >> So, 1 is the default setting, meaning the "add another transport"
> >> facility is disabled by default. Would it be less surprising for
> >> an admin to allow some extra connections by default?
> >>
> >>
> >>>         clp->cl_net = get_net(cl_init->net);
> >>>
> >>>         clp->cl_principal = "*";
> >>> diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
> >>> index d95c9a39bc70..cfbff7098f8e 100644
> >>> --- a/fs/nfs/fs_context.c
> >>> +++ b/fs/nfs/fs_context.c
> >>> @@ -29,6 +29,7 @@
> >>> #endif
> >>>
> >>> #define NFS_MAX_CONNECTIONS 16
> >>> +#define NFS_MAX_TRANSPORTS 128
> >>
> >> This maximum seems excessive... again, there are diminishing
> >> returns to adding more connections to the same server. what's
> >> wrong with re-using NFS_MAX_CONNECTIONS for the maximum?
> >>
> >> As always, I'm a little queasy about adding yet another mount
> >> option. Are there real use cases where a whole-client setting
> >> (like a sysfs attribute) would be inadequate? Is there a way
> >> the client could figure out a reasonable maximum without a
> >> human intervention, say, by counting the number of NICs on
> >> the system?
> >
> > Oh, hell no! We're not tying anything to the number of NICs...
>
> That's a bit of an over-reaction. :-) A little more explanation
> would be welcome. I mean, don't you expect someone to ask "How
> do I pick a good value?" and someone might reasonably answer
> "Well, start with the number of NICs on your client times 3" or
> something like that.

That's what I was thinking and thank you for at least considering that
it's a reasonable answer.

> IMO we're about to add another admin setting without understanding
> how it will be used, how to select a good maximum value, or even
> whether this maximum needs to be adjustable. In a previous e-mail
> Olga has already demonstrated that it will be difficult to explain
> how to use this setting with nconnect=.

I agree that understanding on how it will be used is unknown or
understood but I think nconnect and max_connect represent different
capabilities. I agree that adding nconnect transports leads to
diminishing returns after a certain (relatively low) number. However,
I don't believe the same holds for when xprts are going over different
NICs. Therefore I didn't think max_connect should have been bound by
the same numbers as nconnect. Perhaps 128 is too high of a value (for
reference I did 8 *nconnect_max).

> Thus I would favor a (moderate) soldered-in maximum to start with,
> and then as real world use cases arise, consider adding a tuning
> mechanism based on actual requirements.

Can you suggest a moderate number between 16 and 128?

>
>
> --
> Chuck Lever
>
>
>
Olga Kornievskaia June 10, 2021, 2:31 p.m. UTC | #8
On Thu, Jun 10, 2021 at 10:13 AM Trond Myklebust
<trondmy@hammerspace.com> wrote:
>
> On Thu, 2021-06-10 at 13:56 +0000, Chuck Lever III wrote:
> >
> >
> > > On Jun 10, 2021, at 9:34 AM, Trond Myklebust <
> > > trondmy@hammerspace.com> wrote:
> > >
> > > On Thu, 2021-06-10 at 13:30 +0000, Chuck Lever III wrote:
> > > >
> > > >
> > > > > On Jun 9, 2021, at 5:53 PM, Olga Kornievskaia <
> > > > > olga.kornievskaia@gmail.com> wrote:
> > > > >
> > > > > From: Olga Kornievskaia <kolga@netapp.com>
> > > > >
> > > > > This option will control up to how many xprts can the client
> > > > > establish to the server. This patch parses the value and sets
> > > > > up structures that keep track of max_connect.
> > > > >
> > > > > Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
> > > > > ---
> > > > > fs/nfs/client.c           |  1 +
> > > > > fs/nfs/fs_context.c       |  8 ++++++++
> > > > > fs/nfs/internal.h         |  2 ++
> > > > > fs/nfs/nfs4client.c       | 12 ++++++++++--
> > > > > fs/nfs/super.c            |  2 ++
> > > > > include/linux/nfs_fs_sb.h |  1 +
> > > > > 6 files changed, 24 insertions(+), 2 deletions(-)
> > > > >
> > > > > diff --git a/fs/nfs/client.c b/fs/nfs/client.c
> > > > > index 330f65727c45..486dec59972b 100644
> > > > > --- a/fs/nfs/client.c
> > > > > +++ b/fs/nfs/client.c
> > > > > @@ -179,6 +179,7 @@ struct nfs_client *nfs_alloc_client(const
> > > > > struct nfs_client_initdata *cl_init)
> > > > >
> > > > >         clp->cl_proto = cl_init->proto;
> > > > >         clp->cl_nconnect = cl_init->nconnect;
> > > > > +       clp->cl_max_connect = cl_init->max_connect ? cl_init-
> > > > > > max_connect : 1;
> > > >
> > > > So, 1 is the default setting, meaning the "add another transport"
> > > > facility is disabled by default. Would it be less surprising for
> > > > an admin to allow some extra connections by default?
> > > >
> > > >
> > > > >         clp->cl_net = get_net(cl_init->net);
> > > > >
> > > > >         clp->cl_principal = "*";
> > > > > diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
> > > > > index d95c9a39bc70..cfbff7098f8e 100644
> > > > > --- a/fs/nfs/fs_context.c
> > > > > +++ b/fs/nfs/fs_context.c
> > > > > @@ -29,6 +29,7 @@
> > > > > #endif
> > > > >
> > > > > #define NFS_MAX_CONNECTIONS 16
> > > > > +#define NFS_MAX_TRANSPORTS 128
> > > >
> > > > This maximum seems excessive... again, there are diminishing
> > > > returns to adding more connections to the same server. what's
> > > > wrong with re-using NFS_MAX_CONNECTIONS for the maximum?
> > > >
> > > > As always, I'm a little queasy about adding yet another mount
> > > > option. Are there real use cases where a whole-client setting
> > > > (like a sysfs attribute) would be inadequate? Is there a way
> > > > the client could figure out a reasonable maximum without a
> > > > human intervention, say, by counting the number of NICs on
> > > > the system?
> > >
> > > Oh, hell no! We're not tying anything to the number of NICs...
> >
> > That's a bit of an over-reaction. :-) A little more explanation
> > would be welcome. I mean, don't you expect someone to ask "How
> > do I pick a good value?" and someone might reasonably answer
> > "Well, start with the number of NICs on your client times 3" or
> > something like that.
> >
> > IMO we're about to add another admin setting without understanding
> > how it will be used, how to select a good maximum value, or even
> > whether this maximum needs to be adjustable. In a previous e-mail
> > Olga has already demonstrated that it will be difficult to explain
> > how to use this setting with nconnect=.
> >
> > Thus I would favor a (moderate) soldered-in maximum to start with,
> > and then as real world use cases arise, consider adding a tuning
> > mechanism based on actual requirements.
>
> It's not an overreaction. It's insane to think that counting NICs gives
> you any notion whatsoever about the network topology and connectivity
> between the client and server. It doesn't even tell you how many of
> those NICs might potentially be available to your application.
>
> We're not doing any automation based on that kind of layering
> violation.

I'm not suggesting to programmatically determine the number of NIC to
determine the value of max_connect.

>
> --
> Trond Myklebust
> Linux NFS client maintainer, Hammerspace
> trond.myklebust@hammerspace.com
>
>
Chuck Lever June 10, 2021, 2:38 p.m. UTC | #9
> On Jun 10, 2021, at 10:13 AM, Trond Myklebust <trondmy@hammerspace.com> wrote:
> 
> On Thu, 2021-06-10 at 13:56 +0000, Chuck Lever III wrote:
>> 
>> 
>>> On Jun 10, 2021, at 9:34 AM, Trond Myklebust <
>>> trondmy@hammerspace.com> wrote:
>>> 
>>> On Thu, 2021-06-10 at 13:30 +0000, Chuck Lever III wrote:
>>>> 
>>>> 
>>>>> On Jun 9, 2021, at 5:53 PM, Olga Kornievskaia < 
>>>>> olga.kornievskaia@gmail.com> wrote:
>>>>> 
>>>>> From: Olga Kornievskaia <kolga@netapp.com>
>>>>> 
>>>>> This option will control up to how many xprts can the client
>>>>> establish to the server. This patch parses the value and sets
>>>>> up structures that keep track of max_connect.
>>>>> 
>>>>> Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
>>>>> ---
>>>>> fs/nfs/client.c           |  1 +
>>>>> fs/nfs/fs_context.c       |  8 ++++++++
>>>>> fs/nfs/internal.h         |  2 ++
>>>>> fs/nfs/nfs4client.c       | 12 ++++++++++--
>>>>> fs/nfs/super.c            |  2 ++
>>>>> include/linux/nfs_fs_sb.h |  1 +
>>>>> 6 files changed, 24 insertions(+), 2 deletions(-)
>>>>> 
>>>>> diff --git a/fs/nfs/client.c b/fs/nfs/client.c
>>>>> index 330f65727c45..486dec59972b 100644
>>>>> --- a/fs/nfs/client.c
>>>>> +++ b/fs/nfs/client.c
>>>>> @@ -179,6 +179,7 @@ struct nfs_client *nfs_alloc_client(const
>>>>> struct nfs_client_initdata *cl_init)
>>>>> 
>>>>>         clp->cl_proto = cl_init->proto;
>>>>>         clp->cl_nconnect = cl_init->nconnect;
>>>>> +       clp->cl_max_connect = cl_init->max_connect ? cl_init-
>>>>>> max_connect : 1;
>>>> 
>>>> So, 1 is the default setting, meaning the "add another transport"
>>>> facility is disabled by default. Would it be less surprising for
>>>> an admin to allow some extra connections by default?
>>>> 
>>>> 
>>>>>         clp->cl_net = get_net(cl_init->net);
>>>>> 
>>>>>         clp->cl_principal = "*";
>>>>> diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
>>>>> index d95c9a39bc70..cfbff7098f8e 100644
>>>>> --- a/fs/nfs/fs_context.c
>>>>> +++ b/fs/nfs/fs_context.c
>>>>> @@ -29,6 +29,7 @@
>>>>> #endif
>>>>> 
>>>>> #define NFS_MAX_CONNECTIONS 16
>>>>> +#define NFS_MAX_TRANSPORTS 128
>>>> 
>>>> This maximum seems excessive... again, there are diminishing
>>>> returns to adding more connections to the same server. what's
>>>> wrong with re-using NFS_MAX_CONNECTIONS for the maximum?
>>>> 
>>>> As always, I'm a little queasy about adding yet another mount
>>>> option. Are there real use cases where a whole-client setting
>>>> (like a sysfs attribute) would be inadequate? Is there a way
>>>> the client could figure out a reasonable maximum without a
>>>> human intervention, say, by counting the number of NICs on
>>>> the system?
>>> 
>>> Oh, hell no! We're not tying anything to the number of NICs...
>> 
>> That's a bit of an over-reaction. :-) A little more explanation
>> would be welcome. I mean, don't you expect someone to ask "How
>> do I pick a good value?" and someone might reasonably answer
>> "Well, start with the number of NICs on your client times 3" or
>> something like that.
>> 
>> IMO we're about to add another admin setting without understanding
>> how it will be used, how to select a good maximum value, or even
>> whether this maximum needs to be adjustable. In a previous e-mail
>> Olga has already demonstrated that it will be difficult to explain
>> how to use this setting with nconnect=.
>> 
>> Thus I would favor a (moderate) soldered-in maximum to start with,
>> and then as real world use cases arise, consider adding a tuning
>> mechanism based on actual requirements.
> 
> It's not an overreaction.

The "Oh, hell no!" was an overreaction. But thank you for providing
the additional explanation, that helped me understand your position.
I agree that the number of local NICs is frequently unrelated to
the topology of the whole network.


> It's insane to think that counting NICs gives
> you any notion whatsoever about the network topology and connectivity
> between the client and server. It doesn't even tell you how many of
> those NICs might potentially be available to your application.
> 
> We're not doing any automation based on that kind of layering
> violation.

Fair enough.


--
Chuck Lever
Chuck Lever June 10, 2021, 2:51 p.m. UTC | #10
> On Jun 10, 2021, at 10:29 AM, Olga Kornievskaia <olga.kornievskaia@gmail.com> wrote:
> 
> On Thu, Jun 10, 2021 at 9:56 AM Chuck Lever III <chuck.lever@oracle.com> wrote:
>> 
>> 
>> 
>>> On Jun 10, 2021, at 9:34 AM, Trond Myklebust <trondmy@hammerspace.com> wrote:
>>> 
>>> On Thu, 2021-06-10 at 13:30 +0000, Chuck Lever III wrote:
>>>> 
>>>> 
>>>>> On Jun 9, 2021, at 5:53 PM, Olga Kornievskaia <
>>>>> olga.kornievskaia@gmail.com> wrote:
>>>>> 
>>>>> From: Olga Kornievskaia <kolga@netapp.com>
>>>>> 
>>>>> This option will control up to how many xprts can the client
>>>>> establish to the server. This patch parses the value and sets
>>>>> up structures that keep track of max_connect.
>>>>> 
>>>>> Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
>>>>> ---
>>>>> fs/nfs/client.c           |  1 +
>>>>> fs/nfs/fs_context.c       |  8 ++++++++
>>>>> fs/nfs/internal.h         |  2 ++
>>>>> fs/nfs/nfs4client.c       | 12 ++++++++++--
>>>>> fs/nfs/super.c            |  2 ++
>>>>> include/linux/nfs_fs_sb.h |  1 +
>>>>> 6 files changed, 24 insertions(+), 2 deletions(-)
>>>>> 
>>>>> diff --git a/fs/nfs/client.c b/fs/nfs/client.c
>>>>> index 330f65727c45..486dec59972b 100644
>>>>> --- a/fs/nfs/client.c
>>>>> +++ b/fs/nfs/client.c
>>>>> @@ -179,6 +179,7 @@ struct nfs_client *nfs_alloc_client(const
>>>>> struct nfs_client_initdata *cl_init)
>>>>> 
>>>>>        clp->cl_proto = cl_init->proto;
>>>>>        clp->cl_nconnect = cl_init->nconnect;
>>>>> +       clp->cl_max_connect = cl_init->max_connect ? cl_init-
>>>>>> max_connect : 1;
>>>> 
>>>> So, 1 is the default setting, meaning the "add another transport"
>>>> facility is disabled by default. Would it be less surprising for
>>>> an admin to allow some extra connections by default?
>>>> 
>>>> 
>>>>>        clp->cl_net = get_net(cl_init->net);
>>>>> 
>>>>>        clp->cl_principal = "*";
>>>>> diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
>>>>> index d95c9a39bc70..cfbff7098f8e 100644
>>>>> --- a/fs/nfs/fs_context.c
>>>>> +++ b/fs/nfs/fs_context.c
>>>>> @@ -29,6 +29,7 @@
>>>>> #endif
>>>>> 
>>>>> #define NFS_MAX_CONNECTIONS 16
>>>>> +#define NFS_MAX_TRANSPORTS 128
>>>> 
>>>> This maximum seems excessive... again, there are diminishing
>>>> returns to adding more connections to the same server. what's
>>>> wrong with re-using NFS_MAX_CONNECTIONS for the maximum?
>>>> 
>>>> As always, I'm a little queasy about adding yet another mount
>>>> option. Are there real use cases where a whole-client setting
>>>> (like a sysfs attribute) would be inadequate? Is there a way
>>>> the client could figure out a reasonable maximum without a
>>>> human intervention, say, by counting the number of NICs on
>>>> the system?
>>> 
>>> Oh, hell no! We're not tying anything to the number of NICs...
>> 
>> That's a bit of an over-reaction. :-) A little more explanation
>> would be welcome. I mean, don't you expect someone to ask "How
>> do I pick a good value?" and someone might reasonably answer
>> "Well, start with the number of NICs on your client times 3" or
>> something like that.
> 
> That's what I was thinking and thank you for at least considering that
> it's a reasonable answer.
> 
>> IMO we're about to add another admin setting without understanding
>> how it will be used, how to select a good maximum value, or even
>> whether this maximum needs to be adjustable. In a previous e-mail
>> Olga has already demonstrated that it will be difficult to explain
>> how to use this setting with nconnect=.
> 
> I agree that understanding on how it will be used is unknown or
> understood but I think nconnect and max_connect represent different
> capabilities. I agree that adding nconnect transports leads to
> diminishing returns after a certain (relatively low) number. However,
> I don't believe the same holds for when xprts are going over different
> NICs. Therefore I didn't think max_connect should have been bound by
> the same numbers as nconnect.

Thanks for reminding me, I had forgotten the distinction between
the two mount options.

I think there's more going on than just the NIC -- lock contention
on the client will also be a somewhat limiting factor, as will the
number of local CPUs and memory bandwidth. And as Trond points out,
the network topology between the client and server will also have
some impact.

And I'm trying to understand why an admin would want to turn off
the "add another xprt" mechanism -- ie, the lower bound. Why is
the default setting 1?


> Perhaps 128 is too high of a value (for
> reference I did 8 *nconnect_max).
> 
>> Thus I would favor a (moderate) soldered-in maximum to start with,
>> and then as real world use cases arise, consider adding a tuning
>> mechanism based on actual requirements.
> 
> Can you suggest a moderate number between 16 and 128?

16 is conservative, and there's nothing preventing us from changing
that maximum over time as we learn more.

An in-code comment explaining how the final maximum value was arrived
at would be good to add. Even "This is just a guess" would be valuable
to anyone in the future trying to figure out a new value, IMO.

--
Chuck Lever
Trond Myklebust June 10, 2021, 2:55 p.m. UTC | #11
On Thu, 2021-06-10 at 10:31 -0400, Olga Kornievskaia wrote:
> On Thu, Jun 10, 2021 at 10:13 AM Trond Myklebust
> <trondmy@hammerspace.com> wrote:
> > 
> > On Thu, 2021-06-10 at 13:56 +0000, Chuck Lever III wrote:
> > > 
> > > 
> > > > On Jun 10, 2021, at 9:34 AM, Trond Myklebust <
> > > > trondmy@hammerspace.com> wrote:
> > > > 
> > > > On Thu, 2021-06-10 at 13:30 +0000, Chuck Lever III wrote:
> > > > > 
> > > > > 
> > > > > > On Jun 9, 2021, at 5:53 PM, Olga Kornievskaia <
> > > > > > olga.kornievskaia@gmail.com> wrote:
> > > > > > 
> > > > > > From: Olga Kornievskaia <kolga@netapp.com>
> > > > > > 
> > > > > > This option will control up to how many xprts can the
> > > > > > client
> > > > > > establish to the server. This patch parses the value and
> > > > > > sets
> > > > > > up structures that keep track of max_connect.
> > > > > > 
> > > > > > Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
> > > > > > ---
> > > > > > fs/nfs/client.c           |  1 +
> > > > > > fs/nfs/fs_context.c       |  8 ++++++++
> > > > > > fs/nfs/internal.h         |  2 ++
> > > > > > fs/nfs/nfs4client.c       | 12 ++++++++++--
> > > > > > fs/nfs/super.c            |  2 ++
> > > > > > include/linux/nfs_fs_sb.h |  1 +
> > > > > > 6 files changed, 24 insertions(+), 2 deletions(-)
> > > > > > 
> > > > > > diff --git a/fs/nfs/client.c b/fs/nfs/client.c
> > > > > > index 330f65727c45..486dec59972b 100644
> > > > > > --- a/fs/nfs/client.c
> > > > > > +++ b/fs/nfs/client.c
> > > > > > @@ -179,6 +179,7 @@ struct nfs_client
> > > > > > *nfs_alloc_client(const
> > > > > > struct nfs_client_initdata *cl_init)
> > > > > > 
> > > > > >         clp->cl_proto = cl_init->proto;
> > > > > >         clp->cl_nconnect = cl_init->nconnect;
> > > > > > +       clp->cl_max_connect = cl_init->max_connect ?
> > > > > > cl_init-
> > > > > > > max_connect : 1;
> > > > > 
> > > > > So, 1 is the default setting, meaning the "add another
> > > > > transport"
> > > > > facility is disabled by default. Would it be less surprising
> > > > > for
> > > > > an admin to allow some extra connections by default?
> > > > > 
> > > > > 
> > > > > >         clp->cl_net = get_net(cl_init->net);
> > > > > > 
> > > > > >         clp->cl_principal = "*";
> > > > > > diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
> > > > > > index d95c9a39bc70..cfbff7098f8e 100644
> > > > > > --- a/fs/nfs/fs_context.c
> > > > > > +++ b/fs/nfs/fs_context.c
> > > > > > @@ -29,6 +29,7 @@
> > > > > > #endif
> > > > > > 
> > > > > > #define NFS_MAX_CONNECTIONS 16
> > > > > > +#define NFS_MAX_TRANSPORTS 128
> > > > > 
> > > > > This maximum seems excessive... again, there are diminishing
> > > > > returns to adding more connections to the same server. what's
> > > > > wrong with re-using NFS_MAX_CONNECTIONS for the maximum?
> > > > > 
> > > > > As always, I'm a little queasy about adding yet another mount
> > > > > option. Are there real use cases where a whole-client setting
> > > > > (like a sysfs attribute) would be inadequate? Is there a way
> > > > > the client could figure out a reasonable maximum without a
> > > > > human intervention, say, by counting the number of NICs on
> > > > > the system?
> > > > 
> > > > Oh, hell no! We're not tying anything to the number of NICs...
> > > 
> > > That's a bit of an over-reaction. :-) A little more explanation
> > > would be welcome. I mean, don't you expect someone to ask "How
> > > do I pick a good value?" and someone might reasonably answer
> > > "Well, start with the number of NICs on your client times 3" or
> > > something like that.
> > > 
> > > IMO we're about to add another admin setting without
> > > understanding
> > > how it will be used, how to select a good maximum value, or even
> > > whether this maximum needs to be adjustable. In a previous e-mail
> > > Olga has already demonstrated that it will be difficult to
> > > explain
> > > how to use this setting with nconnect=.
> > > 
> > > Thus I would favor a (moderate) soldered-in maximum to start
> > > with,
> > > and then as real world use cases arise, consider adding a tuning
> > > mechanism based on actual requirements.
> > 
> > It's not an overreaction. It's insane to think that counting NICs
> > gives
> > you any notion whatsoever about the network topology and
> > connectivity
> > between the client and server. It doesn't even tell you how many of
> > those NICs might potentially be available to your application.
> > 
> > We're not doing any automation based on that kind of layering
> > violation.
> 
> I'm not suggesting to programmatically determine the number of NIC to
> determine the value of max_connect.
> > 

No, but that's what Chuck appeared to be suggesting in order to avoid
the need for the mount option.

To me, the main reason for the mount option is to allow the user to
limit the number of new IP addresses being added so that if the DNS
server is configured to hand out lots of different addresses for the
same servername, the user can basically say 'no, I just want to use the
one IP address that I'm already connected to' (i.e. max_connect=1). I
can imagine that some clustered setups might need that ability in order
to work efficiently.

I'm fine with the idea of nconnect setting the number of connections
per IP address, but that would need some plumbing in
rpc_clnt_test_and_add_xprt() to allow us to add up to 'nconnect' copies
of a given transport.
Presumably rpc_xprt_switch_has_addr() would need to return a count of
the number of copies of the transport that are already present so that
we can decide whether or not we should add a new one.
Olga Kornievskaia June 10, 2021, 3:01 p.m. UTC | #12
On Thu, Jun 10, 2021 at 10:51 AM Chuck Lever III <chuck.lever@oracle.com> wrote:
>
>
>
> > On Jun 10, 2021, at 10:29 AM, Olga Kornievskaia <olga.kornievskaia@gmail.com> wrote:
> >
> > On Thu, Jun 10, 2021 at 9:56 AM Chuck Lever III <chuck.lever@oracle.com> wrote:
> >>
> >>
> >>
> >>> On Jun 10, 2021, at 9:34 AM, Trond Myklebust <trondmy@hammerspace.com> wrote:
> >>>
> >>> On Thu, 2021-06-10 at 13:30 +0000, Chuck Lever III wrote:
> >>>>
> >>>>
> >>>>> On Jun 9, 2021, at 5:53 PM, Olga Kornievskaia <
> >>>>> olga.kornievskaia@gmail.com> wrote:
> >>>>>
> >>>>> From: Olga Kornievskaia <kolga@netapp.com>
> >>>>>
> >>>>> This option will control up to how many xprts can the client
> >>>>> establish to the server. This patch parses the value and sets
> >>>>> up structures that keep track of max_connect.
> >>>>>
> >>>>> Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
> >>>>> ---
> >>>>> fs/nfs/client.c           |  1 +
> >>>>> fs/nfs/fs_context.c       |  8 ++++++++
> >>>>> fs/nfs/internal.h         |  2 ++
> >>>>> fs/nfs/nfs4client.c       | 12 ++++++++++--
> >>>>> fs/nfs/super.c            |  2 ++
> >>>>> include/linux/nfs_fs_sb.h |  1 +
> >>>>> 6 files changed, 24 insertions(+), 2 deletions(-)
> >>>>>
> >>>>> diff --git a/fs/nfs/client.c b/fs/nfs/client.c
> >>>>> index 330f65727c45..486dec59972b 100644
> >>>>> --- a/fs/nfs/client.c
> >>>>> +++ b/fs/nfs/client.c
> >>>>> @@ -179,6 +179,7 @@ struct nfs_client *nfs_alloc_client(const
> >>>>> struct nfs_client_initdata *cl_init)
> >>>>>
> >>>>>        clp->cl_proto = cl_init->proto;
> >>>>>        clp->cl_nconnect = cl_init->nconnect;
> >>>>> +       clp->cl_max_connect = cl_init->max_connect ? cl_init-
> >>>>>> max_connect : 1;
> >>>>
> >>>> So, 1 is the default setting, meaning the "add another transport"
> >>>> facility is disabled by default. Would it be less surprising for
> >>>> an admin to allow some extra connections by default?
> >>>>
> >>>>
> >>>>>        clp->cl_net = get_net(cl_init->net);
> >>>>>
> >>>>>        clp->cl_principal = "*";
> >>>>> diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
> >>>>> index d95c9a39bc70..cfbff7098f8e 100644
> >>>>> --- a/fs/nfs/fs_context.c
> >>>>> +++ b/fs/nfs/fs_context.c
> >>>>> @@ -29,6 +29,7 @@
> >>>>> #endif
> >>>>>
> >>>>> #define NFS_MAX_CONNECTIONS 16
> >>>>> +#define NFS_MAX_TRANSPORTS 128
> >>>>
> >>>> This maximum seems excessive... again, there are diminishing
> >>>> returns to adding more connections to the same server. what's
> >>>> wrong with re-using NFS_MAX_CONNECTIONS for the maximum?
> >>>>
> >>>> As always, I'm a little queasy about adding yet another mount
> >>>> option. Are there real use cases where a whole-client setting
> >>>> (like a sysfs attribute) would be inadequate? Is there a way
> >>>> the client could figure out a reasonable maximum without a
> >>>> human intervention, say, by counting the number of NICs on
> >>>> the system?
> >>>
> >>> Oh, hell no! We're not tying anything to the number of NICs...
> >>
> >> That's a bit of an over-reaction. :-) A little more explanation
> >> would be welcome. I mean, don't you expect someone to ask "How
> >> do I pick a good value?" and someone might reasonably answer
> >> "Well, start with the number of NICs on your client times 3" or
> >> something like that.
> >
> > That's what I was thinking and thank you for at least considering that
> > it's a reasonable answer.
> >
> >> IMO we're about to add another admin setting without understanding
> >> how it will be used, how to select a good maximum value, or even
> >> whether this maximum needs to be adjustable. In a previous e-mail
> >> Olga has already demonstrated that it will be difficult to explain
> >> how to use this setting with nconnect=.
> >
> > I agree that understanding on how it will be used is unknown or
> > understood but I think nconnect and max_connect represent different
> > capabilities. I agree that adding nconnect transports leads to
> > diminishing returns after a certain (relatively low) number. However,
> > I don't believe the same holds for when xprts are going over different
> > NICs. Therefore I didn't think max_connect should have been bound by
> > the same numbers as nconnect.
>
> Thanks for reminding me, I had forgotten the distinction between
> the two mount options.
>
> I think there's more going on than just the NIC -- lock contention
> on the client will also be a somewhat limiting factor, as will the
> number of local CPUs and memory bandwidth. And as Trond points out,
> the network topology between the client and server will also have
> some impact.
>
> And I'm trying to understand why an admin would want to turn off
> the "add another xprt" mechanism -- ie, the lower bound. Why is
> the default setting 1?

I think the reason for having default as 1 was to address Trond's
comment that some servers are struggling to support nconnect. So I'm
trying not to force any current setup to needing to change their mount
setup to specifically say "max_connect=1". I want environments that
can support trunking specifically allow for trunking by adding a new
mount option to increase the limit.

If this is not a concern then max_connect's default can just be the
whatever default value we pick for the it.
>
>
> > Perhaps 128 is too high of a value (for
> > reference I did 8 *nconnect_max).
> >
> >> Thus I would favor a (moderate) soldered-in maximum to start with,
> >> and then as real world use cases arise, consider adding a tuning
> >> mechanism based on actual requirements.
> >
> > Can you suggest a moderate number between 16 and 128?
>
> 16 is conservative, and there's nothing preventing us from changing
> that maximum over time as we learn more.
>
> An in-code comment explaining how the final maximum value was arrived
> at would be good to add. Even "This is just a guess" would be valuable
> to anyone in the future trying to figure out a new value, IMO.
>
> --
> Chuck Lever
>
>
>
Trond Myklebust June 10, 2021, 3:30 p.m. UTC | #13
On Thu, 2021-06-10 at 11:01 -0400, Olga Kornievskaia wrote:
> On Thu, Jun 10, 2021 at 10:51 AM Chuck Lever III <
> chuck.lever@oracle.com> wrote:
> > 
> > 
> > 
> > > On Jun 10, 2021, at 10:29 AM, Olga Kornievskaia <
> > > olga.kornievskaia@gmail.com> wrote:
> > > 
> > > On Thu, Jun 10, 2021 at 9:56 AM Chuck Lever III <
> > > chuck.lever@oracle.com> wrote:
> > > > 
> > > > 
> > > > 
> > > > > On Jun 10, 2021, at 9:34 AM, Trond Myklebust <
> > > > > trondmy@hammerspace.com> wrote:
> > > > > 
> > > > > On Thu, 2021-06-10 at 13:30 +0000, Chuck Lever III wrote:
> > > > > > 
> > > > > > 
> > > > > > > On Jun 9, 2021, at 5:53 PM, Olga Kornievskaia <
> > > > > > > olga.kornievskaia@gmail.com> wrote:
> > > > > > > 
> > > > > > > From: Olga Kornievskaia <kolga@netapp.com>
> > > > > > > 
> > > > > > > This option will control up to how many xprts can the
> > > > > > > client
> > > > > > > establish to the server. This patch parses the value and
> > > > > > > sets
> > > > > > > up structures that keep track of max_connect.
> > > > > > > 
> > > > > > > Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
> > > > > > > ---
> > > > > > > fs/nfs/client.c           |  1 +
> > > > > > > fs/nfs/fs_context.c       |  8 ++++++++
> > > > > > > fs/nfs/internal.h         |  2 ++
> > > > > > > fs/nfs/nfs4client.c       | 12 ++++++++++--
> > > > > > > fs/nfs/super.c            |  2 ++
> > > > > > > include/linux/nfs_fs_sb.h |  1 +
> > > > > > > 6 files changed, 24 insertions(+), 2 deletions(-)
> > > > > > > 
> > > > > > > diff --git a/fs/nfs/client.c b/fs/nfs/client.c
> > > > > > > index 330f65727c45..486dec59972b 100644
> > > > > > > --- a/fs/nfs/client.c
> > > > > > > +++ b/fs/nfs/client.c
> > > > > > > @@ -179,6 +179,7 @@ struct nfs_client
> > > > > > > *nfs_alloc_client(const
> > > > > > > struct nfs_client_initdata *cl_init)
> > > > > > > 
> > > > > > >        clp->cl_proto = cl_init->proto;
> > > > > > >        clp->cl_nconnect = cl_init->nconnect;
> > > > > > > +       clp->cl_max_connect = cl_init->max_connect ?
> > > > > > > cl_init-
> > > > > > > > max_connect : 1;
> > > > > > 
> > > > > > So, 1 is the default setting, meaning the "add another
> > > > > > transport"
> > > > > > facility is disabled by default. Would it be less
> > > > > > surprising for
> > > > > > an admin to allow some extra connections by default?
> > > > > > 
> > > > > > 
> > > > > > >        clp->cl_net = get_net(cl_init->net);
> > > > > > > 
> > > > > > >        clp->cl_principal = "*";
> > > > > > > diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
> > > > > > > index d95c9a39bc70..cfbff7098f8e 100644
> > > > > > > --- a/fs/nfs/fs_context.c
> > > > > > > +++ b/fs/nfs/fs_context.c
> > > > > > > @@ -29,6 +29,7 @@
> > > > > > > #endif
> > > > > > > 
> > > > > > > #define NFS_MAX_CONNECTIONS 16
> > > > > > > +#define NFS_MAX_TRANSPORTS 128
> > > > > > 
> > > > > > This maximum seems excessive... again, there are
> > > > > > diminishing
> > > > > > returns to adding more connections to the same server.
> > > > > > what's
> > > > > > wrong with re-using NFS_MAX_CONNECTIONS for the maximum?
> > > > > > 
> > > > > > As always, I'm a little queasy about adding yet another
> > > > > > mount
> > > > > > option. Are there real use cases where a whole-client
> > > > > > setting
> > > > > > (like a sysfs attribute) would be inadequate? Is there a
> > > > > > way
> > > > > > the client could figure out a reasonable maximum without a
> > > > > > human intervention, say, by counting the number of NICs on
> > > > > > the system?
> > > > > 
> > > > > Oh, hell no! We're not tying anything to the number of
> > > > > NICs...
> > > > 
> > > > That's a bit of an over-reaction. :-) A little more explanation
> > > > would be welcome. I mean, don't you expect someone to ask "How
> > > > do I pick a good value?" and someone might reasonably answer
> > > > "Well, start with the number of NICs on your client times 3" or
> > > > something like that.
> > > 
> > > That's what I was thinking and thank you for at least considering
> > > that
> > > it's a reasonable answer.
> > > 
> > > > IMO we're about to add another admin setting without
> > > > understanding
> > > > how it will be used, how to select a good maximum value, or
> > > > even
> > > > whether this maximum needs to be adjustable. In a previous e-
> > > > mail
> > > > Olga has already demonstrated that it will be difficult to
> > > > explain
> > > > how to use this setting with nconnect=.
> > > 
> > > I agree that understanding on how it will be used is unknown or
> > > understood but I think nconnect and max_connect represent
> > > different
> > > capabilities. I agree that adding nconnect transports leads to
> > > diminishing returns after a certain (relatively low) number.
> > > However,
> > > I don't believe the same holds for when xprts are going over
> > > different
> > > NICs. Therefore I didn't think max_connect should have been bound
> > > by
> > > the same numbers as nconnect.
> > 
> > Thanks for reminding me, I had forgotten the distinction between
> > the two mount options.
> > 
> > I think there's more going on than just the NIC -- lock contention
> > on the client will also be a somewhat limiting factor, as will the
> > number of local CPUs and memory bandwidth. And as Trond points out,
> > the network topology between the client and server will also have
> > some impact.
> > 
> > And I'm trying to understand why an admin would want to turn off
> > the "add another xprt" mechanism -- ie, the lower bound. Why is
> > the default setting 1?
> 
> I think the reason for having default as 1 was to address Trond's
> comment that some servers are struggling to support nconnect. So I'm
> trying not to force any current setup to needing to change their
> mount
> setup to specifically say "max_connect=1". I want environments that
> can support trunking specifically allow for trunking by adding a new
> mount option to increase the limit.
> 
> If this is not a concern then max_connect's default can just be the
> whatever default value we pick for the it.
> 

The default needs to preserve existing behaviour, so max_connect=1 is
correct.
Olga Kornievskaia June 10, 2021, 4:14 p.m. UTC | #14
On Thu, Jun 10, 2021 at 10:56 AM Trond Myklebust
<trondmy@hammerspace.com> wrote:
>
> On Thu, 2021-06-10 at 10:31 -0400, Olga Kornievskaia wrote:
> > On Thu, Jun 10, 2021 at 10:13 AM Trond Myklebust
> > <trondmy@hammerspace.com> wrote:
> > >
> > > On Thu, 2021-06-10 at 13:56 +0000, Chuck Lever III wrote:
> > > >
> > > >
> > > > > On Jun 10, 2021, at 9:34 AM, Trond Myklebust <
> > > > > trondmy@hammerspace.com> wrote:
> > > > >
> > > > > On Thu, 2021-06-10 at 13:30 +0000, Chuck Lever III wrote:
> > > > > >
> > > > > >
> > > > > > > On Jun 9, 2021, at 5:53 PM, Olga Kornievskaia <
> > > > > > > olga.kornievskaia@gmail.com> wrote:
> > > > > > >
> > > > > > > From: Olga Kornievskaia <kolga@netapp.com>
> > > > > > >
> > > > > > > This option will control up to how many xprts can the
> > > > > > > client
> > > > > > > establish to the server. This patch parses the value and
> > > > > > > sets
> > > > > > > up structures that keep track of max_connect.
> > > > > > >
> > > > > > > Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
> > > > > > > ---
> > > > > > > fs/nfs/client.c           |  1 +
> > > > > > > fs/nfs/fs_context.c       |  8 ++++++++
> > > > > > > fs/nfs/internal.h         |  2 ++
> > > > > > > fs/nfs/nfs4client.c       | 12 ++++++++++--
> > > > > > > fs/nfs/super.c            |  2 ++
> > > > > > > include/linux/nfs_fs_sb.h |  1 +
> > > > > > > 6 files changed, 24 insertions(+), 2 deletions(-)
> > > > > > >
> > > > > > > diff --git a/fs/nfs/client.c b/fs/nfs/client.c
> > > > > > > index 330f65727c45..486dec59972b 100644
> > > > > > > --- a/fs/nfs/client.c
> > > > > > > +++ b/fs/nfs/client.c
> > > > > > > @@ -179,6 +179,7 @@ struct nfs_client
> > > > > > > *nfs_alloc_client(const
> > > > > > > struct nfs_client_initdata *cl_init)
> > > > > > >
> > > > > > >         clp->cl_proto = cl_init->proto;
> > > > > > >         clp->cl_nconnect = cl_init->nconnect;
> > > > > > > +       clp->cl_max_connect = cl_init->max_connect ?
> > > > > > > cl_init-
> > > > > > > > max_connect : 1;
> > > > > >
> > > > > > So, 1 is the default setting, meaning the "add another
> > > > > > transport"
> > > > > > facility is disabled by default. Would it be less surprising
> > > > > > for
> > > > > > an admin to allow some extra connections by default?
> > > > > >
> > > > > >
> > > > > > >         clp->cl_net = get_net(cl_init->net);
> > > > > > >
> > > > > > >         clp->cl_principal = "*";
> > > > > > > diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
> > > > > > > index d95c9a39bc70..cfbff7098f8e 100644
> > > > > > > --- a/fs/nfs/fs_context.c
> > > > > > > +++ b/fs/nfs/fs_context.c
> > > > > > > @@ -29,6 +29,7 @@
> > > > > > > #endif
> > > > > > >
> > > > > > > #define NFS_MAX_CONNECTIONS 16
> > > > > > > +#define NFS_MAX_TRANSPORTS 128
> > > > > >
> > > > > > This maximum seems excessive... again, there are diminishing
> > > > > > returns to adding more connections to the same server. what's
> > > > > > wrong with re-using NFS_MAX_CONNECTIONS for the maximum?
> > > > > >
> > > > > > As always, I'm a little queasy about adding yet another mount
> > > > > > option. Are there real use cases where a whole-client setting
> > > > > > (like a sysfs attribute) would be inadequate? Is there a way
> > > > > > the client could figure out a reasonable maximum without a
> > > > > > human intervention, say, by counting the number of NICs on
> > > > > > the system?
> > > > >
> > > > > Oh, hell no! We're not tying anything to the number of NICs...
> > > >
> > > > That's a bit of an over-reaction. :-) A little more explanation
> > > > would be welcome. I mean, don't you expect someone to ask "How
> > > > do I pick a good value?" and someone might reasonably answer
> > > > "Well, start with the number of NICs on your client times 3" or
> > > > something like that.
> > > >
> > > > IMO we're about to add another admin setting without
> > > > understanding
> > > > how it will be used, how to select a good maximum value, or even
> > > > whether this maximum needs to be adjustable. In a previous e-mail
> > > > Olga has already demonstrated that it will be difficult to
> > > > explain
> > > > how to use this setting with nconnect=.
> > > >
> > > > Thus I would favor a (moderate) soldered-in maximum to start
> > > > with,
> > > > and then as real world use cases arise, consider adding a tuning
> > > > mechanism based on actual requirements.
> > >
> > > It's not an overreaction. It's insane to think that counting NICs
> > > gives
> > > you any notion whatsoever about the network topology and
> > > connectivity
> > > between the client and server. It doesn't even tell you how many of
> > > those NICs might potentially be available to your application.
> > >
> > > We're not doing any automation based on that kind of layering
> > > violation.
> >
> > I'm not suggesting to programmatically determine the number of NIC to
> > determine the value of max_connect.
> > >
>
> No, but that's what Chuck appeared to be suggesting in order to avoid
> the need for the mount option.
>
> To me, the main reason for the mount option is to allow the user to
> limit the number of new IP addresses being added so that if the DNS
> server is configured to hand out lots of different addresses for the
> same servername, the user can basically say 'no, I just want to use the
> one IP address that I'm already connected to' (i.e. max_connect=1). I
> can imagine that some clustered setups might need that ability in order
> to work efficiently.
>
> I'm fine with the idea of nconnect setting the number of connections
> per IP address, but that would need some plumbing in
> rpc_clnt_test_and_add_xprt() to allow us to add up to 'nconnect' copies
> of a given transport.
> Presumably rpc_xprt_switch_has_addr() would need to return a count of
> the number of copies of the transport that are already present so that
> we can decide whether or not we should add a new one.

I think the last paragraph is what I'm asking for. But I would like to
again confirm if you still mean "max_connect" to be the total number
of connections since you say we could/will allow for nconnect number
of connections per IP address. Would max_connect need to be a multiple
of nconnect (max_connect = X *nconnect)?

Actually when I said supporting (or rather allowing for) nconnect *
max_connect transport, is that correct? Given how the code works now
this is going to be nconnect + max_connect (only if 1st mount had
nconnect option). We can't "add" nconnect connections to the new
mounts (but with my patch we can add a single trunk connection). By
that I mean: say the first was "mount IP1:/vol1 /mnt1" (1 connection
to IP2). Now the client is doing "mount IP2:/vol2 /mnt2". IP1 and IP2
are trunkable addresses of the same server so we add a trunk. We
currently don't allow for doing "mount -o nconnec=2 IP2:vol2 /mnt2"
and then also add "nconnect" connections to IP2 along with a trunk. In
the 2nd example, we'd have 1 connections to IP1, then 2 connections to
IP2. Can we allow for that (with needed code change)?  If not, then we
really need to commit to only support nconnect (16) connections + some
number of trunkable connections.

>
> --
> Trond Myklebust
> Linux NFS client maintainer, Hammerspace
> trond.myklebust@hammerspace.com
>
>
Trond Myklebust June 10, 2021, 4:36 p.m. UTC | #15
On Thu, 2021-06-10 at 12:14 -0400, Olga Kornievskaia wrote:
> On Thu, Jun 10, 2021 at 10:56 AM Trond Myklebust
> <trondmy@hammerspace.com> wrote:
> > 
> > On Thu, 2021-06-10 at 10:31 -0400, Olga Kornievskaia wrote:
> > > On Thu, Jun 10, 2021 at 10:13 AM Trond Myklebust
> > > <trondmy@hammerspace.com> wrote:
> > > > 
> > > > On Thu, 2021-06-10 at 13:56 +0000, Chuck Lever III wrote:
> > > > > 
> > > > > 
> > > > > > On Jun 10, 2021, at 9:34 AM, Trond Myklebust <
> > > > > > trondmy@hammerspace.com> wrote:
> > > > > > 
> > > > > > On Thu, 2021-06-10 at 13:30 +0000, Chuck Lever III wrote:
> > > > > > > 
> > > > > > > 
> > > > > > > > On Jun 9, 2021, at 5:53 PM, Olga Kornievskaia <
> > > > > > > > olga.kornievskaia@gmail.com> wrote:
> > > > > > > > 
> > > > > > > > From: Olga Kornievskaia <kolga@netapp.com>
> > > > > > > > 
> > > > > > > > This option will control up to how many xprts can the
> > > > > > > > client
> > > > > > > > establish to the server. This patch parses the value
> > > > > > > > and
> > > > > > > > sets
> > > > > > > > up structures that keep track of max_connect.
> > > > > > > > 
> > > > > > > > Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
> > > > > > > > ---
> > > > > > > > fs/nfs/client.c           |  1 +
> > > > > > > > fs/nfs/fs_context.c       |  8 ++++++++
> > > > > > > > fs/nfs/internal.h         |  2 ++
> > > > > > > > fs/nfs/nfs4client.c       | 12 ++++++++++--
> > > > > > > > fs/nfs/super.c            |  2 ++
> > > > > > > > include/linux/nfs_fs_sb.h |  1 +
> > > > > > > > 6 files changed, 24 insertions(+), 2 deletions(-)
> > > > > > > > 
> > > > > > > > diff --git a/fs/nfs/client.c b/fs/nfs/client.c
> > > > > > > > index 330f65727c45..486dec59972b 100644
> > > > > > > > --- a/fs/nfs/client.c
> > > > > > > > +++ b/fs/nfs/client.c
> > > > > > > > @@ -179,6 +179,7 @@ struct nfs_client
> > > > > > > > *nfs_alloc_client(const
> > > > > > > > struct nfs_client_initdata *cl_init)
> > > > > > > > 
> > > > > > > >         clp->cl_proto = cl_init->proto;
> > > > > > > >         clp->cl_nconnect = cl_init->nconnect;
> > > > > > > > +       clp->cl_max_connect = cl_init->max_connect ?
> > > > > > > > cl_init-
> > > > > > > > > max_connect : 1;
> > > > > > > 
> > > > > > > So, 1 is the default setting, meaning the "add another
> > > > > > > transport"
> > > > > > > facility is disabled by default. Would it be less
> > > > > > > surprising
> > > > > > > for
> > > > > > > an admin to allow some extra connections by default?
> > > > > > > 
> > > > > > > 
> > > > > > > >         clp->cl_net = get_net(cl_init->net);
> > > > > > > > 
> > > > > > > >         clp->cl_principal = "*";
> > > > > > > > diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
> > > > > > > > index d95c9a39bc70..cfbff7098f8e 100644
> > > > > > > > --- a/fs/nfs/fs_context.c
> > > > > > > > +++ b/fs/nfs/fs_context.c
> > > > > > > > @@ -29,6 +29,7 @@
> > > > > > > > #endif
> > > > > > > > 
> > > > > > > > #define NFS_MAX_CONNECTIONS 16
> > > > > > > > +#define NFS_MAX_TRANSPORTS 128
> > > > > > > 
> > > > > > > This maximum seems excessive... again, there are
> > > > > > > diminishing
> > > > > > > returns to adding more connections to the same server.
> > > > > > > what's
> > > > > > > wrong with re-using NFS_MAX_CONNECTIONS for the maximum?
> > > > > > > 
> > > > > > > As always, I'm a little queasy about adding yet another
> > > > > > > mount
> > > > > > > option. Are there real use cases where a whole-client
> > > > > > > setting
> > > > > > > (like a sysfs attribute) would be inadequate? Is there a
> > > > > > > way
> > > > > > > the client could figure out a reasonable maximum without
> > > > > > > a
> > > > > > > human intervention, say, by counting the number of NICs
> > > > > > > on
> > > > > > > the system?
> > > > > > 
> > > > > > Oh, hell no! We're not tying anything to the number of
> > > > > > NICs...
> > > > > 
> > > > > That's a bit of an over-reaction. :-) A little more
> > > > > explanation
> > > > > would be welcome. I mean, don't you expect someone to ask
> > > > > "How
> > > > > do I pick a good value?" and someone might reasonably answer
> > > > > "Well, start with the number of NICs on your client times 3"
> > > > > or
> > > > > something like that.
> > > > > 
> > > > > IMO we're about to add another admin setting without
> > > > > understanding
> > > > > how it will be used, how to select a good maximum value, or
> > > > > even
> > > > > whether this maximum needs to be adjustable. In a previous e-
> > > > > mail
> > > > > Olga has already demonstrated that it will be difficult to
> > > > > explain
> > > > > how to use this setting with nconnect=.
> > > > > 
> > > > > Thus I would favor a (moderate) soldered-in maximum to start
> > > > > with,
> > > > > and then as real world use cases arise, consider adding a
> > > > > tuning
> > > > > mechanism based on actual requirements.
> > > > 
> > > > It's not an overreaction. It's insane to think that counting
> > > > NICs
> > > > gives
> > > > you any notion whatsoever about the network topology and
> > > > connectivity
> > > > between the client and server. It doesn't even tell you how
> > > > many of
> > > > those NICs might potentially be available to your application.
> > > > 
> > > > We're not doing any automation based on that kind of layering
> > > > violation.
> > > 
> > > I'm not suggesting to programmatically determine the number of
> > > NIC to
> > > determine the value of max_connect.
> > > > 
> > 
> > No, but that's what Chuck appeared to be suggesting in order to
> > avoid
> > the need for the mount option.
> > 
> > To me, the main reason for the mount option is to allow the user to
> > limit the number of new IP addresses being added so that if the DNS
> > server is configured to hand out lots of different addresses for
> > the
> > same servername, the user can basically say 'no, I just want to use
> > the
> > one IP address that I'm already connected to' (i.e. max_connect=1).
> > I
> > can imagine that some clustered setups might need that ability in
> > order
> > to work efficiently.
> > 
> > I'm fine with the idea of nconnect setting the number of
> > connections
> > per IP address, but that would need some plumbing in
> > rpc_clnt_test_and_add_xprt() to allow us to add up to 'nconnect'
> > copies
> > of a given transport.
> > Presumably rpc_xprt_switch_has_addr() would need to return a count
> > of
> > the number of copies of the transport that are already present so
> > that
> > we can decide whether or not we should add a new one.
> 
> I think the last paragraph is what I'm asking for. But I would like
> to
> again confirm if you still mean "max_connect" to be the total number
> of connections since you say we could/will allow for nconnect number
> of connections per IP address. Would max_connect need to be a
> multiple
> of nconnect (max_connect = X *nconnect)?

No. Your suggestion to make the two independent is growing on me,
however in that case we do want to ensure that if nconnect=X, then we
always add X transports when we add a new IP address.

> 
> Actually when I said supporting (or rather allowing for) nconnect *
> max_connect transport, is that correct? Given how the code works now
> this is going to be nconnect + max_connect (only if 1st mount had
> nconnect option). We can't "add" nconnect connections to the new
> mounts (but with my patch we can add a single trunk connection). By
> that I mean: say the first was "mount IP1:/vol1 /mnt1" (1 connection
> to IP2). Now the client is doing "mount IP2:/vol2 /mnt2". IP1 and IP2
> are trunkable addresses of the same server so we add a trunk. We
> currently don't allow for doing "mount -o nconnec=2 IP2:vol2 /mnt2"
> and then also add "nconnect" connections to IP2 along with a trunk.
> In
> the 2nd example, we'd have 1 connections to IP1, then 2 connections
> to
> IP2. Can we allow for that (with needed code change)?  If not, then
> we
> really need to commit to only support nconnect (16) connections +
> some
> number of trunkable connections.


I think we want to have nconnect be server-global. i.e. nconnect
entries of each IP address.
Olga Kornievskaia June 10, 2021, 5:30 p.m. UTC | #16
On Thu, Jun 10, 2021 at 12:36 PM Trond Myklebust
<trondmy@hammerspace.com> wrote:
>
> On Thu, 2021-06-10 at 12:14 -0400, Olga Kornievskaia wrote:
> > On Thu, Jun 10, 2021 at 10:56 AM Trond Myklebust
> > <trondmy@hammerspace.com> wrote:
> > >
> > > On Thu, 2021-06-10 at 10:31 -0400, Olga Kornievskaia wrote:
> > > > On Thu, Jun 10, 2021 at 10:13 AM Trond Myklebust
> > > > <trondmy@hammerspace.com> wrote:
> > > > >
> > > > > On Thu, 2021-06-10 at 13:56 +0000, Chuck Lever III wrote:
> > > > > >
> > > > > >
> > > > > > > On Jun 10, 2021, at 9:34 AM, Trond Myklebust <
> > > > > > > trondmy@hammerspace.com> wrote:
> > > > > > >
> > > > > > > On Thu, 2021-06-10 at 13:30 +0000, Chuck Lever III wrote:
> > > > > > > >
> > > > > > > >
> > > > > > > > > On Jun 9, 2021, at 5:53 PM, Olga Kornievskaia <
> > > > > > > > > olga.kornievskaia@gmail.com> wrote:
> > > > > > > > >
> > > > > > > > > From: Olga Kornievskaia <kolga@netapp.com>
> > > > > > > > >
> > > > > > > > > This option will control up to how many xprts can the
> > > > > > > > > client
> > > > > > > > > establish to the server. This patch parses the value
> > > > > > > > > and
> > > > > > > > > sets
> > > > > > > > > up structures that keep track of max_connect.
> > > > > > > > >
> > > > > > > > > Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
> > > > > > > > > ---
> > > > > > > > > fs/nfs/client.c           |  1 +
> > > > > > > > > fs/nfs/fs_context.c       |  8 ++++++++
> > > > > > > > > fs/nfs/internal.h         |  2 ++
> > > > > > > > > fs/nfs/nfs4client.c       | 12 ++++++++++--
> > > > > > > > > fs/nfs/super.c            |  2 ++
> > > > > > > > > include/linux/nfs_fs_sb.h |  1 +
> > > > > > > > > 6 files changed, 24 insertions(+), 2 deletions(-)
> > > > > > > > >
> > > > > > > > > diff --git a/fs/nfs/client.c b/fs/nfs/client.c
> > > > > > > > > index 330f65727c45..486dec59972b 100644
> > > > > > > > > --- a/fs/nfs/client.c
> > > > > > > > > +++ b/fs/nfs/client.c
> > > > > > > > > @@ -179,6 +179,7 @@ struct nfs_client
> > > > > > > > > *nfs_alloc_client(const
> > > > > > > > > struct nfs_client_initdata *cl_init)
> > > > > > > > >
> > > > > > > > >         clp->cl_proto = cl_init->proto;
> > > > > > > > >         clp->cl_nconnect = cl_init->nconnect;
> > > > > > > > > +       clp->cl_max_connect = cl_init->max_connect ?
> > > > > > > > > cl_init-
> > > > > > > > > > max_connect : 1;
> > > > > > > >
> > > > > > > > So, 1 is the default setting, meaning the "add another
> > > > > > > > transport"
> > > > > > > > facility is disabled by default. Would it be less
> > > > > > > > surprising
> > > > > > > > for
> > > > > > > > an admin to allow some extra connections by default?
> > > > > > > >
> > > > > > > >
> > > > > > > > >         clp->cl_net = get_net(cl_init->net);
> > > > > > > > >
> > > > > > > > >         clp->cl_principal = "*";
> > > > > > > > > diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
> > > > > > > > > index d95c9a39bc70..cfbff7098f8e 100644
> > > > > > > > > --- a/fs/nfs/fs_context.c
> > > > > > > > > +++ b/fs/nfs/fs_context.c
> > > > > > > > > @@ -29,6 +29,7 @@
> > > > > > > > > #endif
> > > > > > > > >
> > > > > > > > > #define NFS_MAX_CONNECTIONS 16
> > > > > > > > > +#define NFS_MAX_TRANSPORTS 128
> > > > > > > >
> > > > > > > > This maximum seems excessive... again, there are
> > > > > > > > diminishing
> > > > > > > > returns to adding more connections to the same server.
> > > > > > > > what's
> > > > > > > > wrong with re-using NFS_MAX_CONNECTIONS for the maximum?
> > > > > > > >
> > > > > > > > As always, I'm a little queasy about adding yet another
> > > > > > > > mount
> > > > > > > > option. Are there real use cases where a whole-client
> > > > > > > > setting
> > > > > > > > (like a sysfs attribute) would be inadequate? Is there a
> > > > > > > > way
> > > > > > > > the client could figure out a reasonable maximum without
> > > > > > > > a
> > > > > > > > human intervention, say, by counting the number of NICs
> > > > > > > > on
> > > > > > > > the system?
> > > > > > >
> > > > > > > Oh, hell no! We're not tying anything to the number of
> > > > > > > NICs...
> > > > > >
> > > > > > That's a bit of an over-reaction. :-) A little more
> > > > > > explanation
> > > > > > would be welcome. I mean, don't you expect someone to ask
> > > > > > "How
> > > > > > do I pick a good value?" and someone might reasonably answer
> > > > > > "Well, start with the number of NICs on your client times 3"
> > > > > > or
> > > > > > something like that.
> > > > > >
> > > > > > IMO we're about to add another admin setting without
> > > > > > understanding
> > > > > > how it will be used, how to select a good maximum value, or
> > > > > > even
> > > > > > whether this maximum needs to be adjustable. In a previous e-
> > > > > > mail
> > > > > > Olga has already demonstrated that it will be difficult to
> > > > > > explain
> > > > > > how to use this setting with nconnect=.
> > > > > >
> > > > > > Thus I would favor a (moderate) soldered-in maximum to start
> > > > > > with,
> > > > > > and then as real world use cases arise, consider adding a
> > > > > > tuning
> > > > > > mechanism based on actual requirements.
> > > > >
> > > > > It's not an overreaction. It's insane to think that counting
> > > > > NICs
> > > > > gives
> > > > > you any notion whatsoever about the network topology and
> > > > > connectivity
> > > > > between the client and server. It doesn't even tell you how
> > > > > many of
> > > > > those NICs might potentially be available to your application.
> > > > >
> > > > > We're not doing any automation based on that kind of layering
> > > > > violation.
> > > >
> > > > I'm not suggesting to programmatically determine the number of
> > > > NIC to
> > > > determine the value of max_connect.
> > > > >
> > >
> > > No, but that's what Chuck appeared to be suggesting in order to
> > > avoid
> > > the need for the mount option.
> > >
> > > To me, the main reason for the mount option is to allow the user to
> > > limit the number of new IP addresses being added so that if the DNS
> > > server is configured to hand out lots of different addresses for
> > > the
> > > same servername, the user can basically say 'no, I just want to use
> > > the
> > > one IP address that I'm already connected to' (i.e. max_connect=1).
> > > I
> > > can imagine that some clustered setups might need that ability in
> > > order
> > > to work efficiently.
> > >
> > > I'm fine with the idea of nconnect setting the number of
> > > connections
> > > per IP address, but that would need some plumbing in
> > > rpc_clnt_test_and_add_xprt() to allow us to add up to 'nconnect'
> > > copies
> > > of a given transport.
> > > Presumably rpc_xprt_switch_has_addr() would need to return a count
> > > of
> > > the number of copies of the transport that are already present so
> > > that
> > > we can decide whether or not we should add a new one.
> >
> > I think the last paragraph is what I'm asking for. But I would like
> > to
> > again confirm if you still mean "max_connect" to be the total number
> > of connections since you say we could/will allow for nconnect number
> > of connections per IP address. Would max_connect need to be a
> > multiple
> > of nconnect (max_connect = X *nconnect)?
>
> No. Your suggestion to make the two independent is growing on me,
> however in that case we do want to ensure that if nconnect=X, then we
> always add X transports when we add a new IP address.

Ok. I'm glad to hear independ idea still has life. Are you still
thinking "max_connect" is the right name for it? I guess if we explain
the feature in the man pages the name doesn't matter so much. I would
have still liked it to be something like "max_session_xprts".

> > Actually when I said supporting (or rather allowing for) nconnect *
> > max_connect transport, is that correct? Given how the code works now
> > this is going to be nconnect + max_connect (only if 1st mount had
> > nconnect option). We can't "add" nconnect connections to the new
> > mounts (but with my patch we can add a single trunk connection). By
> > that I mean: say the first was "mount IP1:/vol1 /mnt1" (1 connection
> > to IP2). Now the client is doing "mount IP2:/vol2 /mnt2". IP1 and IP2
> > are trunkable addresses of the same server so we add a trunk. We
> > currently don't allow for doing "mount -o nconnec=2 IP2:vol2 /mnt2"
> > and then also add "nconnect" connections to IP2 along with a trunk.
> > In
> > the 2nd example, we'd have 1 connections to IP1, then 2 connections
> > to
> > IP2. Can we allow for that (with needed code change)?  If not, then
> > we
> > really need to commit to only support nconnect (16) connections +
> > some
> > number of trunkable connections.
>
>
> I think we want to have nconnect be server-global. i.e. nconnect
> entries of each IP address.

Thank you both, Trond and Chuck.

I'll work on v3.


>
> --
> Trond Myklebust
> Linux NFS client maintainer, Hammerspace
> trond.myklebust@hammerspace.com
>
>
Olga Kornievskaia June 10, 2021, 10:17 p.m. UTC | #17
On Thu, Jun 10, 2021 at 1:30 PM Olga Kornievskaia
<olga.kornievskaia@gmail.com> wrote:
>
> On Thu, Jun 10, 2021 at 12:36 PM Trond Myklebust
> <trondmy@hammerspace.com> wrote:
> >
> > On Thu, 2021-06-10 at 12:14 -0400, Olga Kornievskaia wrote:
> > > On Thu, Jun 10, 2021 at 10:56 AM Trond Myklebust
> > > <trondmy@hammerspace.com> wrote:
> > > >
> > > > On Thu, 2021-06-10 at 10:31 -0400, Olga Kornievskaia wrote:
> > > > > On Thu, Jun 10, 2021 at 10:13 AM Trond Myklebust
> > > > > <trondmy@hammerspace.com> wrote:
> > > > > >
> > > > > > On Thu, 2021-06-10 at 13:56 +0000, Chuck Lever III wrote:
> > > > > > >
> > > > > > >
> > > > > > > > On Jun 10, 2021, at 9:34 AM, Trond Myklebust <
> > > > > > > > trondmy@hammerspace.com> wrote:
> > > > > > > >
> > > > > > > > On Thu, 2021-06-10 at 13:30 +0000, Chuck Lever III wrote:
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > > On Jun 9, 2021, at 5:53 PM, Olga Kornievskaia <
> > > > > > > > > > olga.kornievskaia@gmail.com> wrote:
> > > > > > > > > >
> > > > > > > > > > From: Olga Kornievskaia <kolga@netapp.com>
> > > > > > > > > >
> > > > > > > > > > This option will control up to how many xprts can the
> > > > > > > > > > client
> > > > > > > > > > establish to the server. This patch parses the value
> > > > > > > > > > and
> > > > > > > > > > sets
> > > > > > > > > > up structures that keep track of max_connect.
> > > > > > > > > >
> > > > > > > > > > Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
> > > > > > > > > > ---
> > > > > > > > > > fs/nfs/client.c           |  1 +
> > > > > > > > > > fs/nfs/fs_context.c       |  8 ++++++++
> > > > > > > > > > fs/nfs/internal.h         |  2 ++
> > > > > > > > > > fs/nfs/nfs4client.c       | 12 ++++++++++--
> > > > > > > > > > fs/nfs/super.c            |  2 ++
> > > > > > > > > > include/linux/nfs_fs_sb.h |  1 +
> > > > > > > > > > 6 files changed, 24 insertions(+), 2 deletions(-)
> > > > > > > > > >
> > > > > > > > > > diff --git a/fs/nfs/client.c b/fs/nfs/client.c
> > > > > > > > > > index 330f65727c45..486dec59972b 100644
> > > > > > > > > > --- a/fs/nfs/client.c
> > > > > > > > > > +++ b/fs/nfs/client.c
> > > > > > > > > > @@ -179,6 +179,7 @@ struct nfs_client
> > > > > > > > > > *nfs_alloc_client(const
> > > > > > > > > > struct nfs_client_initdata *cl_init)
> > > > > > > > > >
> > > > > > > > > >         clp->cl_proto = cl_init->proto;
> > > > > > > > > >         clp->cl_nconnect = cl_init->nconnect;
> > > > > > > > > > +       clp->cl_max_connect = cl_init->max_connect ?
> > > > > > > > > > cl_init-
> > > > > > > > > > > max_connect : 1;
> > > > > > > > >
> > > > > > > > > So, 1 is the default setting, meaning the "add another
> > > > > > > > > transport"
> > > > > > > > > facility is disabled by default. Would it be less
> > > > > > > > > surprising
> > > > > > > > > for
> > > > > > > > > an admin to allow some extra connections by default?
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > >         clp->cl_net = get_net(cl_init->net);
> > > > > > > > > >
> > > > > > > > > >         clp->cl_principal = "*";
> > > > > > > > > > diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
> > > > > > > > > > index d95c9a39bc70..cfbff7098f8e 100644
> > > > > > > > > > --- a/fs/nfs/fs_context.c
> > > > > > > > > > +++ b/fs/nfs/fs_context.c
> > > > > > > > > > @@ -29,6 +29,7 @@
> > > > > > > > > > #endif
> > > > > > > > > >
> > > > > > > > > > #define NFS_MAX_CONNECTIONS 16
> > > > > > > > > > +#define NFS_MAX_TRANSPORTS 128
> > > > > > > > >
> > > > > > > > > This maximum seems excessive... again, there are
> > > > > > > > > diminishing
> > > > > > > > > returns to adding more connections to the same server.
> > > > > > > > > what's
> > > > > > > > > wrong with re-using NFS_MAX_CONNECTIONS for the maximum?
> > > > > > > > >
> > > > > > > > > As always, I'm a little queasy about adding yet another
> > > > > > > > > mount
> > > > > > > > > option. Are there real use cases where a whole-client
> > > > > > > > > setting
> > > > > > > > > (like a sysfs attribute) would be inadequate? Is there a
> > > > > > > > > way
> > > > > > > > > the client could figure out a reasonable maximum without
> > > > > > > > > a
> > > > > > > > > human intervention, say, by counting the number of NICs
> > > > > > > > > on
> > > > > > > > > the system?
> > > > > > > >
> > > > > > > > Oh, hell no! We're not tying anything to the number of
> > > > > > > > NICs...
> > > > > > >
> > > > > > > That's a bit of an over-reaction. :-) A little more
> > > > > > > explanation
> > > > > > > would be welcome. I mean, don't you expect someone to ask
> > > > > > > "How
> > > > > > > do I pick a good value?" and someone might reasonably answer
> > > > > > > "Well, start with the number of NICs on your client times 3"
> > > > > > > or
> > > > > > > something like that.
> > > > > > >
> > > > > > > IMO we're about to add another admin setting without
> > > > > > > understanding
> > > > > > > how it will be used, how to select a good maximum value, or
> > > > > > > even
> > > > > > > whether this maximum needs to be adjustable. In a previous e-
> > > > > > > mail
> > > > > > > Olga has already demonstrated that it will be difficult to
> > > > > > > explain
> > > > > > > how to use this setting with nconnect=.
> > > > > > >
> > > > > > > Thus I would favor a (moderate) soldered-in maximum to start
> > > > > > > with,
> > > > > > > and then as real world use cases arise, consider adding a
> > > > > > > tuning
> > > > > > > mechanism based on actual requirements.
> > > > > >
> > > > > > It's not an overreaction. It's insane to think that counting
> > > > > > NICs
> > > > > > gives
> > > > > > you any notion whatsoever about the network topology and
> > > > > > connectivity
> > > > > > between the client and server. It doesn't even tell you how
> > > > > > many of
> > > > > > those NICs might potentially be available to your application.
> > > > > >
> > > > > > We're not doing any automation based on that kind of layering
> > > > > > violation.
> > > > >
> > > > > I'm not suggesting to programmatically determine the number of
> > > > > NIC to
> > > > > determine the value of max_connect.
> > > > > >
> > > >
> > > > No, but that's what Chuck appeared to be suggesting in order to
> > > > avoid
> > > > the need for the mount option.
> > > >
> > > > To me, the main reason for the mount option is to allow the user to
> > > > limit the number of new IP addresses being added so that if the DNS
> > > > server is configured to hand out lots of different addresses for
> > > > the
> > > > same servername, the user can basically say 'no, I just want to use
> > > > the
> > > > one IP address that I'm already connected to' (i.e. max_connect=1).
> > > > I
> > > > can imagine that some clustered setups might need that ability in
> > > > order
> > > > to work efficiently.
> > > >
> > > > I'm fine with the idea of nconnect setting the number of
> > > > connections
> > > > per IP address, but that would need some plumbing in
> > > > rpc_clnt_test_and_add_xprt() to allow us to add up to 'nconnect'
> > > > copies
> > > > of a given transport.
> > > > Presumably rpc_xprt_switch_has_addr() would need to return a count
> > > > of
> > > > the number of copies of the transport that are already present so
> > > > that
> > > > we can decide whether or not we should add a new one.
> > >
> > > I think the last paragraph is what I'm asking for. But I would like
> > > to
> > > again confirm if you still mean "max_connect" to be the total number
> > > of connections since you say we could/will allow for nconnect number
> > > of connections per IP address. Would max_connect need to be a
> > > multiple
> > > of nconnect (max_connect = X *nconnect)?
> >
> > No. Your suggestion to make the two independent is growing on me,
> > however in that case we do want to ensure that if nconnect=X, then we
> > always add X transports when we add a new IP address.
>
> Ok. I'm glad to hear independ idea still has life. Are you still
> thinking "max_connect" is the right name for it? I guess if we explain
> the feature in the man pages the name doesn't matter so much. I would
> have still liked it to be something like "max_session_xprts".
>
> > > Actually when I said supporting (or rather allowing for) nconnect *
> > > max_connect transport, is that correct? Given how the code works now
> > > this is going to be nconnect + max_connect (only if 1st mount had
> > > nconnect option). We can't "add" nconnect connections to the new
> > > mounts (but with my patch we can add a single trunk connection). By
> > > that I mean: say the first was "mount IP1:/vol1 /mnt1" (1 connection
> > > to IP2). Now the client is doing "mount IP2:/vol2 /mnt2". IP1 and IP2
> > > are trunkable addresses of the same server so we add a trunk. We
> > > currently don't allow for doing "mount -o nconnec=2 IP2:vol2 /mnt2"
> > > and then also add "nconnect" connections to IP2 along with a trunk.
> > > In
> > > the 2nd example, we'd have 1 connections to IP1, then 2 connections
> > > to
> > > IP2. Can we allow for that (with needed code change)?  If not, then
> > > we
> > > really need to commit to only support nconnect (16) connections +
> > > some
> > > number of trunkable connections.
> >
> >
> > I think we want to have nconnect be server-global. i.e. nconnect
> > entries of each IP address.

After doing more thinking, I'm not sure I like imposing nconnect
connections on a mount that didn't ask for it when a mount is done to
a trunkable address. It feels like we are going from where we were
conserving resources to creating extra when it wasn't asked for. Note,
I'm not arguing (yet) against "having nconnect be server-global". I
don't have an alternative suggestion.

> Thank you both, Trond and Chuck.
>
> I'll work on v3.
>
>
> >
> > --
> > Trond Myklebust
> > Linux NFS client maintainer, Hammerspace
> > trond.myklebust@hammerspace.com
> >
> >
diff mbox series

Patch

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 330f65727c45..486dec59972b 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -179,6 +179,7 @@  struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
 
 	clp->cl_proto = cl_init->proto;
 	clp->cl_nconnect = cl_init->nconnect;
+	clp->cl_max_connect = cl_init->max_connect ? cl_init->max_connect : 1;
 	clp->cl_net = get_net(cl_init->net);
 
 	clp->cl_principal = "*";
diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
index d95c9a39bc70..cfbff7098f8e 100644
--- a/fs/nfs/fs_context.c
+++ b/fs/nfs/fs_context.c
@@ -29,6 +29,7 @@ 
 #endif
 
 #define NFS_MAX_CONNECTIONS 16
+#define NFS_MAX_TRANSPORTS 128
 
 enum nfs_param {
 	Opt_ac,
@@ -60,6 +61,7 @@  enum nfs_param {
 	Opt_mountvers,
 	Opt_namelen,
 	Opt_nconnect,
+	Opt_max_connect,
 	Opt_port,
 	Opt_posix,
 	Opt_proto,
@@ -158,6 +160,7 @@  static const struct fs_parameter_spec nfs_fs_parameters[] = {
 	fsparam_u32   ("mountvers",	Opt_mountvers),
 	fsparam_u32   ("namlen",	Opt_namelen),
 	fsparam_u32   ("nconnect",	Opt_nconnect),
+	fsparam_u32   ("max_connect",	Opt_max_connect),
 	fsparam_string("nfsvers",	Opt_vers),
 	fsparam_u32   ("port",		Opt_port),
 	fsparam_flag_no("posix",	Opt_posix),
@@ -770,6 +773,11 @@  static int nfs_fs_context_parse_param(struct fs_context *fc,
 			goto out_of_bounds;
 		ctx->nfs_server.nconnect = result.uint_32;
 		break;
+	case Opt_max_connect:
+		if (result.uint_32 < 1 || result.uint_32 > NFS_MAX_TRANSPORTS)
+			goto out_of_bounds;
+		ctx->nfs_server.max_connect = result.uint_32;
+		break;
 	case Opt_lookupcache:
 		switch (result.uint_32) {
 		case Opt_lookupcache_all:
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index a36af04188c2..66fc936834f2 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -67,6 +67,7 @@  struct nfs_client_initdata {
 	int proto;
 	u32 minorversion;
 	unsigned int nconnect;
+	unsigned int max_connect;
 	struct net *net;
 	const struct rpc_timeout *timeparms;
 	const struct cred *cred;
@@ -121,6 +122,7 @@  struct nfs_fs_context {
 		int			port;
 		unsigned short		protocol;
 		unsigned short		nconnect;
+		unsigned short		max_connect;
 		unsigned short		export_path_len;
 	} nfs_server;
 
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 42719384e25f..640c8235d817 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -863,6 +863,7 @@  static int nfs4_set_client(struct nfs_server *server,
 		const char *ip_addr,
 		int proto, const struct rpc_timeout *timeparms,
 		u32 minorversion, unsigned int nconnect,
+		unsigned int max_connect,
 		struct net *net)
 {
 	struct nfs_client_initdata cl_init = {
@@ -881,6 +882,8 @@  static int nfs4_set_client(struct nfs_server *server,
 
 	if (minorversion == 0)
 		__set_bit(NFS_CS_REUSEPORT, &cl_init.init_flags);
+	else
+		cl_init.max_connect = max_connect;
 	if (proto == XPRT_TRANSPORT_TCP)
 		cl_init.nconnect = nconnect;
 
@@ -950,8 +953,10 @@  struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
 		return ERR_PTR(-EINVAL);
 	cl_init.hostname = buf;
 
-	if (mds_clp->cl_nconnect > 1 && ds_proto == XPRT_TRANSPORT_TCP)
+	if (mds_clp->cl_nconnect > 1 && ds_proto == XPRT_TRANSPORT_TCP) {
 		cl_init.nconnect = mds_clp->cl_nconnect;
+		cl_init.max_connect = mds_clp->cl_max_connect;
+	}
 
 	if (mds_srv->flags & NFS_MOUNT_NORESVPORT)
 		__set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
@@ -1120,6 +1125,7 @@  static int nfs4_init_server(struct nfs_server *server, struct fs_context *fc)
 				&timeparms,
 				ctx->minorversion,
 				ctx->nfs_server.nconnect,
+				ctx->nfs_server.max_connect,
 				fc->net_ns);
 	if (error < 0)
 		return error;
@@ -1209,6 +1215,7 @@  struct nfs_server *nfs4_create_referral_server(struct fs_context *fc)
 				parent_server->client->cl_timeout,
 				parent_client->cl_mvops->minor_version,
 				parent_client->cl_nconnect,
+				parent_client->cl_max_connect,
 				parent_client->cl_net);
 	if (!error)
 		goto init_server;
@@ -1224,6 +1231,7 @@  struct nfs_server *nfs4_create_referral_server(struct fs_context *fc)
 				parent_server->client->cl_timeout,
 				parent_client->cl_mvops->minor_version,
 				parent_client->cl_nconnect,
+				parent_client->cl_max_connect,
 				parent_client->cl_net);
 	if (error < 0)
 		goto error;
@@ -1321,7 +1329,7 @@  int nfs4_update_server(struct nfs_server *server, const char *hostname,
 	error = nfs4_set_client(server, hostname, sap, salen, buf,
 				clp->cl_proto, clnt->cl_timeout,
 				clp->cl_minorversion,
-				clp->cl_nconnect, net);
+				clp->cl_nconnect, clp->cl_max_connect, net);
 	clear_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status);
 	if (error != 0) {
 		nfs_server_insert_lists(server);
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index fe58525cfed4..e65c83494c05 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -480,6 +480,8 @@  static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
 	if (clp->cl_nconnect > 0)
 		seq_printf(m, ",nconnect=%u", clp->cl_nconnect);
 	if (version == 4) {
+		if (clp->cl_max_connect > 1)
+			seq_printf(m, ",max_connect=%u", clp->cl_max_connect);
 		if (nfss->port != NFS_PORT)
 			seq_printf(m, ",port=%u", nfss->port);
 	} else
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index d71a0e90faeb..2a9acbfe00f0 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -62,6 +62,7 @@  struct nfs_client {
 
 	u32			cl_minorversion;/* NFSv4 minorversion */
 	unsigned int		cl_nconnect;	/* Number of connections */
+	unsigned int		cl_max_connect; /* max number of xprts allowed */
 	const char *		cl_principal;  /* used for machine cred */
 
 #if IS_ENABLED(CONFIG_NFS_V4)