diff mbox series

[v2,1/1] NFSv4.1: fix pnfs MDS=DS session trunking

Message ID 20230824162416.17482-1-olga.kornievskaia@gmail.com (mailing list archive)
State New, archived
Headers show
Series [v2,1/1] NFSv4.1: fix pnfs MDS=DS session trunking | expand

Commit Message

Olga Kornievskaia Aug. 24, 2023, 4:24 p.m. UTC
From: Olga Kornievskaia <kolga@netapp.com>

Currently, when GETDEVICEINFO returns multiple locations where each
is a different IP but the server's identity is same as MDS, then
nfs4_set_ds_client() finds the existing nfs_client structure which
has the MDS's max_connect value (and if it's 1), then the 1st IP
on the DS's list will get dropped due to MDS trunking rules. Other
IPs would be added as they fall under the pnfs trunking rules.

Instead, this patch prposed to treat MDS=DS as DS trunking and
make sure that MDS's max_connect limit does not apply to the
1st IP returned in the GETDEVICEINFO list.

Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
---
 fs/nfs/nfs4client.c | 7 ++++++-
 net/sunrpc/clnt.c   | 9 ++++++---
 2 files changed, 12 insertions(+), 4 deletions(-)

Comments

Trond Myklebust Aug. 24, 2023, 4:42 p.m. UTC | #1
On Thu, 2023-08-24 at 12:24 -0400, Olga Kornievskaia wrote:
> From: Olga Kornievskaia <kolga@netapp.com>
> 
> Currently, when GETDEVICEINFO returns multiple locations where each
> is a different IP but the server's identity is same as MDS, then
> nfs4_set_ds_client() finds the existing nfs_client structure which
> has the MDS's max_connect value (and if it's 1), then the 1st IP
> on the DS's list will get dropped due to MDS trunking rules. Other
> IPs would be added as they fall under the pnfs trunking rules.
> 
> Instead, this patch prposed to treat MDS=DS as DS trunking and
> make sure that MDS's max_connect limit does not apply to the
> 1st IP returned in the GETDEVICEINFO list.
> 
> Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
> ---
>  fs/nfs/nfs4client.c | 7 ++++++-
>  net/sunrpc/clnt.c   | 9 ++++++---
>  2 files changed, 12 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
> index 27fb25567ce7..b35acd79b895 100644
> --- a/fs/nfs/nfs4client.c
> +++ b/fs/nfs/nfs4client.c
> @@ -417,6 +417,7 @@ static void nfs4_add_trunk(struct nfs_client
> *clp, struct nfs_client *old)
>                 .net = old->cl_net,
>                 .servername = old->cl_hostname,
>         };
> +       int max_connect = old->cl_max_connect;
>  
>         if (clp->cl_proto != old->cl_proto)
>                 return;
> @@ -428,9 +429,12 @@ static void nfs4_add_trunk(struct nfs_client
> *clp, struct nfs_client *old)
>  
>         xprt_args.dstaddr = clp_sap;
>         xprt_args.addrlen = clp_salen;
> +       if (clp->cl_max_connect != old->cl_max_connect &&
> +           test_bit(NFS_CS_DS, &clp->cl_flags))
> +               max_connect = clp->cl_max_connect;
>  
>         rpc_clnt_add_xprt(old->cl_rpcclient, &xprt_args,
> -                         rpc_clnt_test_and_add_xprt, NULL);
> +                         rpc_clnt_test_and_add_xprt, &max_connect);

Instead of using rpc_clnt_add_xprt() to set transport parameters after
the fact, can we please instead just add these parameters to the struct
rpc_create_args/struct xprt_create? Please see the following patch in
my testing branch
https://git.linux-nfs.org/?p=trondmy/linux-nfs.git;a=commitdiff;h=d1fb679d1dff0ae9e118d3380c0f5927cd279efe

>  }
>  
>  /**
> @@ -1010,6 +1014,7 @@ struct nfs_client *nfs4_set_ds_client(struct
> nfs_server *mds_srv,
>                 __set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
>  
>         __set_bit(NFS_CS_DS, &cl_init.init_flags);
> +       cl_init.max_connect = NFS_MAX_TRANSPORTS;
>         /*
>          * Set an authflavor equual to the MDS value. Use the MDS
> nfs_client
>          * cl_ipaddr so as to use the same EXCHANGE_ID co_ownerid as
> the MDS
> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
> index d7c697af3762..325dad20a924 100644
> --- a/net/sunrpc/clnt.c
> +++ b/net/sunrpc/clnt.c
> @@ -2904,16 +2904,19 @@ static const struct rpc_call_ops
> rpc_cb_add_xprt_call_ops = {
>   * @clnt: pointer to struct rpc_clnt
>   * @xps: pointer to struct rpc_xprt_switch,
>   * @xprt: pointer struct rpc_xprt
> - * @dummy: unused
> + * @in_max_connect: pointer to the max_connect value for the passed
> in xprt transport
>   */
>  int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt,
>                 struct rpc_xprt_switch *xps, struct rpc_xprt *xprt,
> -               void *dummy)
> +               void *in_max_connect)
>  {
>         struct rpc_cb_add_xprt_calldata *data;
>         struct rpc_task *task;
> +       int max_connect = clnt->cl_max_connect;
>  
> -       if (xps->xps_nunique_destaddr_xprts + 1 > clnt-
> >cl_max_connect) {
> +       if (in_max_connect)
> +               max_connect = *(int *)in_max_connect;
> +       if (xps->xps_nunique_destaddr_xprts + 1 > max_connect) {
>                 rcu_read_lock();
>                 pr_warn("SUNRPC: reached max allowed number (%d) did
> not add "
>                         "transport to server: %s\n", clnt-
> >cl_max_connect,
Olga Kornievskaia Aug. 24, 2023, 5:20 p.m. UTC | #2
On Thu, Aug 24, 2023 at 12:42 PM Trond Myklebust
<trondmy@hammerspace.com> wrote:
>
> On Thu, 2023-08-24 at 12:24 -0400, Olga Kornievskaia wrote:
> > From: Olga Kornievskaia <kolga@netapp.com>
> >
> > Currently, when GETDEVICEINFO returns multiple locations where each
> > is a different IP but the server's identity is same as MDS, then
> > nfs4_set_ds_client() finds the existing nfs_client structure which
> > has the MDS's max_connect value (and if it's 1), then the 1st IP
> > on the DS's list will get dropped due to MDS trunking rules. Other
> > IPs would be added as they fall under the pnfs trunking rules.
> >
> > Instead, this patch prposed to treat MDS=DS as DS trunking and
> > make sure that MDS's max_connect limit does not apply to the
> > 1st IP returned in the GETDEVICEINFO list.
> >
> > Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
> > ---
> >  fs/nfs/nfs4client.c | 7 ++++++-
> >  net/sunrpc/clnt.c   | 9 ++++++---
> >  2 files changed, 12 insertions(+), 4 deletions(-)
> >
> > diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
> > index 27fb25567ce7..b35acd79b895 100644
> > --- a/fs/nfs/nfs4client.c
> > +++ b/fs/nfs/nfs4client.c
> > @@ -417,6 +417,7 @@ static void nfs4_add_trunk(struct nfs_client
> > *clp, struct nfs_client *old)
> >                 .net = old->cl_net,
> >                 .servername = old->cl_hostname,
> >         };
> > +       int max_connect = old->cl_max_connect;
> >
> >         if (clp->cl_proto != old->cl_proto)
> >                 return;
> > @@ -428,9 +429,12 @@ static void nfs4_add_trunk(struct nfs_client
> > *clp, struct nfs_client *old)
> >
> >         xprt_args.dstaddr = clp_sap;
> >         xprt_args.addrlen = clp_salen;
> > +       if (clp->cl_max_connect != old->cl_max_connect &&
> > +           test_bit(NFS_CS_DS, &clp->cl_flags))
> > +               max_connect = clp->cl_max_connect;
> >
> >         rpc_clnt_add_xprt(old->cl_rpcclient, &xprt_args,
> > -                         rpc_clnt_test_and_add_xprt, NULL);
> > +                         rpc_clnt_test_and_add_xprt, &max_connect);
>
> Instead of using rpc_clnt_add_xprt() to set transport parameters after
> the fact, can we please instead just add these parameters to the struct
> rpc_create_args/struct xprt_create? Please see the following patch in
> my testing branch

But we are not setting parameters after the fact. We are making sure
that we are not constraining the pnfs trunking by the MDS trunking
when MDS=DS.

> https://git.linux-nfs.org/?p=trondmy/linux-nfs.git;a=commitdiff;h=d1fb679d1dff0ae9e118d3380c0f5927cd279efe
>
> >  }
> >
> >  /**
> > @@ -1010,6 +1014,7 @@ struct nfs_client *nfs4_set_ds_client(struct
> > nfs_server *mds_srv,
> >                 __set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
> >
> >         __set_bit(NFS_CS_DS, &cl_init.init_flags);
> > +       cl_init.max_connect = NFS_MAX_TRANSPORTS;
> >         /*
> >          * Set an authflavor equual to the MDS value. Use the MDS
> > nfs_client
> >          * cl_ipaddr so as to use the same EXCHANGE_ID co_ownerid as
> > the MDS
> > diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
> > index d7c697af3762..325dad20a924 100644
> > --- a/net/sunrpc/clnt.c
> > +++ b/net/sunrpc/clnt.c
> > @@ -2904,16 +2904,19 @@ static const struct rpc_call_ops
> > rpc_cb_add_xprt_call_ops = {
> >   * @clnt: pointer to struct rpc_clnt
> >   * @xps: pointer to struct rpc_xprt_switch,
> >   * @xprt: pointer struct rpc_xprt
> > - * @dummy: unused
> > + * @in_max_connect: pointer to the max_connect value for the passed
> > in xprt transport
> >   */
> >  int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt,
> >                 struct rpc_xprt_switch *xps, struct rpc_xprt *xprt,
> > -               void *dummy)
> > +               void *in_max_connect)
> >  {
> >         struct rpc_cb_add_xprt_calldata *data;
> >         struct rpc_task *task;
> > +       int max_connect = clnt->cl_max_connect;
> >
> > -       if (xps->xps_nunique_destaddr_xprts + 1 > clnt-
> > >cl_max_connect) {
> > +       if (in_max_connect)
> > +               max_connect = *(int *)in_max_connect;
> > +       if (xps->xps_nunique_destaddr_xprts + 1 > max_connect) {
> >                 rcu_read_lock();
> >                 pr_warn("SUNRPC: reached max allowed number (%d) did
> > not add "
> >                         "transport to server: %s\n", clnt-
> > >cl_max_connect,
>
> --
> Trond Myklebust
> CTO, Hammerspace Inc
> 1900 S Norfolk St, Suite 350 - #45
> San Mateo, CA 94403
>
> www.hammerspace.com
Olga Kornievskaia Aug. 24, 2023, 5:31 p.m. UTC | #3
On Thu, Aug 24, 2023 at 1:20 PM Olga Kornievskaia
<olga.kornievskaia@gmail.com> wrote:
>
> On Thu, Aug 24, 2023 at 12:42 PM Trond Myklebust
> <trondmy@hammerspace.com> wrote:
> >
> > On Thu, 2023-08-24 at 12:24 -0400, Olga Kornievskaia wrote:
> > > From: Olga Kornievskaia <kolga@netapp.com>
> > >
> > > Currently, when GETDEVICEINFO returns multiple locations where each
> > > is a different IP but the server's identity is same as MDS, then
> > > nfs4_set_ds_client() finds the existing nfs_client structure which
> > > has the MDS's max_connect value (and if it's 1), then the 1st IP
> > > on the DS's list will get dropped due to MDS trunking rules. Other
> > > IPs would be added as they fall under the pnfs trunking rules.
> > >
> > > Instead, this patch prposed to treat MDS=DS as DS trunking and
> > > make sure that MDS's max_connect limit does not apply to the
> > > 1st IP returned in the GETDEVICEINFO list.
> > >
> > > Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
> > > ---
> > >  fs/nfs/nfs4client.c | 7 ++++++-
> > >  net/sunrpc/clnt.c   | 9 ++++++---
> > >  2 files changed, 12 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
> > > index 27fb25567ce7..b35acd79b895 100644
> > > --- a/fs/nfs/nfs4client.c
> > > +++ b/fs/nfs/nfs4client.c
> > > @@ -417,6 +417,7 @@ static void nfs4_add_trunk(struct nfs_client
> > > *clp, struct nfs_client *old)
> > >                 .net = old->cl_net,
> > >                 .servername = old->cl_hostname,
> > >         };
> > > +       int max_connect = old->cl_max_connect;
> > >
> > >         if (clp->cl_proto != old->cl_proto)
> > >                 return;
> > > @@ -428,9 +429,12 @@ static void nfs4_add_trunk(struct nfs_client
> > > *clp, struct nfs_client *old)
> > >
> > >         xprt_args.dstaddr = clp_sap;
> > >         xprt_args.addrlen = clp_salen;
> > > +       if (clp->cl_max_connect != old->cl_max_connect &&
> > > +           test_bit(NFS_CS_DS, &clp->cl_flags))
> > > +               max_connect = clp->cl_max_connect;
> > >
> > >         rpc_clnt_add_xprt(old->cl_rpcclient, &xprt_args,
> > > -                         rpc_clnt_test_and_add_xprt, NULL);
> > > +                         rpc_clnt_test_and_add_xprt, &max_connect);
> >
> > Instead of using rpc_clnt_add_xprt() to set transport parameters after
> > the fact, can we please instead just add these parameters to the struct
> > rpc_create_args/struct xprt_create? Please see the following patch in
> > my testing branch
>
> But we are not setting parameters after the fact. We are making sure
> that we are not constraining the pnfs trunking by the MDS trunking
> when MDS=DS.

To add to that it's because the 1st address of the DS addresses goes
thru the pnfs.c
rpc_clnt_add_xprt()->rpc_clnt_setup_test_and_add_xprt,() which checks
for the max_connect value of the MDS. Other DS addresses are just
added thru the other path.

Instead of blindly using clnt->cl_max_connect value (of the MDS) when
we are called for PNFS DSs, we pass in the value (max) to be used for
the comparison.

> > https://git.linux-nfs.org/?p=trondmy/linux-nfs.git;a=commitdiff;h=d1fb679d1dff0ae9e118d3380c0f5927cd279efe
> >
> > >  }
> > >
> > >  /**
> > > @@ -1010,6 +1014,7 @@ struct nfs_client *nfs4_set_ds_client(struct
> > > nfs_server *mds_srv,
> > >                 __set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
> > >
> > >         __set_bit(NFS_CS_DS, &cl_init.init_flags);
> > > +       cl_init.max_connect = NFS_MAX_TRANSPORTS;
> > >         /*
> > >          * Set an authflavor equual to the MDS value. Use the MDS
> > > nfs_client
> > >          * cl_ipaddr so as to use the same EXCHANGE_ID co_ownerid as
> > > the MDS
> > > diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
> > > index d7c697af3762..325dad20a924 100644
> > > --- a/net/sunrpc/clnt.c
> > > +++ b/net/sunrpc/clnt.c
> > > @@ -2904,16 +2904,19 @@ static const struct rpc_call_ops
> > > rpc_cb_add_xprt_call_ops = {
> > >   * @clnt: pointer to struct rpc_clnt
> > >   * @xps: pointer to struct rpc_xprt_switch,
> > >   * @xprt: pointer struct rpc_xprt
> > > - * @dummy: unused
> > > + * @in_max_connect: pointer to the max_connect value for the passed
> > > in xprt transport
> > >   */
> > >  int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt,
> > >                 struct rpc_xprt_switch *xps, struct rpc_xprt *xprt,
> > > -               void *dummy)
> > > +               void *in_max_connect)
> > >  {
> > >         struct rpc_cb_add_xprt_calldata *data;
> > >         struct rpc_task *task;
> > > +       int max_connect = clnt->cl_max_connect;
> > >
> > > -       if (xps->xps_nunique_destaddr_xprts + 1 > clnt-
> > > >cl_max_connect) {
> > > +       if (in_max_connect)
> > > +               max_connect = *(int *)in_max_connect;
> > > +       if (xps->xps_nunique_destaddr_xprts + 1 > max_connect) {
> > >                 rcu_read_lock();
> > >                 pr_warn("SUNRPC: reached max allowed number (%d) did
> > > not add "
> > >                         "transport to server: %s\n", clnt-
> > > >cl_max_connect,
> >
> > --
> > Trond Myklebust
> > CTO, Hammerspace Inc
> > 1900 S Norfolk St, Suite 350 - #45
> > San Mateo, CA 94403
> >
> > www.hammerspace.com
diff mbox series

Patch

diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 27fb25567ce7..b35acd79b895 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -417,6 +417,7 @@  static void nfs4_add_trunk(struct nfs_client *clp, struct nfs_client *old)
 		.net = old->cl_net,
 		.servername = old->cl_hostname,
 	};
+	int max_connect = old->cl_max_connect;
 
 	if (clp->cl_proto != old->cl_proto)
 		return;
@@ -428,9 +429,12 @@  static void nfs4_add_trunk(struct nfs_client *clp, struct nfs_client *old)
 
 	xprt_args.dstaddr = clp_sap;
 	xprt_args.addrlen = clp_salen;
+	if (clp->cl_max_connect != old->cl_max_connect &&
+	    test_bit(NFS_CS_DS, &clp->cl_flags))
+		max_connect = clp->cl_max_connect;
 
 	rpc_clnt_add_xprt(old->cl_rpcclient, &xprt_args,
-			  rpc_clnt_test_and_add_xprt, NULL);
+			  rpc_clnt_test_and_add_xprt, &max_connect);
 }
 
 /**
@@ -1010,6 +1014,7 @@  struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
 		__set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
 
 	__set_bit(NFS_CS_DS, &cl_init.init_flags);
+	cl_init.max_connect = NFS_MAX_TRANSPORTS;
 	/*
 	 * Set an authflavor equual to the MDS value. Use the MDS nfs_client
 	 * cl_ipaddr so as to use the same EXCHANGE_ID co_ownerid as the MDS
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index d7c697af3762..325dad20a924 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -2904,16 +2904,19 @@  static const struct rpc_call_ops rpc_cb_add_xprt_call_ops = {
  * @clnt: pointer to struct rpc_clnt
  * @xps: pointer to struct rpc_xprt_switch,
  * @xprt: pointer struct rpc_xprt
- * @dummy: unused
+ * @in_max_connect: pointer to the max_connect value for the passed in xprt transport
  */
 int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt,
 		struct rpc_xprt_switch *xps, struct rpc_xprt *xprt,
-		void *dummy)
+		void *in_max_connect)
 {
 	struct rpc_cb_add_xprt_calldata *data;
 	struct rpc_task *task;
+	int max_connect = clnt->cl_max_connect;
 
-	if (xps->xps_nunique_destaddr_xprts + 1 > clnt->cl_max_connect) {
+	if (in_max_connect)
+		max_connect = *(int *)in_max_connect;
+	if (xps->xps_nunique_destaddr_xprts + 1 > max_connect) {
 		rcu_read_lock();
 		pr_warn("SUNRPC: reached max allowed number (%d) did not add "
 			"transport to server: %s\n", clnt->cl_max_connect,