diff mbox series

[v2,10/10] nfsd: Ignore rpcbind errors on nfsd startup

Message ID 20211219013803.324724-11-trondmy@kernel.org (mailing list archive)
State New, archived
Headers show
Series Assorted patches for knfsd | expand

Commit Message

Trond Myklebust Dec. 19, 2021, 1:38 a.m. UTC
From: Trond Myklebust <trond.myklebust@hammerspace.com>

NFSv4 doesn't need rpcbind, so let's not refuse to start up just because
the rpcbind registration failed.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfsd/nfsctl.c               |  7 ++++++-
 fs/nfsd/nfsd.h                 |  1 +
 fs/nfsd/nfssvc.c               | 18 ++++++++++++++++--
 include/linux/sunrpc/svcsock.h |  5 +++--
 net/sunrpc/svcsock.c           | 14 ++++++++------
 5 files changed, 34 insertions(+), 11 deletions(-)

Comments

Chuck Lever Dec. 19, 2021, 6:15 p.m. UTC | #1
> On Dec 18, 2021, at 8:38 PM, trondmy@kernel.org wrote:
> 
> From: Trond Myklebust <trond.myklebust@hammerspace.com>
> 
> NFSv4 doesn't need rpcbind, so let's not refuse to start up just because
> the rpcbind registration failed.

Commit 7e55b59b2f32 ("SUNRPC/NFSD: Support a new option for ignoring
the result of svc_register") added vs_rpcb_optnl, which is already
set for nfsd4_version4. Is that not adequate?


> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
> ---
> fs/nfsd/nfsctl.c               |  7 ++++++-
> fs/nfsd/nfsd.h                 |  1 +
> fs/nfsd/nfssvc.c               | 18 ++++++++++++++++--
> include/linux/sunrpc/svcsock.h |  5 +++--
> net/sunrpc/svcsock.c           | 14 ++++++++------
> 5 files changed, 34 insertions(+), 11 deletions(-)
> 
> diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
> index 51a49e0cfe37..da9760479acd 100644
> --- a/fs/nfsd/nfsctl.c
> +++ b/fs/nfsd/nfsctl.c
> @@ -727,6 +727,7 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net, const struct cred
> 	char *mesg = buf;
> 	int fd, err;
> 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
> +	int flags = SVC_SOCK_DEFAULTS;
> 
> 	err = get_int(&mesg, &fd);
> 	if (err != 0 || fd < 0)
> @@ -741,7 +742,11 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net, const struct cred
> 	if (err != 0)
> 		return err;
> 
> -	err = svc_addsock(nn->nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT, cred);
> +	if (!nfsd_rpcbind_error_is_fatal())
> +		flags |= SVC_SOCK_RPCBIND_NOERR;
> +
> +	err = svc_addsock(nn->nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT,
> +			  flags, cred);
> 	if (err < 0) {
> 		nfsd_destroy(net);
> 		return err;
> diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
> index 498e5a489826..e0356d3ecf65 100644
> --- a/fs/nfsd/nfsd.h
> +++ b/fs/nfsd/nfsd.h
> @@ -134,6 +134,7 @@ int nfsd_vers(struct nfsd_net *nn, int vers, enum vers_op change);
> int nfsd_minorversion(struct nfsd_net *nn, u32 minorversion, enum vers_op change);
> void nfsd_reset_versions(struct nfsd_net *nn);
> int nfsd_create_serv(struct net *net);
> +extern bool nfsd_rpcbind_error_is_fatal(void);
> 
> extern int nfsd_max_blksize;
> 
> diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
> index 6815c70b06af..6f22c72f340d 100644
> --- a/fs/nfsd/nfssvc.c
> +++ b/fs/nfsd/nfssvc.c
> @@ -289,17 +289,21 @@ static int nfsd_init_socks(struct net *net, const struct cred *cred)
> {
> 	int error;
> 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
> +	int flags = SVC_SOCK_DEFAULTS;
> 
> 	if (!list_empty(&nn->nfsd_serv->sv_permsocks))
> 		return 0;
> 
> +	if (!nfsd_rpcbind_error_is_fatal())
> +		flags |= SVC_SOCK_RPCBIND_NOERR;
> +
> 	error = svc_create_xprt(nn->nfsd_serv, "udp", net, PF_INET, NFS_PORT,
> -					SVC_SOCK_DEFAULTS, cred);
> +				flags, cred);
> 	if (error < 0)
> 		return error;
> 
> 	error = svc_create_xprt(nn->nfsd_serv, "tcp", net, PF_INET, NFS_PORT,
> -					SVC_SOCK_DEFAULTS, cred);
> +				flags, cred);
> 	if (error < 0)
> 		return error;
> 
> @@ -340,6 +344,16 @@ static void nfsd_shutdown_generic(void)
> 	nfsd_file_cache_shutdown();
> }
> 
> +static bool nfsd_rpcbind_error_fatal = false;
> +module_param(nfsd_rpcbind_error_fatal, bool, 0644);
> +MODULE_PARM_DESC(nfsd_rpcbind_error_fatal,
> +		 "rpcbind errors are fatal when starting nfsd.");
> +
> +bool nfsd_rpcbind_error_is_fatal(void)
> +{
> +	return nfsd_rpcbind_error_fatal;
> +}
> +
> /*
>  * Allow admin to disable lockd. This would typically be used to allow (e.g.)
>  * a userspace NLM server of some sort to be used.
> diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
> index bcc555c7ae9c..f34c222cee9d 100644
> --- a/include/linux/sunrpc/svcsock.h
> +++ b/include/linux/sunrpc/svcsock.h
> @@ -61,8 +61,8 @@ void		svc_drop(struct svc_rqst *);
> void		svc_sock_update_bufs(struct svc_serv *serv);
> bool		svc_alien_sock(struct net *net, int fd);
> int		svc_addsock(struct svc_serv *serv, const int fd,
> -					char *name_return, const size_t len,
> -					const struct cred *cred);
> +			    char *name_return, const size_t len, int flags,
> +			    const struct cred *cred);
> void		svc_init_xprt_sock(void);
> void		svc_cleanup_xprt_sock(void);
> struct svc_xprt *svc_sock_create(struct svc_serv *serv, int prot);
> @@ -74,5 +74,6 @@ void		svc_sock_destroy(struct svc_xprt *);
> #define SVC_SOCK_DEFAULTS	(0U)
> #define SVC_SOCK_ANONYMOUS	(1U << 0)	/* don't register with pmap */
> #define SVC_SOCK_TEMPORARY	(1U << 1)	/* flag socket as temporary */
> +#define SVC_SOCK_RPCBIND_NOERR	(1U << 2)	/* Ignore pmap errors */
> 
> #endif /* SUNRPC_SVCSOCK_H */
> diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
> index 478f857cdaed..7f5b12a50bf9 100644
> --- a/net/sunrpc/svcsock.c
> +++ b/net/sunrpc/svcsock.c
> @@ -1309,14 +1309,15 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
> 	inet = sock->sk;
> 
> 	/* Register socket with portmapper */
> -	if (pmap_register)
> +	if (pmap_register) {
> 		err = svc_register(serv, sock_net(sock->sk), inet->sk_family,
> 				     inet->sk_protocol,
> 				     ntohs(inet_sk(inet)->inet_sport));
> 
> -	if (err < 0) {
> -		kfree(svsk);
> -		return ERR_PTR(err);
> +		if (err < 0 && !(flags & SVC_SOCK_RPCBIND_NOERR)) {
> +			kfree(svsk);
> +			return ERR_PTR(err);
> +		}
> 	}
> 
> 	svsk->sk_sock = sock;
> @@ -1364,6 +1365,7 @@ EXPORT_SYMBOL_GPL(svc_alien_sock);
>  * @fd: file descriptor of the new listener
>  * @name_return: pointer to buffer to fill in with name of listener
>  * @len: size of the buffer
> + * @flags: flags argument for svc_setup_socket()
>  * @cred: credential
>  *
>  * Fills in socket name and returns positive length of name if successful.
> @@ -1371,7 +1373,7 @@ EXPORT_SYMBOL_GPL(svc_alien_sock);
>  * value.
>  */
> int svc_addsock(struct svc_serv *serv, const int fd, char *name_return,
> -		const size_t len, const struct cred *cred)
> +		const size_t len, int flags, const struct cred *cred)
> {
> 	int err = 0;
> 	struct socket *so = sockfd_lookup(fd, &err);
> @@ -1395,7 +1397,7 @@ int svc_addsock(struct svc_serv *serv, const int fd, char *name_return,
> 	err = -ENOENT;
> 	if (!try_module_get(THIS_MODULE))
> 		goto out;
> -	svsk = svc_setup_socket(serv, so, SVC_SOCK_DEFAULTS);
> +	svsk = svc_setup_socket(serv, so, flags);
> 	if (IS_ERR(svsk)) {
> 		module_put(THIS_MODULE);
> 		err = PTR_ERR(svsk);
> -- 
> 2.33.1
> 

--
Chuck Lever
Trond Myklebust Dec. 19, 2021, 8:49 p.m. UTC | #2
On Sun, 2021-12-19 at 18:15 +0000, Chuck Lever III wrote:
> 
> > On Dec 18, 2021, at 8:38 PM, trondmy@kernel.org wrote:
> > 
> > From: Trond Myklebust <trond.myklebust@hammerspace.com>
> > 
> > NFSv4 doesn't need rpcbind, so let's not refuse to start up just
> > because
> > the rpcbind registration failed.
> 
> Commit 7e55b59b2f32 ("SUNRPC/NFSD: Support a new option for ignoring
> the result of svc_register") added vs_rpcb_optnl, which is already
> set for nfsd4_version4. Is that not adequate?
> 

The other issue is that under certain circumstances, you may also want
to run NFSv3 without rpcbind support. For instance, when you have a
knfsd server instance running as a data server, there is typically no
need to run rpcbind.
Chuck Lever Dec. 20, 2021, 3:51 p.m. UTC | #3
> On Dec 19, 2021, at 3:49 PM, Trond Myklebust <trondmy@hammerspace.com> wrote:
> 
> On Sun, 2021-12-19 at 18:15 +0000, Chuck Lever III wrote:
>> 
>>> On Dec 18, 2021, at 8:38 PM, trondmy@kernel.org wrote:
>>> 
>>> From: Trond Myklebust <trond.myklebust@hammerspace.com>
>>> 
>>> NFSv4 doesn't need rpcbind, so let's not refuse to start up just
>>> because
>>> the rpcbind registration failed.
>> 
>> Commit 7e55b59b2f32 ("SUNRPC/NFSD: Support a new option for ignoring
>> the result of svc_register") added vs_rpcb_optnl, which is already
>> set for nfsd4_version4. Is that not adequate?
>> 
> 
> The other issue is that under certain circumstances, you may also want
> to run NFSv3 without rpcbind support. For instance, when you have a
> knfsd server instance running as a data server, there is typically no
> need to run rpcbind.

So what you are saying is that you'd like this to be a run-time setting
instead of a compile-time setting. Got it.

Note that this patch adds a non-container-aware administrative API. For
the same reasons I NAK'd 9/10, I'm going to NAK this one and ask that
you provide a version that is container-aware (ideally: not a module
parameter).

The new implementation should remove vs_rpcb_optnl, as a clean up.


--
Chuck Lever
Trond Myklebust Dec. 20, 2021, 6:35 p.m. UTC | #4
On Mon, 2021-12-20 at 15:51 +0000, Chuck Lever III wrote:
> 
> 
> > On Dec 19, 2021, at 3:49 PM, Trond Myklebust
> > <trondmy@hammerspace.com> wrote:
> > 
> > On Sun, 2021-12-19 at 18:15 +0000, Chuck Lever III wrote:
> > > 
> > > > On Dec 18, 2021, at 8:38 PM, trondmy@kernel.org wrote:
> > > > 
> > > > From: Trond Myklebust <trond.myklebust@hammerspace.com>
> > > > 
> > > > NFSv4 doesn't need rpcbind, so let's not refuse to start up
> > > > just
> > > > because
> > > > the rpcbind registration failed.
> > > 
> > > Commit 7e55b59b2f32 ("SUNRPC/NFSD: Support a new option for
> > > ignoring
> > > the result of svc_register") added vs_rpcb_optnl, which is
> > > already
> > > set for nfsd4_version4. Is that not adequate?
> > > 
> > 
> > The other issue is that under certain circumstances, you may also
> > want
> > to run NFSv3 without rpcbind support. For instance, when you have a
> > knfsd server instance running as a data server, there is typically
> > no
> > need to run rpcbind.
> 
> So what you are saying is that you'd like this to be a run-time
> setting
> instead of a compile-time setting. Got it.
> 
> Note that this patch adds a non-container-aware administrative API.
> For
> the same reasons I NAK'd 9/10, I'm going to NAK this one and ask that
> you provide a version that is container-aware (ideally: not a module
> parameter).
> 
> The new implementation should remove vs_rpcb_optnl, as a clean up.
> 
> 

This is not something that turns off the registration with rpcbind. It
is something that turns off the decision to abort knfsd if that
registration fails. That's not something that needs to be
containerised: it's just common sense and really wants to be the
default behaviour everywhere.

The only reason for the module parameter is to enable legacy behaviour.
Chuck Lever Dec. 20, 2021, 7:02 p.m. UTC | #5
> On Dec 20, 2021, at 1:35 PM, Trond Myklebust <trondmy@hammerspace.com> wrote:
> 
> On Mon, 2021-12-20 at 15:51 +0000, Chuck Lever III wrote:
>> 
>> 
>>> On Dec 19, 2021, at 3:49 PM, Trond Myklebust
>>> <trondmy@hammerspace.com> wrote:
>>> 
>>> On Sun, 2021-12-19 at 18:15 +0000, Chuck Lever III wrote:
>>>> 
>>>>> On Dec 18, 2021, at 8:38 PM, trondmy@kernel.org wrote:
>>>>> 
>>>>> From: Trond Myklebust <trond.myklebust@hammerspace.com>
>>>>> 
>>>>> NFSv4 doesn't need rpcbind, so let's not refuse to start up
>>>>> just
>>>>> because
>>>>> the rpcbind registration failed.
>>>> 
>>>> Commit 7e55b59b2f32 ("SUNRPC/NFSD: Support a new option for
>>>> ignoring
>>>> the result of svc_register") added vs_rpcb_optnl, which is
>>>> already
>>>> set for nfsd4_version4. Is that not adequate?
>>>> 
>>> 
>>> The other issue is that under certain circumstances, you may also
>>> want
>>> to run NFSv3 without rpcbind support. For instance, when you have a
>>> knfsd server instance running as a data server, there is typically
>>> no
>>> need to run rpcbind.
>> 
>> So what you are saying is that you'd like this to be a run-time
>> setting
>> instead of a compile-time setting. Got it.
>> 
>> Note that this patch adds a non-container-aware administrative API.
>> For
>> the same reasons I NAK'd 9/10, I'm going to NAK this one and ask that
>> you provide a version that is container-aware (ideally: not a module
>> parameter).
>> 
>> The new implementation should remove vs_rpcb_optnl, as a clean up.
>> 
>> 
> 
> This is not something that turns off the registration with rpcbind. It
> is something that turns off the decision to abort knfsd if that
> registration fails.

See below, that's just what vs_rpcb_optnl does. It it's true,
then the result of the rpcbind registration for that service
is ignored.

1039 int svc_generic_rpcbind_set(struct net *net,
1040                             const struct svc_program *progp,
1041                             u32 version, int family,
1042                             unsigned short proto,
1043                             unsigned short port)
1044 {
1045         const struct svc_version *vers = progp->pg_vers[version];
1046         int error;
1047 
1048         if (vers == NULL)
1049                 return 0;
1050 
1051         if (vers->vs_hidden) {
1052                 trace_svc_noregister(progp->pg_name, version, proto,
1053                                      port, family, 0);
1054                 return 0;
1055         }
1056 
1057         /*
1058          * Don't register a UDP port if we need congestion
1059          * control.
1060          */
1061         if (vers->vs_need_cong_ctrl && proto == IPPROTO_UDP)
1062                 return 0;
1063 
1064         error = svc_rpcbind_set_version(net, progp, version,
1065                                         family, proto, port);
1066 
1067         return (vers->vs_rpcb_optnl) ? 0 : error;
1068 }
1069 EXPORT_SYMBOL_GPL(svc_generic_rpcbind_set);

And, it's already the case that vs_rpcb_optnl is true for our
NFSv4 server. So the issue is for NFSv3 only. It still looks
to me like the patch description for this patch, at the very
least, is not correct.


> That's not something that needs to be
> containerised: it's just common sense and really wants to be the
> default behaviour everywhere.

I'm not following this. I can imagine deployment cases where one
container might want to ensure rpcbind is running for NFSv3 while
another does not care. What am I missing?


> The only reason for the module parameter is to enable legacy behaviour.


--
Chuck Lever
Trond Myklebust Dec. 20, 2021, 7:52 p.m. UTC | #6
On Mon, 2021-12-20 at 19:02 +0000, Chuck Lever III wrote:
> 
> 
> > On Dec 20, 2021, at 1:35 PM, Trond Myklebust
> > <trondmy@hammerspace.com> wrote:
> > 
> > On Mon, 2021-12-20 at 15:51 +0000, Chuck Lever III wrote:
> > > 
> > > 
> > > > On Dec 19, 2021, at 3:49 PM, Trond Myklebust
> > > > <trondmy@hammerspace.com> wrote:
> > > > 
> > > > On Sun, 2021-12-19 at 18:15 +0000, Chuck Lever III wrote:
> > > > > 
> > > > > > On Dec 18, 2021, at 8:38 PM, trondmy@kernel.org wrote:
> > > > > > 
> > > > > > From: Trond Myklebust <trond.myklebust@hammerspace.com>
> > > > > > 
> > > > > > NFSv4 doesn't need rpcbind, so let's not refuse to start up
> > > > > > just
> > > > > > because
> > > > > > the rpcbind registration failed.
> > > > > 
> > > > > Commit 7e55b59b2f32 ("SUNRPC/NFSD: Support a new option for
> > > > > ignoring
> > > > > the result of svc_register") added vs_rpcb_optnl, which is
> > > > > already
> > > > > set for nfsd4_version4. Is that not adequate?
> > > > > 
> > > > 
> > > > The other issue is that under certain circumstances, you may
> > > > also
> > > > want
> > > > to run NFSv3 without rpcbind support. For instance, when you
> > > > have a
> > > > knfsd server instance running as a data server, there is
> > > > typically
> > > > no
> > > > need to run rpcbind.
> > > 
> > > So what you are saying is that you'd like this to be a run-time
> > > setting
> > > instead of a compile-time setting. Got it.
> > > 
> > > Note that this patch adds a non-container-aware administrative
> > > API.
> > > For
> > > the same reasons I NAK'd 9/10, I'm going to NAK this one and ask
> > > that
> > > you provide a version that is container-aware (ideally: not a
> > > module
> > > parameter).
> > > 
> > > The new implementation should remove vs_rpcb_optnl, as a clean
> > > up.
> > > 
> > > 
> > 
> > This is not something that turns off the registration with rpcbind.
> > It
> > is something that turns off the decision to abort knfsd if that
> > registration fails.
> 
> See below, that's just what vs_rpcb_optnl does. It it's true,
> then the result of the rpcbind registration for that service
> is ignored.
> 
> 1039 int svc_generic_rpcbind_set(struct net *net,
> 1040                             const struct svc_program *progp,
> 1041                             u32 version, int family,
> 1042                             unsigned short proto,
> 1043                             unsigned short port)
> 1044 {
> 1045         const struct svc_version *vers = progp-
> >pg_vers[version];
> 1046         int error;
> 1047 
> 1048         if (vers == NULL)
> 1049                 return 0;
> 1050 
> 1051         if (vers->vs_hidden) {
> 1052                 trace_svc_noregister(progp->pg_name, version,
> proto,
> 1053                                      port, family, 0);
> 1054                 return 0;
> 1055         }
> 1056 
> 1057         /*
> 1058          * Don't register a UDP port if we need congestion
> 1059          * control.
> 1060          */
> 1061         if (vers->vs_need_cong_ctrl && proto == IPPROTO_UDP)
> 1062                 return 0;
> 1063 
> 1064         error = svc_rpcbind_set_version(net, progp, version,
> 1065                                         family, proto, port);
> 1066 
> 1067         return (vers->vs_rpcb_optnl) ? 0 : error;
> 1068 }
> 1069 EXPORT_SYMBOL_GPL(svc_generic_rpcbind_set);
> 
> And, it's already the case that vs_rpcb_optnl is true for our
> NFSv4 server. So the issue is for NFSv3 only. It still looks
> to me like the patch description for this patch, at the very
> least, is not correct.
> 
> 
> > That's not something that needs to be
> > containerised: it's just common sense and really wants to be the
> > default behaviour everywhere.
> 
> I'm not following this. I can imagine deployment cases where one
> container might want to ensure rpcbind is running for NFSv3 while
> another does not care. What am I missing?
> 

Monitoring that rpcbind is working is not a kernel task. It's something
that can and should be done in userspace. The kernel task should only
be to try to register the ports that it is using to that rpcbind server
if it is up and running.

In a containerised environment, then even the job of registering the
ports is not necessarily desirable behaviour, since there will almost
always be some additional form of NAT or address + port mapping going
on that knfsd has no visibility into.

> 
> > The only reason for the module parameter is to enable legacy
> > behaviour.
> 
>
Chuck Lever Dec. 20, 2021, 8:12 p.m. UTC | #7
> On Dec 20, 2021, at 2:52 PM, Trond Myklebust <trondmy@hammerspace.com> wrote:
> 
> On Mon, 2021-12-20 at 19:02 +0000, Chuck Lever III wrote:
>> 
>> 
>>> On Dec 20, 2021, at 1:35 PM, Trond Myklebust
>>> <trondmy@hammerspace.com> wrote:
>>> 
>>> On Mon, 2021-12-20 at 15:51 +0000, Chuck Lever III wrote:
>>>> 
>>>> 
>>>>> On Dec 19, 2021, at 3:49 PM, Trond Myklebust
>>>>> <trondmy@hammerspace.com> wrote:
>>>>> 
>>>>> On Sun, 2021-12-19 at 18:15 +0000, Chuck Lever III wrote:
>>>>>> 
>>>>>>> On Dec 18, 2021, at 8:38 PM, trondmy@kernel.org wrote:
>>>>>>> 
>>>>>>> From: Trond Myklebust <trond.myklebust@hammerspace.com>
>>>>>>> 
>>>>>>> NFSv4 doesn't need rpcbind, so let's not refuse to start up
>>>>>>> just
>>>>>>> because
>>>>>>> the rpcbind registration failed.
>>>>>> 
>>>>>> Commit 7e55b59b2f32 ("SUNRPC/NFSD: Support a new option for
>>>>>> ignoring
>>>>>> the result of svc_register") added vs_rpcb_optnl, which is
>>>>>> already
>>>>>> set for nfsd4_version4. Is that not adequate?
>>>>>> 
>>>>> 
>>>>> The other issue is that under certain circumstances, you may
>>>>> also
>>>>> want
>>>>> to run NFSv3 without rpcbind support. For instance, when you
>>>>> have a
>>>>> knfsd server instance running as a data server, there is
>>>>> typically
>>>>> no
>>>>> need to run rpcbind.
>>>> 
>>>> So what you are saying is that you'd like this to be a run-time
>>>> setting
>>>> instead of a compile-time setting. Got it.
>>>> 
>>>> Note that this patch adds a non-container-aware administrative
>>>> API.
>>>> For
>>>> the same reasons I NAK'd 9/10, I'm going to NAK this one and ask
>>>> that
>>>> you provide a version that is container-aware (ideally: not a
>>>> module
>>>> parameter).
>>>> 
>>>> The new implementation should remove vs_rpcb_optnl, as a clean
>>>> up.
>>>> 
>>>> 
>>> 
>>> This is not something that turns off the registration with rpcbind.
>>> It
>>> is something that turns off the decision to abort knfsd if that
>>> registration fails.
>> 
>> See below, that's just what vs_rpcb_optnl does. It it's true,
>> then the result of the rpcbind registration for that service
>> is ignored.
>> 
>> 1039 int svc_generic_rpcbind_set(struct net *net,
>> 1040                             const struct svc_program *progp,
>> 1041                             u32 version, int family,
>> 1042                             unsigned short proto,
>> 1043                             unsigned short port)
>> 1044 {
>> 1045         const struct svc_version *vers = progp-
>>> pg_vers[version];
>> 1046         int error;
>> 1047 
>> 1048         if (vers == NULL)
>> 1049                 return 0;
>> 1050 
>> 1051         if (vers->vs_hidden) {
>> 1052                 trace_svc_noregister(progp->pg_name, version,
>> proto,
>> 1053                                      port, family, 0);
>> 1054                 return 0;
>> 1055         }
>> 1056 
>> 1057         /*
>> 1058          * Don't register a UDP port if we need congestion
>> 1059          * control.
>> 1060          */
>> 1061         if (vers->vs_need_cong_ctrl && proto == IPPROTO_UDP)
>> 1062                 return 0;
>> 1063 
>> 1064         error = svc_rpcbind_set_version(net, progp, version,
>> 1065                                         family, proto, port);
>> 1066 
>> 1067         return (vers->vs_rpcb_optnl) ? 0 : error;
>> 1068 }
>> 1069 EXPORT_SYMBOL_GPL(svc_generic_rpcbind_set);
>> 
>> And, it's already the case that vs_rpcb_optnl is true for our
>> NFSv4 server. So the issue is for NFSv3 only. It still looks
>> to me like the patch description for this patch, at the very
>> least, is not correct.
>> 
>> 
>>> That's not something that needs to be
>>> containerised: it's just common sense and really wants to be the
>>> default behaviour everywhere.
>> 
>> I'm not following this. I can imagine deployment cases where one
>> container might want to ensure rpcbind is running for NFSv3 while
>> another does not care. What am I missing?
>> 
> 
> Monitoring that rpcbind is working is not a kernel task. It's something
> that can and should be done in userspace. The kernel task should only
> be to try to register the ports that it is using to that rpcbind server
> if it is up and running.
> 
> In a containerised environment, then even the job of registering the
> ports is not necessarily desirable behaviour, since there will almost
> always be some additional form of NAT or address + port mapping going
> on that knfsd has no visibility into.

Today, the kernel fails nfsd start-up if it can't complete rpcbind
registration. If we want to change that, then the typical process for
this has been that the user space components to support it need to be
in place before we think of changing kernel behavior. Are they?

And, if knfsd is really not the place for checking the registration,
then the right answer is to remove the in-kernel check for everyone,
but only after a proper user space mechanism is in place.

I really don't want to have to carry a module parameter for this
transition in perpetuity. A better choice for managing the transition
would be a CONFIG option, which is relatively harmless to remove once
it is no longer needed.

I don't object to eventually removing the kernel registration check,
but I want to see the transition away from the current arrangement
done properly, please. At the very least we deserve to have some
dialog about the long-term direction.


--
Chuck Lever
diff mbox series

Patch

diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 51a49e0cfe37..da9760479acd 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -727,6 +727,7 @@  static ssize_t __write_ports_addfd(char *buf, struct net *net, const struct cred
 	char *mesg = buf;
 	int fd, err;
 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+	int flags = SVC_SOCK_DEFAULTS;
 
 	err = get_int(&mesg, &fd);
 	if (err != 0 || fd < 0)
@@ -741,7 +742,11 @@  static ssize_t __write_ports_addfd(char *buf, struct net *net, const struct cred
 	if (err != 0)
 		return err;
 
-	err = svc_addsock(nn->nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT, cred);
+	if (!nfsd_rpcbind_error_is_fatal())
+		flags |= SVC_SOCK_RPCBIND_NOERR;
+
+	err = svc_addsock(nn->nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT,
+			  flags, cred);
 	if (err < 0) {
 		nfsd_destroy(net);
 		return err;
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 498e5a489826..e0356d3ecf65 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -134,6 +134,7 @@  int nfsd_vers(struct nfsd_net *nn, int vers, enum vers_op change);
 int nfsd_minorversion(struct nfsd_net *nn, u32 minorversion, enum vers_op change);
 void nfsd_reset_versions(struct nfsd_net *nn);
 int nfsd_create_serv(struct net *net);
+extern bool nfsd_rpcbind_error_is_fatal(void);
 
 extern int nfsd_max_blksize;
 
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 6815c70b06af..6f22c72f340d 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -289,17 +289,21 @@  static int nfsd_init_socks(struct net *net, const struct cred *cred)
 {
 	int error;
 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+	int flags = SVC_SOCK_DEFAULTS;
 
 	if (!list_empty(&nn->nfsd_serv->sv_permsocks))
 		return 0;
 
+	if (!nfsd_rpcbind_error_is_fatal())
+		flags |= SVC_SOCK_RPCBIND_NOERR;
+
 	error = svc_create_xprt(nn->nfsd_serv, "udp", net, PF_INET, NFS_PORT,
-					SVC_SOCK_DEFAULTS, cred);
+				flags, cred);
 	if (error < 0)
 		return error;
 
 	error = svc_create_xprt(nn->nfsd_serv, "tcp", net, PF_INET, NFS_PORT,
-					SVC_SOCK_DEFAULTS, cred);
+				flags, cred);
 	if (error < 0)
 		return error;
 
@@ -340,6 +344,16 @@  static void nfsd_shutdown_generic(void)
 	nfsd_file_cache_shutdown();
 }
 
+static bool nfsd_rpcbind_error_fatal = false;
+module_param(nfsd_rpcbind_error_fatal, bool, 0644);
+MODULE_PARM_DESC(nfsd_rpcbind_error_fatal,
+		 "rpcbind errors are fatal when starting nfsd.");
+
+bool nfsd_rpcbind_error_is_fatal(void)
+{
+	return nfsd_rpcbind_error_fatal;
+}
+
 /*
  * Allow admin to disable lockd. This would typically be used to allow (e.g.)
  * a userspace NLM server of some sort to be used.
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index bcc555c7ae9c..f34c222cee9d 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -61,8 +61,8 @@  void		svc_drop(struct svc_rqst *);
 void		svc_sock_update_bufs(struct svc_serv *serv);
 bool		svc_alien_sock(struct net *net, int fd);
 int		svc_addsock(struct svc_serv *serv, const int fd,
-					char *name_return, const size_t len,
-					const struct cred *cred);
+			    char *name_return, const size_t len, int flags,
+			    const struct cred *cred);
 void		svc_init_xprt_sock(void);
 void		svc_cleanup_xprt_sock(void);
 struct svc_xprt *svc_sock_create(struct svc_serv *serv, int prot);
@@ -74,5 +74,6 @@  void		svc_sock_destroy(struct svc_xprt *);
 #define SVC_SOCK_DEFAULTS	(0U)
 #define SVC_SOCK_ANONYMOUS	(1U << 0)	/* don't register with pmap */
 #define SVC_SOCK_TEMPORARY	(1U << 1)	/* flag socket as temporary */
+#define SVC_SOCK_RPCBIND_NOERR	(1U << 2)	/* Ignore pmap errors */
 
 #endif /* SUNRPC_SVCSOCK_H */
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 478f857cdaed..7f5b12a50bf9 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1309,14 +1309,15 @@  static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
 	inet = sock->sk;
 
 	/* Register socket with portmapper */
-	if (pmap_register)
+	if (pmap_register) {
 		err = svc_register(serv, sock_net(sock->sk), inet->sk_family,
 				     inet->sk_protocol,
 				     ntohs(inet_sk(inet)->inet_sport));
 
-	if (err < 0) {
-		kfree(svsk);
-		return ERR_PTR(err);
+		if (err < 0 && !(flags & SVC_SOCK_RPCBIND_NOERR)) {
+			kfree(svsk);
+			return ERR_PTR(err);
+		}
 	}
 
 	svsk->sk_sock = sock;
@@ -1364,6 +1365,7 @@  EXPORT_SYMBOL_GPL(svc_alien_sock);
  * @fd: file descriptor of the new listener
  * @name_return: pointer to buffer to fill in with name of listener
  * @len: size of the buffer
+ * @flags: flags argument for svc_setup_socket()
  * @cred: credential
  *
  * Fills in socket name and returns positive length of name if successful.
@@ -1371,7 +1373,7 @@  EXPORT_SYMBOL_GPL(svc_alien_sock);
  * value.
  */
 int svc_addsock(struct svc_serv *serv, const int fd, char *name_return,
-		const size_t len, const struct cred *cred)
+		const size_t len, int flags, const struct cred *cred)
 {
 	int err = 0;
 	struct socket *so = sockfd_lookup(fd, &err);
@@ -1395,7 +1397,7 @@  int svc_addsock(struct svc_serv *serv, const int fd, char *name_return,
 	err = -ENOENT;
 	if (!try_module_get(THIS_MODULE))
 		goto out;
-	svsk = svc_setup_socket(serv, so, SVC_SOCK_DEFAULTS);
+	svsk = svc_setup_socket(serv, so, flags);
 	if (IS_ERR(svsk)) {
 		module_put(THIS_MODULE);
 		err = PTR_ERR(svsk);