diff mbox

[v10,44/46] nfs: Add richacl support

Message ID 1444604337-17651-45-git-send-email-andreas.gruenbacher@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Andreas Grünbacher Oct. 11, 2015, 10:58 p.m. UTC
From: Andreas Gruenbacher <agruenba@redhat.com>

Add support for the "system.richacl" xattr in nfs.  The existing
"system.nfs4_acl" xattr on nfs doesn't map user and group names to uids
and gids; the "system.richacl" xattr does, and only keeps the
on-the-wire names when there is no mapping.  This allows to copy
permissions across different file systems.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/nfs/inode.c            |   3 -
 fs/nfs/nfs4proc.c         | 698 +++++++++++++++++++++++++++++++++-------------
 fs/nfs/nfs4xdr.c          | 179 ++++++++++--
 fs/nfs/super.c            |   4 +-
 include/linux/nfs_fs.h    |   1 -
 include/linux/nfs_fs_sb.h |   2 +
 include/linux/nfs_xdr.h   |   9 +-
 7 files changed, 673 insertions(+), 223 deletions(-)

Comments

Schumaker, Anna Oct. 12, 2015, 2:39 p.m. UTC | #1
Hi Andreas,

On 10/11/2015 06:58 PM, Andreas Gruenbacher wrote:
> From: Andreas Gruenbacher <agruenba@redhat.com>
> 
> Add support for the "system.richacl" xattr in nfs.  The existing
> "system.nfs4_acl" xattr on nfs doesn't map user and group names to uids
> and gids; the "system.richacl" xattr does, and only keeps the
> on-the-wire names when there is no mapping.  This allows to copy
> permissions across different file systems.
> 
> Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
> ---
>  fs/nfs/inode.c            |   3 -
>  fs/nfs/nfs4proc.c         | 698 +++++++++++++++++++++++++++++++++-------------
>  fs/nfs/nfs4xdr.c          | 179 ++++++++++--
>  fs/nfs/super.c            |   4 +-
>  include/linux/nfs_fs.h    |   1 -
>  include/linux/nfs_fs_sb.h |   2 +
>  include/linux/nfs_xdr.h   |   9 +-
>  7 files changed, 673 insertions(+), 223 deletions(-)
> 
> diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
> index 326d9e1..843d15d 100644
> --- a/fs/nfs/inode.c
> +++ b/fs/nfs/inode.c
> @@ -1852,9 +1852,6 @@ struct inode *nfs_alloc_inode(struct super_block *sb)
>  		return NULL;
>  	nfsi->flags = 0UL;
>  	nfsi->cache_validity = 0UL;
> -#if IS_ENABLED(CONFIG_NFS_V4)
> -	nfsi->nfs4_acl = NULL;
> -#endif /* CONFIG_NFS_V4 */
>  	return &nfsi->vfs_inode;
>  }
>  EXPORT_SYMBOL_GPL(nfs_alloc_inode);
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index eec5c4c..a686251 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -55,6 +55,9 @@
>  #include <linux/xattr.h>
>  #include <linux/utsname.h>
>  #include <linux/freezer.h>
> +#include <linux/richacl.h>
> +#include <linux/richacl_xattr.h>
> +#include <linux/nfs4acl.h>
>  
>  #include "nfs4_fs.h"
>  #include "delegation.h"
> @@ -2982,15 +2985,18 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
>  			res.attr_bitmask[2] &= FATTR4_WORD2_NFS42_MASK;
>  		}
>  		memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask));
> -		server->caps &= ~(NFS_CAP_ACLS|NFS_CAP_HARDLINKS|
> -				NFS_CAP_SYMLINKS|NFS_CAP_FILEID|
> +		server->caps &= ~(NFS_CAP_ALLOW_ACLS|NFS_CAP_DENY_ACLS|
> +				NFS_CAP_HARDLINKS|NFS_CAP_SYMLINKS|NFS_CAP_FILEID|
>  				NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|
>  				NFS_CAP_OWNER_GROUP|NFS_CAP_ATIME|
>  				NFS_CAP_CTIME|NFS_CAP_MTIME|
>  				NFS_CAP_SECURITY_LABEL);
> -		if (res.attr_bitmask[0] & FATTR4_WORD0_ACL &&
> -				res.acl_bitmask & ACL4_SUPPORT_ALLOW_ACL)
> -			server->caps |= NFS_CAP_ACLS;
> +		if (res.attr_bitmask[0] & FATTR4_WORD0_ACL) {
> +			if (res.acl_bitmask & ACL4_SUPPORT_ALLOW_ACL)
> +				server->caps |= NFS_CAP_ALLOW_ACLS;
> +			if (res.acl_bitmask & ACL4_SUPPORT_DENY_ACL)
> +				server->caps |= NFS_CAP_DENY_ACLS;
> +		}
>  		if (res.has_links != 0)
>  			server->caps |= NFS_CAP_HARDLINKS;
>  		if (res.has_symlinks != 0)
> @@ -4518,45 +4524,11 @@ static int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred)
>  	return 0;
>  }
>  
> -static inline int nfs4_server_supports_acls(struct nfs_server *server)
> -{
> -	return server->caps & NFS_CAP_ACLS;
> -}
> -
> -/* Assuming that XATTR_SIZE_MAX is a multiple of PAGE_SIZE, and that
> - * it's OK to put sizeof(void) * (XATTR_SIZE_MAX/PAGE_SIZE) bytes on
> - * the stack.
> +/* A arbitrary limit; we allocate at most DIV_ROUND_UP(NFS4ACL_SIZE_MAX,
> + * PAGE_SIZE) pages and put an array of DIV_ROUND_UP(NFS4ACL_SIZE_MAX,
> + * PAGE_SIZE) pages on the stack when encoding or decoding acls.
>   */
> -#define NFS4ACL_MAXPAGES DIV_ROUND_UP(XATTR_SIZE_MAX, PAGE_SIZE)
> -
> -static int buf_to_pages_noslab(const void *buf, size_t buflen,
> -		struct page **pages)
> -{
> -	struct page *newpage, **spages;
> -	int rc = 0;
> -	size_t len;
> -	spages = pages;
> -
> -	do {
> -		len = min_t(size_t, PAGE_SIZE, buflen);
> -		newpage = alloc_page(GFP_KERNEL);
> -
> -		if (newpage == NULL)
> -			goto unwind;
> -		memcpy(page_address(newpage), buf, len);
> -                buf += len;
> -                buflen -= len;
> -		*pages++ = newpage;
> -		rc++;
> -	} while (buflen != 0);
> -
> -	return rc;
> -
> -unwind:
> -	for(; rc > 0; rc--)
> -		__free_page(spages[rc-1]);
> -	return -ENOMEM;
> -}
> +#define NFS4ACL_SIZE_MAX 65536
>  
>  struct nfs4_cached_acl {
>  	int cached;
> @@ -4564,66 +4536,9 @@ struct nfs4_cached_acl {
>  	char data[0];
>  };
>  
> -static void nfs4_set_cached_acl(struct inode *inode, struct nfs4_cached_acl *acl)
> -{
> -	struct nfs_inode *nfsi = NFS_I(inode);
> -
> -	spin_lock(&inode->i_lock);
> -	kfree(nfsi->nfs4_acl);
> -	nfsi->nfs4_acl = acl;
> -	spin_unlock(&inode->i_lock);
> -}
> -
>  static void nfs4_zap_acl_attr(struct inode *inode)
>  {
> -	nfs4_set_cached_acl(inode, NULL);
> -}
> -
> -static inline ssize_t nfs4_read_cached_acl(struct inode *inode, char *buf, size_t buflen)
> -{
> -	struct nfs_inode *nfsi = NFS_I(inode);
> -	struct nfs4_cached_acl *acl;
> -	int ret = -ENOENT;
> -
> -	spin_lock(&inode->i_lock);
> -	acl = nfsi->nfs4_acl;
> -	if (acl == NULL)
> -		goto out;
> -	if (buf == NULL) /* user is just asking for length */
> -		goto out_len;
> -	if (acl->cached == 0)
> -		goto out;
> -	ret = -ERANGE; /* see getxattr(2) man page */
> -	if (acl->len > buflen)
> -		goto out;
> -	memcpy(buf, acl->data, acl->len);
> -out_len:
> -	ret = acl->len;
> -out:
> -	spin_unlock(&inode->i_lock);
> -	return ret;
> -}
> -
> -static void nfs4_write_cached_acl(struct inode *inode, struct page **pages, size_t pgbase, size_t acl_len)
> -{
> -	struct nfs4_cached_acl *acl;
> -	size_t buflen = sizeof(*acl) + acl_len;
> -
> -	if (buflen <= PAGE_SIZE) {
> -		acl = kmalloc(buflen, GFP_KERNEL);
> -		if (acl == NULL)
> -			goto out;
> -		acl->cached = 1;
> -		_copy_from_pages(acl->data, pages, pgbase, acl_len);
> -	} else {
> -		acl = kmalloc(sizeof(*acl), GFP_KERNEL);
> -		if (acl == NULL)
> -			goto out;
> -		acl->cached = 0;
> -	}
> -	acl->len = acl_len;
> -out:
> -	nfs4_set_cached_acl(inode, acl);
> +	forget_cached_richacl(inode);
>  }
>  
>  /*
> @@ -4636,121 +4551,269 @@ out:
>   * length. The next getxattr call will then produce another round trip to
>   * the server, this time with the input buf of the required size.
>   */
> -static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen)
> +static struct richacl *__nfs4_get_acl_uncached(struct inode *inode)
>  {
> -	struct page *pages[NFS4ACL_MAXPAGES] = {NULL, };
> +	struct nfs_server *server = NFS_SERVER(inode);
> +	struct page *pages[DIV_ROUND_UP(NFS4ACL_SIZE_MAX, PAGE_SIZE)] = {};
>  	struct nfs_getaclargs args = {
>  		.fh = NFS_FH(inode),
>  		.acl_pages = pages,
> -		.acl_len = buflen,
> +		.acl_len = ARRAY_SIZE(pages) * PAGE_SIZE,
>  	};
>  	struct nfs_getaclres res = {
> -		.acl_len = buflen,
> +		.server = server,
>  	};
>  	struct rpc_message msg = {
>  		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETACL],
>  		.rpc_argp = &args,
>  		.rpc_resp = &res,
>  	};
> -	unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE);
> -	int ret = -ENOMEM, i;
> +	int err, i;
>  
> -	/* As long as we're doing a round trip to the server anyway,
> -	 * let's be prepared for a page of acl data. */
> -	if (npages == 0)
> -		npages = 1;
> -	if (npages > ARRAY_SIZE(pages))
> -		return -ERANGE;
> -
> -	for (i = 0; i < npages; i++) {
> -		pages[i] = alloc_page(GFP_KERNEL);
> -		if (!pages[i])
> +	if (ARRAY_SIZE(pages) > 1) {
> +		/* for decoding across pages */
> +		res.acl_scratch = alloc_page(GFP_KERNEL);
> +		err = -ENOMEM;
> +		if (!res.acl_scratch)
>  			goto out_free;
>  	}
>  
> -	/* for decoding across pages */
> -	res.acl_scratch = alloc_page(GFP_KERNEL);
> -	if (!res.acl_scratch)
> -		goto out_free;
> -
> -	args.acl_len = npages * PAGE_SIZE;
> -
> -	dprintk("%s  buf %p buflen %zu npages %d args.acl_len %zu\n",
> -		__func__, buf, buflen, npages, args.acl_len);
> -	ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode),
> +	dprintk("%s  args.acl_len %zu\n",
> +		__func__, args.acl_len);
> +	err = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode),
>  			     &msg, &args.seq_args, &res.seq_res, 0);
> -	if (ret)
> +	if (err)
>  		goto out_free;
>  
> -	/* Handle the case where the passed-in buffer is too short */
> -	if (res.acl_flags & NFS4_ACL_TRUNC) {
> -		/* Did the user only issue a request for the acl length? */
> -		if (buf == NULL)
> -			goto out_ok;
> -		ret = -ERANGE;
> -		goto out_free;
> -	}
> -	nfs4_write_cached_acl(inode, pages, res.acl_data_offset, res.acl_len);
> -	if (buf) {
> -		if (res.acl_len > buflen) {
> -			ret = -ERANGE;
> -			goto out_free;
> -		}
> -		_copy_from_pages(buf, pages, res.acl_data_offset, res.acl_len);
> -	}
> -out_ok:
> -	ret = res.acl_len;
> +	richacl_compute_max_masks(res.acl);
> +	/* FIXME: Set inode->i_mode from res->mode?  */
> +	set_cached_richacl(inode, res.acl);
> +	err = 0;
> +
>  out_free:
> -	for (i = 0; i < npages; i++)
> -		if (pages[i])
> -			__free_page(pages[i]);
> +	if (err) {
> +		richacl_put(res.acl);
> +		res.acl = ERR_PTR(err);
> +	}
> +	for (i = 0; i < ARRAY_SIZE(pages) && pages[i]; i++)
> +		__free_page(pages[i]);
>  	if (res.acl_scratch)
>  		__free_page(res.acl_scratch);
> -	return ret;
> +	return res.acl;
>  }
>  
> -static ssize_t nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen)
> +static struct richacl *nfs4_get_acl_uncached(struct inode *inode)
>  {
>  	struct nfs4_exception exception = { };
> -	ssize_t ret;
> +	struct richacl *acl;
>  	do {
> -		ret = __nfs4_get_acl_uncached(inode, buf, buflen);
> -		trace_nfs4_get_acl(inode, ret);
> -		if (ret >= 0)
> +		acl = __nfs4_get_acl_uncached(inode);
> +		trace_nfs4_get_acl(inode, IS_ERR(acl) ? PTR_ERR(acl) : 0);
> +		if (!IS_ERR(acl))
>  			break;
> -		ret = nfs4_handle_exception(NFS_SERVER(inode), ret, &exception);
> +		acl = ERR_PTR(nfs4_handle_exception(NFS_SERVER(inode),
> +			      PTR_ERR(acl), &exception));
>  	} while (exception.retry);
> -	return ret;
> +	return acl;
>  }
>  
> -static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen)
> +static struct richacl *nfs4_proc_get_acl(struct inode *inode)
>  {
>  	struct nfs_server *server = NFS_SERVER(inode);
> +	struct richacl *acl;
>  	int ret;
>  
> -	if (!nfs4_server_supports_acls(server))
> -		return -EOPNOTSUPP;
> +	if (!(server->caps & (NFS_CAP_ALLOW_ACLS | NFS_CAP_DENY_ACLS)))
> +		return ERR_PTR(-EOPNOTSUPP);
>  	ret = nfs_revalidate_inode(server, inode);
>  	if (ret < 0)
> -		return ret;
> +		return ERR_PTR(ret);
>  	if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL)
>  		nfs_zap_acl_cache(inode);
> -	ret = nfs4_read_cached_acl(inode, buf, buflen);
> -	if (ret != -ENOENT)
> -		/* -ENOENT is returned if there is no ACL or if there is an ACL
> -		 * but no cached acl data, just the acl length */
> -		return ret;
> -	return nfs4_get_acl_uncached(inode, buf, buflen);
> +	acl = get_cached_richacl(inode);
> +	if (acl != ACL_NOT_CACHED)
> +		return acl;
> +	return nfs4_get_acl_uncached(inode);
> +}
> +
> +static int
> +richacl_supported(struct nfs_server *server, struct richacl *acl)
> +{
> +	struct richace *ace;
> +
> +	if (!(server->caps & (NFS_CAP_ALLOW_ACLS | NFS_CAP_DENY_ACLS)))
> +		return -EOPNOTSUPP;
> +
> +	richacl_for_each_entry(ace, acl) {
> +		if (richace_is_allow(ace)) {
> +			if (!(server->caps & NFS_CAP_ALLOW_ACLS))
> +				return -EINVAL;
> +		} else if (richace_is_deny(ace)) {
> +			if (!(server->caps & NFS_CAP_DENY_ACLS))
> +				return -EINVAL;
> +		} else
> +			return -EINVAL;
> +	}
> +	return 0;
>  }
>  
> -static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen)
> +static int
> +nfs4_encode_user(struct xdr_stream *xdr, const struct nfs_server *server,
> +		 kuid_t uid)
> +{
> +	char name[IDMAP_NAMESZ];
> +	int len;
> +	__be32 *p;
> +
> +	len = nfs_map_uid_to_name(server, uid, name, IDMAP_NAMESZ);
> +	if (len < 0) {
> +		dprintk("nfs: couldn't resolve uid %d to string\n",
> +				from_kuid(&init_user_ns, uid));
> +		return -ENOENT;
> +	}
> +	p = xdr_reserve_space(xdr, 4 + len);
> +	if (!p)
> +		return -EIO;
> +	p = xdr_encode_opaque(p, name, len);
> +	return 0;
> +}
> +
> +static int
> +nfs4_encode_group(struct xdr_stream *xdr, const struct nfs_server *server,
> +		  kgid_t gid)
> +{
> +	char name[IDMAP_NAMESZ];
> +	int len;
> +	__be32 *p;
> +
> +	len = nfs_map_gid_to_group(server, gid, name, IDMAP_NAMESZ);
> +	if (len < 0) {
> +		dprintk("nfs: couldn't resolve gid %d to string\n",
> +				from_kgid(&init_user_ns, gid));
> +		return -ENOENT;
> +	}
> +	p = xdr_reserve_space(xdr, 4 + len);
> +	if (!p)
> +		return -EIO;
> +	p = xdr_encode_opaque(p, name, len);
> +	return 0;
> +}
> +
> +static unsigned int
> +nfs4_ace_mask(int minorversion)
> +{
> +	return minorversion == 0 ? NFS40_ACE_MASK_ALL : NFS4_ACE_MASK_ALL;
> +}
> +
> +static int
> +nfs4_encode_ace_who(struct xdr_stream *xdr, const struct nfs_server *server,
> +		    struct richace *ace, struct richacl *acl)
> +{
> +	const char *who;
> +	__be32 *p;
> +
> +	if (ace->e_flags & RICHACE_SPECIAL_WHO) {
> +		unsigned int special_id = ace->e_id.special;
> +		const char *who;
> +		unsigned int len;
> +
> +		if (!nfs4acl_special_id_to_who(special_id, &who, &len)) {
> +			WARN_ON_ONCE(1);
> +			return -EIO;
> +		}
> +		p = xdr_reserve_space(xdr, 4 + len);
> +		if (!p)
> +			return -EIO;
> +		xdr_encode_opaque(p, who, len);
> +		return 0;
> +	} else {
> +		who = richace_unmapped_identifier(ace, acl);
> +		if (who) {
> +			unsigned int len = strlen(who);
> +
> +			p = xdr_reserve_space(xdr, 4 + len);
> +			if (!p)
> +				return -EIO;
> +			xdr_encode_opaque(p, who, len);
> +			return 0;
> +		} else if (ace->e_flags & RICHACE_IDENTIFIER_GROUP)
> +			return nfs4_encode_group(xdr, server, ace->e_id.gid);
> +		else
> +			return nfs4_encode_user(xdr, server, ace->e_id.uid);
> +	}
> +}
> +
> +static int
> +nfs4_encode_acl(struct page **pages, unsigned int len, struct richacl *acl,
> +		const struct nfs_server *server)
> +{
> +	int minorversion = server->nfs_client->cl_minorversion;
> +	unsigned int ace_mask = nfs4_ace_mask(minorversion);
> +	struct xdr_stream xdr;
> +	struct xdr_buf buf;
> +	__be32 *p;
> +	struct richace *ace;
> +
> +	/* Reject acls not understood by the server */
> +	if (server->attr_bitmask[1] & FATTR4_WORD1_DACL) {
> +		BUILD_BUG_ON(NFS4_ACE_MASK_ALL != RICHACE_VALID_MASK);
> +	} else {
> +		if (acl->a_flags)
> +			return -EINVAL;
> +		richacl_for_each_entry(ace, acl) {
> +			if (ace->e_flags & RICHACE_INHERITED_ACE)
> +				return -EINVAL;
> +		}
> +	}
> +	richacl_for_each_entry(ace, acl) {
> +		if (ace->e_mask & ~ace_mask)
> +			return -EINVAL;
> +	}
> +
> +	xdr_init_encode_pages(&xdr, &buf, pages, len);
> +
> +	if (server->attr_bitmask[1] & FATTR4_WORD1_DACL) {
> +		p = xdr_reserve_space(&xdr, 4);
> +		if (!p)
> +			goto fail;
> +		*p = cpu_to_be32(acl ? acl->a_flags : 0);
> +	}
> +
> +	p = xdr_reserve_space(&xdr, 4);
> +	if (!p)
> +		goto fail;
> +	if (!acl) {
> +		*p++ = cpu_to_be32(0);
> +		return buf.len;
> +	}
> +	*p++ = cpu_to_be32(acl->a_count);
> +
> +	richacl_for_each_entry(ace, acl) {
> +		p = xdr_reserve_space(&xdr, 4*3);
> +		if (!p)
> +			goto fail;
> +		*p++ = cpu_to_be32(ace->e_type);
> +		*p++ = cpu_to_be32(ace->e_flags &
> +			~(RICHACE_SPECIAL_WHO | RICHACE_UNMAPPED_WHO));
> +		*p++ = cpu_to_be32(ace->e_mask & NFS4_ACE_MASK_ALL);
> +		if (nfs4_encode_ace_who(&xdr, server, ace, acl) != 0)
> +			goto fail;
> +	}
> +
> +	return buf.len;
> +
> +fail:
> +	return -ENOMEM;
> +}
> +
> +static int __nfs4_proc_set_acl(struct inode *inode, struct richacl *acl)
>  {
>  	struct nfs_server *server = NFS_SERVER(inode);
> -	struct page *pages[NFS4ACL_MAXPAGES];
> +	struct page *pages[DIV_ROUND_UP(NFS4ACL_SIZE_MAX, PAGE_SIZE) + 1 /* scratch */] = {};
>  	struct nfs_setaclargs arg = {
> +		.server		= server,
>  		.fh		= NFS_FH(inode),
>  		.acl_pages	= pages,
> -		.acl_len	= buflen,
>  	};
>  	struct nfs_setaclres res;
>  	struct rpc_message msg = {
> @@ -4758,16 +4821,20 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
>  		.rpc_argp	= &arg,
>  		.rpc_resp	= &res,
>  	};
> -	unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE);
>  	int ret, i;
>  
> -	if (!nfs4_server_supports_acls(server))
> -		return -EOPNOTSUPP;
> -	if (npages > ARRAY_SIZE(pages))
> -		return -ERANGE;
> -	i = buf_to_pages_noslab(buf, buflen, arg.acl_pages);
> -	if (i < 0)
> -		return i;
> +	ret = richacl_supported(server, acl);
> +	if (ret)
> +		return ret;
> +
> +	ret = nfs4_encode_acl(pages, NFS4ACL_SIZE_MAX, acl, server);
> +	if (ret < 0) {
> +		for (i = 0; i < ARRAY_SIZE(pages) && pages[i]; i++)
> +			put_page(pages[i]);
> +		return ret;
> +	}
> +	arg.acl_len = ret;
> +
>  	nfs4_inode_return_delegation(inode);
>  	ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
>  
> @@ -4775,8 +4842,8 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
>  	 * Free each page after tx, so the only ref left is
>  	 * held by the network stack
>  	 */
> -	for (; i > 0; i--)
> -		put_page(pages[i-1]);
> +	for (i = 0; i < ARRAY_SIZE(pages) && pages[i]; i++)
> +		put_page(pages[i]);
>  
>  	/*
>  	 * Acl update can result in inode attribute update.
> @@ -4790,12 +4857,12 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
>  	return ret;
>  }
>  
> -static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen)
> +static int nfs4_proc_set_acl(struct inode *inode, struct richacl *acl)
>  {
>  	struct nfs4_exception exception = { };
>  	int err;
>  	do {
> -		err = __nfs4_proc_set_acl(inode, buf, buflen);
> +		err = __nfs4_proc_set_acl(inode, acl);
>  		trace_nfs4_set_acl(inode, err);
>  		err = nfs4_handle_exception(NFS_SERVER(inode), err,
>  				&exception);
> @@ -6257,34 +6324,283 @@ nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_state *lsp)
>  	rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, data);
>  }
>  
> +static int nfs4_xattr_set_richacl(struct dentry *dentry, const char *key,
> +				  const void *buf, size_t buflen,
> +				  int flags, int handler_flags)
> +{
> +	struct inode *inode = d_inode(dentry);
> +	struct richacl *acl;
> +	int error;
> +
> +	if (strcmp(key, "") != 0)
> +		return -EINVAL;
> +
> +	if (buf) {
> +		acl = richacl_from_xattr(&init_user_ns, buf, buflen);
> +		if (IS_ERR(acl))
> +			return PTR_ERR(acl);
> +		error = richacl_apply_masks(&acl, inode->i_uid);
> +	} else {
> +		/*
> +		 * "Remove the acl"; only permissions granted by the mode
> +		 * remain.  We are using the cached mode here which could be
> +		 * outdated; should we do a GETATTR first to narrow down the
> +		 * race window?
> +		 */
> +		acl = richacl_from_mode(inode->i_mode);
> +		error = 0;
> +	}
> +
> +	if (!error)
> +		error = nfs4_proc_set_acl(inode, acl);
> +	richacl_put(acl);
> +	return error;
> +}
> +
> +static int nfs4_xattr_get_richacl(struct dentry *dentry, const char *key,
> +				  void *buf, size_t buflen, int handler_flags)
> +{
> +	struct inode *inode = d_inode(dentry);
> +	struct richacl *acl;
> +	int error;
> +	umode_t mode = inode->i_mode & S_IFMT;
> +
> +	if (strcmp(key, "") != 0)
> +		return -EINVAL;
> +
> +	acl = nfs4_proc_get_acl(inode);
> +	if (IS_ERR(acl))
> +		return PTR_ERR(acl);
> +	if (acl == NULL)
> +		return -ENODATA;
> +	error = -ENODATA;
> +	if (richacl_equiv_mode(acl, &mode) == 0 &&
> +	    ((mode ^ inode->i_mode) & S_IRWXUGO) == 0)
> +		goto out;
> +	error = richacl_to_xattr(&init_user_ns, acl, buf, buflen);
> +out:
> +	richacl_put(acl);
> +	return error;
> +}
> +
> +static size_t nfs4_xattr_list_richacl(struct dentry *dentry, char *list,
> +				      size_t list_len, const char *name,
> +				      size_t name_len, int handler_flags)
> +{
> +	struct nfs_server *server = NFS_SERVER(d_inode(dentry));
> +	size_t len = sizeof(XATTR_NAME_RICHACL);
> +
> +	if (!(server->caps & (NFS_CAP_ALLOW_ACLS | NFS_CAP_DENY_ACLS)))
> +		return 0;
> +
> +	if (list && len <= list_len)
> +		memcpy(list, XATTR_NAME_RICHACL, len);
> +	return len;
> +}
> +
>  #define XATTR_NAME_NFSV4_ACL "system.nfs4_acl"
>  
> +static int richacl_to_nfs4_acl(struct nfs_server *server,
> +			       const struct richacl *acl,
> +			       void *buf, size_t buflen)
> +{
> +	const struct richace *ace;
> +	__be32 *p = buf;
> +	size_t size = 0;
> +
> +	size += sizeof(*p);
> +	if (buflen >= size)
> +		*p++ = cpu_to_be32(acl->a_count);
> +
> +	richacl_for_each_entry(ace, acl) {
> +		char who_buf[IDMAP_NAMESZ];
> +		const char *who = who_buf;
> +		int who_len;
> +
> +		size += 3 * sizeof(*p);
> +		if (buflen >= size) {
> +			*p++ = cpu_to_be32(ace->e_type);
> +			*p++ = cpu_to_be32(ace->e_flags &
> +					   ~(RICHACE_INHERITED_ACE |
> +					     RICHACE_UNMAPPED_WHO |
> +					     RICHACE_SPECIAL_WHO));
> +			*p++ = cpu_to_be32(ace->e_mask);
> +		}
> +
> +		if (richace_is_unix_user(ace)) {
> +			who_len = nfs_map_uid_to_name(server, ace->e_id.uid,
> +						      who_buf, sizeof(who_buf));
> +			if (who_len < 0)
> +				return -EIO;
> +		} else if (richace_is_unix_group(ace)) {
> +			who_len = nfs_map_gid_to_group(server, ace->e_id.gid,
> +						       who_buf, sizeof(who_buf));
> +			if (who_len < 0)
> +				return -EIO;
> +		} else if (ace->e_flags & RICHACE_SPECIAL_WHO) {
> +			if (!nfs4acl_special_id_to_who(ace->e_id.special,
> +						       &who, &who_len))
> +				return -EIO;
> +		} else {
> +			who = richace_unmapped_identifier(ace, acl);
> +			if (who)
> +				who_len = strlen(who);
> +			else
> +				return -EIO;
> +		}
> +
> +		size += sizeof(*p) + ALIGN(who_len, sizeof(*p));
> +		if (buflen >= size) {
> +			unsigned int padding = -who_len & (sizeof(*p) - 1);
> +
> +			*p++ = cpu_to_be32(who_len);
> +			memcpy(p, who, who_len);
> +			memset((char *)p + who_len, 0, padding);
> +			p += DIV_ROUND_UP(who_len, sizeof(*p));
> +		}
> +	}
> +	if (buflen && buflen < size)
> +		return -ERANGE;
> +	return size;
> +}
> +
> +static struct richacl *richacl_from_nfs4_acl(struct nfs_server *server,
> +					     const void *buf, size_t buflen)
> +{
> +	struct richacl *acl = NULL;
> +	struct richace *ace;
> +	const __be32 *p = buf;
> +	int count, err;
> +
> +	if (buflen < sizeof(*p))
> +		return ERR_PTR(-EINVAL);
> +	count = be32_to_cpu(*p++);
> +	if (count > RICHACL_XATTR_MAX_COUNT)
> +		return ERR_PTR(-EINVAL);
> +	buflen -= sizeof(*p);
> +	acl = richacl_alloc(count, GFP_NOFS);
> +	if (!acl)
> +		return ERR_PTR(-ENOMEM);
> +	richacl_for_each_entry(ace, acl) {
> +		u32 who_len, size;
> +		int special_id;
> +		char *who;
> +
> +		err = -EINVAL;
> +		if (buflen < 4 * sizeof(*p))
> +			goto out;
> +		ace->e_type = be32_to_cpu(*p++);
> +		ace->e_flags = be32_to_cpu(*p++);
> +		if (ace->e_flags & (RICHACE_SPECIAL_WHO | RICHACE_UNMAPPED_WHO))
> +			goto out;
> +		ace->e_mask = be32_to_cpu(*p++);
> +		who_len = be32_to_cpu(*p++);
> +		buflen -= 4 * sizeof(*p);
> +		size = ALIGN(who_len, 4);
> +		if (buflen < size || size == 0)
> +			goto out;
> +		who = (char *)p;
> +		special_id = nfs4acl_who_to_special_id(who, who_len);
> +		if (special_id >= 0) {
> +			ace->e_flags |= RICHACE_SPECIAL_WHO;
> +			ace->e_id.special = special_id;
> +		} else {
> +			bool unmappable;
> +
> +			if (ace->e_flags & RICHACE_IDENTIFIER_GROUP) {
> +				err = nfs_map_group_to_gid(server, who, who_len,
> +							   &ace->e_id.gid);
> +				if (err) {
> +					dprintk("%s: nfs_map_group_to_gid "
> +						"failed!\n", __func__);
> +					goto out;
> +				}
> +				/* FIXME: nfsidmap doesn't distinguish between
> +					  group nobody and unmappable groups! */
> +				unmappable = gid_eq(ace->e_id.gid,
> +					make_kgid(&init_user_ns, 99));
> +			} else {
> +				err = nfs_map_name_to_uid(server, who, who_len,
> +							  &ace->e_id.uid);
> +				if (err) {
> +					dprintk("%s: nfs_map_name_to_gid "
> +						"failed!\n", __func__);
> +					goto out;
> +				}
> +				/* FIXME: nfsidmap doesn't distinguish between
> +					  user nobody and unmappable users! */
> +				unmappable = uid_eq(ace->e_id.uid,
> +					make_kuid(&init_user_ns, 99));
> +			}
> +			if (unmappable) {
> +				err = -ENOMEM;
> +				if (richacl_add_unmapped_identifier(&acl, &ace,
> +					who, who_len, GFP_NOFS))
> +					goto out;
> +			}
> +		}
> +		p += size / sizeof(*p);
> +		buflen -= size;
> +	}
> +	err = -EINVAL;
> +	if (buflen != 0)
> +		goto out;
> +	err = 0;
> +
> +out:
> +	if (err) {
> +		richacl_put(acl);
> +		acl = ERR_PTR(err);
> +	}
> +	return acl;
> +}

I'm not a fan of the "one giant function" approach.  Is there any way to split richacl_from_nfs4_acl() into several smaller functions?

Thanks,
Anna

> +
>  static int nfs4_xattr_set_nfs4_acl(struct dentry *dentry, const char *key,
>  				   const void *buf, size_t buflen,
>  				   int flags, int type)
>  {
> -	if (strcmp(key, "") != 0)
> +	struct inode *inode = d_inode(dentry);
> +	struct richacl *acl;
> +	int error;
> +
> +	if (!buf || strcmp(key, "") != 0)
>  		return -EINVAL;
>  
> -	return nfs4_proc_set_acl(d_inode(dentry), buf, buflen);
> +	acl = richacl_from_nfs4_acl(NFS_SERVER(inode), (void *)buf, buflen);
> +	if (IS_ERR(acl))
> +		return PTR_ERR(acl);
> +	error = nfs4_proc_set_acl(inode, acl);
> +	richacl_put(acl);
> +	return error;
>  }
>  
>  static int nfs4_xattr_get_nfs4_acl(struct dentry *dentry, const char *key,
>  				   void *buf, size_t buflen, int type)
>  {
> +	struct inode *inode = d_inode(dentry);
> +	struct richacl *acl;
> +	int error;
> +
>  	if (strcmp(key, "") != 0)
>  		return -EINVAL;
> -
> -	return nfs4_proc_get_acl(d_inode(dentry), buf, buflen);
> +	acl = nfs4_proc_get_acl(inode);
> +	if (IS_ERR(acl))
> +		return PTR_ERR(acl);
> +	if (acl == NULL)
> +		return -ENODATA;
> +	error = richacl_to_nfs4_acl(NFS_SERVER(inode), acl, buf, buflen);
> +	richacl_put(acl);
> +	return error;
>  }
>  
>  static size_t nfs4_xattr_list_nfs4_acl(struct dentry *dentry, char *list,
>  				       size_t list_len, const char *name,
>  				       size_t name_len, int type)
>  {
> +	struct nfs_server *server = NFS_SERVER(d_inode(dentry));
>  	size_t len = sizeof(XATTR_NAME_NFSV4_ACL);
>  
> -	if (!nfs4_server_supports_acls(NFS_SERVER(d_inode(dentry))))
> +	if (!(server->caps & (NFS_CAP_ALLOW_ACLS | NFS_CAP_DENY_ACLS)))
>  		return 0;
>  
>  	if (list && len <= list_len)
> @@ -8837,6 +9153,13 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
>  	.clone_server	= nfs_clone_server,
>  };
>  
> +static const struct xattr_handler nfs4_xattr_richacl_handler = {
> +	.prefix	= XATTR_NAME_RICHACL,
> +	.list	= nfs4_xattr_list_richacl,
> +	.get	= nfs4_xattr_get_richacl,
> +	.set	= nfs4_xattr_set_richacl,
> +};
> +
>  static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = {
>  	.prefix	= XATTR_NAME_NFSV4_ACL,
>  	.list	= nfs4_xattr_list_nfs4_acl,
> @@ -8845,6 +9168,7 @@ static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = {
>  };
>  
>  const struct xattr_handler *nfs4_xattr_handlers[] = {
> +	&nfs4_xattr_richacl_handler,
>  	&nfs4_xattr_nfs4_acl_handler,
>  #ifdef CONFIG_NFS_V4_SECURITY_LABEL
>  	&nfs4_xattr_nfs4_label_handler,
> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
> index eefed15..f2507d7 100644
> --- a/fs/nfs/nfs4xdr.c
> +++ b/fs/nfs/nfs4xdr.c
> @@ -52,6 +52,10 @@
>  #include <linux/nfs.h>
>  #include <linux/nfs4.h>
>  #include <linux/nfs_fs.h>
> +#include <linux/nfs_idmap.h>
> +#include <linux/richacl.h>
> +#include <linux/richacl_xattr.h>  /* for RICHACL_XATTR_MAX_COUNT */
> +#include <linux/nfs4acl.h>
>  
>  #include "nfs4_fs.h"
>  #include "internal.h"
> @@ -1650,16 +1654,24 @@ encode_restorefh(struct xdr_stream *xdr, struct compound_hdr *hdr)
>  static void
>  encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compound_hdr *hdr)
>  {
> -	__be32 *p;
> +	int attrlen_offset;
> +	__be32 attrlen, *p;
>  
>  	encode_op_hdr(xdr, OP_SETATTR, decode_setacl_maxsz, hdr);
>  	encode_nfs4_stateid(xdr, &zero_stateid);
> +
> +	/* Encode attribute bitmap. */
>  	p = reserve_space(xdr, 2*4);
>  	*p++ = cpu_to_be32(1);
>  	*p = cpu_to_be32(FATTR4_WORD0_ACL);
> -	p = reserve_space(xdr, 4);
> -	*p = cpu_to_be32(arg->acl_len);
> +
> +	attrlen_offset = xdr->buf->len;
> +	xdr_reserve_space(xdr, 4);  /* to be backfilled later */
> +
>  	xdr_write_pages(xdr, arg->acl_pages, 0, arg->acl_len);
> +
> +	attrlen = htonl(xdr->buf->len - attrlen_offset - 4);
> +	write_bytes_to_xdr_buf(xdr->buf, attrlen_offset, &attrlen, 4);
>  }
>  
>  static void
> @@ -2488,7 +2500,7 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr,
>  	encode_sequence(xdr, &args->seq_args, &hdr);
>  	encode_putfh(xdr, args->fh, &hdr);
>  	replen = hdr.replen + op_decode_hdr_maxsz + 1;
> -	encode_getattr_two(xdr, FATTR4_WORD0_ACL, 0, &hdr);
> +	encode_getattr_two(xdr, FATTR4_WORD0_ACL, FATTR4_WORD1_MODE, &hdr);
>  
>  	xdr_inline_pages(&req->rq_rcv_buf, replen << 2,
>  		args->acl_pages, 0, args->acl_len);
> @@ -5260,24 +5272,135 @@ decode_restorefh(struct xdr_stream *xdr)
>  	return decode_op_hdr(xdr, OP_RESTOREFH);
>  }
>  
> +static int
> +nfs4_decode_ace_who(struct richace *ace,
> +		    const char **unmapped, unsigned int *unmapped_len,
> +		    const struct nfs_server *server,
> +		    struct xdr_stream *xdr)
> +{
> +	char *who;
> +	u32 len;
> +	int special_id;
> +	__be32 *p;
> +	int error;
> +
> +	p = xdr_inline_decode(xdr, 4);
> +	if (!p)
> +		return -ENOMEM;  /* acl truncated */
> +	len = be32_to_cpup(p++);
> +	if (len >= XDR_MAX_NETOBJ) {
> +		dprintk("%s: name too long (%u)!\n",
> +			__func__, len);
> +		return -EIO;
> +	}
> +	who = (char *)xdr_inline_decode(xdr, len);
> +	if (!who)
> +		return -ENOMEM;  /* acl truncated */
> +
> +	special_id = nfs4acl_who_to_special_id(who, len);
> +	if (special_id >= 0) {
> +		ace->e_flags |= RICHACE_SPECIAL_WHO;
> +		ace->e_flags &= ~RICHACE_IDENTIFIER_GROUP;
> +		ace->e_id.special = special_id;
> +		return 0;
> +	}
> +	if (ace->e_flags & RICHACE_IDENTIFIER_GROUP) {
> +		error = nfs_map_group_to_gid(server, who, len, &ace->e_id.gid);
> +		if (error) {
> +			dprintk("%s: nfs_map_group_to_gid failed!\n",
> +					__func__);
> +			return error;
> +		}
> +		/* FIXME: nfsidmap doesn't distinguish between group nobody and
> +			  unmappable groups! */
> +		if (gid_eq(ace->e_id.gid, make_kgid(&init_user_ns, 99))) {
> +			*unmapped = who;
> +			*unmapped_len = len;
> +		}
> +	} else {
> +		error = nfs_map_name_to_uid(server, who, len, &ace->e_id.uid);
> +		if (error) {
> +			dprintk("%s: nfs_map_name_to_uid failed!\n",
> +					__func__);
> +			return error;
> +		}
> +		/* FIXME: nfsidmap doesn't distinguish between user nobody and
> +			  unmappable users! */
> +		if (uid_eq(ace->e_id.uid, make_kuid(&init_user_ns, 99))) {
> +			*unmapped = who;
> +			*unmapped_len = len;
> +		}
> +	}
> +	return 0;
> +}
> +
> +static struct richacl *
> +decode_acl_entries(struct xdr_stream *xdr, const struct nfs_server *server)
> +{
> +	struct richacl *acl;
> +	struct richace *ace;
> +	uint32_t count;
> +	__be32 *p;
> +	int status;
> +
> +	p = xdr_inline_decode(xdr, 4);
> +	if (unlikely(!p))
> +		return ERR_PTR(-ENOMEM);  /* acl truncated */
> +	count = be32_to_cpup(p);
> +	if (count > RICHACL_XATTR_MAX_COUNT)
> +		return ERR_PTR(-EIO);
> +	acl = richacl_alloc(count, GFP_NOFS);
> +	if (!acl)
> +		return ERR_PTR(-ENOMEM);
> +	richacl_for_each_entry(ace, acl) {
> +		const char *unmapped = NULL;
> +		unsigned int unmapped_len;
> +
> +		p = xdr_inline_decode(xdr, 4*3);
> +		status = -ENOMEM;
> +		if (unlikely(!p))
> +			goto out;  /* acl truncated */
> +		ace->e_type = be32_to_cpup(p++);
> +		ace->e_flags = be32_to_cpup(p++);
> +		status = -EIO;
> +		if (ace->e_flags &
> +		    (RICHACE_SPECIAL_WHO | RICHACE_UNMAPPED_WHO))
> +			goto out;
> +		ace->e_mask = be32_to_cpup(p++);
> +		status = nfs4_decode_ace_who(ace, &unmapped,
> +					     &unmapped_len, server,
> +					     xdr);
> +		if (status)
> +			goto out;
> +		if (unmapped) {
> +			status = -ENOMEM;
> +			if (richacl_add_unmapped_identifier(&acl, &ace,
> +					unmapped, unmapped_len,
> +					GFP_NOFS))
> +				goto out;
> +		}
> +	}
> +	status = 0;
> +
> +out:
> +	if (status) {
> +		richacl_put(acl);
> +		acl = ERR_PTR(status);
> +	}
> +	return acl;
> +}
> +
>  static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
>  			 struct nfs_getaclres *res)
>  {
>  	unsigned int savep;
>  	uint32_t attrlen,
>  		 bitmap[3] = {0};
> +	struct richacl *acl = NULL;
>  	int status;
> -	unsigned int pg_offset;
>  
> -	res->acl_len = 0;
>  	if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
>  		goto out;
> -
> -	xdr_enter_page(xdr, xdr->buf->page_len);
> -
> -	/* Calculate the offset of the page data */
> -	pg_offset = xdr->buf->head[0].iov_len;
> -
>  	if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
>  		goto out;
>  	if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0)
> @@ -5286,24 +5409,28 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
>  	if (unlikely(bitmap[0] & (FATTR4_WORD0_ACL - 1U)))
>  		return -EIO;
>  	if (likely(bitmap[0] & FATTR4_WORD0_ACL)) {
> -
> -		/* The bitmap (xdr len + bitmaps) and the attr xdr len words
> -		 * are stored with the acl data to handle the problem of
> -		 * variable length bitmaps.*/
> -		res->acl_data_offset = xdr_stream_pos(xdr) - pg_offset;
> -		res->acl_len = attrlen;
> -
> -		/* Check for receive buffer overflow */
> -		if (res->acl_len > (xdr->nwords << 2) ||
> -		    res->acl_len + res->acl_data_offset > xdr->buf->page_len) {
> -			res->acl_flags |= NFS4_ACL_TRUNC;
> -			dprintk("NFS: acl reply: attrlen %u > page_len %u\n",
> -					attrlen, xdr->nwords << 2);
> -		}
> +		acl = decode_acl_entries(xdr, res->server);
> +		status = PTR_ERR(acl);
> +		if (IS_ERR(acl))
> +			goto out;
> +		bitmap[0] &= ~FATTR4_WORD0_ACL;
>  	} else
>  		status = -EOPNOTSUPP;
>  
> +	status = -EIO;
> +	if (unlikely(bitmap[0]))
> +		goto out;
> +
> +	status = decode_attr_mode(xdr, bitmap, &res->mode);
> +	if (status < 0)
> +		goto out;
> +	status = 0;
> +
>  out:
> +	if (status == 0)
> +		res->acl = acl;
> +	else
> +		richacl_put(acl);
>  	return status;
>  }
>  
> diff --git a/fs/nfs/super.c b/fs/nfs/super.c
> index 383a027..8ced33d 100644
> --- a/fs/nfs/super.c
> +++ b/fs/nfs/super.c
> @@ -2319,7 +2319,7 @@ void nfs_fill_super(struct super_block *sb, struct nfs_mount_info *mount_info)
>  		/* The VFS shouldn't apply the umask to mode bits. We will do
>  		 * so ourselves when necessary.
>  		 */
> -		sb->s_flags |= MS_POSIXACL;
> +		sb->s_flags |= MS_RICHACL;
>  		sb->s_time_gran = 1;
>  	}
>  
> @@ -2346,7 +2346,7 @@ void nfs_clone_super(struct super_block *sb, struct nfs_mount_info *mount_info)
>  		/* The VFS shouldn't apply the umask to mode bits. We will do
>  		 * so ourselves when necessary.
>  		 */
> -		sb->s_flags |= MS_POSIXACL;
> +		sb->s_flags |= MS_RICHACL;
>  	}
>  
>   	nfs_initialise_sb(sb);
> diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
> index c0e9614..b84e194 100644
> --- a/include/linux/nfs_fs.h
> +++ b/include/linux/nfs_fs.h
> @@ -176,7 +176,6 @@ struct nfs_inode {
>  	wait_queue_head_t	waitqueue;
>  
>  #if IS_ENABLED(CONFIG_NFS_V4)
> -	struct nfs4_cached_acl	*nfs4_acl;
>          /* NFSv4 state */
>  	struct list_head	open_states;
>  	struct nfs_delegation __rcu *delegation;
> diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
> index 570a7df..6c41668 100644
> --- a/include/linux/nfs_fs_sb.h
> +++ b/include/linux/nfs_fs_sb.h
> @@ -243,5 +243,7 @@ struct nfs_server {
>  #define NFS_CAP_ALLOCATE	(1U << 20)
>  #define NFS_CAP_DEALLOCATE	(1U << 21)
>  #define NFS_CAP_LAYOUTSTATS	(1U << 22)
> +#define NFS_CAP_ALLOW_ACLS	(1U << 23)
> +#define NFS_CAP_DENY_ACLS	(1U << 24)
>  
>  #endif
> diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
> index 090ade4..337c341 100644
> --- a/include/linux/nfs_xdr.h
> +++ b/include/linux/nfs_xdr.h
> @@ -683,9 +683,10 @@ struct nfs_setattrargs {
>  
>  struct nfs_setaclargs {
>  	struct nfs4_sequence_args	seq_args;
> +	const struct nfs_server *	server;
>  	struct nfs_fh *			fh;
> -	size_t				acl_len;
>  	struct page **			acl_pages;
> +	size_t				acl_len;
>  };
>  
>  struct nfs_setaclres {
> @@ -703,9 +704,9 @@ struct nfs_getaclargs {
>  #define NFS4_ACL_TRUNC		0x0001	/* ACL was truncated */
>  struct nfs_getaclres {
>  	struct nfs4_sequence_res	seq_res;
> -	size_t				acl_len;
> -	size_t				acl_data_offset;
> -	int				acl_flags;
> +	const struct nfs_server *	server;
> +	struct richacl *		acl;
> +	umode_t				mode;
>  	struct page *			acl_scratch;
>  };
>  
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-cifs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andreas Gruenbacher Oct. 12, 2015, 7:49 p.m. UTC | #2
Anna,

On Mon, Oct 12, 2015 at 4:39 PM, Anna Schumaker
<Anna.Schumaker@netapp.com> wrote:
> I'm not a fan of the "one giant function" approach.  Is there any way to split richacl_from_nfs4_acl() into several smaller functions?

sure, parsing the usr/group identifier could be put in a separate
function, for example.

That function cannot stay as it is right now anyway --- we really need
idmapper to tell us when an identifier string cannot be mapped to a
uid or gid so that we can preserve that identifier.

Thanks,
Andreas
--
To unsubscribe from this list: send the line "unsubscribe linux-cifs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 326d9e1..843d15d 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1852,9 +1852,6 @@  struct inode *nfs_alloc_inode(struct super_block *sb)
 		return NULL;
 	nfsi->flags = 0UL;
 	nfsi->cache_validity = 0UL;
-#if IS_ENABLED(CONFIG_NFS_V4)
-	nfsi->nfs4_acl = NULL;
-#endif /* CONFIG_NFS_V4 */
 	return &nfsi->vfs_inode;
 }
 EXPORT_SYMBOL_GPL(nfs_alloc_inode);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index eec5c4c..a686251 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -55,6 +55,9 @@ 
 #include <linux/xattr.h>
 #include <linux/utsname.h>
 #include <linux/freezer.h>
+#include <linux/richacl.h>
+#include <linux/richacl_xattr.h>
+#include <linux/nfs4acl.h>
 
 #include "nfs4_fs.h"
 #include "delegation.h"
@@ -2982,15 +2985,18 @@  static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
 			res.attr_bitmask[2] &= FATTR4_WORD2_NFS42_MASK;
 		}
 		memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask));
-		server->caps &= ~(NFS_CAP_ACLS|NFS_CAP_HARDLINKS|
-				NFS_CAP_SYMLINKS|NFS_CAP_FILEID|
+		server->caps &= ~(NFS_CAP_ALLOW_ACLS|NFS_CAP_DENY_ACLS|
+				NFS_CAP_HARDLINKS|NFS_CAP_SYMLINKS|NFS_CAP_FILEID|
 				NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|
 				NFS_CAP_OWNER_GROUP|NFS_CAP_ATIME|
 				NFS_CAP_CTIME|NFS_CAP_MTIME|
 				NFS_CAP_SECURITY_LABEL);
-		if (res.attr_bitmask[0] & FATTR4_WORD0_ACL &&
-				res.acl_bitmask & ACL4_SUPPORT_ALLOW_ACL)
-			server->caps |= NFS_CAP_ACLS;
+		if (res.attr_bitmask[0] & FATTR4_WORD0_ACL) {
+			if (res.acl_bitmask & ACL4_SUPPORT_ALLOW_ACL)
+				server->caps |= NFS_CAP_ALLOW_ACLS;
+			if (res.acl_bitmask & ACL4_SUPPORT_DENY_ACL)
+				server->caps |= NFS_CAP_DENY_ACLS;
+		}
 		if (res.has_links != 0)
 			server->caps |= NFS_CAP_HARDLINKS;
 		if (res.has_symlinks != 0)
@@ -4518,45 +4524,11 @@  static int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred)
 	return 0;
 }
 
-static inline int nfs4_server_supports_acls(struct nfs_server *server)
-{
-	return server->caps & NFS_CAP_ACLS;
-}
-
-/* Assuming that XATTR_SIZE_MAX is a multiple of PAGE_SIZE, and that
- * it's OK to put sizeof(void) * (XATTR_SIZE_MAX/PAGE_SIZE) bytes on
- * the stack.
+/* A arbitrary limit; we allocate at most DIV_ROUND_UP(NFS4ACL_SIZE_MAX,
+ * PAGE_SIZE) pages and put an array of DIV_ROUND_UP(NFS4ACL_SIZE_MAX,
+ * PAGE_SIZE) pages on the stack when encoding or decoding acls.
  */
-#define NFS4ACL_MAXPAGES DIV_ROUND_UP(XATTR_SIZE_MAX, PAGE_SIZE)
-
-static int buf_to_pages_noslab(const void *buf, size_t buflen,
-		struct page **pages)
-{
-	struct page *newpage, **spages;
-	int rc = 0;
-	size_t len;
-	spages = pages;
-
-	do {
-		len = min_t(size_t, PAGE_SIZE, buflen);
-		newpage = alloc_page(GFP_KERNEL);
-
-		if (newpage == NULL)
-			goto unwind;
-		memcpy(page_address(newpage), buf, len);
-                buf += len;
-                buflen -= len;
-		*pages++ = newpage;
-		rc++;
-	} while (buflen != 0);
-
-	return rc;
-
-unwind:
-	for(; rc > 0; rc--)
-		__free_page(spages[rc-1]);
-	return -ENOMEM;
-}
+#define NFS4ACL_SIZE_MAX 65536
 
 struct nfs4_cached_acl {
 	int cached;
@@ -4564,66 +4536,9 @@  struct nfs4_cached_acl {
 	char data[0];
 };
 
-static void nfs4_set_cached_acl(struct inode *inode, struct nfs4_cached_acl *acl)
-{
-	struct nfs_inode *nfsi = NFS_I(inode);
-
-	spin_lock(&inode->i_lock);
-	kfree(nfsi->nfs4_acl);
-	nfsi->nfs4_acl = acl;
-	spin_unlock(&inode->i_lock);
-}
-
 static void nfs4_zap_acl_attr(struct inode *inode)
 {
-	nfs4_set_cached_acl(inode, NULL);
-}
-
-static inline ssize_t nfs4_read_cached_acl(struct inode *inode, char *buf, size_t buflen)
-{
-	struct nfs_inode *nfsi = NFS_I(inode);
-	struct nfs4_cached_acl *acl;
-	int ret = -ENOENT;
-
-	spin_lock(&inode->i_lock);
-	acl = nfsi->nfs4_acl;
-	if (acl == NULL)
-		goto out;
-	if (buf == NULL) /* user is just asking for length */
-		goto out_len;
-	if (acl->cached == 0)
-		goto out;
-	ret = -ERANGE; /* see getxattr(2) man page */
-	if (acl->len > buflen)
-		goto out;
-	memcpy(buf, acl->data, acl->len);
-out_len:
-	ret = acl->len;
-out:
-	spin_unlock(&inode->i_lock);
-	return ret;
-}
-
-static void nfs4_write_cached_acl(struct inode *inode, struct page **pages, size_t pgbase, size_t acl_len)
-{
-	struct nfs4_cached_acl *acl;
-	size_t buflen = sizeof(*acl) + acl_len;
-
-	if (buflen <= PAGE_SIZE) {
-		acl = kmalloc(buflen, GFP_KERNEL);
-		if (acl == NULL)
-			goto out;
-		acl->cached = 1;
-		_copy_from_pages(acl->data, pages, pgbase, acl_len);
-	} else {
-		acl = kmalloc(sizeof(*acl), GFP_KERNEL);
-		if (acl == NULL)
-			goto out;
-		acl->cached = 0;
-	}
-	acl->len = acl_len;
-out:
-	nfs4_set_cached_acl(inode, acl);
+	forget_cached_richacl(inode);
 }
 
 /*
@@ -4636,121 +4551,269 @@  out:
  * length. The next getxattr call will then produce another round trip to
  * the server, this time with the input buf of the required size.
  */
-static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen)
+static struct richacl *__nfs4_get_acl_uncached(struct inode *inode)
 {
-	struct page *pages[NFS4ACL_MAXPAGES] = {NULL, };
+	struct nfs_server *server = NFS_SERVER(inode);
+	struct page *pages[DIV_ROUND_UP(NFS4ACL_SIZE_MAX, PAGE_SIZE)] = {};
 	struct nfs_getaclargs args = {
 		.fh = NFS_FH(inode),
 		.acl_pages = pages,
-		.acl_len = buflen,
+		.acl_len = ARRAY_SIZE(pages) * PAGE_SIZE,
 	};
 	struct nfs_getaclres res = {
-		.acl_len = buflen,
+		.server = server,
 	};
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETACL],
 		.rpc_argp = &args,
 		.rpc_resp = &res,
 	};
-	unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE);
-	int ret = -ENOMEM, i;
+	int err, i;
 
-	/* As long as we're doing a round trip to the server anyway,
-	 * let's be prepared for a page of acl data. */
-	if (npages == 0)
-		npages = 1;
-	if (npages > ARRAY_SIZE(pages))
-		return -ERANGE;
-
-	for (i = 0; i < npages; i++) {
-		pages[i] = alloc_page(GFP_KERNEL);
-		if (!pages[i])
+	if (ARRAY_SIZE(pages) > 1) {
+		/* for decoding across pages */
+		res.acl_scratch = alloc_page(GFP_KERNEL);
+		err = -ENOMEM;
+		if (!res.acl_scratch)
 			goto out_free;
 	}
 
-	/* for decoding across pages */
-	res.acl_scratch = alloc_page(GFP_KERNEL);
-	if (!res.acl_scratch)
-		goto out_free;
-
-	args.acl_len = npages * PAGE_SIZE;
-
-	dprintk("%s  buf %p buflen %zu npages %d args.acl_len %zu\n",
-		__func__, buf, buflen, npages, args.acl_len);
-	ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode),
+	dprintk("%s  args.acl_len %zu\n",
+		__func__, args.acl_len);
+	err = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode),
 			     &msg, &args.seq_args, &res.seq_res, 0);
-	if (ret)
+	if (err)
 		goto out_free;
 
-	/* Handle the case where the passed-in buffer is too short */
-	if (res.acl_flags & NFS4_ACL_TRUNC) {
-		/* Did the user only issue a request for the acl length? */
-		if (buf == NULL)
-			goto out_ok;
-		ret = -ERANGE;
-		goto out_free;
-	}
-	nfs4_write_cached_acl(inode, pages, res.acl_data_offset, res.acl_len);
-	if (buf) {
-		if (res.acl_len > buflen) {
-			ret = -ERANGE;
-			goto out_free;
-		}
-		_copy_from_pages(buf, pages, res.acl_data_offset, res.acl_len);
-	}
-out_ok:
-	ret = res.acl_len;
+	richacl_compute_max_masks(res.acl);
+	/* FIXME: Set inode->i_mode from res->mode?  */
+	set_cached_richacl(inode, res.acl);
+	err = 0;
+
 out_free:
-	for (i = 0; i < npages; i++)
-		if (pages[i])
-			__free_page(pages[i]);
+	if (err) {
+		richacl_put(res.acl);
+		res.acl = ERR_PTR(err);
+	}
+	for (i = 0; i < ARRAY_SIZE(pages) && pages[i]; i++)
+		__free_page(pages[i]);
 	if (res.acl_scratch)
 		__free_page(res.acl_scratch);
-	return ret;
+	return res.acl;
 }
 
-static ssize_t nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen)
+static struct richacl *nfs4_get_acl_uncached(struct inode *inode)
 {
 	struct nfs4_exception exception = { };
-	ssize_t ret;
+	struct richacl *acl;
 	do {
-		ret = __nfs4_get_acl_uncached(inode, buf, buflen);
-		trace_nfs4_get_acl(inode, ret);
-		if (ret >= 0)
+		acl = __nfs4_get_acl_uncached(inode);
+		trace_nfs4_get_acl(inode, IS_ERR(acl) ? PTR_ERR(acl) : 0);
+		if (!IS_ERR(acl))
 			break;
-		ret = nfs4_handle_exception(NFS_SERVER(inode), ret, &exception);
+		acl = ERR_PTR(nfs4_handle_exception(NFS_SERVER(inode),
+			      PTR_ERR(acl), &exception));
 	} while (exception.retry);
-	return ret;
+	return acl;
 }
 
-static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen)
+static struct richacl *nfs4_proc_get_acl(struct inode *inode)
 {
 	struct nfs_server *server = NFS_SERVER(inode);
+	struct richacl *acl;
 	int ret;
 
-	if (!nfs4_server_supports_acls(server))
-		return -EOPNOTSUPP;
+	if (!(server->caps & (NFS_CAP_ALLOW_ACLS | NFS_CAP_DENY_ACLS)))
+		return ERR_PTR(-EOPNOTSUPP);
 	ret = nfs_revalidate_inode(server, inode);
 	if (ret < 0)
-		return ret;
+		return ERR_PTR(ret);
 	if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL)
 		nfs_zap_acl_cache(inode);
-	ret = nfs4_read_cached_acl(inode, buf, buflen);
-	if (ret != -ENOENT)
-		/* -ENOENT is returned if there is no ACL or if there is an ACL
-		 * but no cached acl data, just the acl length */
-		return ret;
-	return nfs4_get_acl_uncached(inode, buf, buflen);
+	acl = get_cached_richacl(inode);
+	if (acl != ACL_NOT_CACHED)
+		return acl;
+	return nfs4_get_acl_uncached(inode);
+}
+
+static int
+richacl_supported(struct nfs_server *server, struct richacl *acl)
+{
+	struct richace *ace;
+
+	if (!(server->caps & (NFS_CAP_ALLOW_ACLS | NFS_CAP_DENY_ACLS)))
+		return -EOPNOTSUPP;
+
+	richacl_for_each_entry(ace, acl) {
+		if (richace_is_allow(ace)) {
+			if (!(server->caps & NFS_CAP_ALLOW_ACLS))
+				return -EINVAL;
+		} else if (richace_is_deny(ace)) {
+			if (!(server->caps & NFS_CAP_DENY_ACLS))
+				return -EINVAL;
+		} else
+			return -EINVAL;
+	}
+	return 0;
 }
 
-static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen)
+static int
+nfs4_encode_user(struct xdr_stream *xdr, const struct nfs_server *server,
+		 kuid_t uid)
+{
+	char name[IDMAP_NAMESZ];
+	int len;
+	__be32 *p;
+
+	len = nfs_map_uid_to_name(server, uid, name, IDMAP_NAMESZ);
+	if (len < 0) {
+		dprintk("nfs: couldn't resolve uid %d to string\n",
+				from_kuid(&init_user_ns, uid));
+		return -ENOENT;
+	}
+	p = xdr_reserve_space(xdr, 4 + len);
+	if (!p)
+		return -EIO;
+	p = xdr_encode_opaque(p, name, len);
+	return 0;
+}
+
+static int
+nfs4_encode_group(struct xdr_stream *xdr, const struct nfs_server *server,
+		  kgid_t gid)
+{
+	char name[IDMAP_NAMESZ];
+	int len;
+	__be32 *p;
+
+	len = nfs_map_gid_to_group(server, gid, name, IDMAP_NAMESZ);
+	if (len < 0) {
+		dprintk("nfs: couldn't resolve gid %d to string\n",
+				from_kgid(&init_user_ns, gid));
+		return -ENOENT;
+	}
+	p = xdr_reserve_space(xdr, 4 + len);
+	if (!p)
+		return -EIO;
+	p = xdr_encode_opaque(p, name, len);
+	return 0;
+}
+
+static unsigned int
+nfs4_ace_mask(int minorversion)
+{
+	return minorversion == 0 ? NFS40_ACE_MASK_ALL : NFS4_ACE_MASK_ALL;
+}
+
+static int
+nfs4_encode_ace_who(struct xdr_stream *xdr, const struct nfs_server *server,
+		    struct richace *ace, struct richacl *acl)
+{
+	const char *who;
+	__be32 *p;
+
+	if (ace->e_flags & RICHACE_SPECIAL_WHO) {
+		unsigned int special_id = ace->e_id.special;
+		const char *who;
+		unsigned int len;
+
+		if (!nfs4acl_special_id_to_who(special_id, &who, &len)) {
+			WARN_ON_ONCE(1);
+			return -EIO;
+		}
+		p = xdr_reserve_space(xdr, 4 + len);
+		if (!p)
+			return -EIO;
+		xdr_encode_opaque(p, who, len);
+		return 0;
+	} else {
+		who = richace_unmapped_identifier(ace, acl);
+		if (who) {
+			unsigned int len = strlen(who);
+
+			p = xdr_reserve_space(xdr, 4 + len);
+			if (!p)
+				return -EIO;
+			xdr_encode_opaque(p, who, len);
+			return 0;
+		} else if (ace->e_flags & RICHACE_IDENTIFIER_GROUP)
+			return nfs4_encode_group(xdr, server, ace->e_id.gid);
+		else
+			return nfs4_encode_user(xdr, server, ace->e_id.uid);
+	}
+}
+
+static int
+nfs4_encode_acl(struct page **pages, unsigned int len, struct richacl *acl,
+		const struct nfs_server *server)
+{
+	int minorversion = server->nfs_client->cl_minorversion;
+	unsigned int ace_mask = nfs4_ace_mask(minorversion);
+	struct xdr_stream xdr;
+	struct xdr_buf buf;
+	__be32 *p;
+	struct richace *ace;
+
+	/* Reject acls not understood by the server */
+	if (server->attr_bitmask[1] & FATTR4_WORD1_DACL) {
+		BUILD_BUG_ON(NFS4_ACE_MASK_ALL != RICHACE_VALID_MASK);
+	} else {
+		if (acl->a_flags)
+			return -EINVAL;
+		richacl_for_each_entry(ace, acl) {
+			if (ace->e_flags & RICHACE_INHERITED_ACE)
+				return -EINVAL;
+		}
+	}
+	richacl_for_each_entry(ace, acl) {
+		if (ace->e_mask & ~ace_mask)
+			return -EINVAL;
+	}
+
+	xdr_init_encode_pages(&xdr, &buf, pages, len);
+
+	if (server->attr_bitmask[1] & FATTR4_WORD1_DACL) {
+		p = xdr_reserve_space(&xdr, 4);
+		if (!p)
+			goto fail;
+		*p = cpu_to_be32(acl ? acl->a_flags : 0);
+	}
+
+	p = xdr_reserve_space(&xdr, 4);
+	if (!p)
+		goto fail;
+	if (!acl) {
+		*p++ = cpu_to_be32(0);
+		return buf.len;
+	}
+	*p++ = cpu_to_be32(acl->a_count);
+
+	richacl_for_each_entry(ace, acl) {
+		p = xdr_reserve_space(&xdr, 4*3);
+		if (!p)
+			goto fail;
+		*p++ = cpu_to_be32(ace->e_type);
+		*p++ = cpu_to_be32(ace->e_flags &
+			~(RICHACE_SPECIAL_WHO | RICHACE_UNMAPPED_WHO));
+		*p++ = cpu_to_be32(ace->e_mask & NFS4_ACE_MASK_ALL);
+		if (nfs4_encode_ace_who(&xdr, server, ace, acl) != 0)
+			goto fail;
+	}
+
+	return buf.len;
+
+fail:
+	return -ENOMEM;
+}
+
+static int __nfs4_proc_set_acl(struct inode *inode, struct richacl *acl)
 {
 	struct nfs_server *server = NFS_SERVER(inode);
-	struct page *pages[NFS4ACL_MAXPAGES];
+	struct page *pages[DIV_ROUND_UP(NFS4ACL_SIZE_MAX, PAGE_SIZE) + 1 /* scratch */] = {};
 	struct nfs_setaclargs arg = {
+		.server		= server,
 		.fh		= NFS_FH(inode),
 		.acl_pages	= pages,
-		.acl_len	= buflen,
 	};
 	struct nfs_setaclres res;
 	struct rpc_message msg = {
@@ -4758,16 +4821,20 @@  static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
 		.rpc_argp	= &arg,
 		.rpc_resp	= &res,
 	};
-	unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE);
 	int ret, i;
 
-	if (!nfs4_server_supports_acls(server))
-		return -EOPNOTSUPP;
-	if (npages > ARRAY_SIZE(pages))
-		return -ERANGE;
-	i = buf_to_pages_noslab(buf, buflen, arg.acl_pages);
-	if (i < 0)
-		return i;
+	ret = richacl_supported(server, acl);
+	if (ret)
+		return ret;
+
+	ret = nfs4_encode_acl(pages, NFS4ACL_SIZE_MAX, acl, server);
+	if (ret < 0) {
+		for (i = 0; i < ARRAY_SIZE(pages) && pages[i]; i++)
+			put_page(pages[i]);
+		return ret;
+	}
+	arg.acl_len = ret;
+
 	nfs4_inode_return_delegation(inode);
 	ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
 
@@ -4775,8 +4842,8 @@  static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
 	 * Free each page after tx, so the only ref left is
 	 * held by the network stack
 	 */
-	for (; i > 0; i--)
-		put_page(pages[i-1]);
+	for (i = 0; i < ARRAY_SIZE(pages) && pages[i]; i++)
+		put_page(pages[i]);
 
 	/*
 	 * Acl update can result in inode attribute update.
@@ -4790,12 +4857,12 @@  static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
 	return ret;
 }
 
-static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen)
+static int nfs4_proc_set_acl(struct inode *inode, struct richacl *acl)
 {
 	struct nfs4_exception exception = { };
 	int err;
 	do {
-		err = __nfs4_proc_set_acl(inode, buf, buflen);
+		err = __nfs4_proc_set_acl(inode, acl);
 		trace_nfs4_set_acl(inode, err);
 		err = nfs4_handle_exception(NFS_SERVER(inode), err,
 				&exception);
@@ -6257,34 +6324,283 @@  nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_state *lsp)
 	rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, data);
 }
 
+static int nfs4_xattr_set_richacl(struct dentry *dentry, const char *key,
+				  const void *buf, size_t buflen,
+				  int flags, int handler_flags)
+{
+	struct inode *inode = d_inode(dentry);
+	struct richacl *acl;
+	int error;
+
+	if (strcmp(key, "") != 0)
+		return -EINVAL;
+
+	if (buf) {
+		acl = richacl_from_xattr(&init_user_ns, buf, buflen);
+		if (IS_ERR(acl))
+			return PTR_ERR(acl);
+		error = richacl_apply_masks(&acl, inode->i_uid);
+	} else {
+		/*
+		 * "Remove the acl"; only permissions granted by the mode
+		 * remain.  We are using the cached mode here which could be
+		 * outdated; should we do a GETATTR first to narrow down the
+		 * race window?
+		 */
+		acl = richacl_from_mode(inode->i_mode);
+		error = 0;
+	}
+
+	if (!error)
+		error = nfs4_proc_set_acl(inode, acl);
+	richacl_put(acl);
+	return error;
+}
+
+static int nfs4_xattr_get_richacl(struct dentry *dentry, const char *key,
+				  void *buf, size_t buflen, int handler_flags)
+{
+	struct inode *inode = d_inode(dentry);
+	struct richacl *acl;
+	int error;
+	umode_t mode = inode->i_mode & S_IFMT;
+
+	if (strcmp(key, "") != 0)
+		return -EINVAL;
+
+	acl = nfs4_proc_get_acl(inode);
+	if (IS_ERR(acl))
+		return PTR_ERR(acl);
+	if (acl == NULL)
+		return -ENODATA;
+	error = -ENODATA;
+	if (richacl_equiv_mode(acl, &mode) == 0 &&
+	    ((mode ^ inode->i_mode) & S_IRWXUGO) == 0)
+		goto out;
+	error = richacl_to_xattr(&init_user_ns, acl, buf, buflen);
+out:
+	richacl_put(acl);
+	return error;
+}
+
+static size_t nfs4_xattr_list_richacl(struct dentry *dentry, char *list,
+				      size_t list_len, const char *name,
+				      size_t name_len, int handler_flags)
+{
+	struct nfs_server *server = NFS_SERVER(d_inode(dentry));
+	size_t len = sizeof(XATTR_NAME_RICHACL);
+
+	if (!(server->caps & (NFS_CAP_ALLOW_ACLS | NFS_CAP_DENY_ACLS)))
+		return 0;
+
+	if (list && len <= list_len)
+		memcpy(list, XATTR_NAME_RICHACL, len);
+	return len;
+}
+
 #define XATTR_NAME_NFSV4_ACL "system.nfs4_acl"
 
+static int richacl_to_nfs4_acl(struct nfs_server *server,
+			       const struct richacl *acl,
+			       void *buf, size_t buflen)
+{
+	const struct richace *ace;
+	__be32 *p = buf;
+	size_t size = 0;
+
+	size += sizeof(*p);
+	if (buflen >= size)
+		*p++ = cpu_to_be32(acl->a_count);
+
+	richacl_for_each_entry(ace, acl) {
+		char who_buf[IDMAP_NAMESZ];
+		const char *who = who_buf;
+		int who_len;
+
+		size += 3 * sizeof(*p);
+		if (buflen >= size) {
+			*p++ = cpu_to_be32(ace->e_type);
+			*p++ = cpu_to_be32(ace->e_flags &
+					   ~(RICHACE_INHERITED_ACE |
+					     RICHACE_UNMAPPED_WHO |
+					     RICHACE_SPECIAL_WHO));
+			*p++ = cpu_to_be32(ace->e_mask);
+		}
+
+		if (richace_is_unix_user(ace)) {
+			who_len = nfs_map_uid_to_name(server, ace->e_id.uid,
+						      who_buf, sizeof(who_buf));
+			if (who_len < 0)
+				return -EIO;
+		} else if (richace_is_unix_group(ace)) {
+			who_len = nfs_map_gid_to_group(server, ace->e_id.gid,
+						       who_buf, sizeof(who_buf));
+			if (who_len < 0)
+				return -EIO;
+		} else if (ace->e_flags & RICHACE_SPECIAL_WHO) {
+			if (!nfs4acl_special_id_to_who(ace->e_id.special,
+						       &who, &who_len))
+				return -EIO;
+		} else {
+			who = richace_unmapped_identifier(ace, acl);
+			if (who)
+				who_len = strlen(who);
+			else
+				return -EIO;
+		}
+
+		size += sizeof(*p) + ALIGN(who_len, sizeof(*p));
+		if (buflen >= size) {
+			unsigned int padding = -who_len & (sizeof(*p) - 1);
+
+			*p++ = cpu_to_be32(who_len);
+			memcpy(p, who, who_len);
+			memset((char *)p + who_len, 0, padding);
+			p += DIV_ROUND_UP(who_len, sizeof(*p));
+		}
+	}
+	if (buflen && buflen < size)
+		return -ERANGE;
+	return size;
+}
+
+static struct richacl *richacl_from_nfs4_acl(struct nfs_server *server,
+					     const void *buf, size_t buflen)
+{
+	struct richacl *acl = NULL;
+	struct richace *ace;
+	const __be32 *p = buf;
+	int count, err;
+
+	if (buflen < sizeof(*p))
+		return ERR_PTR(-EINVAL);
+	count = be32_to_cpu(*p++);
+	if (count > RICHACL_XATTR_MAX_COUNT)
+		return ERR_PTR(-EINVAL);
+	buflen -= sizeof(*p);
+	acl = richacl_alloc(count, GFP_NOFS);
+	if (!acl)
+		return ERR_PTR(-ENOMEM);
+	richacl_for_each_entry(ace, acl) {
+		u32 who_len, size;
+		int special_id;
+		char *who;
+
+		err = -EINVAL;
+		if (buflen < 4 * sizeof(*p))
+			goto out;
+		ace->e_type = be32_to_cpu(*p++);
+		ace->e_flags = be32_to_cpu(*p++);
+		if (ace->e_flags & (RICHACE_SPECIAL_WHO | RICHACE_UNMAPPED_WHO))
+			goto out;
+		ace->e_mask = be32_to_cpu(*p++);
+		who_len = be32_to_cpu(*p++);
+		buflen -= 4 * sizeof(*p);
+		size = ALIGN(who_len, 4);
+		if (buflen < size || size == 0)
+			goto out;
+		who = (char *)p;
+		special_id = nfs4acl_who_to_special_id(who, who_len);
+		if (special_id >= 0) {
+			ace->e_flags |= RICHACE_SPECIAL_WHO;
+			ace->e_id.special = special_id;
+		} else {
+			bool unmappable;
+
+			if (ace->e_flags & RICHACE_IDENTIFIER_GROUP) {
+				err = nfs_map_group_to_gid(server, who, who_len,
+							   &ace->e_id.gid);
+				if (err) {
+					dprintk("%s: nfs_map_group_to_gid "
+						"failed!\n", __func__);
+					goto out;
+				}
+				/* FIXME: nfsidmap doesn't distinguish between
+					  group nobody and unmappable groups! */
+				unmappable = gid_eq(ace->e_id.gid,
+					make_kgid(&init_user_ns, 99));
+			} else {
+				err = nfs_map_name_to_uid(server, who, who_len,
+							  &ace->e_id.uid);
+				if (err) {
+					dprintk("%s: nfs_map_name_to_gid "
+						"failed!\n", __func__);
+					goto out;
+				}
+				/* FIXME: nfsidmap doesn't distinguish between
+					  user nobody and unmappable users! */
+				unmappable = uid_eq(ace->e_id.uid,
+					make_kuid(&init_user_ns, 99));
+			}
+			if (unmappable) {
+				err = -ENOMEM;
+				if (richacl_add_unmapped_identifier(&acl, &ace,
+					who, who_len, GFP_NOFS))
+					goto out;
+			}
+		}
+		p += size / sizeof(*p);
+		buflen -= size;
+	}
+	err = -EINVAL;
+	if (buflen != 0)
+		goto out;
+	err = 0;
+
+out:
+	if (err) {
+		richacl_put(acl);
+		acl = ERR_PTR(err);
+	}
+	return acl;
+}
+
 static int nfs4_xattr_set_nfs4_acl(struct dentry *dentry, const char *key,
 				   const void *buf, size_t buflen,
 				   int flags, int type)
 {
-	if (strcmp(key, "") != 0)
+	struct inode *inode = d_inode(dentry);
+	struct richacl *acl;
+	int error;
+
+	if (!buf || strcmp(key, "") != 0)
 		return -EINVAL;
 
-	return nfs4_proc_set_acl(d_inode(dentry), buf, buflen);
+	acl = richacl_from_nfs4_acl(NFS_SERVER(inode), (void *)buf, buflen);
+	if (IS_ERR(acl))
+		return PTR_ERR(acl);
+	error = nfs4_proc_set_acl(inode, acl);
+	richacl_put(acl);
+	return error;
 }
 
 static int nfs4_xattr_get_nfs4_acl(struct dentry *dentry, const char *key,
 				   void *buf, size_t buflen, int type)
 {
+	struct inode *inode = d_inode(dentry);
+	struct richacl *acl;
+	int error;
+
 	if (strcmp(key, "") != 0)
 		return -EINVAL;
-
-	return nfs4_proc_get_acl(d_inode(dentry), buf, buflen);
+	acl = nfs4_proc_get_acl(inode);
+	if (IS_ERR(acl))
+		return PTR_ERR(acl);
+	if (acl == NULL)
+		return -ENODATA;
+	error = richacl_to_nfs4_acl(NFS_SERVER(inode), acl, buf, buflen);
+	richacl_put(acl);
+	return error;
 }
 
 static size_t nfs4_xattr_list_nfs4_acl(struct dentry *dentry, char *list,
 				       size_t list_len, const char *name,
 				       size_t name_len, int type)
 {
+	struct nfs_server *server = NFS_SERVER(d_inode(dentry));
 	size_t len = sizeof(XATTR_NAME_NFSV4_ACL);
 
-	if (!nfs4_server_supports_acls(NFS_SERVER(d_inode(dentry))))
+	if (!(server->caps & (NFS_CAP_ALLOW_ACLS | NFS_CAP_DENY_ACLS)))
 		return 0;
 
 	if (list && len <= list_len)
@@ -8837,6 +9153,13 @@  const struct nfs_rpc_ops nfs_v4_clientops = {
 	.clone_server	= nfs_clone_server,
 };
 
+static const struct xattr_handler nfs4_xattr_richacl_handler = {
+	.prefix	= XATTR_NAME_RICHACL,
+	.list	= nfs4_xattr_list_richacl,
+	.get	= nfs4_xattr_get_richacl,
+	.set	= nfs4_xattr_set_richacl,
+};
+
 static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = {
 	.prefix	= XATTR_NAME_NFSV4_ACL,
 	.list	= nfs4_xattr_list_nfs4_acl,
@@ -8845,6 +9168,7 @@  static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = {
 };
 
 const struct xattr_handler *nfs4_xattr_handlers[] = {
+	&nfs4_xattr_richacl_handler,
 	&nfs4_xattr_nfs4_acl_handler,
 #ifdef CONFIG_NFS_V4_SECURITY_LABEL
 	&nfs4_xattr_nfs4_label_handler,
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index eefed15..f2507d7 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -52,6 +52,10 @@ 
 #include <linux/nfs.h>
 #include <linux/nfs4.h>
 #include <linux/nfs_fs.h>
+#include <linux/nfs_idmap.h>
+#include <linux/richacl.h>
+#include <linux/richacl_xattr.h>  /* for RICHACL_XATTR_MAX_COUNT */
+#include <linux/nfs4acl.h>
 
 #include "nfs4_fs.h"
 #include "internal.h"
@@ -1650,16 +1654,24 @@  encode_restorefh(struct xdr_stream *xdr, struct compound_hdr *hdr)
 static void
 encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compound_hdr *hdr)
 {
-	__be32 *p;
+	int attrlen_offset;
+	__be32 attrlen, *p;
 
 	encode_op_hdr(xdr, OP_SETATTR, decode_setacl_maxsz, hdr);
 	encode_nfs4_stateid(xdr, &zero_stateid);
+
+	/* Encode attribute bitmap. */
 	p = reserve_space(xdr, 2*4);
 	*p++ = cpu_to_be32(1);
 	*p = cpu_to_be32(FATTR4_WORD0_ACL);
-	p = reserve_space(xdr, 4);
-	*p = cpu_to_be32(arg->acl_len);
+
+	attrlen_offset = xdr->buf->len;
+	xdr_reserve_space(xdr, 4);  /* to be backfilled later */
+
 	xdr_write_pages(xdr, arg->acl_pages, 0, arg->acl_len);
+
+	attrlen = htonl(xdr->buf->len - attrlen_offset - 4);
+	write_bytes_to_xdr_buf(xdr->buf, attrlen_offset, &attrlen, 4);
 }
 
 static void
@@ -2488,7 +2500,7 @@  static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr,
 	encode_sequence(xdr, &args->seq_args, &hdr);
 	encode_putfh(xdr, args->fh, &hdr);
 	replen = hdr.replen + op_decode_hdr_maxsz + 1;
-	encode_getattr_two(xdr, FATTR4_WORD0_ACL, 0, &hdr);
+	encode_getattr_two(xdr, FATTR4_WORD0_ACL, FATTR4_WORD1_MODE, &hdr);
 
 	xdr_inline_pages(&req->rq_rcv_buf, replen << 2,
 		args->acl_pages, 0, args->acl_len);
@@ -5260,24 +5272,135 @@  decode_restorefh(struct xdr_stream *xdr)
 	return decode_op_hdr(xdr, OP_RESTOREFH);
 }
 
+static int
+nfs4_decode_ace_who(struct richace *ace,
+		    const char **unmapped, unsigned int *unmapped_len,
+		    const struct nfs_server *server,
+		    struct xdr_stream *xdr)
+{
+	char *who;
+	u32 len;
+	int special_id;
+	__be32 *p;
+	int error;
+
+	p = xdr_inline_decode(xdr, 4);
+	if (!p)
+		return -ENOMEM;  /* acl truncated */
+	len = be32_to_cpup(p++);
+	if (len >= XDR_MAX_NETOBJ) {
+		dprintk("%s: name too long (%u)!\n",
+			__func__, len);
+		return -EIO;
+	}
+	who = (char *)xdr_inline_decode(xdr, len);
+	if (!who)
+		return -ENOMEM;  /* acl truncated */
+
+	special_id = nfs4acl_who_to_special_id(who, len);
+	if (special_id >= 0) {
+		ace->e_flags |= RICHACE_SPECIAL_WHO;
+		ace->e_flags &= ~RICHACE_IDENTIFIER_GROUP;
+		ace->e_id.special = special_id;
+		return 0;
+	}
+	if (ace->e_flags & RICHACE_IDENTIFIER_GROUP) {
+		error = nfs_map_group_to_gid(server, who, len, &ace->e_id.gid);
+		if (error) {
+			dprintk("%s: nfs_map_group_to_gid failed!\n",
+					__func__);
+			return error;
+		}
+		/* FIXME: nfsidmap doesn't distinguish between group nobody and
+			  unmappable groups! */
+		if (gid_eq(ace->e_id.gid, make_kgid(&init_user_ns, 99))) {
+			*unmapped = who;
+			*unmapped_len = len;
+		}
+	} else {
+		error = nfs_map_name_to_uid(server, who, len, &ace->e_id.uid);
+		if (error) {
+			dprintk("%s: nfs_map_name_to_uid failed!\n",
+					__func__);
+			return error;
+		}
+		/* FIXME: nfsidmap doesn't distinguish between user nobody and
+			  unmappable users! */
+		if (uid_eq(ace->e_id.uid, make_kuid(&init_user_ns, 99))) {
+			*unmapped = who;
+			*unmapped_len = len;
+		}
+	}
+	return 0;
+}
+
+static struct richacl *
+decode_acl_entries(struct xdr_stream *xdr, const struct nfs_server *server)
+{
+	struct richacl *acl;
+	struct richace *ace;
+	uint32_t count;
+	__be32 *p;
+	int status;
+
+	p = xdr_inline_decode(xdr, 4);
+	if (unlikely(!p))
+		return ERR_PTR(-ENOMEM);  /* acl truncated */
+	count = be32_to_cpup(p);
+	if (count > RICHACL_XATTR_MAX_COUNT)
+		return ERR_PTR(-EIO);
+	acl = richacl_alloc(count, GFP_NOFS);
+	if (!acl)
+		return ERR_PTR(-ENOMEM);
+	richacl_for_each_entry(ace, acl) {
+		const char *unmapped = NULL;
+		unsigned int unmapped_len;
+
+		p = xdr_inline_decode(xdr, 4*3);
+		status = -ENOMEM;
+		if (unlikely(!p))
+			goto out;  /* acl truncated */
+		ace->e_type = be32_to_cpup(p++);
+		ace->e_flags = be32_to_cpup(p++);
+		status = -EIO;
+		if (ace->e_flags &
+		    (RICHACE_SPECIAL_WHO | RICHACE_UNMAPPED_WHO))
+			goto out;
+		ace->e_mask = be32_to_cpup(p++);
+		status = nfs4_decode_ace_who(ace, &unmapped,
+					     &unmapped_len, server,
+					     xdr);
+		if (status)
+			goto out;
+		if (unmapped) {
+			status = -ENOMEM;
+			if (richacl_add_unmapped_identifier(&acl, &ace,
+					unmapped, unmapped_len,
+					GFP_NOFS))
+				goto out;
+		}
+	}
+	status = 0;
+
+out:
+	if (status) {
+		richacl_put(acl);
+		acl = ERR_PTR(status);
+	}
+	return acl;
+}
+
 static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
 			 struct nfs_getaclres *res)
 {
 	unsigned int savep;
 	uint32_t attrlen,
 		 bitmap[3] = {0};
+	struct richacl *acl = NULL;
 	int status;
-	unsigned int pg_offset;
 
-	res->acl_len = 0;
 	if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
 		goto out;
-
-	xdr_enter_page(xdr, xdr->buf->page_len);
-
-	/* Calculate the offset of the page data */
-	pg_offset = xdr->buf->head[0].iov_len;
-
 	if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
 		goto out;
 	if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0)
@@ -5286,24 +5409,28 @@  static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
 	if (unlikely(bitmap[0] & (FATTR4_WORD0_ACL - 1U)))
 		return -EIO;
 	if (likely(bitmap[0] & FATTR4_WORD0_ACL)) {
-
-		/* The bitmap (xdr len + bitmaps) and the attr xdr len words
-		 * are stored with the acl data to handle the problem of
-		 * variable length bitmaps.*/
-		res->acl_data_offset = xdr_stream_pos(xdr) - pg_offset;
-		res->acl_len = attrlen;
-
-		/* Check for receive buffer overflow */
-		if (res->acl_len > (xdr->nwords << 2) ||
-		    res->acl_len + res->acl_data_offset > xdr->buf->page_len) {
-			res->acl_flags |= NFS4_ACL_TRUNC;
-			dprintk("NFS: acl reply: attrlen %u > page_len %u\n",
-					attrlen, xdr->nwords << 2);
-		}
+		acl = decode_acl_entries(xdr, res->server);
+		status = PTR_ERR(acl);
+		if (IS_ERR(acl))
+			goto out;
+		bitmap[0] &= ~FATTR4_WORD0_ACL;
 	} else
 		status = -EOPNOTSUPP;
 
+	status = -EIO;
+	if (unlikely(bitmap[0]))
+		goto out;
+
+	status = decode_attr_mode(xdr, bitmap, &res->mode);
+	if (status < 0)
+		goto out;
+	status = 0;
+
 out:
+	if (status == 0)
+		res->acl = acl;
+	else
+		richacl_put(acl);
 	return status;
 }
 
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 383a027..8ced33d 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2319,7 +2319,7 @@  void nfs_fill_super(struct super_block *sb, struct nfs_mount_info *mount_info)
 		/* The VFS shouldn't apply the umask to mode bits. We will do
 		 * so ourselves when necessary.
 		 */
-		sb->s_flags |= MS_POSIXACL;
+		sb->s_flags |= MS_RICHACL;
 		sb->s_time_gran = 1;
 	}
 
@@ -2346,7 +2346,7 @@  void nfs_clone_super(struct super_block *sb, struct nfs_mount_info *mount_info)
 		/* The VFS shouldn't apply the umask to mode bits. We will do
 		 * so ourselves when necessary.
 		 */
-		sb->s_flags |= MS_POSIXACL;
+		sb->s_flags |= MS_RICHACL;
 	}
 
  	nfs_initialise_sb(sb);
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index c0e9614..b84e194 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -176,7 +176,6 @@  struct nfs_inode {
 	wait_queue_head_t	waitqueue;
 
 #if IS_ENABLED(CONFIG_NFS_V4)
-	struct nfs4_cached_acl	*nfs4_acl;
         /* NFSv4 state */
 	struct list_head	open_states;
 	struct nfs_delegation __rcu *delegation;
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 570a7df..6c41668 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -243,5 +243,7 @@  struct nfs_server {
 #define NFS_CAP_ALLOCATE	(1U << 20)
 #define NFS_CAP_DEALLOCATE	(1U << 21)
 #define NFS_CAP_LAYOUTSTATS	(1U << 22)
+#define NFS_CAP_ALLOW_ACLS	(1U << 23)
+#define NFS_CAP_DENY_ACLS	(1U << 24)
 
 #endif
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 090ade4..337c341 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -683,9 +683,10 @@  struct nfs_setattrargs {
 
 struct nfs_setaclargs {
 	struct nfs4_sequence_args	seq_args;
+	const struct nfs_server *	server;
 	struct nfs_fh *			fh;
-	size_t				acl_len;
 	struct page **			acl_pages;
+	size_t				acl_len;
 };
 
 struct nfs_setaclres {
@@ -703,9 +704,9 @@  struct nfs_getaclargs {
 #define NFS4_ACL_TRUNC		0x0001	/* ACL was truncated */
 struct nfs_getaclres {
 	struct nfs4_sequence_res	seq_res;
-	size_t				acl_len;
-	size_t				acl_data_offset;
-	int				acl_flags;
+	const struct nfs_server *	server;
+	struct richacl *		acl;
+	umode_t				mode;
 	struct page *			acl_scratch;
 };