@@ -1311,7 +1311,8 @@ const struct inode_operations ovl_dir_inode_operations = {
.permission = ovl_permission,
.getattr = ovl_getattr,
.listxattr = ovl_listxattr,
- .get_inode_acl = ovl_get_acl,
+ .get_inode_acl = ovl_get_inode_acl,
+ .get_acl = ovl_get_acl,
.update_time = ovl_update_time,
.fileattr_get = ovl_fileattr_get,
.fileattr_set = ovl_fileattr_set,
@@ -14,6 +14,8 @@
#include <linux/fileattr.h>
#include <linux/security.h>
#include <linux/namei.h>
+#include <linux/posix_acl.h>
+#include <linux/posix_acl_xattr.h>
#include "overlayfs.h"
@@ -460,9 +462,9 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
* of the POSIX ACLs retrieved from the lower layer to this function to not
* alter the POSIX ACLs for the underlying filesystem.
*/
-static void ovl_idmap_posix_acl(struct inode *realinode,
- struct user_namespace *mnt_userns,
- struct posix_acl *acl)
+void ovl_idmap_posix_acl(struct inode *realinode,
+ struct user_namespace *mnt_userns,
+ struct posix_acl *acl)
{
struct user_namespace *fs_userns = i_user_ns(realinode);
@@ -495,7 +497,7 @@ static void ovl_idmap_posix_acl(struct inode *realinode,
*
* This is obviously only relevant when idmapped layers are used.
*/
-struct posix_acl *ovl_get_acl(struct inode *inode, int type, bool rcu)
+struct posix_acl *ovl_get_inode_acl(struct inode *inode, int type, bool rcu)
{
struct inode *realinode = ovl_inode_real(inode);
struct posix_acl *acl, *clone;
@@ -547,6 +549,51 @@ struct posix_acl *ovl_get_acl(struct inode *inode, int type, bool rcu)
posix_acl_release(acl);
return clone;
}
+
+static struct posix_acl *ovl_get_acl_path(const struct path *path,
+ const char *acl_name)
+{
+ struct posix_acl *real_acl, *clone;
+ struct user_namespace *mnt_userns;
+
+ mnt_userns = mnt_user_ns(path->mnt);
+
+ real_acl = vfs_get_acl(mnt_userns, path->dentry, acl_name);
+ if (IS_ERR_OR_NULL(real_acl))
+ return real_acl;
+
+ if (!is_idmapped_mnt(path->mnt))
+ return real_acl;
+
+ /*
+ * We cannot alter the ACLs returned from the relevant layer as that
+ * would alter the cached values filesystem wide for the lower
+ * filesystem. Instead we can clone the ACLs and then apply the
+ * relevant idmapping of the layer.
+ */
+ clone = posix_acl_clone(real_acl, GFP_KERNEL);
+ if (clone)
+ ovl_idmap_posix_acl(d_inode(path->dentry), mnt_userns, clone);
+ else
+ clone = ERR_PTR(-ENOMEM);
+ /* Drop reference to original posix acls. */
+ posix_acl_release(real_acl);
+ return clone;
+}
+
+struct posix_acl *ovl_get_acl(struct user_namespace *mnt_userns,
+ struct dentry *dentry, int type)
+{
+ struct posix_acl *acl = NULL;
+ const struct cred *old_cred;
+ struct path realpath;
+
+ ovl_path_real(dentry, &realpath);
+ old_cred = ovl_override_creds(dentry->d_sb);
+ acl = ovl_get_acl_path(&realpath, posix_acl_xattr_name(type));
+ revert_creds(old_cred);
+ return acl;
+}
#endif
int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags)
@@ -721,7 +768,8 @@ static const struct inode_operations ovl_file_inode_operations = {
.permission = ovl_permission,
.getattr = ovl_getattr,
.listxattr = ovl_listxattr,
- .get_inode_acl = ovl_get_acl,
+ .get_inode_acl = ovl_get_inode_acl,
+ .get_acl = ovl_get_acl,
.update_time = ovl_update_time,
.fiemap = ovl_fiemap,
.fileattr_get = ovl_fileattr_get,
@@ -741,7 +789,8 @@ static const struct inode_operations ovl_special_inode_operations = {
.permission = ovl_permission,
.getattr = ovl_getattr,
.listxattr = ovl_listxattr,
- .get_inode_acl = ovl_get_acl,
+ .get_inode_acl = ovl_get_inode_acl,
+ .get_acl = ovl_get_acl,
.update_time = ovl_update_time,
};
@@ -592,9 +592,15 @@ int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name,
ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size);
#ifdef CONFIG_FS_POSIX_ACL
-struct posix_acl *ovl_get_acl(struct inode *inode, int type, bool rcu);
+struct posix_acl *ovl_get_inode_acl(struct inode *inode, int type, bool rcu);
+struct posix_acl *ovl_get_acl(struct user_namespace *mnt_userns,
+ struct dentry *dentry, int type);
+void ovl_idmap_posix_acl(struct inode *realinode,
+ struct user_namespace *mnt_userns,
+ struct posix_acl *acl);
#else
-#define ovl_get_acl NULL
+#define ovl_get_inode_acl NULL
+#define ovl_get_acl NULL
#endif
int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags);
The current way of setting and getting posix acls through the generic xattr interface is error prone and type unsafe. The vfs needs to interpret and fixup posix acls before storing or reporting it to userspace. Various hacks exist to make this work. The code is hard to understand and difficult to maintain in it's current form. Instead of making this work by hacking posix acls through xattr handlers we are building a dedicated posix acl api around the get and set inode operations. This removes a lot of hackiness and makes the codepaths easier to maintain. A lot of background can be found in [1]. In order to build a type safe posix api around get and set acl we need all filesystem to implement get and set acl. Now that we have added get and set acl inode operations that allow easy access to the dentry we give overlayfs it's own get and set acl inode operations. Since overlayfs is a stacking filesystem it will use the newly added posix acl api when retrieving posix acls from the relevant layer. Since overlayfs can also be mounted on top of idmapped layers. If idmapped layers are used overlayfs must take the layer's idmapping into account after it retrieved the posix acls from the relevant layer. Note, until the vfs has been switched to the new posix acl api this patch is a non-functional change. Link: https://lore.kernel.org/all/20220801145520.1532837-1-brauner@kernel.org [1] Signed-off-by: Christian Brauner (Microsoft) <brauner@kernel.org> --- Notes: /* v2 */ Miklos Szeredi <mszeredi@redhat.com> - Use IS_ERR_OR_NULL() macro. /* v3 */ unchanged /* v4 */ unchanged fs/overlayfs/dir.c | 3 +- fs/overlayfs/inode.c | 61 ++++++++++++++++++++++++++++++++++++---- fs/overlayfs/overlayfs.h | 10 +++++-- 3 files changed, 65 insertions(+), 9 deletions(-)