Message ID | 87wpadpb3m.fsf@xmission.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Quoting Eric W. Biederman (ebiederm@xmission.com): > > "Serge E. Hallyn" <serge@hallyn.com> writes: > > Overall this looks quite reasonable. > > My only big concern was the lack of verifying of magic_etc. As without Yes, I was relying too much on the size check. > that the code might not be future compatible with new versions of the > capability xattrs. It it tends to be nice to be able to boot old > kernels when regression testing etc. Even if they can't make use of > all of the features of a new filesystem. That certainly was my intent. > > diff --git a/fs/xattr.c b/fs/xattr.c > > index 7e3317c..75cc65a 100644 > > --- a/fs/xattr.c > > +++ b/fs/xattr.c > > @@ -170,12 +170,29 @@ int __vfs_setxattr_noperm(struct dentry *dentry, const char *name, > > const void *value, size_t size, int flags) > > { > > struct inode *inode = dentry->d_inode; > > - int error = -EAGAIN; > > + int error; > > + void *wvalue = NULL; > > + size_t wsize = 0; > > int issec = !strncmp(name, XATTR_SECURITY_PREFIX, > > XATTR_SECURITY_PREFIX_LEN); > > > > - if (issec) > > + if (issec) { > > inode->i_flags &= ~S_NOSEC; > > + > > + if (!strcmp(name, "security.capability")) { > > + error = cap_setxattr_convert_nscap(dentry, value, size, > > + &wvalue, &wsize); > > + if (error < 0) > > + return error; > > + if (wvalue) { > > + value = wvalue; > > + size = wsize; > > + } > > + } > > + } > > + > > + error = -EAGAIN; > > + > > Why is the conversion in __vfs_setxattr_noperm and not in setattr as > was done for posix_acl_fix_xattr_from_user? I think I was thinking I wanted to catch all the vfs_setxattr operations, but I don't think that's right. Moving to setxattr seems right. I'll look around a bit more. > > if (inode->i_opflags & IOP_XATTR) { > > error = __vfs_setxattr(dentry, inode, name, value, size, flags); > > if (!error) { > > @@ -184,8 +201,10 @@ int __vfs_setxattr_noperm(struct dentry *dentry, const char *name, > > size, flags); > > } > > } else { > > - if (unlikely(is_bad_inode(inode))) > > - return -EIO; > > + if (unlikely(is_bad_inode(inode))) { > > + error = -EIO; > > + goto out; > > + } > > } > > if (error == -EAGAIN) { > > error = -EOPNOTSUPP; > > @@ -200,10 +219,11 @@ int __vfs_setxattr_noperm(struct dentry *dentry, const char *name, > > } > > } > > > > +out: > > + kfree(wvalue); > > return error; > > } > > > > - > > int > > vfs_setxattr(struct dentry *dentry, const char *name, const void *value, > > size_t size, int flags) > > The rest of my comments I am going to express as an incremental diff. > Using "security.capability" directly looks like a typo waiting to > happen. > > You have an unnecessary include of linux/uidgid.h > > Missing version checks on the magic_etc field. > And the wrong error code when the code deliberately refuses to return a > capability. Thanks, all looks good. Did you want to just squash these yourself and move on, keep them as separate patches, or have me incorporate into mine and resend? > Eric > > diff --git a/fs/xattr.c b/fs/xattr.c > index 75cc65ac7ea9..f6d5ce3e1030 100644 > --- a/fs/xattr.c > +++ b/fs/xattr.c > @@ -179,7 +179,7 @@ int __vfs_setxattr_noperm(struct dentry *dentry, const char *name, > if (issec) { > inode->i_flags &= ~S_NOSEC; > > - if (!strcmp(name, "security.capability")) { > + if (!strcmp(name, XATTR_NAME_CAPS)) { > error = cap_setxattr_convert_nscap(dentry, value, size, > &wvalue, &wsize); > if (error < 0) > diff --git a/include/linux/capability.h b/include/linux/capability.h > index b97343353a11..c47febf8448b 100644 > --- a/include/linux/capability.h > +++ b/include/linux/capability.h > @@ -13,7 +13,6 @@ > #define _LINUX_CAPABILITY_H > > #include <uapi/linux/capability.h> > -#include <linux/uidgid.h> > > #define _KERNEL_CAPABILITY_VERSION _LINUX_CAPABILITY_VERSION_3 > #define _KERNEL_CAPABILITY_U32S _LINUX_CAPABILITY_U32S_3 > diff --git a/security/commoncap.c b/security/commoncap.c > index 8abb9bf4ec17..32e32f437ef5 100644 > --- a/security/commoncap.c > +++ b/security/commoncap.c > @@ -367,6 +367,7 @@ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer, > kuid_t kroot; > uid_t root, mappedroot; > char *tmpbuf = NULL; > + struct vfs_cap_data *cap; > struct vfs_ns_cap_data *nscap; > struct dentry *dentry; > struct user_namespace *fs_ns; > @@ -379,14 +380,16 @@ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer, > return -EINVAL; > > size = sizeof(struct vfs_ns_cap_data); > - ret = vfs_getxattr_alloc(dentry, "security.capability", > + ret = vfs_getxattr_alloc(dentry, XATTR_NAME_CAPS, > &tmpbuf, size, GFP_NOFS); > > if (ret < 0) > return ret; > > fs_ns = inode->i_sb->s_user_ns; > - if (ret == sizeof(struct vfs_cap_data)) { > + cap = (struct vfs_cap_data *) tmpbuf; > + if ((ret == sizeof(struct vfs_cap_data)) && > + (cap->magic_etc == cpu_to_le32(VFS_CAP_REVISION_2))) { > /* If this is sizeof(vfs_cap_data) then we're ok with the > * on-disk value, so return that. */ > if (alloc) > @@ -394,7 +397,8 @@ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer, > else > kfree(tmpbuf); > return ret; > - } else if (ret != sizeof(struct vfs_ns_cap_data)) { > + } else if ((ret != sizeof(struct vfs_ns_cap_data)) || > + (cap->magic_etc != cpu_to_le32(VFS_CAP_REVISION_3))) { > kfree(tmpbuf); > return -EINVAL; > } > @@ -417,7 +421,7 @@ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer, > > if (!rootid_owns_currentns(kroot)) { > kfree(tmpbuf); > - return -EOPNOTSUPP; > + return -ENODATA; > } > > /* This comes from a parent namespace. Return as a v2 capability */ > @@ -474,11 +478,17 @@ int cap_setxattr_convert_nscap(struct dentry *dentry, const void *value, size_t > return -EINVAL; > if (size != XATTR_CAPS_SZ_2 && size != XATTR_CAPS_SZ_3) > return -EINVAL; > + if ((size == XATTR_CAPS_SZ_2) && > + (cap->magic_etc != cpu_to_le32(VFS_CAP_REVISION_2))) > + return -EINVAL; > + if ((size == XATTR_CAPS_SZ_3) && > + (cap->magic_etc != cpu_to_le32(VFS_CAP_REVISION_3))) > + return -EINVAL; > if (!capable_wrt_inode_uidgid(inode, CAP_SETFCAP)) > return -EPERM; > if (size == XATTR_CAPS_SZ_2) > if (ns_capable(inode->i_sb->s_user_ns, CAP_SETFCAP)) > - // user is privileged, just write the v2 > + /* user is privileged, just write the v2 */ > return 0; > > rootid = rootid_from_xattr(value, size, task_ns); > @@ -855,7 +865,10 @@ int cap_inode_setxattr(struct dentry *dentry, const char *name, > sizeof(XATTR_SECURITY_PREFIX) - 1) != 0) > return 0; > > - // For XATTR_NAME_CAPS the check will be done in __vfs_setxattr_noperm() > + /* > + * For XATTR_NAME_CAPS the check will be done in > + * __vfs_setxattr_noperm() > + */ > if (strcmp(name, XATTR_NAME_CAPS) == 0) > return 0; > > -- To unsubscribe from this list: send the line "unsubscribe linux-security-module" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
"Serge E. Hallyn" <serge@hallyn.com> writes: > Quoting Eric W. Biederman (ebiederm@xmission.com): >> >> "Serge E. Hallyn" <serge@hallyn.com> writes: >> >> Overall this looks quite reasonable. >> >> My only big concern was the lack of verifying of magic_etc. As without > > Yes, I was relying too much on the size check. > >> that the code might not be future compatible with new versions of the >> capability xattrs. It it tends to be nice to be able to boot old >> kernels when regression testing etc. Even if they can't make use of >> all of the features of a new filesystem. > > That certainly was my intent. > >> > diff --git a/fs/xattr.c b/fs/xattr.c >> > index 7e3317c..75cc65a 100644 >> > --- a/fs/xattr.c >> > +++ b/fs/xattr.c >> > @@ -170,12 +170,29 @@ int __vfs_setxattr_noperm(struct dentry *dentry, const char *name, >> > const void *value, size_t size, int flags) >> > { >> > struct inode *inode = dentry->d_inode; >> > - int error = -EAGAIN; >> > + int error; >> > + void *wvalue = NULL; >> > + size_t wsize = 0; >> > int issec = !strncmp(name, XATTR_SECURITY_PREFIX, >> > XATTR_SECURITY_PREFIX_LEN); >> > >> > - if (issec) >> > + if (issec) { >> > inode->i_flags &= ~S_NOSEC; >> > + >> > + if (!strcmp(name, "security.capability")) { >> > + error = cap_setxattr_convert_nscap(dentry, value, size, >> > + &wvalue, &wsize); >> > + if (error < 0) >> > + return error; >> > + if (wvalue) { >> > + value = wvalue; >> > + size = wsize; >> > + } >> > + } >> > + } >> > + >> > + error = -EAGAIN; >> > + >> >> Why is the conversion in __vfs_setxattr_noperm and not in setattr as >> was done for posix_acl_fix_xattr_from_user? > > I think I was thinking I wanted to catch all the vfs_setxattr operations, > but I don't think that's right. Moving to setxattr seems right. I'll > look around a bit more. Thanks. This is one of these little details that we want a good answer to why there. If you can document that in your patch description when you resend I would appreciate it. >> Missing version checks on the magic_etc field. >> And the wrong error code when the code deliberately refuses to return a >> capability. > > Thanks, all looks good. Did you want to just squash these yourself and > move on, keep them as separate patches, or have me incorporate into mine > and resend? Given that there is an outstanding question I would appreciate a resend with an updated patch description, the changes squashed and possibly a change in where cap_setxattr_convert_nscap is called. I have the untested version on my for-testing branch and except for a small increase in the binary size of the kernel all seems well. Eric -- To unsubscribe from this list: send the line "unsubscribe linux-security-module" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
ebiederm@xmission.com (Eric W. Biederman) writes: > "Serge E. Hallyn" <serge@hallyn.com> writes: > >> Quoting Eric W. Biederman (ebiederm@xmission.com): >>> >>> "Serge E. Hallyn" <serge@hallyn.com> writes: >>> >>> > diff --git a/fs/xattr.c b/fs/xattr.c >>> > index 7e3317c..75cc65a 100644 >>> > --- a/fs/xattr.c >>> > +++ b/fs/xattr.c >>> > @@ -170,12 +170,29 @@ int __vfs_setxattr_noperm(struct dentry *dentry, const char *name, >>> > const void *value, size_t size, int flags) >>> > { >>> > struct inode *inode = dentry->d_inode; >>> > - int error = -EAGAIN; >>> > + int error; >>> > + void *wvalue = NULL; >>> > + size_t wsize = 0; >>> > int issec = !strncmp(name, XATTR_SECURITY_PREFIX, >>> > XATTR_SECURITY_PREFIX_LEN); >>> > >>> > - if (issec) >>> > + if (issec) { >>> > inode->i_flags &= ~S_NOSEC; >>> > + >>> > + if (!strcmp(name, "security.capability")) { >>> > + error = cap_setxattr_convert_nscap(dentry, value, size, >>> > + &wvalue, &wsize); >>> > + if (error < 0) >>> > + return error; >>> > + if (wvalue) { >>> > + value = wvalue; >>> > + size = wsize; >>> > + } >>> > + } >>> > + } >>> > + >>> > + error = -EAGAIN; >>> > + >>> >>> Why is the conversion in __vfs_setxattr_noperm and not in setattr as >>> was done for posix_acl_fix_xattr_from_user? >> >> I think I was thinking I wanted to catch all the vfs_setxattr operations, >> but I don't think that's right. Moving to setxattr seems right. I'll >> look around a bit more. > > Thanks. This is one of these little details that we want a good answer > to why there. If you can document that in your patch description when > you resend I would appreciate it. Ok. Grrr. Looking at this a little more getting it correct where we call the conversion operation is critical. I believe the current placement of cap_setxattr_convert_nscap is actively wrong. In particular unless I am misleading something this will trigger multiple conversions when setting one of these attributes on overlayfs. The stragey I adopted for for posix acls is: On a write from userspace convert from current_user_ns() to &init_user_ns. On a write to the filesystem convert from &init_user_ns to fs_user_ns. On a read from the filesystem convert from fs_user_ns to &init_user_ns On a read from the kernel to userspace convert from &init_user_ns to current_user_ns(). Overall a good strategy but no one we can trivially adopt for the capability xattr as the second write to filesystem method does not appear to actually exist for anything except for posix acls. I need to think a little more about how we want to accomplish this for the capability xattr. My apoligies for leading you down a path that has all of these bumps and then being sufficiently distracted not to help you through this maze. The only easy solution I can see is to just always keep things in &init_user_ns inside the kernel. That works until we bring fuse or other unprivileged mounts onboard that have storage outside of the kernel. Seth and I will have to rework that for fuse support but that sounds better than not letting such an issue prevent us from merging the code. Eric -- To unsubscribe from this list: send the line "unsubscribe linux-security-module" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Quoting Eric W. Biederman (ebiederm@xmission.com): > ebiederm@xmission.com (Eric W. Biederman) writes: > > > "Serge E. Hallyn" <serge@hallyn.com> writes: > > > >> Quoting Eric W. Biederman (ebiederm@xmission.com): > >>> > >>> "Serge E. Hallyn" <serge@hallyn.com> writes: > >>> > >>> > diff --git a/fs/xattr.c b/fs/xattr.c > >>> > index 7e3317c..75cc65a 100644 > >>> > --- a/fs/xattr.c > >>> > +++ b/fs/xattr.c > >>> > @@ -170,12 +170,29 @@ int __vfs_setxattr_noperm(struct dentry *dentry, const char *name, > >>> > const void *value, size_t size, int flags) > >>> > { > >>> > struct inode *inode = dentry->d_inode; > >>> > - int error = -EAGAIN; > >>> > + int error; > >>> > + void *wvalue = NULL; > >>> > + size_t wsize = 0; > >>> > int issec = !strncmp(name, XATTR_SECURITY_PREFIX, > >>> > XATTR_SECURITY_PREFIX_LEN); > >>> > > >>> > - if (issec) > >>> > + if (issec) { > >>> > inode->i_flags &= ~S_NOSEC; > >>> > + > >>> > + if (!strcmp(name, "security.capability")) { > >>> > + error = cap_setxattr_convert_nscap(dentry, value, size, > >>> > + &wvalue, &wsize); > >>> > + if (error < 0) > >>> > + return error; > >>> > + if (wvalue) { > >>> > + value = wvalue; > >>> > + size = wsize; > >>> > + } > >>> > + } > >>> > + } > >>> > + > >>> > + error = -EAGAIN; > >>> > + > >>> > >>> Why is the conversion in __vfs_setxattr_noperm and not in setattr as > >>> was done for posix_acl_fix_xattr_from_user? > >> > >> I think I was thinking I wanted to catch all the vfs_setxattr operations, > >> but I don't think that's right. Moving to setxattr seems right. I'll > >> look around a bit more. > > > > Thanks. This is one of these little details that we want a good answer > > to why there. If you can document that in your patch description when > > you resend I would appreciate it. > > Ok. Grrr. > > Looking at this a little more getting it correct where we call the > conversion operation is critical. > > I believe the current placement of cap_setxattr_convert_nscap is > actively wrong. In particular unless I am misleading something this > will trigger multiple conversions when setting one of these attributes > on overlayfs. > > The stragey I adopted for for posix acls is: > > On a write from userspace convert from current_user_ns() to &init_user_ns. > On a write to the filesystem convert from &init_user_ns to fs_user_ns. > > On a read from the filesystem convert from fs_user_ns to &init_user_ns > On a read from the kernel to userspace convert from &init_user_ns > to current_user_ns(). > > Overall a good strategy but no one we can trivially adopt for the > capability xattr as the second write to filesystem method does not > appear to actually exist for anything except for posix acls. > > I need to think a little more about how we want to accomplish this for > the capability xattr. My apoligies for leading you down a path that has > all of these bumps and then being sufficiently distracted not to help > you through this maze. > > The only easy solution I can see is to just always keep things in > &init_user_ns inside the kernel. That works until we bring fuse or > other unprivileged mounts onboard that have storage outside of the > kernel. > > Seth and I will have to rework that for fuse support but that sounds > better than not letting such an issue prevent us from merging the code. Ok, in the meantime I've made a few updates in my tree which I think make the code a lot nicer (and do move the conversion to setxattr()), but there's a bug in that which I'm still trying to nail down. I'll send a new version when I get that figured, and we can see how close to ok that is. Note that upstream cap_inode_removexattr and cap_inode_setxattr() upstream still don't respect the fs_user_ns properly either (the proper code is in the Ubuntu kernel, maybe it's in your -next tree, I don't know how you and Seth are coordinating that) -serge -- To unsubscribe from this list: send the line "unsubscribe linux-security-module" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
"Serge E. Hallyn" <serge@hallyn.com> writes: > Quoting Eric W. Biederman (ebiederm@xmission.com): >> ebiederm@xmission.com (Eric W. Biederman) writes: >> >> > "Serge E. Hallyn" <serge@hallyn.com> writes: >> > >> >> Quoting Eric W. Biederman (ebiederm@xmission.com): >> >>> >> >>> "Serge E. Hallyn" <serge@hallyn.com> writes: >> >>> >> >>> > diff --git a/fs/xattr.c b/fs/xattr.c >> >>> > index 7e3317c..75cc65a 100644 >> >>> > --- a/fs/xattr.c >> >>> > +++ b/fs/xattr.c >> >>> > @@ -170,12 +170,29 @@ int __vfs_setxattr_noperm(struct dentry *dentry, const char *name, >> >>> > const void *value, size_t size, int flags) >> >>> > { >> >>> > struct inode *inode = dentry->d_inode; >> >>> > - int error = -EAGAIN; >> >>> > + int error; >> >>> > + void *wvalue = NULL; >> >>> > + size_t wsize = 0; >> >>> > int issec = !strncmp(name, XATTR_SECURITY_PREFIX, >> >>> > XATTR_SECURITY_PREFIX_LEN); >> >>> > >> >>> > - if (issec) >> >>> > + if (issec) { >> >>> > inode->i_flags &= ~S_NOSEC; >> >>> > + >> >>> > + if (!strcmp(name, "security.capability")) { >> >>> > + error = cap_setxattr_convert_nscap(dentry, value, size, >> >>> > + &wvalue, &wsize); >> >>> > + if (error < 0) >> >>> > + return error; >> >>> > + if (wvalue) { >> >>> > + value = wvalue; >> >>> > + size = wsize; >> >>> > + } >> >>> > + } >> >>> > + } >> >>> > + >> >>> > + error = -EAGAIN; >> >>> > + >> >>> >> >>> Why is the conversion in __vfs_setxattr_noperm and not in setattr as >> >>> was done for posix_acl_fix_xattr_from_user? >> >> >> >> I think I was thinking I wanted to catch all the vfs_setxattr operations, >> >> but I don't think that's right. Moving to setxattr seems right. I'll >> >> look around a bit more. >> > >> > Thanks. This is one of these little details that we want a good answer >> > to why there. If you can document that in your patch description when >> > you resend I would appreciate it. >> >> Ok. Grrr. >> >> Looking at this a little more getting it correct where we call the >> conversion operation is critical. >> >> I believe the current placement of cap_setxattr_convert_nscap is >> actively wrong. In particular unless I am misleading something this >> will trigger multiple conversions when setting one of these attributes >> on overlayfs. >> >> The stragey I adopted for for posix acls is: >> >> On a write from userspace convert from current_user_ns() to &init_user_ns. >> On a write to the filesystem convert from &init_user_ns to fs_user_ns. >> >> On a read from the filesystem convert from fs_user_ns to &init_user_ns >> On a read from the kernel to userspace convert from &init_user_ns >> to current_user_ns(). >> >> Overall a good strategy but no one we can trivially adopt for the >> capability xattr as the second write to filesystem method does not >> appear to actually exist for anything except for posix acls. >> >> I need to think a little more about how we want to accomplish this for >> the capability xattr. My apoligies for leading you down a path that has >> all of these bumps and then being sufficiently distracted not to help >> you through this maze. >> >> The only easy solution I can see is to just always keep things in >> &init_user_ns inside the kernel. That works until we bring fuse or >> other unprivileged mounts onboard that have storage outside of the >> kernel. >> >> Seth and I will have to rework that for fuse support but that sounds >> better than not letting such an issue prevent us from merging the code. > > Ok, in the meantime I've made a few updates in my tree which I think > make the code a lot nicer (and do move the conversion to setxattr()), > but there's a bug in that which I'm still trying to nail down. I'll > send a new version when I get that figured, and we can see how close > to ok that is. > > Note that upstream cap_inode_removexattr and cap_inode_setxattr() > upstream still don't respect the fs_user_ns properly either (the > proper code is in the Ubuntu kernel, maybe it's in your -next > tree, I don't know how you and Seth are coordinating that) Oh yes. The relaxation of permissions. I remember holding off on that until we knew the core vfs work was done. At this point I don't think it is necessary to keep holding off. It seemed prudent before we got all of the s_user_ns bits used in all of the proper places. At this point I think it was just worry about the last little vfs bits has been challenging enough that we just haven't gotten too it. Eric -- To unsubscribe from this list: send the line "unsubscribe linux-security-module" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/fs/xattr.c b/fs/xattr.c index 75cc65ac7ea9..f6d5ce3e1030 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -179,7 +179,7 @@ int __vfs_setxattr_noperm(struct dentry *dentry, const char *name, if (issec) { inode->i_flags &= ~S_NOSEC; - if (!strcmp(name, "security.capability")) { + if (!strcmp(name, XATTR_NAME_CAPS)) { error = cap_setxattr_convert_nscap(dentry, value, size, &wvalue, &wsize); if (error < 0) diff --git a/include/linux/capability.h b/include/linux/capability.h index b97343353a11..c47febf8448b 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -13,7 +13,6 @@ #define _LINUX_CAPABILITY_H #include <uapi/linux/capability.h> -#include <linux/uidgid.h> #define _KERNEL_CAPABILITY_VERSION _LINUX_CAPABILITY_VERSION_3 #define _KERNEL_CAPABILITY_U32S _LINUX_CAPABILITY_U32S_3 diff --git a/security/commoncap.c b/security/commoncap.c index 8abb9bf4ec17..32e32f437ef5 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -367,6 +367,7 @@ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer, kuid_t kroot; uid_t root, mappedroot; char *tmpbuf = NULL; + struct vfs_cap_data *cap; struct vfs_ns_cap_data *nscap; struct dentry *dentry; struct user_namespace *fs_ns; @@ -379,14 +380,16 @@ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer, return -EINVAL; size = sizeof(struct vfs_ns_cap_data); - ret = vfs_getxattr_alloc(dentry, "security.capability", + ret = vfs_getxattr_alloc(dentry, XATTR_NAME_CAPS, &tmpbuf, size, GFP_NOFS); if (ret < 0) return ret; fs_ns = inode->i_sb->s_user_ns; - if (ret == sizeof(struct vfs_cap_data)) { + cap = (struct vfs_cap_data *) tmpbuf; + if ((ret == sizeof(struct vfs_cap_data)) && + (cap->magic_etc == cpu_to_le32(VFS_CAP_REVISION_2))) { /* If this is sizeof(vfs_cap_data) then we're ok with the * on-disk value, so return that. */ if (alloc) @@ -394,7 +397,8 @@ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer, else kfree(tmpbuf); return ret; - } else if (ret != sizeof(struct vfs_ns_cap_data)) { + } else if ((ret != sizeof(struct vfs_ns_cap_data)) || + (cap->magic_etc != cpu_to_le32(VFS_CAP_REVISION_3))) { kfree(tmpbuf); return -EINVAL; } @@ -417,7 +421,7 @@ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer, if (!rootid_owns_currentns(kroot)) { kfree(tmpbuf); - return -EOPNOTSUPP; + return -ENODATA; } /* This comes from a parent namespace. Return as a v2 capability */ @@ -474,11 +478,17 @@ int cap_setxattr_convert_nscap(struct dentry *dentry, const void *value, size_t return -EINVAL; if (size != XATTR_CAPS_SZ_2 && size != XATTR_CAPS_SZ_3) return -EINVAL; + if ((size == XATTR_CAPS_SZ_2) && + (cap->magic_etc != cpu_to_le32(VFS_CAP_REVISION_2))) + return -EINVAL; + if ((size == XATTR_CAPS_SZ_3) && + (cap->magic_etc != cpu_to_le32(VFS_CAP_REVISION_3))) + return -EINVAL; if (!capable_wrt_inode_uidgid(inode, CAP_SETFCAP)) return -EPERM; if (size == XATTR_CAPS_SZ_2) if (ns_capable(inode->i_sb->s_user_ns, CAP_SETFCAP)) - // user is privileged, just write the v2 + /* user is privileged, just write the v2 */ return 0; rootid = rootid_from_xattr(value, size, task_ns); @@ -855,7 +865,10 @@ int cap_inode_setxattr(struct dentry *dentry, const char *name, sizeof(XATTR_SECURITY_PREFIX) - 1) != 0) return 0; - // For XATTR_NAME_CAPS the check will be done in __vfs_setxattr_noperm() + /* + * For XATTR_NAME_CAPS the check will be done in + * __vfs_setxattr_noperm() + */ if (strcmp(name, XATTR_NAME_CAPS) == 0) return 0;