Message ID | 0d43234f1b2c1d35a06e8259ca94c7d976e0a604.1428471096.git.osandov@osandov.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
-------- Original Message -------- Subject: [PATCH 2/3] Btrfs: unify subvol= and subvolid= mounting From: Omar Sandoval <osandov@osandov.com> To: Chris Mason <clm@fb.com>, Josef Bacik <jbacik@fb.com>, David Sterba <dsterba@suse.cz>, <linux-btrfs@vger.kernel.org> Date: 2015?04?08? 13:34 > Currently, mounting a subvolume with subvolid= takes a different code > path than mounting with subvol=. This isn't really a big deal except for > the fact that mounts done with subvolid= or the default subvolume don't > have a dentry that's connected to the dentry tree like in the subvol= > case. To unify the code paths, when given subvolid= or using the default > subvolume ID, translate it into a subvolume name by walking > ROOT_BACKREFs in the root tree and INODE_REFs in the filesystem trees. Oh, this patch is what I have tried long long ago, and want to do the same thing, to show subvolume mount for btrfs. But it came to me that, superblock->show_path() is a better method to do it. You can implement btrfs_show_path() to allow mountinfo to get the subvolume name from subvolid, and don't change the mount routine much. Thanks, Qu > > Signed-off-by: Omar Sandoval <osandov@osandov.com> > --- > fs/btrfs/super.c | 347 ++++++++++++++++++++++++++++++++++++------------------- > 1 file changed, 225 insertions(+), 122 deletions(-) > > diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c > index d38be09..5ab9801 100644 > --- a/fs/btrfs/super.c > +++ b/fs/btrfs/super.c > @@ -841,33 +841,153 @@ out: > return error; > } > > -static struct dentry *get_default_root(struct super_block *sb, > - u64 subvol_objectid) > +static char *get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info, > + u64 subvol_objectid) > { > - struct btrfs_fs_info *fs_info = btrfs_sb(sb); > struct btrfs_root *root = fs_info->tree_root; > - struct btrfs_root *new_root; > - struct btrfs_dir_item *di; > - struct btrfs_path *path; > - struct btrfs_key location; > - struct inode *inode; > - u64 dir_id; > - int new = 0; > + struct btrfs_root *fs_root; > + struct btrfs_root_ref *root_ref; > + struct btrfs_inode_ref *inode_ref; > + struct btrfs_key key; > + struct btrfs_path *path = NULL; > + char *name = NULL, *ptr; > + u64 dirid; > + int len; > + int ret; > + > + path = btrfs_alloc_path(); > + if (!path) { > + ret = -ENOMEM; > + goto err; > + } > + path->leave_spinning = 1; > + > + name = kmalloc(PATH_MAX, GFP_NOFS); > + if (!name) { > + ret = -ENOMEM; > + goto err; > + } > + ptr = name + PATH_MAX - 1; > + ptr[0] = '\0'; > > /* > - * We have a specific subvol we want to mount, just setup location and > - * go look up the root. > + * Walk up the subvolume trees in the tree of tree roots by root > + * backrefs until we hit the top-level subvolume. > */ > - if (subvol_objectid) { > - location.objectid = subvol_objectid; > - location.type = BTRFS_ROOT_ITEM_KEY; > - location.offset = (u64)-1; > - goto find_root; > + while (subvol_objectid != BTRFS_FS_TREE_OBJECTID) { > + key.objectid = subvol_objectid; > + key.type = BTRFS_ROOT_BACKREF_KEY; > + key.offset = (u64)-1; > + > + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); > + if (ret < 0) { > + goto err; > + } else if (ret > 0) { > + ret = btrfs_previous_item(root, path, subvol_objectid, > + BTRFS_ROOT_BACKREF_KEY); > + if (ret < 0) { > + goto err; > + } else if (ret > 0) { > + ret = -ENOENT; > + goto err; > + } > + } > + > + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); > + subvol_objectid = key.offset; > + > + root_ref = btrfs_item_ptr(path->nodes[0], path->slots[0], > + struct btrfs_root_ref); > + len = btrfs_root_ref_name_len(path->nodes[0], root_ref); > + ptr -= len + 1; > + if (ptr < name) { > + ret = -ENAMETOOLONG; > + goto err; > + } > + read_extent_buffer(path->nodes[0], ptr + 1, > + (unsigned long)(root_ref + 1), len); > + ptr[0] = '/'; > + dirid = btrfs_root_ref_dirid(path->nodes[0], root_ref); > + btrfs_release_path(path); > + > + key.objectid = subvol_objectid; > + key.type = BTRFS_ROOT_ITEM_KEY; > + key.offset = (u64)-1; > + fs_root = btrfs_read_fs_root_no_name(fs_info, &key); > + if (IS_ERR(fs_root)) { > + ret = PTR_ERR(fs_root); > + goto err; > + } > + > + /* > + * Walk up the filesystem tree by inode refs until we hit the > + * root directory. > + */ > + while (dirid != BTRFS_FIRST_FREE_OBJECTID) { > + key.objectid = dirid; > + key.type = BTRFS_INODE_REF_KEY; > + key.offset = (u64)-1; > + > + ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0); > + if (ret < 0) { > + goto err; > + } else if (ret > 0) { > + ret = btrfs_previous_item(fs_root, path, dirid, > + BTRFS_INODE_REF_KEY); > + if (ret < 0) { > + goto err; > + } else if (ret > 0) { > + ret = -ENOENT; > + goto err; > + } > + } > + > + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); > + dirid = key.offset; > + > + inode_ref = btrfs_item_ptr(path->nodes[0], > + path->slots[0], > + struct btrfs_inode_ref); > + len = btrfs_inode_ref_name_len(path->nodes[0], > + inode_ref); > + ptr -= len + 1; > + if (ptr < name) { > + ret = -ENAMETOOLONG; > + goto err; > + } > + read_extent_buffer(path->nodes[0], ptr + 1, > + (unsigned long)(inode_ref + 1), len); > + ptr[0] = '/'; > + btrfs_release_path(path); > + } > + } > + > + btrfs_free_path(path); > + if (ptr == name + PATH_MAX - 1) { > + name[0] = '/'; > + name[1] = '\0'; > + } else { > + memmove(name, ptr, name + PATH_MAX - ptr); > } > + return name; > + > +err: > + btrfs_free_path(path); > + kfree(name); > + return ERR_PTR(ret); > +} > + > +static int get_default_subvol_objectid(struct btrfs_fs_info *fs_info, u64 *objectid) > +{ > + struct btrfs_root *root = fs_info->tree_root; > + struct btrfs_dir_item *di; > + struct btrfs_path *path; > + struct btrfs_key location; > + u64 dir_id; > > path = btrfs_alloc_path(); > if (!path) > - return ERR_PTR(-ENOMEM); > + return -ENOMEM; > path->leave_spinning = 1; > > /* > @@ -879,49 +999,23 @@ static struct dentry *get_default_root(struct super_block *sb, > di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0); > if (IS_ERR(di)) { > btrfs_free_path(path); > - return ERR_CAST(di); > + return PTR_ERR(di); > } > if (!di) { > /* > * Ok the default dir item isn't there. This is weird since > * it's always been there, but don't freak out, just try and > - * mount to root most subvolume. > + * mount the top-level subvolume. > */ > btrfs_free_path(path); > - dir_id = BTRFS_FIRST_FREE_OBJECTID; > - new_root = fs_info->fs_root; > - goto setup_root; > + *objectid = BTRFS_FS_TREE_OBJECTID; > + return 0; > } > > btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location); > btrfs_free_path(path); > - > -find_root: > - new_root = btrfs_read_fs_root_no_name(fs_info, &location); > - if (IS_ERR(new_root)) > - return ERR_CAST(new_root); > - > - dir_id = btrfs_root_dirid(&new_root->root_item); > -setup_root: > - location.objectid = dir_id; > - location.type = BTRFS_INODE_ITEM_KEY; > - location.offset = 0; > - > - inode = btrfs_iget(sb, &location, new_root, &new); > - if (IS_ERR(inode)) > - return ERR_CAST(inode); > - > - /* > - * If we're just mounting the root most subvol put the inode and return > - * a reference to the dentry. We will have already gotten a reference > - * to the inode in btrfs_fill_super so we're good to go. > - */ > - if (!new && sb->s_root->d_inode == inode) { > - iput(inode); > - return dget(sb->s_root); > - } > - > - return d_obtain_root(inode); > + *objectid = location.objectid; > + return 0; > } > > static int btrfs_fill_super(struct super_block *sb, > @@ -1129,109 +1223,123 @@ static inline int is_subvolume_inode(struct inode *inode) > } > > /* > - * This will strip out the subvol=%s argument for an argument string and add > - * subvolid=0 to make sure we get the actual tree root for path walking to the > - * subvol we want. > + * This will add subvolid=0 to the argument string while removing any subvol= > + * and subvolid= arguments to make sure we get the top-level root for path > + * walking to the subvol we want. > */ > static char *setup_root_args(char *args) > { > - unsigned len = strlen(args) + 2 + 1; > - char *src, *dst, *buf; > - > - /* > - * We need the same args as before, but with this substitution: > - * s!subvol=[^,]+!subvolid=0! > - * > - * Since the replacement string is up to 2 bytes longer than the > - * original, allocate strlen(args) + 2 + 1 bytes. > - */ > + char *p, *dst, *buf; > > - src = strstr(args, "subvol="); > - /* This shouldn't happen, but just in case.. */ > - if (!src) > - return NULL; > + if (!args) > + return kstrdup("subvolid=0", GFP_NOFS); > > - buf = dst = kmalloc(len, GFP_NOFS); > + /* The worst case is that we add ",subvolid=0" to the end. */ > + buf = dst = kmalloc(strlen(args) + strlen(",subvolid=0") + 1, GFP_NOFS); > if (!buf) > return NULL; > > - /* > - * If the subvol= arg is not at the start of the string, > - * copy whatever precedes it into buf. > - */ > - if (src != args) { > - *src++ = '\0'; > - strcpy(buf, args); > - dst += strlen(args); > + while (1) { > + p = strchrnul(args, ','); > + if (strncmp(args, "subvol=", strlen("subvol=")) != 0 && > + strncmp(args, "subvolid=", strlen("subvolid=")) != 0) { > + memcpy(dst, args, p - args); > + dst += p - args; > + *dst++ = ','; > + } > + if (*p) > + args = p + 1; > + else > + break; > } > - > strcpy(dst, "subvolid=0"); > - dst += strlen("subvolid=0"); > - > - /* > - * If there is a "," after the original subvol=... string, > - * copy that suffix into our buffer. Otherwise, we're done. > - */ > - src = strchr(src, ','); > - if (src) > - strcpy(dst, src); > > return buf; > } > > -static struct dentry *mount_subvol(const char *subvol_name, int flags, > - const char *device_name, char *data) > +static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid, > + int flags, const char *device_name, > + char *data) > { > struct dentry *root; > - struct vfsmount *mnt; > + struct vfsmount *mnt = NULL; > char *newargs; > + int ret; > > newargs = setup_root_args(data); > - if (!newargs) > - return ERR_PTR(-ENOMEM); > - mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name, > - newargs); > + if (!newargs) { > + root = ERR_PTR(-ENOMEM); > + goto out; > + } > > - if (PTR_RET(mnt) == -EBUSY) { > + mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name, newargs); > + if (PTR_ERR_OR_ZERO(mnt) == -EBUSY) { > if (flags & MS_RDONLY) { > - mnt = vfs_kern_mount(&btrfs_fs_type, flags & ~MS_RDONLY, device_name, > - newargs); > + mnt = vfs_kern_mount(&btrfs_fs_type, flags & ~MS_RDONLY, > + device_name, newargs); > } else { > - int r; > - mnt = vfs_kern_mount(&btrfs_fs_type, flags | MS_RDONLY, device_name, > - newargs); > + mnt = vfs_kern_mount(&btrfs_fs_type, flags | MS_RDONLY, > + device_name, newargs); > if (IS_ERR(mnt)) { > - kfree(newargs); > - return ERR_CAST(mnt); > + root = ERR_CAST(mnt); > + mnt = NULL; > + goto out; > } > > down_write(&mnt->mnt_sb->s_umount); > - r = btrfs_remount(mnt->mnt_sb, &flags, NULL); > + ret = btrfs_remount(mnt->mnt_sb, &flags, NULL); > up_write(&mnt->mnt_sb->s_umount); > - if (r < 0) { > - /* FIXME: release vfsmount mnt ??*/ > - kfree(newargs); > - return ERR_PTR(r); > + if (ret < 0) { > + root = ERR_PTR(ret); > + goto out; > } > } > } > + if (IS_ERR(mnt)) { > + root = ERR_CAST(mnt); > + mnt = NULL; > + goto out; > + } > > - kfree(newargs); > + if (!subvol_name) { > + if (!subvol_objectid) { > + ret = get_default_subvol_objectid(btrfs_sb(mnt->mnt_sb), > + &subvol_objectid); > + if (ret) { > + root = ERR_PTR(ret); > + goto out; > + } > + } > + subvol_name = get_subvol_name_from_objectid(btrfs_sb(mnt->mnt_sb), > + subvol_objectid); > + if (IS_ERR(subvol_name)) { > + root = ERR_CAST(subvol_name); > + subvol_name = NULL; > + goto out; > + } > > - if (IS_ERR(mnt)) > - return ERR_CAST(mnt); > + } > > root = mount_subtree(mnt, subvol_name); > + mnt = NULL; /* mount_subtree drops our reference on the vfsmount. */ > > if (!IS_ERR(root) && !is_subvolume_inode(root->d_inode)) { > struct super_block *s = root->d_sb; > dput(root); > root = ERR_PTR(-EINVAL); > deactivate_locked_super(s); > - printk(KERN_ERR "BTRFS: '%s' is not a valid subvolume\n", > - subvol_name); > + pr_err("BTRFS: '%s' is not a valid subvolume\n", subvol_name); > + } > + if (!IS_ERR(root) && subvol_objectid && > + BTRFS_I(root->d_inode)->root->root_key.objectid != subvol_objectid) { > + pr_warn("BTRFS: subvol '%s' does not match subvolid %llu\n", > + subvol_name, subvol_objectid); > } > > +out: > + mntput(mnt); > + kfree(newargs); > + kfree(subvol_name); > return root; > } > > @@ -1296,7 +1404,6 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, > { > struct block_device *bdev = NULL; > struct super_block *s; > - struct dentry *root; > struct btrfs_fs_devices *fs_devices = NULL; > struct btrfs_fs_info *fs_info = NULL; > struct security_mnt_opts new_sec_opts; > @@ -1316,10 +1423,10 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, > return ERR_PTR(error); > } > > - if (subvol_name) { > - root = mount_subvol(subvol_name, flags, device_name, data); > - kfree(subvol_name); > - return root; > + if (subvol_name || subvol_objectid != BTRFS_FS_TREE_OBJECTID) { > + /* mount_subvol() will free subvol_name. */ > + return mount_subvol(subvol_name, subvol_objectid, flags, > + device_name, data); > } > > security_init_mnt_opts(&new_sec_opts); > @@ -1385,23 +1492,19 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, > error = btrfs_fill_super(s, fs_devices, data, > flags & MS_SILENT ? 1 : 0); > } > - > - root = !error ? get_default_root(s, subvol_objectid) : ERR_PTR(error); > - if (IS_ERR(root)) { > + if (error) { > deactivate_locked_super(s); > - error = PTR_ERR(root); > goto error_sec_opts; > } > > fs_info = btrfs_sb(s); > error = setup_security_options(fs_info, s, &new_sec_opts); > if (error) { > - dput(root); > deactivate_locked_super(s); > goto error_sec_opts; > } > > - return root; > + return dget(s->s_root); > > error_close_devices: > btrfs_close_devices(fs_devices); > -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Apr 08, 2015 at 02:06:14PM +0800, Qu Wenruo wrote: > > > -------- Original Message -------- > Subject: [PATCH 2/3] Btrfs: unify subvol= and subvolid= mounting > From: Omar Sandoval <osandov@osandov.com> > To: Chris Mason <clm@fb.com>, Josef Bacik <jbacik@fb.com>, David Sterba > <dsterba@suse.cz>, <linux-btrfs@vger.kernel.org> > Date: 2015?04?08? 13:34 > > >Currently, mounting a subvolume with subvolid= takes a different code > >path than mounting with subvol=. This isn't really a big deal except for > >the fact that mounts done with subvolid= or the default subvolume don't > >have a dentry that's connected to the dentry tree like in the subvol= > >case. To unify the code paths, when given subvolid= or using the default > >subvolume ID, translate it into a subvolume name by walking > >ROOT_BACKREFs in the root tree and INODE_REFs in the filesystem trees. Hi, Qu, > Oh, this patch is what I have tried long long ago, and want to do the same > thing, to show subvolume mount for btrfs. Thanks for pointing that out, I didn't come across your post when I was looking around. I figured that someone must have thought of it first :) > But it came to me that, superblock->show_path() is a better method to do it. > > You can implement btrfs_show_path() to allow mountinfo to get the subvolume > name from subvolid, and don't change the mount routine much. Hm, I don't think that the changes to the mount code would be unwarranted. Having one code path makes it more obvious what's going on. Do you mind elaborating on why you preferred doing it in ->show_path()? Thanks! > Thanks, > Qu
-------- Original Message -------- Subject: Re: [PATCH 2/3] Btrfs: unify subvol= and subvolid= mounting From: Omar Sandoval <osandov@osandov.com> To: Qu Wenruo <quwenruo@cn.fujitsu.com> Date: 2015?04?08? 15:17 > On Wed, Apr 08, 2015 at 02:06:14PM +0800, Qu Wenruo wrote: >> >> >> -------- Original Message -------- >> Subject: [PATCH 2/3] Btrfs: unify subvol= and subvolid= mounting >> From: Omar Sandoval <osandov@osandov.com> >> To: Chris Mason <clm@fb.com>, Josef Bacik <jbacik@fb.com>, David Sterba >> <dsterba@suse.cz>, <linux-btrfs@vger.kernel.org> >> Date: 2015?04?08? 13:34 >> >>> Currently, mounting a subvolume with subvolid= takes a different code >>> path than mounting with subvol=. This isn't really a big deal except for >>> the fact that mounts done with subvolid= or the default subvolume don't >>> have a dentry that's connected to the dentry tree like in the subvol= >>> case. To unify the code paths, when given subvolid= or using the default >>> subvolume ID, translate it into a subvolume name by walking >>> ROOT_BACKREFs in the root tree and INODE_REFs in the filesystem trees. > > Hi, Qu, > >> Oh, this patch is what I have tried long long ago, and want to do the same >> thing, to show subvolume mount for btrfs. > > Thanks for pointing that out, I didn't come across your post when I was > looking around. I figured that someone must have thought of it first :) > >> But it came to me that, superblock->show_path() is a better method to do it. >> >> You can implement btrfs_show_path() to allow mountinfo to get the subvolume >> name from subvolid, and don't change the mount routine much. > > Hm, I don't think that the changes to the mount code would be > unwarranted. Having one code path makes it more obvious what's going on. > Do you mind elaborating on why you preferred doing it in ->show_path()? The story seems to be long. At that time, I also tried to do the subvolid->path convert and it seems works. But another problem, IIRC, btrfs losing its security label bug, will be triggered more easy if we all go through the "subvol=" routine, as that routine will use vfs_mount twice. The second time it will definitely lost the security label. Although the problem is later resolved by handling security label internally, but it drove me not touching the mount routine. Also another problem is, "subvolid=" routine can also happen when the fs is already mounted, so there may be some operations ,like deleting files and dirs, interfere your subvolid->path search codes. (During your while loop, there is a race windows between your release_path() and search_slot()) Resulting a mount failure even nothing goes wrong. ->show_path() method can't avoid above race problem, but the good thing is, even race happens, it won't disturb our mount. Just a -EBUSY when showing /proc/self/mountinfo, not a mount failure. Thanks, Qu > > Thanks! > >> Thanks, >> Qu > -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Apr 08, 2015 at 02:06:14PM +0800, Qu Wenruo wrote: > > > -------- Original Message -------- > Subject: [PATCH 2/3] Btrfs: unify subvol= and subvolid= mounting > From: Omar Sandoval <osandov@osandov.com> > To: Chris Mason <clm@fb.com>, Josef Bacik <jbacik@fb.com>, David Sterba > <dsterba@suse.cz>, <linux-btrfs@vger.kernel.org> > Date: 2015?04?08? 13:34 > > > Currently, mounting a subvolume with subvolid= takes a different code > > path than mounting with subvol=. This isn't really a big deal except for > > the fact that mounts done with subvolid= or the default subvolume don't > > have a dentry that's connected to the dentry tree like in the subvol= > > case. To unify the code paths, when given subvolid= or using the default > > subvolume ID, translate it into a subvolume name by walking > > ROOT_BACKREFs in the root tree and INODE_REFs in the filesystem trees. > Oh, this patch is what I have tried long long ago, and want to do the > same thing, to show subvolume mount for btrfs. > > But it came to me that, superblock->show_path() is a better method to do it. > > You can implement btrfs_show_path() to allow mountinfo to get the > subvolume name from subvolid, and don't change the mount routine much. The problem I see with the show_mount approach is related to the additional path lookup, memory allocation and locking. If the mountpoint dentry is the right on ,it's just a simple seq_dentry in show_options. OTOH, your patch takes subvol_sem that will block the callback if there's eg. a subvolume being deleted (that takes the write lock). This is not a lightweight operation nor an infrequent one. There are more write locks to subvol_sem. I'm not sure if I've ever sent this comment back to you, sorry if not. -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Apr 07, 2015 at 10:34:01PM -0700, Omar Sandoval wrote: > Currently, mounting a subvolume with subvolid= takes a different code > path than mounting with subvol=. This isn't really a big deal except for > the fact that mounts done with subvolid= or the default subvolume don't > have a dentry that's connected to the dentry tree like in the subvol= > case. To unify the code paths, when given subvolid= or using the default > subvolume ID, translate it into a subvolume name by walking > ROOT_BACKREFs in the root tree and INODE_REFs in the filesystem trees. Can you please split this patches? It's doing several things, but the core change will probably be a big one. The mount path is not trivial, all the recursions and argument replacements. Otherwise, I'm ok with this approach, ie. to set up the dentry at mount time. A few comments below. > /* > - * This will strip out the subvol=%s argument for an argument string and add > - * subvolid=0 to make sure we get the actual tree root for path walking to the > - * subvol we want. > + * This will add subvolid=0 to the argument string while removing any subvol= > + * and subvolid= arguments to make sure we get the top-level root for path > + * walking to the subvol we want. > */ > static char *setup_root_args(char *args) > { > - unsigned len = strlen(args) + 2 + 1; > - char *src, *dst, *buf; > - > - /* > - * We need the same args as before, but with this substitution: > - * s!subvol=[^,]+!subvolid=0! > - * > - * Since the replacement string is up to 2 bytes longer than the > - * original, allocate strlen(args) + 2 + 1 bytes. > - */ > + char *p, *dst, *buf; Fix the coding style. > root = mount_subtree(mnt, subvol_name); > + mnt = NULL; /* mount_subtree drops our reference on the vfsmount. */ Put the comment on a separate line. > + if (!IS_ERR(root) && subvol_objectid && > + BTRFS_I(root->d_inode)->root->root_key.objectid != subvol_objectid) { > + pr_warn("BTRFS: subvol '%s' does not match subvolid %llu\n", > + subvol_name, subvol_objectid); We should define the precedence of subvolid and subvol if both are set. A warning might not be enough. -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Apr 09, 2015 at 06:28:48PM +0200, David Sterba wrote: > On Tue, Apr 07, 2015 at 10:34:01PM -0700, Omar Sandoval wrote: > > Currently, mounting a subvolume with subvolid= takes a different code > > path than mounting with subvol=. This isn't really a big deal except for > > the fact that mounts done with subvolid= or the default subvolume don't > > have a dentry that's connected to the dentry tree like in the subvol= > > case. To unify the code paths, when given subvolid= or using the default > > subvolume ID, translate it into a subvolume name by walking > > ROOT_BACKREFs in the root tree and INODE_REFs in the filesystem trees. > > Can you please split this patches? It's doing several things, but the > core change will probably be a big one. The mount path is not trivial, > all the recursions and argument replacements. Will do. > Otherwise, I'm ok with this approach, ie. to set up the dentry at mount > time. > > A few comments below. > > > /* > > - * This will strip out the subvol=%s argument for an argument string and add > > - * subvolid=0 to make sure we get the actual tree root for path walking to the > > - * subvol we want. > > + * This will add subvolid=0 to the argument string while removing any subvol= > > + * and subvolid= arguments to make sure we get the top-level root for path > > + * walking to the subvol we want. > > */ > > static char *setup_root_args(char *args) > > { > > - unsigned len = strlen(args) + 2 + 1; > > - char *src, *dst, *buf; > > - > > - /* > > - * We need the same args as before, but with this substitution: > > - * s!subvol=[^,]+!subvolid=0! > > - * > > - * Since the replacement string is up to 2 bytes longer than the > > - * original, allocate strlen(args) + 2 + 1 bytes. > > - */ > > + char *p, *dst, *buf; > > Fix the coding style. Ok. > > root = mount_subtree(mnt, subvol_name); > > + mnt = NULL; /* mount_subtree drops our reference on the vfsmount. */ > > Put the comment on a separate line. Ok. > > + if (!IS_ERR(root) && subvol_objectid && > > + BTRFS_I(root->d_inode)->root->root_key.objectid != subvol_objectid) { > > + pr_warn("BTRFS: subvol '%s' does not match subvolid %llu\n", > > + subvol_name, subvol_objectid); > > We should define the precedence of subvolid and subvol if both are set. > A warning might not be enough. Ah, that probably deserves some more explanation. My original intent was to alert the user if there was a race where the subvolume passed by ID was renamed and another subvolume was renamed over the old location. Then I figured that users should probably be warned if they are passing bogus mount options, too. However, I just now realized that the current behavior will error out in that case anyways because before this patch, setup_root_args() only replaces the first subvol= and ignores anything that comes after it. So subvol=/foovol,subvolid=258 becomes subvolid=0,subvolid=258 and the last one takes precedence, so the lookup of /foovol happens inside of subvol 258 instead of the top-level and fails. So I think reasonable behavior would be to change that warning into a hard error for both cases (the race and the misguided user). Just in case a user copies the mount options straight out of /proc/mounts or something, we can allow both subvol= and subvolid= to be passed, but only if they match. Thanks for the review!
-------- Original Message -------- Subject: Re: [PATCH 2/3] Btrfs: unify subvol= and subvolid= mounting From: David Sterba <dsterba@suse.cz> To: Qu Wenruo <quwenruo@cn.fujitsu.com> Date: 2015?04?10? 00:10 > On Wed, Apr 08, 2015 at 02:06:14PM +0800, Qu Wenruo wrote: >> >> >> -------- Original Message -------- >> Subject: [PATCH 2/3] Btrfs: unify subvol= and subvolid= mounting >> From: Omar Sandoval <osandov@osandov.com> >> To: Chris Mason <clm@fb.com>, Josef Bacik <jbacik@fb.com>, David Sterba >> <dsterba@suse.cz>, <linux-btrfs@vger.kernel.org> >> Date: 2015?04?08? 13:34 >> >>> Currently, mounting a subvolume with subvolid= takes a different code >>> path than mounting with subvol=. This isn't really a big deal except for >>> the fact that mounts done with subvolid= or the default subvolume don't >>> have a dentry that's connected to the dentry tree like in the subvol= >>> case. To unify the code paths, when given subvolid= or using the default >>> subvolume ID, translate it into a subvolume name by walking >>> ROOT_BACKREFs in the root tree and INODE_REFs in the filesystem trees. >> Oh, this patch is what I have tried long long ago, and want to do the >> same thing, to show subvolume mount for btrfs. >> >> But it came to me that, superblock->show_path() is a better method to do it. >> >> You can implement btrfs_show_path() to allow mountinfo to get the >> subvolume name from subvolid, and don't change the mount routine much. > > The problem I see with the show_mount approach is related to the > additional path lookup, memory allocation and locking. > > If the mountpoint dentry is the right on ,it's just a simple seq_dentry > in show_options. > > OTOH, your patch takes subvol_sem that will block the callback if > there's eg. a subvolume being deleted (that takes the write lock). This > is not a lightweight operation nor an infrequent one. There are more > write locks to subvol_sem. Thanks for pointing out this problem. That's right. But I found that, since in show_path() function, we can just return -EAGAIN without breaking anything, locking in btrfs_path should be enough. So I can remove all the unneeded lock/sem. Thanks, Qu > > I'm not sure if I've ever sent this comment back to you, sorry if not. > -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index d38be09..5ab9801 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -841,33 +841,153 @@ out: return error; } -static struct dentry *get_default_root(struct super_block *sb, - u64 subvol_objectid) +static char *get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info, + u64 subvol_objectid) { - struct btrfs_fs_info *fs_info = btrfs_sb(sb); struct btrfs_root *root = fs_info->tree_root; - struct btrfs_root *new_root; - struct btrfs_dir_item *di; - struct btrfs_path *path; - struct btrfs_key location; - struct inode *inode; - u64 dir_id; - int new = 0; + struct btrfs_root *fs_root; + struct btrfs_root_ref *root_ref; + struct btrfs_inode_ref *inode_ref; + struct btrfs_key key; + struct btrfs_path *path = NULL; + char *name = NULL, *ptr; + u64 dirid; + int len; + int ret; + + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; + goto err; + } + path->leave_spinning = 1; + + name = kmalloc(PATH_MAX, GFP_NOFS); + if (!name) { + ret = -ENOMEM; + goto err; + } + ptr = name + PATH_MAX - 1; + ptr[0] = '\0'; /* - * We have a specific subvol we want to mount, just setup location and - * go look up the root. + * Walk up the subvolume trees in the tree of tree roots by root + * backrefs until we hit the top-level subvolume. */ - if (subvol_objectid) { - location.objectid = subvol_objectid; - location.type = BTRFS_ROOT_ITEM_KEY; - location.offset = (u64)-1; - goto find_root; + while (subvol_objectid != BTRFS_FS_TREE_OBJECTID) { + key.objectid = subvol_objectid; + key.type = BTRFS_ROOT_BACKREF_KEY; + key.offset = (u64)-1; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) { + goto err; + } else if (ret > 0) { + ret = btrfs_previous_item(root, path, subvol_objectid, + BTRFS_ROOT_BACKREF_KEY); + if (ret < 0) { + goto err; + } else if (ret > 0) { + ret = -ENOENT; + goto err; + } + } + + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + subvol_objectid = key.offset; + + root_ref = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_root_ref); + len = btrfs_root_ref_name_len(path->nodes[0], root_ref); + ptr -= len + 1; + if (ptr < name) { + ret = -ENAMETOOLONG; + goto err; + } + read_extent_buffer(path->nodes[0], ptr + 1, + (unsigned long)(root_ref + 1), len); + ptr[0] = '/'; + dirid = btrfs_root_ref_dirid(path->nodes[0], root_ref); + btrfs_release_path(path); + + key.objectid = subvol_objectid; + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = (u64)-1; + fs_root = btrfs_read_fs_root_no_name(fs_info, &key); + if (IS_ERR(fs_root)) { + ret = PTR_ERR(fs_root); + goto err; + } + + /* + * Walk up the filesystem tree by inode refs until we hit the + * root directory. + */ + while (dirid != BTRFS_FIRST_FREE_OBJECTID) { + key.objectid = dirid; + key.type = BTRFS_INODE_REF_KEY; + key.offset = (u64)-1; + + ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0); + if (ret < 0) { + goto err; + } else if (ret > 0) { + ret = btrfs_previous_item(fs_root, path, dirid, + BTRFS_INODE_REF_KEY); + if (ret < 0) { + goto err; + } else if (ret > 0) { + ret = -ENOENT; + goto err; + } + } + + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + dirid = key.offset; + + inode_ref = btrfs_item_ptr(path->nodes[0], + path->slots[0], + struct btrfs_inode_ref); + len = btrfs_inode_ref_name_len(path->nodes[0], + inode_ref); + ptr -= len + 1; + if (ptr < name) { + ret = -ENAMETOOLONG; + goto err; + } + read_extent_buffer(path->nodes[0], ptr + 1, + (unsigned long)(inode_ref + 1), len); + ptr[0] = '/'; + btrfs_release_path(path); + } + } + + btrfs_free_path(path); + if (ptr == name + PATH_MAX - 1) { + name[0] = '/'; + name[1] = '\0'; + } else { + memmove(name, ptr, name + PATH_MAX - ptr); } + return name; + +err: + btrfs_free_path(path); + kfree(name); + return ERR_PTR(ret); +} + +static int get_default_subvol_objectid(struct btrfs_fs_info *fs_info, u64 *objectid) +{ + struct btrfs_root *root = fs_info->tree_root; + struct btrfs_dir_item *di; + struct btrfs_path *path; + struct btrfs_key location; + u64 dir_id; path = btrfs_alloc_path(); if (!path) - return ERR_PTR(-ENOMEM); + return -ENOMEM; path->leave_spinning = 1; /* @@ -879,49 +999,23 @@ static struct dentry *get_default_root(struct super_block *sb, di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0); if (IS_ERR(di)) { btrfs_free_path(path); - return ERR_CAST(di); + return PTR_ERR(di); } if (!di) { /* * Ok the default dir item isn't there. This is weird since * it's always been there, but don't freak out, just try and - * mount to root most subvolume. + * mount the top-level subvolume. */ btrfs_free_path(path); - dir_id = BTRFS_FIRST_FREE_OBJECTID; - new_root = fs_info->fs_root; - goto setup_root; + *objectid = BTRFS_FS_TREE_OBJECTID; + return 0; } btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location); btrfs_free_path(path); - -find_root: - new_root = btrfs_read_fs_root_no_name(fs_info, &location); - if (IS_ERR(new_root)) - return ERR_CAST(new_root); - - dir_id = btrfs_root_dirid(&new_root->root_item); -setup_root: - location.objectid = dir_id; - location.type = BTRFS_INODE_ITEM_KEY; - location.offset = 0; - - inode = btrfs_iget(sb, &location, new_root, &new); - if (IS_ERR(inode)) - return ERR_CAST(inode); - - /* - * If we're just mounting the root most subvol put the inode and return - * a reference to the dentry. We will have already gotten a reference - * to the inode in btrfs_fill_super so we're good to go. - */ - if (!new && sb->s_root->d_inode == inode) { - iput(inode); - return dget(sb->s_root); - } - - return d_obtain_root(inode); + *objectid = location.objectid; + return 0; } static int btrfs_fill_super(struct super_block *sb, @@ -1129,109 +1223,123 @@ static inline int is_subvolume_inode(struct inode *inode) } /* - * This will strip out the subvol=%s argument for an argument string and add - * subvolid=0 to make sure we get the actual tree root for path walking to the - * subvol we want. + * This will add subvolid=0 to the argument string while removing any subvol= + * and subvolid= arguments to make sure we get the top-level root for path + * walking to the subvol we want. */ static char *setup_root_args(char *args) { - unsigned len = strlen(args) + 2 + 1; - char *src, *dst, *buf; - - /* - * We need the same args as before, but with this substitution: - * s!subvol=[^,]+!subvolid=0! - * - * Since the replacement string is up to 2 bytes longer than the - * original, allocate strlen(args) + 2 + 1 bytes. - */ + char *p, *dst, *buf; - src = strstr(args, "subvol="); - /* This shouldn't happen, but just in case.. */ - if (!src) - return NULL; + if (!args) + return kstrdup("subvolid=0", GFP_NOFS); - buf = dst = kmalloc(len, GFP_NOFS); + /* The worst case is that we add ",subvolid=0" to the end. */ + buf = dst = kmalloc(strlen(args) + strlen(",subvolid=0") + 1, GFP_NOFS); if (!buf) return NULL; - /* - * If the subvol= arg is not at the start of the string, - * copy whatever precedes it into buf. - */ - if (src != args) { - *src++ = '\0'; - strcpy(buf, args); - dst += strlen(args); + while (1) { + p = strchrnul(args, ','); + if (strncmp(args, "subvol=", strlen("subvol=")) != 0 && + strncmp(args, "subvolid=", strlen("subvolid=")) != 0) { + memcpy(dst, args, p - args); + dst += p - args; + *dst++ = ','; + } + if (*p) + args = p + 1; + else + break; } - strcpy(dst, "subvolid=0"); - dst += strlen("subvolid=0"); - - /* - * If there is a "," after the original subvol=... string, - * copy that suffix into our buffer. Otherwise, we're done. - */ - src = strchr(src, ','); - if (src) - strcpy(dst, src); return buf; } -static struct dentry *mount_subvol(const char *subvol_name, int flags, - const char *device_name, char *data) +static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid, + int flags, const char *device_name, + char *data) { struct dentry *root; - struct vfsmount *mnt; + struct vfsmount *mnt = NULL; char *newargs; + int ret; newargs = setup_root_args(data); - if (!newargs) - return ERR_PTR(-ENOMEM); - mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name, - newargs); + if (!newargs) { + root = ERR_PTR(-ENOMEM); + goto out; + } - if (PTR_RET(mnt) == -EBUSY) { + mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name, newargs); + if (PTR_ERR_OR_ZERO(mnt) == -EBUSY) { if (flags & MS_RDONLY) { - mnt = vfs_kern_mount(&btrfs_fs_type, flags & ~MS_RDONLY, device_name, - newargs); + mnt = vfs_kern_mount(&btrfs_fs_type, flags & ~MS_RDONLY, + device_name, newargs); } else { - int r; - mnt = vfs_kern_mount(&btrfs_fs_type, flags | MS_RDONLY, device_name, - newargs); + mnt = vfs_kern_mount(&btrfs_fs_type, flags | MS_RDONLY, + device_name, newargs); if (IS_ERR(mnt)) { - kfree(newargs); - return ERR_CAST(mnt); + root = ERR_CAST(mnt); + mnt = NULL; + goto out; } down_write(&mnt->mnt_sb->s_umount); - r = btrfs_remount(mnt->mnt_sb, &flags, NULL); + ret = btrfs_remount(mnt->mnt_sb, &flags, NULL); up_write(&mnt->mnt_sb->s_umount); - if (r < 0) { - /* FIXME: release vfsmount mnt ??*/ - kfree(newargs); - return ERR_PTR(r); + if (ret < 0) { + root = ERR_PTR(ret); + goto out; } } } + if (IS_ERR(mnt)) { + root = ERR_CAST(mnt); + mnt = NULL; + goto out; + } - kfree(newargs); + if (!subvol_name) { + if (!subvol_objectid) { + ret = get_default_subvol_objectid(btrfs_sb(mnt->mnt_sb), + &subvol_objectid); + if (ret) { + root = ERR_PTR(ret); + goto out; + } + } + subvol_name = get_subvol_name_from_objectid(btrfs_sb(mnt->mnt_sb), + subvol_objectid); + if (IS_ERR(subvol_name)) { + root = ERR_CAST(subvol_name); + subvol_name = NULL; + goto out; + } - if (IS_ERR(mnt)) - return ERR_CAST(mnt); + } root = mount_subtree(mnt, subvol_name); + mnt = NULL; /* mount_subtree drops our reference on the vfsmount. */ if (!IS_ERR(root) && !is_subvolume_inode(root->d_inode)) { struct super_block *s = root->d_sb; dput(root); root = ERR_PTR(-EINVAL); deactivate_locked_super(s); - printk(KERN_ERR "BTRFS: '%s' is not a valid subvolume\n", - subvol_name); + pr_err("BTRFS: '%s' is not a valid subvolume\n", subvol_name); + } + if (!IS_ERR(root) && subvol_objectid && + BTRFS_I(root->d_inode)->root->root_key.objectid != subvol_objectid) { + pr_warn("BTRFS: subvol '%s' does not match subvolid %llu\n", + subvol_name, subvol_objectid); } +out: + mntput(mnt); + kfree(newargs); + kfree(subvol_name); return root; } @@ -1296,7 +1404,6 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, { struct block_device *bdev = NULL; struct super_block *s; - struct dentry *root; struct btrfs_fs_devices *fs_devices = NULL; struct btrfs_fs_info *fs_info = NULL; struct security_mnt_opts new_sec_opts; @@ -1316,10 +1423,10 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, return ERR_PTR(error); } - if (subvol_name) { - root = mount_subvol(subvol_name, flags, device_name, data); - kfree(subvol_name); - return root; + if (subvol_name || subvol_objectid != BTRFS_FS_TREE_OBJECTID) { + /* mount_subvol() will free subvol_name. */ + return mount_subvol(subvol_name, subvol_objectid, flags, + device_name, data); } security_init_mnt_opts(&new_sec_opts); @@ -1385,23 +1492,19 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, error = btrfs_fill_super(s, fs_devices, data, flags & MS_SILENT ? 1 : 0); } - - root = !error ? get_default_root(s, subvol_objectid) : ERR_PTR(error); - if (IS_ERR(root)) { + if (error) { deactivate_locked_super(s); - error = PTR_ERR(root); goto error_sec_opts; } fs_info = btrfs_sb(s); error = setup_security_options(fs_info, s, &new_sec_opts); if (error) { - dput(root); deactivate_locked_super(s); goto error_sec_opts; } - return root; + return dget(s->s_root); error_close_devices: btrfs_close_devices(fs_devices);
Currently, mounting a subvolume with subvolid= takes a different code path than mounting with subvol=. This isn't really a big deal except for the fact that mounts done with subvolid= or the default subvolume don't have a dentry that's connected to the dentry tree like in the subvol= case. To unify the code paths, when given subvolid= or using the default subvolume ID, translate it into a subvolume name by walking ROOT_BACKREFs in the root tree and INODE_REFs in the filesystem trees. Signed-off-by: Omar Sandoval <osandov@osandov.com> --- fs/btrfs/super.c | 347 ++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 225 insertions(+), 122 deletions(-)