Message ID | 1390829312-814-6-git-send-email-Gerhard@Heift.Name (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Mon, Jan 27, 2014 at 02:28:31PM +0100, Gerhard Heift wrote: > By copying each found item seperatly to userspace, we only need a small buffer > in the kernel. This allows to run a large search inside of a single call. > > Signed-off-by: Gerhard Heift <Gerhard@Heift.Name> > --- > fs/btrfs/ioctl.c | 107 ++++++++++++++++++++++++++++++++++++------------------- > 1 file changed, 71 insertions(+), 36 deletions(-) > > diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c > index c44fcdd..38403e6 100644 > --- a/fs/btrfs/ioctl.c > +++ b/fs/btrfs/ioctl.c > @@ -1905,20 +1908,47 @@ static noinline int copy_to_sk(struct btrfs_root *root, > sh.transid = found_transid; > > /* copy search result header */ > - memcpy(buf + *sk_offset, &sh, sizeof(sh)); > + if (copy_to_user(buf + *sk_offset, &sh, sizeof(sh))) { > + ret = -EFAULT; > + goto err; > + } > + > *sk_offset += sizeof(sh); > > if (item_len) { > - char *p = buf + *sk_offset; > + /* resize internal buffer if needed */ > + if (content_buffer_size < item_len) { > + kfree(content_buffer); > + > + content_buffer_size = > + ALIGN(item_len, PAGE_SIZE); > + > + content_buffer = kmalloc_track_caller( > + content_buffer_size, GFP_KERNEL); So that's another allocation. I see it's due to read_extent_buffer() that wants a kernel memory, and the buffer size is again up to the item size, potentially spanning multiple pages. > + if (!content_buffer) { > + content_buffer_size = 0; > + ret = -ENOMEM; > + goto err; > + } > + } > + > /* copy the item */ > - read_extent_buffer(leaf, p, > + read_extent_buffer(leaf, content_buffer, > item_off, item_len); My suggestion is to introduce read_extent_buffer_user that does copy_to_user instead of the memcpy. It is code duplication, but we can clean it in a separate patch. > + > + if (copy_to_user(buf + *sk_offset, > + content_buffer, item_len)) { > + ret = -EFAULT; > + goto err; > + } > + > *sk_offset += item_len; > } > (*num_found)++; > > if (ret) /* -EOVERFLOW from above */ > - goto overflow; > + goto err; > > if (*num_found >= sk->nr_items) > break; > @@ -1936,14 +1966,25 @@ advance_key: > key->objectid++; > } else > ret = 1; > -overflow: > +err: > + /* > + 0: all items from this leaf copied, continue with next > + 1: more items can be copied, but buffer is too small or all items > + were found. Either way, it will stops the loop which iterates to > + the next leaf > + -EOVERFLOW: item was to large for buffer > + -ENOMEM: could not allocate memory for a temporary extent buffer > + -EFAULT: could not copy extent buffer back to userspace > + */ Please use the kernel style for comments. > + kfree(content_buffer); > + > return ret; > } > > @@ -2011,34 +2053,39 @@ err: > static noinline int btrfs_ioctl_tree_search(struct file *file, > void __user *argp) > { > - struct btrfs_ioctl_search_args *args; > - struct inode *inode; > - int ret; > + struct btrfs_ioctl_search_args __user *usarg; > + struct btrfs_ioctl_search_key *sk; > + struct inode *inode; > + int ret; > > if (!capable(CAP_SYS_ADMIN)) > return -EPERM; > > - args = memdup_user(argp, sizeof(*args)); > - if (IS_ERR(args)) > - return PTR_ERR(args); > + usarg = (struct btrfs_ioctl_search_args __user *)argp; > + > + sk = memdup_user(&usarg->key, sizeof(*sk)); Search key is 13x u64 = 104 bytes, you can use a local variable. This is usually possible in ioctl functions, the stack is not deep at this moment and will usually not grow too much. Removing the allocation makes it a bit more resilient against ENOMEM conditions. > + if (IS_ERR(sk)) > + return PTR_ERR(sk); > > inode = file_inode(file); > - ret = search_ioctl(inode, &args->key, sizeof(args->buf), args->buf); > + ret = search_ioctl(inode, sk, sizeof(usarg->buf), usarg->buf); Ok, v1 ioctl uses the same code in the end. > + /* the origin ioctl does handle too large results by returning an item > + * with a len of 0 */ > if (ret == -EOVERFLOW) > ret = 0; > - if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) > + if (ret == 0 && copy_to_user(argp, sk, sizeof(*sk))) > ret = -EFAULT; > - kfree(args); > + kfree(sk); > return ret; > } -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
2014-01-27 David Sterba <dsterba@suse.cz>: > On Mon, Jan 27, 2014 at 02:28:31PM +0100, Gerhard Heift wrote: >> By copying each found item seperatly to userspace, we only need a small buffer >> in the kernel. This allows to run a large search inside of a single call. >> >> Signed-off-by: Gerhard Heift <Gerhard@Heift.Name> >> --- >> fs/btrfs/ioctl.c | 107 ++++++++++++++++++++++++++++++++++++------------------- >> 1 file changed, 71 insertions(+), 36 deletions(-) >> >> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c >> index c44fcdd..38403e6 100644 >> --- a/fs/btrfs/ioctl.c >> +++ b/fs/btrfs/ioctl.c >> @@ -1905,20 +1908,47 @@ static noinline int copy_to_sk(struct btrfs_root *root, >> sh.transid = found_transid; >> >> /* copy search result header */ >> - memcpy(buf + *sk_offset, &sh, sizeof(sh)); >> + if (copy_to_user(buf + *sk_offset, &sh, sizeof(sh))) { >> + ret = -EFAULT; >> + goto err; >> + } >> + >> *sk_offset += sizeof(sh); >> >> if (item_len) { >> - char *p = buf + *sk_offset; >> + /* resize internal buffer if needed */ >> + if (content_buffer_size < item_len) { >> + kfree(content_buffer); >> + >> + content_buffer_size = >> + ALIGN(item_len, PAGE_SIZE); >> + >> + content_buffer = kmalloc_track_caller( >> + content_buffer_size, GFP_KERNEL); > > So that's another allocation. I see it's due to read_extent_buffer() > that wants a kernel memory, and the buffer size is again up to the item > size, potentially spanning multiple pages. > >> + if (!content_buffer) { >> + content_buffer_size = 0; >> + ret = -ENOMEM; >> + goto err; >> + } >> + } >> + >> /* copy the item */ >> - read_extent_buffer(leaf, p, >> + read_extent_buffer(leaf, content_buffer, >> item_off, item_len); > > My suggestion is to introduce read_extent_buffer_user that does > copy_to_user instead of the memcpy. It is code duplication, but we can > clean it in a separate patch. This was an approach I have already tried, but it did not work for me. After a few calls the kernel was frozen. I did not know, what caused the freeze, perhaps it was a wrong change I made while testing. I check, if it works now. >> + >> + if (copy_to_user(buf + *sk_offset, >> + content_buffer, item_len)) { >> + ret = -EFAULT; >> + goto err; >> + } >> + >> *sk_offset += item_len; >> } >> (*num_found)++; >> >> if (ret) /* -EOVERFLOW from above */ >> - goto overflow; >> + goto err; >> >> if (*num_found >= sk->nr_items) >> break; >> @@ -1936,14 +1966,25 @@ advance_key: >> key->objectid++; >> } else >> ret = 1; >> -overflow: >> +err: >> + /* >> + 0: all items from this leaf copied, continue with next >> + 1: more items can be copied, but buffer is too small or all items >> + were found. Either way, it will stops the loop which iterates to >> + the next leaf >> + -EOVERFLOW: item was to large for buffer >> + -ENOMEM: could not allocate memory for a temporary extent buffer >> + -EFAULT: could not copy extent buffer back to userspace >> + */ > > Please use the kernel style for comments. Fixed. >> + kfree(content_buffer); >> + >> return ret; >> } >> >> @@ -2011,34 +2053,39 @@ err: >> static noinline int btrfs_ioctl_tree_search(struct file *file, >> void __user *argp) >> { >> - struct btrfs_ioctl_search_args *args; >> - struct inode *inode; >> - int ret; >> + struct btrfs_ioctl_search_args __user *usarg; >> + struct btrfs_ioctl_search_key *sk; >> + struct inode *inode; >> + int ret; >> >> if (!capable(CAP_SYS_ADMIN)) >> return -EPERM; >> >> - args = memdup_user(argp, sizeof(*args)); >> - if (IS_ERR(args)) >> - return PTR_ERR(args); >> + usarg = (struct btrfs_ioctl_search_args __user *)argp; >> + >> + sk = memdup_user(&usarg->key, sizeof(*sk)); > > Search key is 13x u64 = 104 bytes, you can use a local variable. This is > usually possible in ioctl functions, the stack is not deep at this > moment and will usually not grow too much. Removing the allocation makes > it a bit more resilient against ENOMEM conditions. Thanks for the hint. In the new version I will use local variables for the search argument in both versions. >> + if (IS_ERR(sk)) >> + return PTR_ERR(sk); >> >> inode = file_inode(file); >> - ret = search_ioctl(inode, &args->key, sizeof(args->buf), args->buf); >> + ret = search_ioctl(inode, sk, sizeof(usarg->buf), usarg->buf); > > Ok, v1 ioctl uses the same code in the end. > >> + /* the origin ioctl does handle too large results by returning an item >> + * with a len of 0 */ >> if (ret == -EOVERFLOW) >> ret = 0; >> - if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) >> + if (ret == 0 && copy_to_user(argp, sk, sizeof(*sk))) >> ret = -EFAULT; >> - kfree(args); >> + kfree(sk); >> return ret; >> } -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index c44fcdd..38403e6 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1851,7 +1851,7 @@ static noinline int copy_to_sk(struct btrfs_root *root, struct btrfs_key *key, struct btrfs_ioctl_search_key *sk, size_t buf_size, - char *buf, + char __user *buf, unsigned long *sk_offset, int *num_found) { @@ -1865,6 +1865,9 @@ static noinline int copy_to_sk(struct btrfs_root *root, int slot; int ret = 0; + char *content_buffer = NULL; + unsigned long content_buffer_size = 0; + leaf = path->nodes[0]; slot = path->slots[0]; nritems = btrfs_header_nritems(leaf); @@ -1886,7 +1889,7 @@ static noinline int copy_to_sk(struct btrfs_root *root, if (sizeof(sh) + item_len > buf_size) { if (*num_found) { ret = 1; - goto overflow; + goto err; } item_len = 0; @@ -1895,7 +1898,7 @@ static noinline int copy_to_sk(struct btrfs_root *root, if (sizeof(sh) + item_len + *sk_offset > buf_size) { ret = 1; - goto overflow; + goto err; } sh.objectid = key->objectid; @@ -1905,20 +1908,47 @@ static noinline int copy_to_sk(struct btrfs_root *root, sh.transid = found_transid; /* copy search result header */ - memcpy(buf + *sk_offset, &sh, sizeof(sh)); + if (copy_to_user(buf + *sk_offset, &sh, sizeof(sh))) { + ret = -EFAULT; + goto err; + } + *sk_offset += sizeof(sh); if (item_len) { - char *p = buf + *sk_offset; + /* resize internal buffer if needed */ + if (content_buffer_size < item_len) { + kfree(content_buffer); + + content_buffer_size = + ALIGN(item_len, PAGE_SIZE); + + content_buffer = kmalloc_track_caller( + content_buffer_size, GFP_KERNEL); + + if (!content_buffer) { + content_buffer_size = 0; + ret = -ENOMEM; + goto err; + } + } + /* copy the item */ - read_extent_buffer(leaf, p, + read_extent_buffer(leaf, content_buffer, item_off, item_len); + + if (copy_to_user(buf + *sk_offset, + content_buffer, item_len)) { + ret = -EFAULT; + goto err; + } + *sk_offset += item_len; } (*num_found)++; if (ret) /* -EOVERFLOW from above */ - goto overflow; + goto err; if (*num_found >= sk->nr_items) break; @@ -1936,14 +1966,25 @@ advance_key: key->objectid++; } else ret = 1; -overflow: +err: + /* + 0: all items from this leaf copied, continue with next + 1: more items can be copied, but buffer is too small or all items + were found. Either way, it will stops the loop which iterates to + the next leaf + -EOVERFLOW: item was to large for buffer + -ENOMEM: could not allocate memory for a temporary extent buffer + -EFAULT: could not copy extent buffer back to userspace + */ + kfree(content_buffer); + return ret; } static noinline int search_ioctl(struct inode *inode, struct btrfs_ioctl_search_key *sk, size_t buf_size, - char *buf + char __user *buf ) { struct btrfs_root *root; @@ -1996,6 +2037,7 @@ static noinline int search_ioctl(struct inode *inode, ret = copy_to_sk(root, path, &key, sk, buf_size, buf, &sk_offset, &num_found); btrfs_release_path(path); + if (ret || num_found >= sk->nr_items) break; @@ -2011,34 +2053,39 @@ err: static noinline int btrfs_ioctl_tree_search(struct file *file, void __user *argp) { - struct btrfs_ioctl_search_args *args; - struct inode *inode; - int ret; + struct btrfs_ioctl_search_args __user *usarg; + struct btrfs_ioctl_search_key *sk; + struct inode *inode; + int ret; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - args = memdup_user(argp, sizeof(*args)); - if (IS_ERR(args)) - return PTR_ERR(args); + usarg = (struct btrfs_ioctl_search_args __user *)argp; + + sk = memdup_user(&usarg->key, sizeof(*sk)); + if (IS_ERR(sk)) + return PTR_ERR(sk); inode = file_inode(file); - ret = search_ioctl(inode, &args->key, sizeof(args->buf), args->buf); + ret = search_ioctl(inode, sk, sizeof(usarg->buf), usarg->buf); + /* the origin ioctl does handle too large results by returning an item + * with a len of 0 */ if (ret == -EOVERFLOW) ret = 0; - if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) + if (ret == 0 && copy_to_user(argp, sk, sizeof(*sk))) ret = -EFAULT; - kfree(args); + kfree(sk); return ret; } static noinline int btrfs_ioctl_tree_search_v2(struct file *file, void __user *argp) { + struct btrfs_ioctl_search_args_v2 __user *usarg; struct btrfs_ioctl_search_args_v2 *args; struct inode *inode; int ret; - char *buf; size_t buf_size; if (!capable(CAP_SYS_ADMIN)) @@ -2049,31 +2096,19 @@ static noinline int btrfs_ioctl_tree_search_v2(struct file *file, if (IS_ERR(args)) return PTR_ERR(args); + usarg = (struct btrfs_ioctl_search_args_v2 __user *)argp; + buf_size = args->buf_size; if (buf_size < sizeof(struct btrfs_ioctl_search_header)) { kfree(args); - return -ENOMEM; - } - - /* limit memory */ - if (buf_size > PAGE_SIZE * 32) - buf_size = PAGE_SIZE * 32; - - buf = memdup_user(argp->buf, buf_size); - if (IS_ERR(buf)) { - kfree(args); - return PTR_ERR(buf); + return -EOVERFLOW; } inode = file_inode(file); - ret = search_ioctl(inode, &args->key, buf_size, buf); - if (ret == 0 && ( - copy_to_user(argp, args, sizeof(*args)) || - copy_to_user(argp->buf, buf, buf_size) - )) + ret = search_ioctl(inode, &args->key, buf_size, usarg->buf); + if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) ret = -EFAULT; - kfree(buf); kfree(args); return ret; }
By copying each found item seperatly to userspace, we only need a small buffer in the kernel. This allows to run a large search inside of a single call. Signed-off-by: Gerhard Heift <Gerhard@Heift.Name> --- fs/btrfs/ioctl.c | 107 ++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 71 insertions(+), 36 deletions(-)