@@ -40,6 +40,8 @@
#include <linux/xattr.h>
#include <linux/vmalloc.h>
#include <linux/slab.h>
+#include <linux/crypto.h>
+#include <linux/scatterlist.h>
#include "compat.h"
#include "ctree.h"
#include "disk-io.h"
@@ -2231,6 +2233,664 @@ static noinline long btrfs_ioctl_wait_sync(struct file *file, void __user *argp)
return btrfs_wait_for_commit(root, transid);
}
+static noinline int check_hash(struct inode *inode,
+ struct btrfs_ioctl_same_args *args,
+ u64 off, u64 len)
+{
+ struct hash_desc desc;
+ struct scatterlist sg;
+ struct page *page;
+ void *addr;
+ char *buffer;
+ char *digest = NULL;
+ char *hash = NULL;
+ pgoff_t index;
+ pgoff_t last_index;
+ int ret = 0;
+ int bytes_copied = 0;
+ int digest_len;
+
+ buffer = kmalloc(len, GFP_NOFS);
+ if (!buffer)
+ return -ENOMEM;
+
+ switch (args->hash_type) {
+ case BTRFS_SAME_EXTENT_HASH_SHA256:
+ desc.tfm = crypto_alloc_hash("sha256", 0, CRYPTO_ALG_ASYNC);
+ break;
+ default:
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (!desc.tfm) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ digest_len = crypto_hash_digestsize(desc.tfm);
+ if (digest_len != args->hash_len) {
+ ret = -EINVAL;
+ goto out_crypto;
+ }
+
+ hash = kmalloc(digest_len, GFP_NOFS);
+ if (!hash) {
+ ret = -ENOMEM;
+ goto out_crypto;
+ }
+
+ digest = kmalloc(digest_len, GFP_NOFS);
+ if (!digest) {
+ ret = -ENOMEM;
+ goto out_crypto;
+ }
+
+ if (copy_from_user(hash, (char __user *)args->hash, digest_len)) {
+ ret = -EFAULT;
+ goto out_crypto;
+ }
+
+ desc.flags = 0;
+ index = off >> PAGE_CACHE_SHIFT;
+ last_index = (off + len - 1) >> PAGE_CACHE_SHIFT;
+
+ while (index <= last_index) {
+ page = grab_cache_page(inode->i_mapping, index);
+ if (!page) {
+ ret = -EINVAL;
+ goto out_crypto;
+ }
+
+ if (!PageUptodate(page)) {
+ btrfs_readpage(NULL, page);
+ lock_page(page);
+ if (!PageUptodate(page)) {
+ unlock_page(page);
+ page_cache_release(page);
+ ret = -EINVAL;
+ goto out_crypto;
+ }
+ }
+
+ addr = kmap(page);
+ memcpy(buffer + bytes_copied, addr, PAGE_CACHE_SIZE);
+ kunmap(page);
+ unlock_page(page);
+ page_cache_release(page);
+ bytes_copied += PAGE_CACHE_SIZE;
+ index++;
+ }
+
+ sg_init_one(&sg, buffer, len);
+
+ if (crypto_hash_digest(&desc, &sg, sg.length, digest)) {
+ ret = -EINVAL;
+ goto out_crypto;
+ }
+
+ if (memcmp(digest, hash, digest_len)) {
+ printk(KERN_ERR "hash's don't match\n");
+ ret = -EINVAL;
+ }
+
+out_crypto:
+ kfree(digest);
+ kfree(hash);
+ crypto_free_hash(desc.tfm);
+out:
+ kfree(buffer);
+ printk(KERN_ERR "mark 4\n");
+ return ret;
+}
+
+static noinline int split_extent(struct btrfs_trans_handle *trans,
+ struct inode *inode, u64 start, u64 end,
+ u64 *bytenr, u64 *bytes, u64 *offset)
+{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct extent_buffer *leaf;
+ struct btrfs_file_extent_item *fi;
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ u64 extent_start;
+ u64 extent_end;
+ u64 extent_offset;
+ u64 search_start = start;
+ u64 disk_bytenr;
+ u64 disk_bytes;
+ int ret;
+ int recow;
+ int extent_type;
+
+ btrfs_drop_extent_cache(inode, start, end - 1, 0);
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ while (1) {
+ recow = 0;
+ ret = btrfs_lookup_file_extent(trans, root, path,
+ inode->i_ino, search_start, 1);
+ if (ret < 0)
+ break;
+ if (ret > 0 && path->slots[0] > 0) {
+ path->slots[0]--;
+ } else if (ret > 0 && path->slots[0] == 0) {
+ ret = btrfs_prev_leaf(root, path);
+ if (ret < 0)
+ break;
+ if (ret > 0) {
+ ret = 0;
+ break;
+ }
+ }
+
+next_slot:
+ leaf = path->nodes[0];
+ if (path->slots[0] >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret < 0)
+ break;
+ if (ret > 0) {
+ ret = 0;
+ break;
+ }
+ leaf = path->nodes[0];
+ }
+
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+ if (key.objectid > inode->i_ino ||
+ key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end)
+ break;
+
+ fi = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_file_extent_item);
+ extent_type = btrfs_file_extent_type(leaf, fi);
+ if (extent_type != BTRFS_FILE_EXTENT_REG) {
+ ret = -EINVAL;
+ break;
+ }
+
+ if (btrfs_file_extent_compression(leaf, fi) ||
+ btrfs_file_extent_encryption(leaf, fi) ||
+ btrfs_file_extent_other_encoding(leaf, fi)) {
+ printk(KERN_ERR "cannot dedup transformed extents\n");
+ ret = -EINVAL;
+ break;
+ }
+
+ extent_start = key.offset;
+ extent_end = extent_start +
+ btrfs_file_extent_num_bytes(leaf, fi);
+ extent_offset = btrfs_file_extent_offset(leaf, fi);
+ disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
+ disk_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
+
+ if (extent_end < search_start) {
+ path->slots[0]++;
+ goto next_slot;
+ }
+
+ search_start = max(key.offset, start);
+ if (recow) {
+ btrfs_release_path(root, path);
+ continue;
+ }
+
+ /*
+ * | - range to split - |
+ * | -------- extent -------- |
+ */
+ if (start > key.offset && end < extent_end) {
+ struct btrfs_key new_key;
+
+ memcpy(&new_key, &key, sizeof(new_key));
+ new_key.offset = start;
+ ret = btrfs_duplicate_item(trans, root, path,
+ &new_key);
+ if (ret == -EAGAIN) {
+ btrfs_release_path(root, path);
+ continue;
+ }
+ if (ret < 0)
+ break;
+
+ leaf = path->nodes[0];
+ fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
+ struct btrfs_file_extent_item);
+ btrfs_set_file_extent_num_bytes(leaf, fi, start -
+ key.offset);
+ fi = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_file_extent_item);
+ extent_offset += start - key.offset;
+ btrfs_set_file_extent_offset(leaf, fi, extent_offset);
+ btrfs_set_file_extent_num_bytes(leaf, fi,
+ extent_end - start);
+ if (bytes)
+ *bytes = disk_bytes;
+ if (bytenr)
+ *bytenr = disk_bytenr;
+ if (offset)
+ *offset = extent_offset;
+
+ btrfs_mark_buffer_dirty(leaf);
+
+ ret = btrfs_inc_extent_ref(trans, root, disk_bytenr,
+ disk_bytes, 0,
+ root->root_key.objectid,
+ inode->i_ino,
+ extent_offset);
+ if (ret) {
+ int err;
+ WARN_ON(1);
+ fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
+ struct btrfs_file_extent_item);
+ btrfs_set_file_extent_num_bytes(leaf, fi,
+ extent_end - extent_start);
+ err = btrfs_del_items(trans, root, path,
+ path->slots[0], 1);
+ WARN_ON(err);
+ break;
+ }
+
+ new_key.offset = end;
+ ret = btrfs_duplicate_item(trans, root, path, &new_key);
+ if (ret == -EAGAIN) {
+ btrfs_release_path(root, path);
+ continue;
+ }
+ if (ret < 0)
+ break;
+
+ leaf = path->nodes[0];
+ fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
+ struct btrfs_file_extent_item);
+ btrfs_set_file_extent_num_bytes(leaf, fi,
+ end - start);
+
+ fi = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_file_extent_item);
+ extent_offset += end - start;
+ btrfs_set_file_extent_offset(leaf, fi, extent_offset);
+ btrfs_set_file_extent_num_bytes(leaf, fi,
+ extent_end - end);
+
+ ret = btrfs_inc_extent_ref(trans, root, disk_bytenr,
+ disk_bytes, 0,
+ root->root_key.objectid,
+ inode->i_ino, 0);
+ if (ret) {
+ int err;
+ WARN_ON(1);
+ fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
+ struct btrfs_file_extent_item);
+ btrfs_set_file_extent_num_bytes(leaf, fi,
+ extent_end - start);
+ err = btrfs_del_items(trans, root, path,
+ path->slots[0], 1);
+ WARN_ON(err);
+ break;
+ }
+ btrfs_mark_buffer_dirty(leaf);
+ ret = 0;
+ break;
+ }
+
+ /*
+ * | - range to split -|
+ * | ------ extent ------|
+ */
+ if (start == key.offset && end < extent_end) {
+ struct btrfs_key new_key;
+
+ memcpy(&new_key, &key, sizeof(new_key));
+ new_key.offset = end;
+ ret = btrfs_duplicate_item(trans, root, path,
+ &new_key);
+ if (ret == -EAGAIN) {
+ btrfs_release_path(root, path);
+ continue;
+ }
+ if (ret < 0)
+ break;
+
+ leaf = path->nodes[0];
+ fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
+ struct btrfs_file_extent_item);
+ btrfs_set_file_extent_num_bytes(leaf, fi,
+ end - start);
+
+ if (bytes)
+ *bytes = disk_bytes;
+ if (bytenr)
+ *bytenr = disk_bytenr;
+ if (offset)
+ *offset = extent_offset;
+
+ fi = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_file_extent_item);
+ extent_offset += end - start;
+ btrfs_set_file_extent_offset(leaf, fi, extent_offset);
+ btrfs_set_file_extent_num_bytes(leaf, fi,
+ extent_end - end);
+ btrfs_mark_buffer_dirty(leaf);
+
+ ret = btrfs_inc_extent_ref(trans, root, disk_bytenr,
+ disk_bytes, 0,
+ root->root_key.objectid,
+ inode->i_ino,
+ extent_offset);
+ if (ret) {
+ int err;
+
+ WARN_ON(1);
+ fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
+ struct btrfs_file_extent_item);
+ btrfs_set_file_extent_num_bytes(leaf, fi,
+ extent_end - start);
+ err = btrfs_del_items(trans, root, path,
+ path->slots[0], 1);
+ WARN_ON(err);
+ break;
+ }
+ ret = 0;
+ break;
+ }
+
+ if (start == key.offset && end == extent_end) {
+ if (bytes)
+ *bytes = disk_bytes;
+ if (bytenr)
+ *bytenr = disk_bytenr;
+ if (offset)
+ *offset = extent_offset;
+ ret = 0;
+ break;
+ }
+
+ ret = -ERANGE;
+ break;
+ }
+
+ btrfs_free_path(path);
+ return ret;
+}
+
+static noinline int link_extent(struct btrfs_trans_handle *trans,
+ struct inode *inode, u64 disk_bytenr,
+ u64 disk_bytes, u64 extent_offset,
+ u64 offset, u64 len)
+{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_path *path;
+ struct extent_buffer *leaf;
+ struct btrfs_file_extent_item *fi;
+ struct btrfs_key key;
+ u64 hint;
+ int ret;
+ int err;
+
+ path = btrfs_alloc_path();
+ if (!path) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, disk_bytes, 0,
+ root->root_key.objectid, inode->i_ino,
+ extent_offset);
+ if (ret)
+ return ret;
+
+ ret = btrfs_drop_extents(trans, inode, offset, offset + len,
+ &hint, 1);
+ if (ret) {
+ err = ret;
+ btrfs_free_path(path);
+ goto out;
+ }
+
+ key.objectid = inode->i_ino;
+ key.offset = offset;
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*fi));
+
+ /*
+ * Yes I know, it sucks, but if we don't do this we'll lose data, we
+ * need to come up with a way to undo the drop_extents so that if this
+ * part fails we can still at least have the old data.
+ */
+ BUG_ON(ret);
+
+ leaf = path->nodes[0];
+ fi = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_file_extent_item);
+
+ btrfs_set_file_extent_generation(leaf, fi, trans->transid);
+ btrfs_set_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG);
+ btrfs_set_file_extent_disk_bytenr(leaf, fi, disk_bytenr);
+ btrfs_set_file_extent_disk_num_bytes(leaf, fi, disk_bytes);
+ btrfs_set_file_extent_offset(leaf, fi, extent_offset);
+ btrfs_set_file_extent_num_bytes(leaf, fi, len);
+ btrfs_set_file_extent_ram_bytes(leaf, fi, len);
+ btrfs_set_file_extent_compression(leaf, fi, 0);
+ btrfs_set_file_extent_encryption(leaf, fi, 0);
+ btrfs_set_file_extent_other_encoding(leaf, fi, 0);
+
+ btrfs_mark_buffer_dirty(leaf);
+ inode_add_bytes(inode, len);
+ btrfs_free_path(path);
+
+ return 0;
+out:
+ ret = btrfs_free_extent(trans, root, disk_bytenr, disk_bytes, 0,
+ root->root_key.objectid, inode->i_ino,
+ extent_offset);
+ if (ret)
+ err = ret;
+ return err;
+}
+
+static inline void lock_extent_range(struct inode *inode, u64 off, u64 len)
+{
+ while (1) {
+ struct btrfs_ordered_extent *ordered;
+ lock_extent(&BTRFS_I(inode)->io_tree, off,
+ off + len - 1, GFP_NOFS);
+ ordered = btrfs_lookup_first_ordered_extent(inode,
+ off + len - 1);
+ if (!ordered &&
+ !test_range_bit(&BTRFS_I(inode)->io_tree, off,
+ off + len - 1, EXTENT_DELALLOC, 0, NULL))
+ break;
+ unlock_extent(&BTRFS_I(inode)->io_tree, off, off + len -1,
+ GFP_NOFS);
+ if (ordered)
+ btrfs_put_ordered_extent(ordered);
+ btrfs_wait_ordered_range(inode, off, len);
+ }
+}
+
+static noinline long btrfs_ioctl_file_extent_same(struct file *file,
+ void __user *argp)
+{
+ struct btrfs_ioctl_same_args *args;
+ struct btrfs_ioctl_same_args tmp;
+ struct inode *src = file->f_dentry->d_inode;
+ struct btrfs_root *root;
+ struct file *dst_file;
+ struct inode *dst_inode;
+ struct btrfs_trans_handle *trans;
+ u64 off;
+ u64 len;
+ u64 bytenr;
+ u64 bytes;
+ u64 extent_offset;
+ int args_size = 0;
+ int drop_ref = 0;
+ int i;
+ int ret;
+
+ if (copy_from_user(&tmp,
+ (struct btrfs_ioctl_same_args __user *)argp,
+ sizeof(tmp)))
+ return -EFAULT;
+
+ if (tmp.hash_type != BTRFS_SAME_EXTENT_HASH_SHA256)
+ return -EINVAL;
+
+ args_size = sizeof(tmp) + (tmp.total_files *
+ sizeof(struct btrfs_ioctl_file_extent_info));
+
+ /* lets not let this get out of hand */
+ if (args_size > PAGE_CACHE_SIZE)
+ return -ENOMEM;
+
+ args = kmalloc(args_size, GFP_NOFS);
+ if (!args)
+ return -ENOMEM;
+
+ if (copy_from_user(args,
+ (struct btrfs_ioctl_same_args __user *)argp,
+ args_size))
+ return -EFAULT;
+
+ /* Make sure args didn't change magically between copies. */
+ if (memcmp(&tmp, args, sizeof(tmp))) {
+ kfree(args);
+ return -EINVAL;
+ }
+
+ if (S_ISDIR(src->i_mode)) {
+ kfree(args);
+ return -EISDIR;
+ }
+
+ off = args->logical_offset;
+ len = args->length;
+ root = BTRFS_I(src)->root;
+
+ /*
+ * Since we have to hash the extent to make sure it's still right, we
+ * have to allocate a buffer to hold the data, so let's not let that
+ * buffer be larger than 1mb just to make sure nobody abuses this.
+ */
+ if (len > 1 * 1024 * 1024)
+ return -ENOMEM;
+
+ lock_extent_range(src, off, len);
+
+ if (check_hash(src, args, off, len)) {
+ unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1,
+ GFP_NOFS);
+ kfree(args);
+ return -EINVAL;
+ }
+
+ trans = btrfs_start_transaction(root, args->total_files + 1);
+ if (IS_ERR(trans)) {
+ unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1,
+ GFP_NOFS);
+ kfree(args);
+ return PTR_ERR(trans);
+ }
+
+ ret = split_extent(trans, src, off, off + len, &bytenr, &bytes,
+ &extent_offset);
+ if (ret) {
+ unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1,
+ GFP_NOFS);
+ btrfs_end_transaction(trans, root);
+ kfree(args);
+ return ret;
+ }
+
+ ret = btrfs_inc_extent_ref(trans, root, bytenr, bytes, 0,
+ root->root_key.objectid, src->i_ino,
+ extent_offset);
+ unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1, GFP_NOFS);
+ if (ret) {
+ btrfs_end_transaction(trans, root);
+ kfree(args);
+ return ret;
+ }
+
+ drop_ref = 1;
+ for (i = 0; i < args->total_files; i++) {
+ struct btrfs_ioctl_file_extent_info *info;
+ info = &args->info[i];
+
+ dst_file = fget(info->fd);
+ if (!dst_file) {
+ printk(KERN_ERR "btrfs: invalid fd %lld\n", info->fd);
+ ret = -EINVAL;
+ break;
+ }
+ dst_inode = dst_file->f_dentry->d_inode;
+ if (S_ISDIR(dst_inode->i_mode)) {
+ fput(dst_file);
+ printk(KERN_ERR "btrfs: file is dir %lld\n", info->fd);
+ ret = -EINVAL;
+ break;
+ }
+
+ if (BTRFS_I(dst_inode)->root != root) {
+ fput(dst_file);
+ printk(KERN_ERR "btrfs: cannot dedup across subvolumes"
+ " %lld\n", info->fd);
+ ret = -EINVAL;
+ break;
+ }
+
+ off = info->logical_offset;
+ lock_extent_range(dst_inode, off, len);
+ if (check_hash(dst_inode, args, off, len)) {
+ printk(KERN_ERR "btrfs: hash for fd %lld does not "
+ "match\n", info->fd);
+ unlock_extent(&BTRFS_I(dst_inode)->io_tree, off,
+ off + len - 1, GFP_NOFS);
+ fput(dst_file);
+ continue;
+ }
+
+ ret = link_extent(trans, dst_inode, bytenr, bytes,
+ extent_offset, off, len);
+ if (ret) {
+ unlock_extent(&BTRFS_I(dst_inode)->io_tree, off,
+ off + len - 1, GFP_NOFS);
+ fput(dst_file);
+ break;
+ }
+
+ unlock_extent(&BTRFS_I(dst_inode)->io_tree, off, off + len - 1,
+ GFP_NOFS);
+
+ fput(dst_file);
+
+ if (drop_ref) {
+ ret = btrfs_free_extent(trans, root, bytenr, bytes, 0,
+ root->root_key.objectid,
+ src->i_ino, extent_offset);
+ if (ret)
+ break;
+ drop_ref = 0;
+ }
+ }
+
+ if (drop_ref) {
+ btrfs_free_extent(trans, root, bytenr, bytes, 0,
+ root->root_key.objectid, src->i_ino,
+ extent_offset);
+ }
+
+ btrfs_end_transaction(trans, root);
+
+ kfree(args);
+ return ret;
+}
+
long btrfs_ioctl(struct file *file, unsigned int
cmd, unsigned long arg)
{
@@ -2287,6 +2947,8 @@ long btrfs_ioctl(struct file *file, unsigned int
return btrfs_ioctl_start_sync(file, argp);
case BTRFS_IOC_WAIT_SYNC:
return btrfs_ioctl_wait_sync(file, argp);
+ case BTRFS_IOC_FILE_EXTENT_SAME:
+ return btrfs_ioctl_file_extent_same(file, argp);
}
return -ENOTTY;
@@ -145,6 +145,23 @@ struct btrfs_ioctl_space_args {
struct btrfs_ioctl_space_info spaces[0];
};
+#define BTRFS_SAME_EXTENT_HASH_SHA256 1
+
+struct btrfs_ioctl_file_extent_info {
+ __s64 fd;
+ __u64 logical_offset;
+};
+
+struct btrfs_ioctl_same_args {
+ __u64 logical_offset;
+ __u64 length;
+ __u64 total_files;
+ u32 hash_len;
+ u8 hash_type;
+ char *hash;
+ struct btrfs_ioctl_file_extent_info info[0];
+};
+
#define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \
struct btrfs_ioctl_vol_args)
#define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \
@@ -189,4 +206,6 @@ struct btrfs_ioctl_space_args {
#define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64)
#define BTRFS_IOC_SNAP_CREATE_ASYNC _IOW(BTRFS_IOCTL_MAGIC, 23, \
struct btrfs_ioctl_async_vol_args)
+#define BTRFS_IOC_FILE_EXTENT_SAME _IOW(BTRFS_IOCTL_MAGIC, 24, \
+ struct btrfs_ioctl_same_args)
#endif