diff mbox series

[2/5] btrfs: add new ioctl CLEAR_FREE

Message ID ecc43a72997ae7836c2d227b69924d364698e665.1740753608.git.dsterba@suse.com (mailing list archive)
State New
Headers show
Series Ioctl to clear unused space in various ways | expand

Commit Message

David Sterba Feb. 28, 2025, 2:49 p.m. UTC
Add a new ioctl that is an extensible version of FITRIM. It currently
does only the trim/discard and will be extended by other modes like
zeroing or block unmapping.

We need a new ioctl for that because struct fstrim_range does not
provide any existing or reserved member for extensions. The new ioctl
also supports TRIM as the operation type.

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c     | 92 ++++++++++++++++++++++++++++++++++++++
 fs/btrfs/extent-tree.h     |  2 +
 fs/btrfs/ioctl.c           | 42 +++++++++++++++++
 include/uapi/linux/btrfs.h | 20 +++++++++
 4 files changed, 156 insertions(+)

Comments

Sun YangKai March 1, 2025, 3:19 a.m. UTC | #1
New to lkml, please correct me if I made any mistake :P

> +static int btrfs_ioctl_clear_free(struct file *file, void __user *arg)
> +{
> +	struct btrfs_fs_info *fs_info;
> +	struct btrfs_ioctl_clear_free_args args;
> +	u64 total_bytes;
> +	int ret;
> +
> +	if (!capable(CAP_SYS_ADMIN))
> +		return -EPERM;
> +
> +	if (copy_from_user(&args, arg, sizeof(args)))
> +		return -EFAULT;
> +
> +	if (args.type >= BTRFS_NR_CLEAR_OP_TYPES)
> +		return -EOPNOTSUPP;
> +
> +	ret = mnt_want_write_file(file);
> +	if (ret)
> +		return ret;
> +
> +	fs_info = inode_to_fs_info(file_inode(file));
> +	total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
> +	if (args.start > total_bytes) {
> +		ret = -EINVAL;
> +		goto out_drop_write;
> +	}
> +
> +	ret = btrfs_clear_free_space(fs_info, &args);
> +	if (ret < 0)
> +		goto out_drop_write;
> +
> +	if (copy_to_user(arg, &args, sizeof(args)))
> +		ret = -EFAULT;
> +
> +out_drop_write:
> +	mnt_drop_write_file(file);
> +
> +	return 0;
previous stored return value int `ret` is not used here.
> +}
diff mbox series

Patch

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index df86ffde478b..4ab9850b7383 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -6562,3 +6562,95 @@  int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
 		return bg_ret;
 	return dev_ret;
 }
+
+int btrfs_clear_free_space(struct btrfs_fs_info *fs_info,
+			   struct btrfs_ioctl_clear_free_args *args)
+{
+	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+	struct btrfs_device *device;
+	struct btrfs_block_group *cache = NULL;
+	u64 group_cleared;
+	u64 range_end = U64_MAX;
+	u64 start;
+	u64 end;
+	u64 cleared = 0;
+	u64 bg_failed = 0;
+	u64 dev_failed = 0;
+	int bg_ret = 0;
+	int dev_ret = 0;
+	int ret = 0;
+
+	if (args->start == U64_MAX)
+		return -EINVAL;
+
+	/*
+	 * Check range overflow if args->length is set.  The default args->length
+	 * is U64_MAX.
+	 */
+	if (args->length != U64_MAX &&
+	    check_add_overflow(args->start, args->length, &range_end))
+		return -EINVAL;
+
+	cache = btrfs_lookup_first_block_group(fs_info, args->start);
+	for (; cache; cache = btrfs_next_block_group(cache)) {
+		if (cache->start >= range_end) {
+			btrfs_put_block_group(cache);
+			break;
+		}
+
+		start = max(args->start, cache->start);
+		end = min(range_end, cache->start + cache->length);
+
+		if (end - start >= args->minlen) {
+			if (!btrfs_block_group_done(cache)) {
+				ret = btrfs_cache_block_group(cache, true);
+				if (ret) {
+					bg_failed++;
+					bg_ret = ret;
+					continue;
+				}
+			}
+			ret = btrfs_trim_block_group(cache, &group_cleared,
+						     start, end, args->minlen,
+						     args->type);
+
+			cleared += group_cleared;
+			if (ret) {
+				bg_failed++;
+				bg_ret = ret;
+				continue;
+			}
+		}
+	}
+
+	if (bg_failed)
+		btrfs_warn(fs_info,
+			"failed to clear %llu block group(s), last error %d",
+			bg_failed, bg_ret);
+
+	mutex_lock(&fs_devices->device_list_mutex);
+	list_for_each_entry(device, &fs_devices->devices, dev_list) {
+		if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
+			continue;
+
+		ret = btrfs_trim_free_extents(device, &group_cleared, args->type);
+		if (ret) {
+			dev_failed++;
+			dev_ret = ret;
+			break;
+		}
+
+		cleared += group_cleared;
+	}
+	mutex_unlock(&fs_devices->device_list_mutex);
+
+	if (dev_failed)
+		btrfs_warn(fs_info,
+			"failed to trim %llu device(s), last error %d",
+			dev_failed, dev_ret);
+	args->length = cleared;
+	if (bg_ret)
+		return bg_ret;
+
+	return dev_ret;
+}
diff --git a/fs/btrfs/extent-tree.h b/fs/btrfs/extent-tree.h
index c8e1a30309ab..e0702b276825 100644
--- a/fs/btrfs/extent-tree.h
+++ b/fs/btrfs/extent-tree.h
@@ -166,5 +166,7 @@  int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
 			 u64 num_bytes, u64 *actual_bytes,
 			 enum btrfs_clear_op_type clear);
 int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range);
+int btrfs_clear_free_space(struct btrfs_fs_info *fs_info,
+			   struct btrfs_ioctl_clear_free_args *args);
 
 #endif
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index f3ce82d113be..203e8a23d6c2 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -5213,6 +5213,46 @@  static int btrfs_ioctl_subvol_sync(struct btrfs_fs_info *fs_info, void __user *a
 	return 0;
 }
 
+static int btrfs_ioctl_clear_free(struct file *file, void __user *arg)
+{
+	struct btrfs_fs_info *fs_info;
+	struct btrfs_ioctl_clear_free_args args;
+	u64 total_bytes;
+	int ret;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (copy_from_user(&args, arg, sizeof(args)))
+		return -EFAULT;
+
+	if (args.type >= BTRFS_NR_CLEAR_OP_TYPES)
+		return -EOPNOTSUPP;
+
+	ret = mnt_want_write_file(file);
+	if (ret)
+		return ret;
+
+	fs_info = inode_to_fs_info(file_inode(file));
+	total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
+	if (args.start > total_bytes) {
+		ret = -EINVAL;
+		goto out_drop_write;
+	}
+
+	ret = btrfs_clear_free_space(fs_info, &args);
+	if (ret < 0)
+		goto out_drop_write;
+
+	if (copy_to_user(arg, &args, sizeof(args)))
+		ret = -EFAULT;
+
+out_drop_write:
+	mnt_drop_write_file(file);
+
+	return 0;
+}
+
 long btrfs_ioctl(struct file *file, unsigned int
 		cmd, unsigned long arg)
 {
@@ -5368,6 +5408,8 @@  long btrfs_ioctl(struct file *file, unsigned int
 #endif
 	case BTRFS_IOC_SUBVOL_SYNC_WAIT:
 		return btrfs_ioctl_subvol_sync(fs_info, argp);
+	case BTRFS_IOC_CLEAR_FREE:
+		return btrfs_ioctl_clear_free(file, argp);
 	}
 
 	return -ENOTTY;
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index 64f971a6bcb2..278010aff02e 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -1094,6 +1094,24 @@  enum btrfs_clear_op_type {
 	BTRFS_NR_CLEAR_OP_TYPES,
 };
 
+struct btrfs_ioctl_clear_free_args {
+	/* In, type of clearing operation, enumerated in btrfs_clear_free_op_type. */
+	__u32 type;
+	/* Reserved must be zero. */
+	__u32 reserved1;
+	/*
+	 * In. Starting offset to clear from in the logical address space (same
+	 * as fstrim_range::start).
+	 */
+	__u64 start;			/* in */
+	/* In, out. Length from the start to clear (same as fstrim_range::length). */
+	__u64 length;
+	/* In. Minimal length to clear (same as fstrim_range::minlen). */
+	__u64 minlen;
+	/* Reserved, must be zero. */
+	__u64 reserved2[4];
+};
+
 #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \
 				   struct btrfs_ioctl_vol_args)
 #define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \
@@ -1200,6 +1218,8 @@  enum btrfs_clear_op_type {
 				   struct btrfs_ioctl_vol_args_v2)
 #define BTRFS_IOC_LOGICAL_INO_V2 _IOWR(BTRFS_IOCTL_MAGIC, 59, \
 					struct btrfs_ioctl_logical_ino_args)
+#define BTRFS_IOC_CLEAR_FREE _IOW(BTRFS_IOCTL_MAGIC, 90, \
+				struct btrfs_ioctl_clear_free_args)
 #define BTRFS_IOC_GET_SUBVOL_INFO _IOR(BTRFS_IOCTL_MAGIC, 60, \
 				struct btrfs_ioctl_get_subvol_info_args)
 #define BTRFS_IOC_GET_SUBVOL_ROOTREF _IOWR(BTRFS_IOCTL_MAGIC, 61, \