diff mbox series

[V4] btrfs-progs: Make btrfs_prepare_device parallel during mkfs.btrfs

Message ID 1662279863-17114-1-git-send-email-zhanglikernel@gmail.com (mailing list archive)
State New, archived
Headers show
Series [V4] btrfs-progs: Make btrfs_prepare_device parallel during mkfs.btrfs | expand

Commit Message

Li Zhang Sept. 4, 2022, 8:24 a.m. UTC
[enhancement]
When a disk is formatted as btrfs, it calls
btrfs_prepare_device for each device, which takes too much time.

[implementation]
Put each btrfs_prepare_device into a thread,
wait for the first thread to complete to mkfs.btrfs,
and wait for other threads to complete before adding
other devices to the file system.

[test]
Using the btrfs-progs test case mkfs-tests, mkfs.btrfs works fine.

Use tcmu-runner emulation to simulate two devices for testing.
Each device is 2000G (about 19.53 TiB), the region size is 4MB,
Use the following parameters for targetcli
create name=zbc0 size=20000G cfgstring=model-HM/zsize-4/conv-100@~/zbc0.raw

Call difftime to calculate the running time of the function btrfs_prepare_device.
Calculate the time from thread creation to completion of all threads after
patching (time calculation is not included in patch submission)
The test results are as follows.

$ lsscsi -p
[10:0:1:0]   (0x14)  LIO-ORG  TCMU ZBC device  0002  /dev/sdb   -          none
[11:0:1:0]   (0x14)  LIO-ORG  TCMU ZBC device  0002  /dev/sdc   -          none

$ sudo mkfs.btrfs -d single -m single -O zoned /dev/sdc /dev/sdb -f
....
time for prepare devices:4.000000.
....

$ sudo mkfs.btrfs -d single -m single -O zoned /dev/sdc /dev/sdb -f
...
time for prepare devices:2.000000.
...

Signed-off-by: Li Zhang <zhanglikernel@gmail.com>
---
Issue: 496

V1:
* Put btrfs_prepare_device into threads and make them parallel

V2:
* Set the 4 variables used by btrfs_prepare_device as global variables.
* Use pthread_mutex to ensure error messages are not messed up.
* Correct the error message
* Wait for all threads to exit in a loop

V3:
* Add prefix opt to the global variables
* Format code
* Add error handler for open

V4:
* initialize the global options

 mkfs/main.c | 154 +++++++++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 107 insertions(+), 47 deletions(-)

Comments

Qu Wenruo Sept. 4, 2022, 8:40 a.m. UTC | #1
On 2022/9/4 16:24, Li Zhang wrote:
> [enhancement]
> When a disk is formatted as btrfs, it calls
> btrfs_prepare_device for each device, which takes too much time.
>
> [implementation]
> Put each btrfs_prepare_device into a thread,
> wait for the first thread to complete to mkfs.btrfs,
> and wait for other threads to complete before adding
> other devices to the file system.
>
> [test]
> Using the btrfs-progs test case mkfs-tests, mkfs.btrfs works fine.
>
> Use tcmu-runner emulation to simulate two devices for testing.
> Each device is 2000G (about 19.53 TiB), the region size is 4MB,
> Use the following parameters for targetcli
> create name=zbc0 size=20000G cfgstring=model-HM/zsize-4/conv-100@~/zbc0.raw
>
> Call difftime to calculate the running time of the function btrfs_prepare_device.
> Calculate the time from thread creation to completion of all threads after
> patching (time calculation is not included in patch submission)
> The test results are as follows.
>
> $ lsscsi -p
> [10:0:1:0]   (0x14)  LIO-ORG  TCMU ZBC device  0002  /dev/sdb   -          none
> [11:0:1:0]   (0x14)  LIO-ORG  TCMU ZBC device  0002  /dev/sdc   -          none
>
> $ sudo mkfs.btrfs -d single -m single -O zoned /dev/sdc /dev/sdb -f
> ....
> time for prepare devices:4.000000.
> ....
>
> $ sudo mkfs.btrfs -d single -m single -O zoned /dev/sdc /dev/sdb -f
> ...
> time for prepare devices:2.000000.
> ...
>
> Signed-off-by: Li Zhang <zhanglikernel@gmail.com>
> ---
> Issue: 496
>
> V1:
> * Put btrfs_prepare_device into threads and make them parallel
>
> V2:
> * Set the 4 variables used by btrfs_prepare_device as global variables.
> * Use pthread_mutex to ensure error messages are not messed up.
> * Correct the error message
> * Wait for all threads to exit in a loop
>
> V3:
> * Add prefix opt to the global variables
> * Format code
> * Add error handler for open
>
> V4:
> * initialize the global options
>
>   mkfs/main.c | 154 +++++++++++++++++++++++++++++++++++++++++-------------------
>   1 file changed, 107 insertions(+), 47 deletions(-)
>
> diff --git a/mkfs/main.c b/mkfs/main.c
> index ce096d3..3e16a0e 100644
> --- a/mkfs/main.c
> +++ b/mkfs/main.c
> @@ -31,6 +31,7 @@
>   #include <uuid/uuid.h>
>   #include <ctype.h>
>   #include <blkid/blkid.h>
> +#include <pthread.h>
>   #include "kernel-shared/ctree.h"
>   #include "kernel-shared/disk-io.h"
>   #include "kernel-shared/free-space-tree.h"
> @@ -60,6 +61,20 @@ struct mkfs_allocation {
>   	u64 system;
>   };
>
> +static bool opt_zero_end = true;
> +static bool opt_discard = true;
> +static bool opt_zoned = true;
> +static int opt_oflags = O_RDONLY;

Isn't the default one should be O_RDWR?

Despite that, feel free to addd my reviewed by tag:

Reviewed-by: Qu Wenruo <wqu@suse.com>

Thanks,
Qu
> +
> +static pthread_mutex_t prepare_mutex;
> +
> +struct prepare_device_progress {
> +	char *file;
> +	u64 dev_block_count;
> +	u64 block_count;
> +	int ret;
> +};
> +
>   static int create_metadata_block_groups(struct btrfs_root *root, bool mixed,
>   				struct mkfs_allocation *allocation)
>   {
> @@ -969,6 +984,31 @@ fail:
>   	return ret;
>   }
>
> +static void *prepare_one_dev(void *ctx)
> +{
> +	struct prepare_device_progress *prepare_ctx = ctx;
> +	int fd;
> +
> +	fd = open(prepare_ctx->file, opt_oflags);
> +	if (fd < 0) {
> +		pthread_mutex_lock(&prepare_mutex);
> +		error("unable to open %s: %m", prepare_ctx->file);
> +		pthread_mutex_unlock(&prepare_mutex);
> +		prepare_ctx->ret = fd;
> +		return NULL;
> +	}
> +	prepare_ctx->ret = btrfs_prepare_device(fd,
> +			prepare_ctx->file,
> +			&prepare_ctx->dev_block_count,
> +			prepare_ctx->block_count,
> +			(bconf.verbose ? PREP_DEVICE_VERBOSE : 0) |
> +			(opt_zero_end ? PREP_DEVICE_ZERO_END : 0) |
> +			(opt_discard ? PREP_DEVICE_DISCARD : 0) |
> +			(opt_zoned ? PREP_DEVICE_ZONED : 0));
> +	close(fd);
> +	return NULL;
> +}
> +
>   int BOX_MAIN(mkfs)(int argc, char **argv)
>   {
>   	char *file;
> @@ -984,7 +1024,6 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
>   	u32 nodesize = 0;
>   	u32 sectorsize = 0;
>   	u32 stripesize = 4096;
> -	bool zero_end = true;
>   	int fd = -1;
>   	int ret = 0;
>   	int close_ret;
> @@ -993,11 +1032,8 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
>   	bool nodesize_forced = false;
>   	bool data_profile_opt = false;
>   	bool metadata_profile_opt = false;
> -	bool discard = true;
>   	bool ssd = false;
> -	bool zoned = false;
>   	bool force_overwrite = false;
> -	int oflags;
>   	char *source_dir = NULL;
>   	bool source_dir_set = false;
>   	bool shrink_rootdir = false;
> @@ -1006,6 +1042,8 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
>   	u64 shrink_size;
>   	int dev_cnt = 0;
>   	int saved_optind;
> +	pthread_t *t_prepare = NULL;
> +	struct prepare_device_progress *prepare_ctx = NULL;
>   	char fs_uuid[BTRFS_UUID_UNPARSED_SIZE] = { 0 };
>   	u64 features = BTRFS_MKFS_DEFAULT_FEATURES;
>   	u64 runtime_features = BTRFS_MKFS_DEFAULT_RUNTIME_FEATURES;
> @@ -1126,7 +1164,7 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
>   				break;
>   			case 'b':
>   				block_count = parse_size_from_string(optarg);
> -				zero_end = false;
> +				opt_zero_end = false;
>   				break;
>   			case 'v':
>   				bconf_be_verbose();
> @@ -1144,7 +1182,7 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
>   					BTRFS_UUID_UNPARSED_SIZE - 1);
>   				break;
>   			case 'K':
> -				discard = false;
> +				opt_discard = false;
>   				break;
>   			case 'q':
>   				bconf_be_quiet();
> @@ -1183,7 +1221,7 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
>   	if (dev_cnt == 0)
>   		print_usage(1);
>
> -	zoned = !!(features & BTRFS_FEATURE_INCOMPAT_ZONED);
> +	opt_zoned = !!(features & BTRFS_FEATURE_INCOMPAT_ZONED);
>
>   	if (source_dir_set && dev_cnt > 1) {
>   		error("the option -r is limited to a single device");
> @@ -1225,7 +1263,7 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
>
>   	file = argv[optind++];
>   	ssd = is_ssd(file);
> -	if (zoned) {
> +	if (opt_zoned) {
>   		if (!zone_size(file)) {
>   			error("zoned: %s: zone size undefined", file);
>   			exit(1);
> @@ -1235,7 +1273,7 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
>   			printf(
>   	"Zoned: %s: host-managed device detected, setting zoned feature\n",
>   			       file);
> -		zoned = true;
> +		opt_zoned = true;
>   		features |= BTRFS_FEATURE_INCOMPAT_ZONED;
>   	}
>
> @@ -1302,7 +1340,7 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
>   		}
>   	}
>
> -	if (zoned) {
> +	if (opt_zoned) {
>   		if (source_dir_set) {
>   			error("the option -r and zoned mode are incompatible");
>   			exit(1);
> @@ -1392,7 +1430,7 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
>   	 * 1 zone for a metadata block group
>   	 * 1 zone for a data block group
>   	 */
> -	if (zoned && block_count && block_count < 5 * zone_size(file)) {
> +	if (opt_zoned && block_count && block_count < 5 * zone_size(file)) {
>   		error("size %llu is too small to make a usable filesystem",
>   			block_count);
>   		error("minimum size for a zoned btrfs filesystem is %llu",
> @@ -1422,35 +1460,58 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
>   	if (ret)
>   		goto error;
>
> -	if (zoned && (!zoned_profile_supported(BTRFS_BLOCK_GROUP_METADATA | metadata_profile) ||
> +	if (opt_zoned && (!zoned_profile_supported(BTRFS_BLOCK_GROUP_METADATA | metadata_profile) ||
>   		      !zoned_profile_supported(BTRFS_BLOCK_GROUP_DATA | data_profile))) {
>   		error("zoned mode does not yet support RAID/DUP profiles, please specify '-d single -m single' manually");
>   		goto error;
>   	}
>
> -	dev_cnt--;
> +	t_prepare = calloc(dev_cnt, sizeof(*t_prepare));
> +	prepare_ctx = calloc(dev_cnt, sizeof(*prepare_ctx));
>
> -	oflags = O_RDWR;
> -	if (zoned && zoned_model(file) == ZONED_HOST_MANAGED)
> -		oflags |= O_DIRECT;
> +	if (!t_prepare || !prepare_ctx) {
> +		error("unable to alloc thread for preparing dev");
> +		goto error;
> +	}
>
> -	/*
> -	 * Open without O_EXCL so that the problem should not occur by the
> -	 * following operation in kernel:
> -	 * (btrfs_register_one_device() fails if O_EXCL is on)
> -	 */
> -	fd = open(file, oflags);
> +	pthread_mutex_init(&prepare_mutex, NULL);
> +	opt_oflags = O_RDWR;
> +	for (i = 0; i < dev_cnt; i++) {
> +		if (opt_zoned && zoned_model(argv[optind + i - 1]) ==
> +			ZONED_HOST_MANAGED) {
> +			opt_oflags |= O_DIRECT;
> +			break;
> +		}
> +	}
> +	for (i = 0; i < dev_cnt; i++) {
> +		prepare_ctx[i].file = argv[optind + i - 1];
> +		prepare_ctx[i].block_count = block_count;
> +		prepare_ctx[i].dev_block_count = block_count;
> +		ret = pthread_create(&t_prepare[i], NULL,
> +			prepare_one_dev, &prepare_ctx[i]);
> +		if (ret) {
> +			error("create thread for prepare devices: %s failed, "
> +					"errno: %d",
> +					prepare_ctx[i].file, ret);
> +			goto error;
> +		}
> +	}
> +	for (i = 0; i < dev_cnt; i++)
> +		pthread_join(t_prepare[i], NULL);
> +	ret = prepare_ctx[0].ret;
> +
> +	if (ret) {
> +		error("unable prepare device: %s\n", prepare_ctx[0].file);
> +		goto error;
> +	}
> +
> +	dev_cnt--;
> +	fd = open(file, opt_oflags);
>   	if (fd < 0) {
>   		error("unable to open %s: %m", file);
>   		goto error;
>   	}
> -	ret = btrfs_prepare_device(fd, file, &dev_block_count, block_count,
> -			(zero_end ? PREP_DEVICE_ZERO_END : 0) |
> -			(discard ? PREP_DEVICE_DISCARD : 0) |
> -			(bconf.verbose ? PREP_DEVICE_VERBOSE : 0) |
> -			(zoned ? PREP_DEVICE_ZONED : 0));
> -	if (ret)
> -		goto error;
> +	dev_block_count = prepare_ctx[0].dev_block_count;
>   	if (block_count && block_count > dev_block_count) {
>   		error("%s is smaller than requested size, expected %llu, found %llu",
>   		      file, (unsigned long long)block_count,
> @@ -1459,7 +1520,7 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
>   	}
>
>   	/* To create the first block group and chunk 0 in make_btrfs */
> -	system_group_size = zoned ?  zone_size(file) : BTRFS_MKFS_SYSTEM_GROUP_SIZE;
> +	system_group_size = opt_zoned ? zone_size(file) : BTRFS_MKFS_SYSTEM_GROUP_SIZE;
>   	if (dev_block_count < system_group_size) {
>   		error("device is too small to make filesystem, must be at least %llu",
>   				(unsigned long long)system_group_size);
> @@ -1487,7 +1548,7 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
>   	mkfs_cfg.runtime_features = runtime_features;
>   	mkfs_cfg.csum_type = csum_type;
>   	mkfs_cfg.leaf_data_size = __BTRFS_LEAF_DATA_SIZE(nodesize);
> -	if (zoned)
> +	if (opt_zoned)
>   		mkfs_cfg.zone_size = zone_size(file);
>   	else
>   		mkfs_cfg.zone_size = 0;
> @@ -1558,14 +1619,10 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
>   		goto raid_groups;
>
>   	while (dev_cnt-- > 0) {
> +		int dev_index = argc - saved_optind - dev_cnt - 1;
>   		file = argv[optind++];
>
> -		/*
> -		 * open without O_EXCL so that the problem should not
> -		 * occur by the following processing.
> -		 * (btrfs_register_one_device() fails if O_EXCL is on)
> -		 */
> -		fd = open(file, O_RDWR);
> +		fd = open(file, opt_oflags);
>   		if (fd < 0) {
>   			error("unable to open %s: %m", file);
>   			goto error;
> @@ -1578,13 +1635,12 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
>   			close(fd);
>   			continue;
>   		}
> -		ret = btrfs_prepare_device(fd, file, &dev_block_count,
> -				block_count,
> -				(bconf.verbose ? PREP_DEVICE_VERBOSE : 0) |
> -				(zero_end ? PREP_DEVICE_ZERO_END : 0) |
> -				(discard ? PREP_DEVICE_DISCARD : 0) |
> -				(zoned ? PREP_DEVICE_ZONED : 0));
> -		if (ret) {
> +		dev_block_count = prepare_ctx[dev_index]
> +			.dev_block_count;
> +
> +		if (prepare_ctx[dev_index].ret) {
> +			error("unable prepare device: %s.\n",
> +					prepare_ctx[dev_index].file);
>   			goto error;
>   		}
>
> @@ -1714,8 +1770,8 @@ raid_groups:
>   			btrfs_group_profile_str(metadata_profile),
>   			pretty_size(allocation.system));
>   		printf("SSD detected:       %s\n", ssd ? "yes" : "no");
> -		printf("Zoned device:       %s\n", zoned ? "yes" : "no");
> -		if (zoned)
> +		printf("Zoned device:       %s\n", opt_zoned ? "yes" : "no");
> +		if (opt_zoned)
>   			printf("  Zone size:        %s\n",
>   			       pretty_size(fs_info->zone_size));
>   		btrfs_parse_fs_features_to_string(features_buf, features);
> @@ -1763,12 +1819,16 @@ out:
>
>   	btrfs_close_all_devices();
>   	free(label);
> -
> +	free(t_prepare);
> +	free(prepare_ctx);
>   	return !!ret;
> +
>   error:
>   	if (fd > 0)
>   		close(fd);
>
> +	free(t_prepare);
> +	free(prepare_ctx);
>   	free(label);
>   	exit(1);
>   success:
Li Zhang Sept. 4, 2022, 9:31 a.m. UTC | #2
My first thought was that we should give the default value a least privilege,
so when others want to use this global value they should think about what
privileges they really need, so at first I initialized opt_oflags with 0,
but scripts/checkpatch. pl(btrfs-devel) says I can't initialize global variables
with 0 and opt_zoned with false. So I compromised to initialize opt_oflags
with O_RDONLY and opt_zoned with false .

Thanks,
Li

Qu Wenruo <quwenruo.btrfs@gmx.com> 于2022年9月4日周日 16:40写道:
>
>
>
> On 2022/9/4 16:24, Li Zhang wrote:
> > [enhancement]
> > When a disk is formatted as btrfs, it calls
> > btrfs_prepare_device for each device, which takes too much time.
> >
> > [implementation]
> > Put each btrfs_prepare_device into a thread,
> > wait for the first thread to complete to mkfs.btrfs,
> > and wait for other threads to complete before adding
> > other devices to the file system.
> >
> > [test]
> > Using the btrfs-progs test case mkfs-tests, mkfs.btrfs works fine.
> >
> > Use tcmu-runner emulation to simulate two devices for testing.
> > Each device is 2000G (about 19.53 TiB), the region size is 4MB,
> > Use the following parameters for targetcli
> > create name=zbc0 size=20000G cfgstring=model-HM/zsize-4/conv-100@~/zbc0.raw
> >
> > Call difftime to calculate the running time of the function btrfs_prepare_device.
> > Calculate the time from thread creation to completion of all threads after
> > patching (time calculation is not included in patch submission)
> > The test results are as follows.
> >
> > $ lsscsi -p
> > [10:0:1:0]   (0x14)  LIO-ORG  TCMU ZBC device  0002  /dev/sdb   -          none
> > [11:0:1:0]   (0x14)  LIO-ORG  TCMU ZBC device  0002  /dev/sdc   -          none
> >
> > $ sudo mkfs.btrfs -d single -m single -O zoned /dev/sdc /dev/sdb -f
> > ....
> > time for prepare devices:4.000000.
> > ....
> >
> > $ sudo mkfs.btrfs -d single -m single -O zoned /dev/sdc /dev/sdb -f
> > ...
> > time for prepare devices:2.000000.
> > ...
> >
> > Signed-off-by: Li Zhang <zhanglikernel@gmail.com>
> > ---
> > Issue: 496
> >
> > V1:
> > * Put btrfs_prepare_device into threads and make them parallel
> >
> > V2:
> > * Set the 4 variables used by btrfs_prepare_device as global variables.
> > * Use pthread_mutex to ensure error messages are not messed up.
> > * Correct the error message
> > * Wait for all threads to exit in a loop
> >
> > V3:
> > * Add prefix opt to the global variables
> > * Format code
> > * Add error handler for open
> >
> > V4:
> > * initialize the global options
> >
> >   mkfs/main.c | 154 +++++++++++++++++++++++++++++++++++++++++-------------------
> >   1 file changed, 107 insertions(+), 47 deletions(-)
> >
> > diff --git a/mkfs/main.c b/mkfs/main.c
> > index ce096d3..3e16a0e 100644
> > --- a/mkfs/main.c
> > +++ b/mkfs/main.c
> > @@ -31,6 +31,7 @@
> >   #include <uuid/uuid.h>
> >   #include <ctype.h>
> >   #include <blkid/blkid.h>
> > +#include <pthread.h>
> >   #include "kernel-shared/ctree.h"
> >   #include "kernel-shared/disk-io.h"
> >   #include "kernel-shared/free-space-tree.h"
> > @@ -60,6 +61,20 @@ struct mkfs_allocation {
> >       u64 system;
> >   };
> >
> > +static bool opt_zero_end = true;
> > +static bool opt_discard = true;
> > +static bool opt_zoned = true;
> > +static int opt_oflags = O_RDONLY;
>
> Isn't the default one should be O_RDWR?
>
> Despite that, feel free to addd my reviewed by tag:
>
> Reviewed-by: Qu Wenruo <wqu@suse.com>
>
> Thanks,
> Qu
> > +
> > +static pthread_mutex_t prepare_mutex;
> > +
> > +struct prepare_device_progress {
> > +     char *file;
> > +     u64 dev_block_count;
> > +     u64 block_count;
> > +     int ret;
> > +};
> > +
> >   static int create_metadata_block_groups(struct btrfs_root *root, bool mixed,
> >                               struct mkfs_allocation *allocation)
> >   {
> > @@ -969,6 +984,31 @@ fail:
> >       return ret;
> >   }
> >
> > +static void *prepare_one_dev(void *ctx)
> > +{
> > +     struct prepare_device_progress *prepare_ctx = ctx;
> > +     int fd;
> > +
> > +     fd = open(prepare_ctx->file, opt_oflags);
> > +     if (fd < 0) {
> > +             pthread_mutex_lock(&prepare_mutex);
> > +             error("unable to open %s: %m", prepare_ctx->file);
> > +             pthread_mutex_unlock(&prepare_mutex);
> > +             prepare_ctx->ret = fd;
> > +             return NULL;
> > +     }
> > +     prepare_ctx->ret = btrfs_prepare_device(fd,
> > +                     prepare_ctx->file,
> > +                     &prepare_ctx->dev_block_count,
> > +                     prepare_ctx->block_count,
> > +                     (bconf.verbose ? PREP_DEVICE_VERBOSE : 0) |
> > +                     (opt_zero_end ? PREP_DEVICE_ZERO_END : 0) |
> > +                     (opt_discard ? PREP_DEVICE_DISCARD : 0) |
> > +                     (opt_zoned ? PREP_DEVICE_ZONED : 0));
> > +     close(fd);
> > +     return NULL;
> > +}
> > +
> >   int BOX_MAIN(mkfs)(int argc, char **argv)
> >   {
> >       char *file;
> > @@ -984,7 +1024,6 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
> >       u32 nodesize = 0;
> >       u32 sectorsize = 0;
> >       u32 stripesize = 4096;
> > -     bool zero_end = true;
> >       int fd = -1;
> >       int ret = 0;
> >       int close_ret;
> > @@ -993,11 +1032,8 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
> >       bool nodesize_forced = false;
> >       bool data_profile_opt = false;
> >       bool metadata_profile_opt = false;
> > -     bool discard = true;
> >       bool ssd = false;
> > -     bool zoned = false;
> >       bool force_overwrite = false;
> > -     int oflags;
> >       char *source_dir = NULL;
> >       bool source_dir_set = false;
> >       bool shrink_rootdir = false;
> > @@ -1006,6 +1042,8 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
> >       u64 shrink_size;
> >       int dev_cnt = 0;
> >       int saved_optind;
> > +     pthread_t *t_prepare = NULL;
> > +     struct prepare_device_progress *prepare_ctx = NULL;
> >       char fs_uuid[BTRFS_UUID_UNPARSED_SIZE] = { 0 };
> >       u64 features = BTRFS_MKFS_DEFAULT_FEATURES;
> >       u64 runtime_features = BTRFS_MKFS_DEFAULT_RUNTIME_FEATURES;
> > @@ -1126,7 +1164,7 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
> >                               break;
> >                       case 'b':
> >                               block_count = parse_size_from_string(optarg);
> > -                             zero_end = false;
> > +                             opt_zero_end = false;
> >                               break;
> >                       case 'v':
> >                               bconf_be_verbose();
> > @@ -1144,7 +1182,7 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
> >                                       BTRFS_UUID_UNPARSED_SIZE - 1);
> >                               break;
> >                       case 'K':
> > -                             discard = false;
> > +                             opt_discard = false;
> >                               break;
> >                       case 'q':
> >                               bconf_be_quiet();
> > @@ -1183,7 +1221,7 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
> >       if (dev_cnt == 0)
> >               print_usage(1);
> >
> > -     zoned = !!(features & BTRFS_FEATURE_INCOMPAT_ZONED);
> > +     opt_zoned = !!(features & BTRFS_FEATURE_INCOMPAT_ZONED);
> >
> >       if (source_dir_set && dev_cnt > 1) {
> >               error("the option -r is limited to a single device");
> > @@ -1225,7 +1263,7 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
> >
> >       file = argv[optind++];
> >       ssd = is_ssd(file);
> > -     if (zoned) {
> > +     if (opt_zoned) {
> >               if (!zone_size(file)) {
> >                       error("zoned: %s: zone size undefined", file);
> >                       exit(1);
> > @@ -1235,7 +1273,7 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
> >                       printf(
> >       "Zoned: %s: host-managed device detected, setting zoned feature\n",
> >                              file);
> > -             zoned = true;
> > +             opt_zoned = true;
> >               features |= BTRFS_FEATURE_INCOMPAT_ZONED;
> >       }
> >
> > @@ -1302,7 +1340,7 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
> >               }
> >       }
> >
> > -     if (zoned) {
> > +     if (opt_zoned) {
> >               if (source_dir_set) {
> >                       error("the option -r and zoned mode are incompatible");
> >                       exit(1);
> > @@ -1392,7 +1430,7 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
> >        * 1 zone for a metadata block group
> >        * 1 zone for a data block group
> >        */
> > -     if (zoned && block_count && block_count < 5 * zone_size(file)) {
> > +     if (opt_zoned && block_count && block_count < 5 * zone_size(file)) {
> >               error("size %llu is too small to make a usable filesystem",
> >                       block_count);
> >               error("minimum size for a zoned btrfs filesystem is %llu",
> > @@ -1422,35 +1460,58 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
> >       if (ret)
> >               goto error;
> >
> > -     if (zoned && (!zoned_profile_supported(BTRFS_BLOCK_GROUP_METADATA | metadata_profile) ||
> > +     if (opt_zoned && (!zoned_profile_supported(BTRFS_BLOCK_GROUP_METADATA | metadata_profile) ||
> >                     !zoned_profile_supported(BTRFS_BLOCK_GROUP_DATA | data_profile))) {
> >               error("zoned mode does not yet support RAID/DUP profiles, please specify '-d single -m single' manually");
> >               goto error;
> >       }
> >
> > -     dev_cnt--;
> > +     t_prepare = calloc(dev_cnt, sizeof(*t_prepare));
> > +     prepare_ctx = calloc(dev_cnt, sizeof(*prepare_ctx));
> >
> > -     oflags = O_RDWR;
> > -     if (zoned && zoned_model(file) == ZONED_HOST_MANAGED)
> > -             oflags |= O_DIRECT;
> > +     if (!t_prepare || !prepare_ctx) {
> > +             error("unable to alloc thread for preparing dev");
> > +             goto error;
> > +     }
> >
> > -     /*
> > -      * Open without O_EXCL so that the problem should not occur by the
> > -      * following operation in kernel:
> > -      * (btrfs_register_one_device() fails if O_EXCL is on)
> > -      */
> > -     fd = open(file, oflags);
> > +     pthread_mutex_init(&prepare_mutex, NULL);
> > +     opt_oflags = O_RDWR;
> > +     for (i = 0; i < dev_cnt; i++) {
> > +             if (opt_zoned && zoned_model(argv[optind + i - 1]) ==
> > +                     ZONED_HOST_MANAGED) {
> > +                     opt_oflags |= O_DIRECT;
> > +                     break;
> > +             }
> > +     }
> > +     for (i = 0; i < dev_cnt; i++) {
> > +             prepare_ctx[i].file = argv[optind + i - 1];
> > +             prepare_ctx[i].block_count = block_count;
> > +             prepare_ctx[i].dev_block_count = block_count;
> > +             ret = pthread_create(&t_prepare[i], NULL,
> > +                     prepare_one_dev, &prepare_ctx[i]);
> > +             if (ret) {
> > +                     error("create thread for prepare devices: %s failed, "
> > +                                     "errno: %d",
> > +                                     prepare_ctx[i].file, ret);
> > +                     goto error;
> > +             }
> > +     }
> > +     for (i = 0; i < dev_cnt; i++)
> > +             pthread_join(t_prepare[i], NULL);
> > +     ret = prepare_ctx[0].ret;
> > +
> > +     if (ret) {
> > +             error("unable prepare device: %s\n", prepare_ctx[0].file);
> > +             goto error;
> > +     }
> > +
> > +     dev_cnt--;
> > +     fd = open(file, opt_oflags);
> >       if (fd < 0) {
> >               error("unable to open %s: %m", file);
> >               goto error;
> >       }
> > -     ret = btrfs_prepare_device(fd, file, &dev_block_count, block_count,
> > -                     (zero_end ? PREP_DEVICE_ZERO_END : 0) |
> > -                     (discard ? PREP_DEVICE_DISCARD : 0) |
> > -                     (bconf.verbose ? PREP_DEVICE_VERBOSE : 0) |
> > -                     (zoned ? PREP_DEVICE_ZONED : 0));
> > -     if (ret)
> > -             goto error;
> > +     dev_block_count = prepare_ctx[0].dev_block_count;
> >       if (block_count && block_count > dev_block_count) {
> >               error("%s is smaller than requested size, expected %llu, found %llu",
> >                     file, (unsigned long long)block_count,
> > @@ -1459,7 +1520,7 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
> >       }
> >
> >       /* To create the first block group and chunk 0 in make_btrfs */
> > -     system_group_size = zoned ?  zone_size(file) : BTRFS_MKFS_SYSTEM_GROUP_SIZE;
> > +     system_group_size = opt_zoned ? zone_size(file) : BTRFS_MKFS_SYSTEM_GROUP_SIZE;
> >       if (dev_block_count < system_group_size) {
> >               error("device is too small to make filesystem, must be at least %llu",
> >                               (unsigned long long)system_group_size);
> > @@ -1487,7 +1548,7 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
> >       mkfs_cfg.runtime_features = runtime_features;
> >       mkfs_cfg.csum_type = csum_type;
> >       mkfs_cfg.leaf_data_size = __BTRFS_LEAF_DATA_SIZE(nodesize);
> > -     if (zoned)
> > +     if (opt_zoned)
> >               mkfs_cfg.zone_size = zone_size(file);
> >       else
> >               mkfs_cfg.zone_size = 0;
> > @@ -1558,14 +1619,10 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
> >               goto raid_groups;
> >
> >       while (dev_cnt-- > 0) {
> > +             int dev_index = argc - saved_optind - dev_cnt - 1;
> >               file = argv[optind++];
> >
> > -             /*
> > -              * open without O_EXCL so that the problem should not
> > -              * occur by the following processing.
> > -              * (btrfs_register_one_device() fails if O_EXCL is on)
> > -              */
> > -             fd = open(file, O_RDWR);
> > +             fd = open(file, opt_oflags);
> >               if (fd < 0) {
> >                       error("unable to open %s: %m", file);
> >                       goto error;
> > @@ -1578,13 +1635,12 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
> >                       close(fd);
> >                       continue;
> >               }
> > -             ret = btrfs_prepare_device(fd, file, &dev_block_count,
> > -                             block_count,
> > -                             (bconf.verbose ? PREP_DEVICE_VERBOSE : 0) |
> > -                             (zero_end ? PREP_DEVICE_ZERO_END : 0) |
> > -                             (discard ? PREP_DEVICE_DISCARD : 0) |
> > -                             (zoned ? PREP_DEVICE_ZONED : 0));
> > -             if (ret) {
> > +             dev_block_count = prepare_ctx[dev_index]
> > +                     .dev_block_count;
> > +
> > +             if (prepare_ctx[dev_index].ret) {
> > +                     error("unable prepare device: %s.\n",
> > +                                     prepare_ctx[dev_index].file);
> >                       goto error;
> >               }
> >
> > @@ -1714,8 +1770,8 @@ raid_groups:
> >                       btrfs_group_profile_str(metadata_profile),
> >                       pretty_size(allocation.system));
> >               printf("SSD detected:       %s\n", ssd ? "yes" : "no");
> > -             printf("Zoned device:       %s\n", zoned ? "yes" : "no");
> > -             if (zoned)
> > +             printf("Zoned device:       %s\n", opt_zoned ? "yes" : "no");
> > +             if (opt_zoned)
> >                       printf("  Zone size:        %s\n",
> >                              pretty_size(fs_info->zone_size));
> >               btrfs_parse_fs_features_to_string(features_buf, features);
> > @@ -1763,12 +1819,16 @@ out:
> >
> >       btrfs_close_all_devices();
> >       free(label);
> > -
> > +     free(t_prepare);
> > +     free(prepare_ctx);
> >       return !!ret;
> > +
> >   error:
> >       if (fd > 0)
> >               close(fd);
> >
> > +     free(t_prepare);
> > +     free(prepare_ctx);
> >       free(label);
> >       exit(1);
> >   success:
Qu Wenruo Sept. 4, 2022, 9:50 a.m. UTC | #3
On 2022/9/4 17:31, li zhang wrote:
> My first thought was that we should give the default value a least privilege,
> so when others want to use this global value they should think about what
> privileges they really need, so at first I initialized opt_oflags with 0,
> but scripts/checkpatch. pl(btrfs-devel) says I can't initialize global variables
> with 0 and opt_zoned with false.

For checkpatch, I guess the reason is, static global variable is already 
initialized to 0, thus no need to specificially assign it to 0/false.

But if you can provide some context, like the checkpatch error, maybe 
I'm totally wrong.


Another thing related the privillege is, the global variables are only 
for mkfs usage.

I can hardly come up any reason that we want read-only open flags for 
mkfs...

Thanks,
Qu

> So I compromised to initialize opt_oflags
> with O_RDONLY and opt_zoned with false .
> 
> Thanks,
> Li
> 
> Qu Wenruo <quwenruo.btrfs@gmx.com> 于2022年9月4日周日 16:40写道:
>>
>>
>>
>> On 2022/9/4 16:24, Li Zhang wrote:
>>> [enhancement]
>>> When a disk is formatted as btrfs, it calls
>>> btrfs_prepare_device for each device, which takes too much time.
>>>
>>> [implementation]
>>> Put each btrfs_prepare_device into a thread,
>>> wait for the first thread to complete to mkfs.btrfs,
>>> and wait for other threads to complete before adding
>>> other devices to the file system.
>>>
>>> [test]
>>> Using the btrfs-progs test case mkfs-tests, mkfs.btrfs works fine.
>>>
>>> Use tcmu-runner emulation to simulate two devices for testing.
>>> Each device is 2000G (about 19.53 TiB), the region size is 4MB,
>>> Use the following parameters for targetcli
>>> create name=zbc0 size=20000G cfgstring=model-HM/zsize-4/conv-100@~/zbc0.raw
>>>
>>> Call difftime to calculate the running time of the function btrfs_prepare_device.
>>> Calculate the time from thread creation to completion of all threads after
>>> patching (time calculation is not included in patch submission)
>>> The test results are as follows.
>>>
>>> $ lsscsi -p
>>> [10:0:1:0]   (0x14)  LIO-ORG  TCMU ZBC device  0002  /dev/sdb   -          none
>>> [11:0:1:0]   (0x14)  LIO-ORG  TCMU ZBC device  0002  /dev/sdc   -          none
>>>
>>> $ sudo mkfs.btrfs -d single -m single -O zoned /dev/sdc /dev/sdb -f
>>> ....
>>> time for prepare devices:4.000000.
>>> ....
>>>
>>> $ sudo mkfs.btrfs -d single -m single -O zoned /dev/sdc /dev/sdb -f
>>> ...
>>> time for prepare devices:2.000000.
>>> ...
>>>
>>> Signed-off-by: Li Zhang <zhanglikernel@gmail.com>
>>> ---
>>> Issue: 496
>>>
>>> V1:
>>> * Put btrfs_prepare_device into threads and make them parallel
>>>
>>> V2:
>>> * Set the 4 variables used by btrfs_prepare_device as global variables.
>>> * Use pthread_mutex to ensure error messages are not messed up.
>>> * Correct the error message
>>> * Wait for all threads to exit in a loop
>>>
>>> V3:
>>> * Add prefix opt to the global variables
>>> * Format code
>>> * Add error handler for open
>>>
>>> V4:
>>> * initialize the global options
>>>
>>>    mkfs/main.c | 154 +++++++++++++++++++++++++++++++++++++++++-------------------
>>>    1 file changed, 107 insertions(+), 47 deletions(-)
>>>
>>> diff --git a/mkfs/main.c b/mkfs/main.c
>>> index ce096d3..3e16a0e 100644
>>> --- a/mkfs/main.c
>>> +++ b/mkfs/main.c
>>> @@ -31,6 +31,7 @@
>>>    #include <uuid/uuid.h>
>>>    #include <ctype.h>
>>>    #include <blkid/blkid.h>
>>> +#include <pthread.h>
>>>    #include "kernel-shared/ctree.h"
>>>    #include "kernel-shared/disk-io.h"
>>>    #include "kernel-shared/free-space-tree.h"
>>> @@ -60,6 +61,20 @@ struct mkfs_allocation {
>>>        u64 system;
>>>    };
>>>
>>> +static bool opt_zero_end = true;
>>> +static bool opt_discard = true;
>>> +static bool opt_zoned = true;
>>> +static int opt_oflags = O_RDONLY;
>>
>> Isn't the default one should be O_RDWR?
>>
>> Despite that, feel free to addd my reviewed by tag:
>>
>> Reviewed-by: Qu Wenruo <wqu@suse.com>
>>
>> Thanks,
>> Qu
>>> +
>>> +static pthread_mutex_t prepare_mutex;
>>> +
>>> +struct prepare_device_progress {
>>> +     char *file;
>>> +     u64 dev_block_count;
>>> +     u64 block_count;
>>> +     int ret;
>>> +};
>>> +
>>>    static int create_metadata_block_groups(struct btrfs_root *root, bool mixed,
>>>                                struct mkfs_allocation *allocation)
>>>    {
>>> @@ -969,6 +984,31 @@ fail:
>>>        return ret;
>>>    }
>>>
>>> +static void *prepare_one_dev(void *ctx)
>>> +{
>>> +     struct prepare_device_progress *prepare_ctx = ctx;
>>> +     int fd;
>>> +
>>> +     fd = open(prepare_ctx->file, opt_oflags);
>>> +     if (fd < 0) {
>>> +             pthread_mutex_lock(&prepare_mutex);
>>> +             error("unable to open %s: %m", prepare_ctx->file);
>>> +             pthread_mutex_unlock(&prepare_mutex);
>>> +             prepare_ctx->ret = fd;
>>> +             return NULL;
>>> +     }
>>> +     prepare_ctx->ret = btrfs_prepare_device(fd,
>>> +                     prepare_ctx->file,
>>> +                     &prepare_ctx->dev_block_count,
>>> +                     prepare_ctx->block_count,
>>> +                     (bconf.verbose ? PREP_DEVICE_VERBOSE : 0) |
>>> +                     (opt_zero_end ? PREP_DEVICE_ZERO_END : 0) |
>>> +                     (opt_discard ? PREP_DEVICE_DISCARD : 0) |
>>> +                     (opt_zoned ? PREP_DEVICE_ZONED : 0));
>>> +     close(fd);
>>> +     return NULL;
>>> +}
>>> +
>>>    int BOX_MAIN(mkfs)(int argc, char **argv)
>>>    {
>>>        char *file;
>>> @@ -984,7 +1024,6 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
>>>        u32 nodesize = 0;
>>>        u32 sectorsize = 0;
>>>        u32 stripesize = 4096;
>>> -     bool zero_end = true;
>>>        int fd = -1;
>>>        int ret = 0;
>>>        int close_ret;
>>> @@ -993,11 +1032,8 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
>>>        bool nodesize_forced = false;
>>>        bool data_profile_opt = false;
>>>        bool metadata_profile_opt = false;
>>> -     bool discard = true;
>>>        bool ssd = false;
>>> -     bool zoned = false;
>>>        bool force_overwrite = false;
>>> -     int oflags;
>>>        char *source_dir = NULL;
>>>        bool source_dir_set = false;
>>>        bool shrink_rootdir = false;
>>> @@ -1006,6 +1042,8 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
>>>        u64 shrink_size;
>>>        int dev_cnt = 0;
>>>        int saved_optind;
>>> +     pthread_t *t_prepare = NULL;
>>> +     struct prepare_device_progress *prepare_ctx = NULL;
>>>        char fs_uuid[BTRFS_UUID_UNPARSED_SIZE] = { 0 };
>>>        u64 features = BTRFS_MKFS_DEFAULT_FEATURES;
>>>        u64 runtime_features = BTRFS_MKFS_DEFAULT_RUNTIME_FEATURES;
>>> @@ -1126,7 +1164,7 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
>>>                                break;
>>>                        case 'b':
>>>                                block_count = parse_size_from_string(optarg);
>>> -                             zero_end = false;
>>> +                             opt_zero_end = false;
>>>                                break;
>>>                        case 'v':
>>>                                bconf_be_verbose();
>>> @@ -1144,7 +1182,7 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
>>>                                        BTRFS_UUID_UNPARSED_SIZE - 1);
>>>                                break;
>>>                        case 'K':
>>> -                             discard = false;
>>> +                             opt_discard = false;
>>>                                break;
>>>                        case 'q':
>>>                                bconf_be_quiet();
>>> @@ -1183,7 +1221,7 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
>>>        if (dev_cnt == 0)
>>>                print_usage(1);
>>>
>>> -     zoned = !!(features & BTRFS_FEATURE_INCOMPAT_ZONED);
>>> +     opt_zoned = !!(features & BTRFS_FEATURE_INCOMPAT_ZONED);
>>>
>>>        if (source_dir_set && dev_cnt > 1) {
>>>                error("the option -r is limited to a single device");
>>> @@ -1225,7 +1263,7 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
>>>
>>>        file = argv[optind++];
>>>        ssd = is_ssd(file);
>>> -     if (zoned) {
>>> +     if (opt_zoned) {
>>>                if (!zone_size(file)) {
>>>                        error("zoned: %s: zone size undefined", file);
>>>                        exit(1);
>>> @@ -1235,7 +1273,7 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
>>>                        printf(
>>>        "Zoned: %s: host-managed device detected, setting zoned feature\n",
>>>                               file);
>>> -             zoned = true;
>>> +             opt_zoned = true;
>>>                features |= BTRFS_FEATURE_INCOMPAT_ZONED;
>>>        }
>>>
>>> @@ -1302,7 +1340,7 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
>>>                }
>>>        }
>>>
>>> -     if (zoned) {
>>> +     if (opt_zoned) {
>>>                if (source_dir_set) {
>>>                        error("the option -r and zoned mode are incompatible");
>>>                        exit(1);
>>> @@ -1392,7 +1430,7 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
>>>         * 1 zone for a metadata block group
>>>         * 1 zone for a data block group
>>>         */
>>> -     if (zoned && block_count && block_count < 5 * zone_size(file)) {
>>> +     if (opt_zoned && block_count && block_count < 5 * zone_size(file)) {
>>>                error("size %llu is too small to make a usable filesystem",
>>>                        block_count);
>>>                error("minimum size for a zoned btrfs filesystem is %llu",
>>> @@ -1422,35 +1460,58 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
>>>        if (ret)
>>>                goto error;
>>>
>>> -     if (zoned && (!zoned_profile_supported(BTRFS_BLOCK_GROUP_METADATA | metadata_profile) ||
>>> +     if (opt_zoned && (!zoned_profile_supported(BTRFS_BLOCK_GROUP_METADATA | metadata_profile) ||
>>>                      !zoned_profile_supported(BTRFS_BLOCK_GROUP_DATA | data_profile))) {
>>>                error("zoned mode does not yet support RAID/DUP profiles, please specify '-d single -m single' manually");
>>>                goto error;
>>>        }
>>>
>>> -     dev_cnt--;
>>> +     t_prepare = calloc(dev_cnt, sizeof(*t_prepare));
>>> +     prepare_ctx = calloc(dev_cnt, sizeof(*prepare_ctx));
>>>
>>> -     oflags = O_RDWR;
>>> -     if (zoned && zoned_model(file) == ZONED_HOST_MANAGED)
>>> -             oflags |= O_DIRECT;
>>> +     if (!t_prepare || !prepare_ctx) {
>>> +             error("unable to alloc thread for preparing dev");
>>> +             goto error;
>>> +     }
>>>
>>> -     /*
>>> -      * Open without O_EXCL so that the problem should not occur by the
>>> -      * following operation in kernel:
>>> -      * (btrfs_register_one_device() fails if O_EXCL is on)
>>> -      */
>>> -     fd = open(file, oflags);
>>> +     pthread_mutex_init(&prepare_mutex, NULL);
>>> +     opt_oflags = O_RDWR;
>>> +     for (i = 0; i < dev_cnt; i++) {
>>> +             if (opt_zoned && zoned_model(argv[optind + i - 1]) ==
>>> +                     ZONED_HOST_MANAGED) {
>>> +                     opt_oflags |= O_DIRECT;
>>> +                     break;
>>> +             }
>>> +     }
>>> +     for (i = 0; i < dev_cnt; i++) {
>>> +             prepare_ctx[i].file = argv[optind + i - 1];
>>> +             prepare_ctx[i].block_count = block_count;
>>> +             prepare_ctx[i].dev_block_count = block_count;
>>> +             ret = pthread_create(&t_prepare[i], NULL,
>>> +                     prepare_one_dev, &prepare_ctx[i]);
>>> +             if (ret) {
>>> +                     error("create thread for prepare devices: %s failed, "
>>> +                                     "errno: %d",
>>> +                                     prepare_ctx[i].file, ret);
>>> +                     goto error;
>>> +             }
>>> +     }
>>> +     for (i = 0; i < dev_cnt; i++)
>>> +             pthread_join(t_prepare[i], NULL);
>>> +     ret = prepare_ctx[0].ret;
>>> +
>>> +     if (ret) {
>>> +             error("unable prepare device: %s\n", prepare_ctx[0].file);
>>> +             goto error;
>>> +     }
>>> +
>>> +     dev_cnt--;
>>> +     fd = open(file, opt_oflags);
>>>        if (fd < 0) {
>>>                error("unable to open %s: %m", file);
>>>                goto error;
>>>        }
>>> -     ret = btrfs_prepare_device(fd, file, &dev_block_count, block_count,
>>> -                     (zero_end ? PREP_DEVICE_ZERO_END : 0) |
>>> -                     (discard ? PREP_DEVICE_DISCARD : 0) |
>>> -                     (bconf.verbose ? PREP_DEVICE_VERBOSE : 0) |
>>> -                     (zoned ? PREP_DEVICE_ZONED : 0));
>>> -     if (ret)
>>> -             goto error;
>>> +     dev_block_count = prepare_ctx[0].dev_block_count;
>>>        if (block_count && block_count > dev_block_count) {
>>>                error("%s is smaller than requested size, expected %llu, found %llu",
>>>                      file, (unsigned long long)block_count,
>>> @@ -1459,7 +1520,7 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
>>>        }
>>>
>>>        /* To create the first block group and chunk 0 in make_btrfs */
>>> -     system_group_size = zoned ?  zone_size(file) : BTRFS_MKFS_SYSTEM_GROUP_SIZE;
>>> +     system_group_size = opt_zoned ? zone_size(file) : BTRFS_MKFS_SYSTEM_GROUP_SIZE;
>>>        if (dev_block_count < system_group_size) {
>>>                error("device is too small to make filesystem, must be at least %llu",
>>>                                (unsigned long long)system_group_size);
>>> @@ -1487,7 +1548,7 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
>>>        mkfs_cfg.runtime_features = runtime_features;
>>>        mkfs_cfg.csum_type = csum_type;
>>>        mkfs_cfg.leaf_data_size = __BTRFS_LEAF_DATA_SIZE(nodesize);
>>> -     if (zoned)
>>> +     if (opt_zoned)
>>>                mkfs_cfg.zone_size = zone_size(file);
>>>        else
>>>                mkfs_cfg.zone_size = 0;
>>> @@ -1558,14 +1619,10 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
>>>                goto raid_groups;
>>>
>>>        while (dev_cnt-- > 0) {
>>> +             int dev_index = argc - saved_optind - dev_cnt - 1;
>>>                file = argv[optind++];
>>>
>>> -             /*
>>> -              * open without O_EXCL so that the problem should not
>>> -              * occur by the following processing.
>>> -              * (btrfs_register_one_device() fails if O_EXCL is on)
>>> -              */
>>> -             fd = open(file, O_RDWR);
>>> +             fd = open(file, opt_oflags);
>>>                if (fd < 0) {
>>>                        error("unable to open %s: %m", file);
>>>                        goto error;
>>> @@ -1578,13 +1635,12 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
>>>                        close(fd);
>>>                        continue;
>>>                }
>>> -             ret = btrfs_prepare_device(fd, file, &dev_block_count,
>>> -                             block_count,
>>> -                             (bconf.verbose ? PREP_DEVICE_VERBOSE : 0) |
>>> -                             (zero_end ? PREP_DEVICE_ZERO_END : 0) |
>>> -                             (discard ? PREP_DEVICE_DISCARD : 0) |
>>> -                             (zoned ? PREP_DEVICE_ZONED : 0));
>>> -             if (ret) {
>>> +             dev_block_count = prepare_ctx[dev_index]
>>> +                     .dev_block_count;
>>> +
>>> +             if (prepare_ctx[dev_index].ret) {
>>> +                     error("unable prepare device: %s.\n",
>>> +                                     prepare_ctx[dev_index].file);
>>>                        goto error;
>>>                }
>>>
>>> @@ -1714,8 +1770,8 @@ raid_groups:
>>>                        btrfs_group_profile_str(metadata_profile),
>>>                        pretty_size(allocation.system));
>>>                printf("SSD detected:       %s\n", ssd ? "yes" : "no");
>>> -             printf("Zoned device:       %s\n", zoned ? "yes" : "no");
>>> -             if (zoned)
>>> +             printf("Zoned device:       %s\n", opt_zoned ? "yes" : "no");
>>> +             if (opt_zoned)
>>>                        printf("  Zone size:        %s\n",
>>>                               pretty_size(fs_info->zone_size));
>>>                btrfs_parse_fs_features_to_string(features_buf, features);
>>> @@ -1763,12 +1819,16 @@ out:
>>>
>>>        btrfs_close_all_devices();
>>>        free(label);
>>> -
>>> +     free(t_prepare);
>>> +     free(prepare_ctx);
>>>        return !!ret;
>>> +
>>>    error:
>>>        if (fd > 0)
>>>                close(fd);
>>>
>>> +     free(t_prepare);
>>> +     free(prepare_ctx);
>>>        free(label);
>>>        exit(1);
>>>    success:
Li Zhang Sept. 4, 2022, 10:28 a.m. UTC | #4
ok, I got you.

Thanks,
Li

Qu Wenruo <wqu@suse.com> 于2022年9月4日周日 17:50写道:
>
>
>
> On 2022/9/4 17:31, li zhang wrote:
> > My first thought was that we should give the default value a least privilege,
> > so when others want to use this global value they should think about what
> > privileges they really need, so at first I initialized opt_oflags with 0,
> > but scripts/checkpatch. pl(btrfs-devel) says I can't initialize global variables
> > with 0 and opt_zoned with false.
>
> For checkpatch, I guess the reason is, static global variable is already
> initialized to 0, thus no need to specificially assign it to 0/false.
>
> But if you can provide some context, like the checkpatch error, maybe
> I'm totally wrong.
>
>
> Another thing related the privillege is, the global variables are only
> for mkfs usage.
>
> I can hardly come up any reason that we want read-only open flags for
> mkfs...
>
> Thanks,
> Qu
>
> > So I compromised to initialize opt_oflags
> > with O_RDONLY and opt_zoned with false .
> >
> > Thanks,
> > Li
> >
> > Qu Wenruo <quwenruo.btrfs@gmx.com> 于2022年9月4日周日 16:40写道:
> >>
> >>
> >>
> >> On 2022/9/4 16:24, Li Zhang wrote:
> >>> [enhancement]
> >>> When a disk is formatted as btrfs, it calls
> >>> btrfs_prepare_device for each device, which takes too much time.
> >>>
> >>> [implementation]
> >>> Put each btrfs_prepare_device into a thread,
> >>> wait for the first thread to complete to mkfs.btrfs,
> >>> and wait for other threads to complete before adding
> >>> other devices to the file system.
> >>>
> >>> [test]
> >>> Using the btrfs-progs test case mkfs-tests, mkfs.btrfs works fine.
> >>>
> >>> Use tcmu-runner emulation to simulate two devices for testing.
> >>> Each device is 2000G (about 19.53 TiB), the region size is 4MB,
> >>> Use the following parameters for targetcli
> >>> create name=zbc0 size=20000G cfgstring=model-HM/zsize-4/conv-100@~/zbc0.raw
> >>>
> >>> Call difftime to calculate the running time of the function btrfs_prepare_device.
> >>> Calculate the time from thread creation to completion of all threads after
> >>> patching (time calculation is not included in patch submission)
> >>> The test results are as follows.
> >>>
> >>> $ lsscsi -p
> >>> [10:0:1:0]   (0x14)  LIO-ORG  TCMU ZBC device  0002  /dev/sdb   -          none
> >>> [11:0:1:0]   (0x14)  LIO-ORG  TCMU ZBC device  0002  /dev/sdc   -          none
> >>>
> >>> $ sudo mkfs.btrfs -d single -m single -O zoned /dev/sdc /dev/sdb -f
> >>> ....
> >>> time for prepare devices:4.000000.
> >>> ....
> >>>
> >>> $ sudo mkfs.btrfs -d single -m single -O zoned /dev/sdc /dev/sdb -f
> >>> ...
> >>> time for prepare devices:2.000000.
> >>> ...
> >>>
> >>> Signed-off-by: Li Zhang <zhanglikernel@gmail.com>
> >>> ---
> >>> Issue: 496
> >>>
> >>> V1:
> >>> * Put btrfs_prepare_device into threads and make them parallel
> >>>
> >>> V2:
> >>> * Set the 4 variables used by btrfs_prepare_device as global variables.
> >>> * Use pthread_mutex to ensure error messages are not messed up.
> >>> * Correct the error message
> >>> * Wait for all threads to exit in a loop
> >>>
> >>> V3:
> >>> * Add prefix opt to the global variables
> >>> * Format code
> >>> * Add error handler for open
> >>>
> >>> V4:
> >>> * initialize the global options
> >>>
> >>>    mkfs/main.c | 154 +++++++++++++++++++++++++++++++++++++++++-------------------
> >>>    1 file changed, 107 insertions(+), 47 deletions(-)
> >>>
> >>> diff --git a/mkfs/main.c b/mkfs/main.c
> >>> index ce096d3..3e16a0e 100644
> >>> --- a/mkfs/main.c
> >>> +++ b/mkfs/main.c
> >>> @@ -31,6 +31,7 @@
> >>>    #include <uuid/uuid.h>
> >>>    #include <ctype.h>
> >>>    #include <blkid/blkid.h>
> >>> +#include <pthread.h>
> >>>    #include "kernel-shared/ctree.h"
> >>>    #include "kernel-shared/disk-io.h"
> >>>    #include "kernel-shared/free-space-tree.h"
> >>> @@ -60,6 +61,20 @@ struct mkfs_allocation {
> >>>        u64 system;
> >>>    };
> >>>
> >>> +static bool opt_zero_end = true;
> >>> +static bool opt_discard = true;
> >>> +static bool opt_zoned = true;
> >>> +static int opt_oflags = O_RDONLY;
> >>
> >> Isn't the default one should be O_RDWR?
> >>
> >> Despite that, feel free to addd my reviewed by tag:
> >>
> >> Reviewed-by: Qu Wenruo <wqu@suse.com>
> >>
> >> Thanks,
> >> Qu
> >>> +
> >>> +static pthread_mutex_t prepare_mutex;
> >>> +
> >>> +struct prepare_device_progress {
> >>> +     char *file;
> >>> +     u64 dev_block_count;
> >>> +     u64 block_count;
> >>> +     int ret;
> >>> +};
> >>> +
> >>>    static int create_metadata_block_groups(struct btrfs_root *root, bool mixed,
> >>>                                struct mkfs_allocation *allocation)
> >>>    {
> >>> @@ -969,6 +984,31 @@ fail:
> >>>        return ret;
> >>>    }
> >>>
> >>> +static void *prepare_one_dev(void *ctx)
> >>> +{
> >>> +     struct prepare_device_progress *prepare_ctx = ctx;
> >>> +     int fd;
> >>> +
> >>> +     fd = open(prepare_ctx->file, opt_oflags);
> >>> +     if (fd < 0) {
> >>> +             pthread_mutex_lock(&prepare_mutex);
> >>> +             error("unable to open %s: %m", prepare_ctx->file);
> >>> +             pthread_mutex_unlock(&prepare_mutex);
> >>> +             prepare_ctx->ret = fd;
> >>> +             return NULL;
> >>> +     }
> >>> +     prepare_ctx->ret = btrfs_prepare_device(fd,
> >>> +                     prepare_ctx->file,
> >>> +                     &prepare_ctx->dev_block_count,
> >>> +                     prepare_ctx->block_count,
> >>> +                     (bconf.verbose ? PREP_DEVICE_VERBOSE : 0) |
> >>> +                     (opt_zero_end ? PREP_DEVICE_ZERO_END : 0) |
> >>> +                     (opt_discard ? PREP_DEVICE_DISCARD : 0) |
> >>> +                     (opt_zoned ? PREP_DEVICE_ZONED : 0));
> >>> +     close(fd);
> >>> +     return NULL;
> >>> +}
> >>> +
> >>>    int BOX_MAIN(mkfs)(int argc, char **argv)
> >>>    {
> >>>        char *file;
> >>> @@ -984,7 +1024,6 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
> >>>        u32 nodesize = 0;
> >>>        u32 sectorsize = 0;
> >>>        u32 stripesize = 4096;
> >>> -     bool zero_end = true;
> >>>        int fd = -1;
> >>>        int ret = 0;
> >>>        int close_ret;
> >>> @@ -993,11 +1032,8 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
> >>>        bool nodesize_forced = false;
> >>>        bool data_profile_opt = false;
> >>>        bool metadata_profile_opt = false;
> >>> -     bool discard = true;
> >>>        bool ssd = false;
> >>> -     bool zoned = false;
> >>>        bool force_overwrite = false;
> >>> -     int oflags;
> >>>        char *source_dir = NULL;
> >>>        bool source_dir_set = false;
> >>>        bool shrink_rootdir = false;
> >>> @@ -1006,6 +1042,8 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
> >>>        u64 shrink_size;
> >>>        int dev_cnt = 0;
> >>>        int saved_optind;
> >>> +     pthread_t *t_prepare = NULL;
> >>> +     struct prepare_device_progress *prepare_ctx = NULL;
> >>>        char fs_uuid[BTRFS_UUID_UNPARSED_SIZE] = { 0 };
> >>>        u64 features = BTRFS_MKFS_DEFAULT_FEATURES;
> >>>        u64 runtime_features = BTRFS_MKFS_DEFAULT_RUNTIME_FEATURES;
> >>> @@ -1126,7 +1164,7 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
> >>>                                break;
> >>>                        case 'b':
> >>>                                block_count = parse_size_from_string(optarg);
> >>> -                             zero_end = false;
> >>> +                             opt_zero_end = false;
> >>>                                break;
> >>>                        case 'v':
> >>>                                bconf_be_verbose();
> >>> @@ -1144,7 +1182,7 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
> >>>                                        BTRFS_UUID_UNPARSED_SIZE - 1);
> >>>                                break;
> >>>                        case 'K':
> >>> -                             discard = false;
> >>> +                             opt_discard = false;
> >>>                                break;
> >>>                        case 'q':
> >>>                                bconf_be_quiet();
> >>> @@ -1183,7 +1221,7 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
> >>>        if (dev_cnt == 0)
> >>>                print_usage(1);
> >>>
> >>> -     zoned = !!(features & BTRFS_FEATURE_INCOMPAT_ZONED);
> >>> +     opt_zoned = !!(features & BTRFS_FEATURE_INCOMPAT_ZONED);
> >>>
> >>>        if (source_dir_set && dev_cnt > 1) {
> >>>                error("the option -r is limited to a single device");
> >>> @@ -1225,7 +1263,7 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
> >>>
> >>>        file = argv[optind++];
> >>>        ssd = is_ssd(file);
> >>> -     if (zoned) {
> >>> +     if (opt_zoned) {
> >>>                if (!zone_size(file)) {
> >>>                        error("zoned: %s: zone size undefined", file);
> >>>                        exit(1);
> >>> @@ -1235,7 +1273,7 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
> >>>                        printf(
> >>>        "Zoned: %s: host-managed device detected, setting zoned feature\n",
> >>>                               file);
> >>> -             zoned = true;
> >>> +             opt_zoned = true;
> >>>                features |= BTRFS_FEATURE_INCOMPAT_ZONED;
> >>>        }
> >>>
> >>> @@ -1302,7 +1340,7 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
> >>>                }
> >>>        }
> >>>
> >>> -     if (zoned) {
> >>> +     if (opt_zoned) {
> >>>                if (source_dir_set) {
> >>>                        error("the option -r and zoned mode are incompatible");
> >>>                        exit(1);
> >>> @@ -1392,7 +1430,7 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
> >>>         * 1 zone for a metadata block group
> >>>         * 1 zone for a data block group
> >>>         */
> >>> -     if (zoned && block_count && block_count < 5 * zone_size(file)) {
> >>> +     if (opt_zoned && block_count && block_count < 5 * zone_size(file)) {
> >>>                error("size %llu is too small to make a usable filesystem",
> >>>                        block_count);
> >>>                error("minimum size for a zoned btrfs filesystem is %llu",
> >>> @@ -1422,35 +1460,58 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
> >>>        if (ret)
> >>>                goto error;
> >>>
> >>> -     if (zoned && (!zoned_profile_supported(BTRFS_BLOCK_GROUP_METADATA | metadata_profile) ||
> >>> +     if (opt_zoned && (!zoned_profile_supported(BTRFS_BLOCK_GROUP_METADATA | metadata_profile) ||
> >>>                      !zoned_profile_supported(BTRFS_BLOCK_GROUP_DATA | data_profile))) {
> >>>                error("zoned mode does not yet support RAID/DUP profiles, please specify '-d single -m single' manually");
> >>>                goto error;
> >>>        }
> >>>
> >>> -     dev_cnt--;
> >>> +     t_prepare = calloc(dev_cnt, sizeof(*t_prepare));
> >>> +     prepare_ctx = calloc(dev_cnt, sizeof(*prepare_ctx));
> >>>
> >>> -     oflags = O_RDWR;
> >>> -     if (zoned && zoned_model(file) == ZONED_HOST_MANAGED)
> >>> -             oflags |= O_DIRECT;
> >>> +     if (!t_prepare || !prepare_ctx) {
> >>> +             error("unable to alloc thread for preparing dev");
> >>> +             goto error;
> >>> +     }
> >>>
> >>> -     /*
> >>> -      * Open without O_EXCL so that the problem should not occur by the
> >>> -      * following operation in kernel:
> >>> -      * (btrfs_register_one_device() fails if O_EXCL is on)
> >>> -      */
> >>> -     fd = open(file, oflags);
> >>> +     pthread_mutex_init(&prepare_mutex, NULL);
> >>> +     opt_oflags = O_RDWR;
> >>> +     for (i = 0; i < dev_cnt; i++) {
> >>> +             if (opt_zoned && zoned_model(argv[optind + i - 1]) ==
> >>> +                     ZONED_HOST_MANAGED) {
> >>> +                     opt_oflags |= O_DIRECT;
> >>> +                     break;
> >>> +             }
> >>> +     }
> >>> +     for (i = 0; i < dev_cnt; i++) {
> >>> +             prepare_ctx[i].file = argv[optind + i - 1];
> >>> +             prepare_ctx[i].block_count = block_count;
> >>> +             prepare_ctx[i].dev_block_count = block_count;
> >>> +             ret = pthread_create(&t_prepare[i], NULL,
> >>> +                     prepare_one_dev, &prepare_ctx[i]);
> >>> +             if (ret) {
> >>> +                     error("create thread for prepare devices: %s failed, "
> >>> +                                     "errno: %d",
> >>> +                                     prepare_ctx[i].file, ret);
> >>> +                     goto error;
> >>> +             }
> >>> +     }
> >>> +     for (i = 0; i < dev_cnt; i++)
> >>> +             pthread_join(t_prepare[i], NULL);
> >>> +     ret = prepare_ctx[0].ret;
> >>> +
> >>> +     if (ret) {
> >>> +             error("unable prepare device: %s\n", prepare_ctx[0].file);
> >>> +             goto error;
> >>> +     }
> >>> +
> >>> +     dev_cnt--;
> >>> +     fd = open(file, opt_oflags);
> >>>        if (fd < 0) {
> >>>                error("unable to open %s: %m", file);
> >>>                goto error;
> >>>        }
> >>> -     ret = btrfs_prepare_device(fd, file, &dev_block_count, block_count,
> >>> -                     (zero_end ? PREP_DEVICE_ZERO_END : 0) |
> >>> -                     (discard ? PREP_DEVICE_DISCARD : 0) |
> >>> -                     (bconf.verbose ? PREP_DEVICE_VERBOSE : 0) |
> >>> -                     (zoned ? PREP_DEVICE_ZONED : 0));
> >>> -     if (ret)
> >>> -             goto error;
> >>> +     dev_block_count = prepare_ctx[0].dev_block_count;
> >>>        if (block_count && block_count > dev_block_count) {
> >>>                error("%s is smaller than requested size, expected %llu, found %llu",
> >>>                      file, (unsigned long long)block_count,
> >>> @@ -1459,7 +1520,7 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
> >>>        }
> >>>
> >>>        /* To create the first block group and chunk 0 in make_btrfs */
> >>> -     system_group_size = zoned ?  zone_size(file) : BTRFS_MKFS_SYSTEM_GROUP_SIZE;
> >>> +     system_group_size = opt_zoned ? zone_size(file) : BTRFS_MKFS_SYSTEM_GROUP_SIZE;
> >>>        if (dev_block_count < system_group_size) {
> >>>                error("device is too small to make filesystem, must be at least %llu",
> >>>                                (unsigned long long)system_group_size);
> >>> @@ -1487,7 +1548,7 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
> >>>        mkfs_cfg.runtime_features = runtime_features;
> >>>        mkfs_cfg.csum_type = csum_type;
> >>>        mkfs_cfg.leaf_data_size = __BTRFS_LEAF_DATA_SIZE(nodesize);
> >>> -     if (zoned)
> >>> +     if (opt_zoned)
> >>>                mkfs_cfg.zone_size = zone_size(file);
> >>>        else
> >>>                mkfs_cfg.zone_size = 0;
> >>> @@ -1558,14 +1619,10 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
> >>>                goto raid_groups;
> >>>
> >>>        while (dev_cnt-- > 0) {
> >>> +             int dev_index = argc - saved_optind - dev_cnt - 1;
> >>>                file = argv[optind++];
> >>>
> >>> -             /*
> >>> -              * open without O_EXCL so that the problem should not
> >>> -              * occur by the following processing.
> >>> -              * (btrfs_register_one_device() fails if O_EXCL is on)
> >>> -              */
> >>> -             fd = open(file, O_RDWR);
> >>> +             fd = open(file, opt_oflags);
> >>>                if (fd < 0) {
> >>>                        error("unable to open %s: %m", file);
> >>>                        goto error;
> >>> @@ -1578,13 +1635,12 @@ int BOX_MAIN(mkfs)(int argc, char **argv)
> >>>                        close(fd);
> >>>                        continue;
> >>>                }
> >>> -             ret = btrfs_prepare_device(fd, file, &dev_block_count,
> >>> -                             block_count,
> >>> -                             (bconf.verbose ? PREP_DEVICE_VERBOSE : 0) |
> >>> -                             (zero_end ? PREP_DEVICE_ZERO_END : 0) |
> >>> -                             (discard ? PREP_DEVICE_DISCARD : 0) |
> >>> -                             (zoned ? PREP_DEVICE_ZONED : 0));
> >>> -             if (ret) {
> >>> +             dev_block_count = prepare_ctx[dev_index]
> >>> +                     .dev_block_count;
> >>> +
> >>> +             if (prepare_ctx[dev_index].ret) {
> >>> +                     error("unable prepare device: %s.\n",
> >>> +                                     prepare_ctx[dev_index].file);
> >>>                        goto error;
> >>>                }
> >>>
> >>> @@ -1714,8 +1770,8 @@ raid_groups:
> >>>                        btrfs_group_profile_str(metadata_profile),
> >>>                        pretty_size(allocation.system));
> >>>                printf("SSD detected:       %s\n", ssd ? "yes" : "no");
> >>> -             printf("Zoned device:       %s\n", zoned ? "yes" : "no");
> >>> -             if (zoned)
> >>> +             printf("Zoned device:       %s\n", opt_zoned ? "yes" : "no");
> >>> +             if (opt_zoned)
> >>>                        printf("  Zone size:        %s\n",
> >>>                               pretty_size(fs_info->zone_size));
> >>>                btrfs_parse_fs_features_to_string(features_buf, features);
> >>> @@ -1763,12 +1819,16 @@ out:
> >>>
> >>>        btrfs_close_all_devices();
> >>>        free(label);
> >>> -
> >>> +     free(t_prepare);
> >>> +     free(prepare_ctx);
> >>>        return !!ret;
> >>> +
> >>>    error:
> >>>        if (fd > 0)
> >>>                close(fd);
> >>>
> >>> +     free(t_prepare);
> >>> +     free(prepare_ctx);
> >>>        free(label);
> >>>        exit(1);
> >>>    success:
David Sterba Sept. 22, 2022, 10:28 p.m. UTC | #5
On Sun, Sep 04, 2022 at 04:24:23PM +0800, Li Zhang wrote:
> [enhancement]
> When a disk is formatted as btrfs, it calls
> btrfs_prepare_device for each device, which takes too much time.
> 
> [implementation]
> Put each btrfs_prepare_device into a thread,
> wait for the first thread to complete to mkfs.btrfs,
> and wait for other threads to complete before adding
> other devices to the file system.
> 
> [test]
> Using the btrfs-progs test case mkfs-tests, mkfs.btrfs works fine.
> 
> Use tcmu-runner emulation to simulate two devices for testing.
> Each device is 2000G (about 19.53 TiB), the region size is 4MB,
> Use the following parameters for targetcli
> create name=zbc0 size=20000G cfgstring=model-HM/zsize-4/conv-100@~/zbc0.raw
> 
> Call difftime to calculate the running time of the function btrfs_prepare_device.
> Calculate the time from thread creation to completion of all threads after
> patching (time calculation is not included in patch submission)
> The test results are as follows.
> 
> $ lsscsi -p
> [10:0:1:0]   (0x14)  LIO-ORG  TCMU ZBC device  0002  /dev/sdb   -          none
> [11:0:1:0]   (0x14)  LIO-ORG  TCMU ZBC device  0002  /dev/sdc   -          none
> 
> $ sudo mkfs.btrfs -d single -m single -O zoned /dev/sdc /dev/sdb -f
> ....
> time for prepare devices:4.000000.
> ....
> 
> $ sudo mkfs.btrfs -d single -m single -O zoned /dev/sdc /dev/sdb -f
> ...
> time for prepare devices:2.000000.
> ...
> 
> Signed-off-by: Li Zhang <zhanglikernel@gmail.com>
> ---
> Issue: 496

Thanks for implementing it. I did only some style fixups but the
parallel zone reset works (and trim/discard as well as it's the same
function).

I've tested it with the null blk devices (128M, 4M zone, 32 zones) and
added some more debugging to see how it works, with artificial delay
100ms to zone reset. The overall runtime is about the same when running
on devices:

 2 - 4s
 6 - 5s
 8 - 5s
10 - 6s

It's fast so there's some noise in the numbers but it's not linear in
the number of devices and the threads may also wait on the printing lock
as I've added some debugging messages.

Sample run with your patch reverted and same delay:

 2 - 7s
 6 - 21s
 8 - 27s
10 - 35s

> --- a/mkfs/main.c
> +++ b/mkfs/main.c
> @@ -969,6 +984,31 @@ fail:
>  	return ret;
>  }
>  
> +static void *prepare_one_dev(void *ctx)
> +{
> +	struct prepare_device_progress *prepare_ctx = ctx;
> +	int fd;
> +
> +	fd = open(prepare_ctx->file, opt_oflags);
> +	if (fd < 0) {
> +		pthread_mutex_lock(&prepare_mutex);

Is this lock actually needed? printf is thread safe and there are other
messages printed from btrfs_prepare_device anyway, so using this lock
does not help.

> +		error("unable to open %s: %m", prepare_ctx->file);
> +		pthread_mutex_unlock(&prepare_mutex);
> +		prepare_ctx->ret = fd;
> +		return NULL;
> +	}
> +	prepare_ctx->ret = btrfs_prepare_device(fd,
> +			prepare_ctx->file,
> +			&prepare_ctx->dev_block_count,
> +			prepare_ctx->block_count,
> +			(bconf.verbose ? PREP_DEVICE_VERBOSE : 0) |
> +			(opt_zero_end ? PREP_DEVICE_ZERO_END : 0) |
> +			(opt_discard ? PREP_DEVICE_DISCARD : 0) |
> +			(opt_zoned ? PREP_DEVICE_ZONED : 0));
> +	close(fd);
> +	return NULL;
> +}
diff mbox series

Patch

diff --git a/mkfs/main.c b/mkfs/main.c
index ce096d3..3e16a0e 100644
--- a/mkfs/main.c
+++ b/mkfs/main.c
@@ -31,6 +31,7 @@ 
 #include <uuid/uuid.h>
 #include <ctype.h>
 #include <blkid/blkid.h>
+#include <pthread.h>
 #include "kernel-shared/ctree.h"
 #include "kernel-shared/disk-io.h"
 #include "kernel-shared/free-space-tree.h"
@@ -60,6 +61,20 @@  struct mkfs_allocation {
 	u64 system;
 };
 
+static bool opt_zero_end = true;
+static bool opt_discard = true;
+static bool opt_zoned = true;
+static int opt_oflags = O_RDONLY;
+
+static pthread_mutex_t prepare_mutex;
+
+struct prepare_device_progress {
+	char *file;
+	u64 dev_block_count;
+	u64 block_count;
+	int ret;
+};
+
 static int create_metadata_block_groups(struct btrfs_root *root, bool mixed,
 				struct mkfs_allocation *allocation)
 {
@@ -969,6 +984,31 @@  fail:
 	return ret;
 }
 
+static void *prepare_one_dev(void *ctx)
+{
+	struct prepare_device_progress *prepare_ctx = ctx;
+	int fd;
+
+	fd = open(prepare_ctx->file, opt_oflags);
+	if (fd < 0) {
+		pthread_mutex_lock(&prepare_mutex);
+		error("unable to open %s: %m", prepare_ctx->file);
+		pthread_mutex_unlock(&prepare_mutex);
+		prepare_ctx->ret = fd;
+		return NULL;
+	}
+	prepare_ctx->ret = btrfs_prepare_device(fd,
+			prepare_ctx->file,
+			&prepare_ctx->dev_block_count,
+			prepare_ctx->block_count,
+			(bconf.verbose ? PREP_DEVICE_VERBOSE : 0) |
+			(opt_zero_end ? PREP_DEVICE_ZERO_END : 0) |
+			(opt_discard ? PREP_DEVICE_DISCARD : 0) |
+			(opt_zoned ? PREP_DEVICE_ZONED : 0));
+	close(fd);
+	return NULL;
+}
+
 int BOX_MAIN(mkfs)(int argc, char **argv)
 {
 	char *file;
@@ -984,7 +1024,6 @@  int BOX_MAIN(mkfs)(int argc, char **argv)
 	u32 nodesize = 0;
 	u32 sectorsize = 0;
 	u32 stripesize = 4096;
-	bool zero_end = true;
 	int fd = -1;
 	int ret = 0;
 	int close_ret;
@@ -993,11 +1032,8 @@  int BOX_MAIN(mkfs)(int argc, char **argv)
 	bool nodesize_forced = false;
 	bool data_profile_opt = false;
 	bool metadata_profile_opt = false;
-	bool discard = true;
 	bool ssd = false;
-	bool zoned = false;
 	bool force_overwrite = false;
-	int oflags;
 	char *source_dir = NULL;
 	bool source_dir_set = false;
 	bool shrink_rootdir = false;
@@ -1006,6 +1042,8 @@  int BOX_MAIN(mkfs)(int argc, char **argv)
 	u64 shrink_size;
 	int dev_cnt = 0;
 	int saved_optind;
+	pthread_t *t_prepare = NULL;
+	struct prepare_device_progress *prepare_ctx = NULL;
 	char fs_uuid[BTRFS_UUID_UNPARSED_SIZE] = { 0 };
 	u64 features = BTRFS_MKFS_DEFAULT_FEATURES;
 	u64 runtime_features = BTRFS_MKFS_DEFAULT_RUNTIME_FEATURES;
@@ -1126,7 +1164,7 @@  int BOX_MAIN(mkfs)(int argc, char **argv)
 				break;
 			case 'b':
 				block_count = parse_size_from_string(optarg);
-				zero_end = false;
+				opt_zero_end = false;
 				break;
 			case 'v':
 				bconf_be_verbose();
@@ -1144,7 +1182,7 @@  int BOX_MAIN(mkfs)(int argc, char **argv)
 					BTRFS_UUID_UNPARSED_SIZE - 1);
 				break;
 			case 'K':
-				discard = false;
+				opt_discard = false;
 				break;
 			case 'q':
 				bconf_be_quiet();
@@ -1183,7 +1221,7 @@  int BOX_MAIN(mkfs)(int argc, char **argv)
 	if (dev_cnt == 0)
 		print_usage(1);
 
-	zoned = !!(features & BTRFS_FEATURE_INCOMPAT_ZONED);
+	opt_zoned = !!(features & BTRFS_FEATURE_INCOMPAT_ZONED);
 
 	if (source_dir_set && dev_cnt > 1) {
 		error("the option -r is limited to a single device");
@@ -1225,7 +1263,7 @@  int BOX_MAIN(mkfs)(int argc, char **argv)
 
 	file = argv[optind++];
 	ssd = is_ssd(file);
-	if (zoned) {
+	if (opt_zoned) {
 		if (!zone_size(file)) {
 			error("zoned: %s: zone size undefined", file);
 			exit(1);
@@ -1235,7 +1273,7 @@  int BOX_MAIN(mkfs)(int argc, char **argv)
 			printf(
 	"Zoned: %s: host-managed device detected, setting zoned feature\n",
 			       file);
-		zoned = true;
+		opt_zoned = true;
 		features |= BTRFS_FEATURE_INCOMPAT_ZONED;
 	}
 
@@ -1302,7 +1340,7 @@  int BOX_MAIN(mkfs)(int argc, char **argv)
 		}
 	}
 
-	if (zoned) {
+	if (opt_zoned) {
 		if (source_dir_set) {
 			error("the option -r and zoned mode are incompatible");
 			exit(1);
@@ -1392,7 +1430,7 @@  int BOX_MAIN(mkfs)(int argc, char **argv)
 	 * 1 zone for a metadata block group
 	 * 1 zone for a data block group
 	 */
-	if (zoned && block_count && block_count < 5 * zone_size(file)) {
+	if (opt_zoned && block_count && block_count < 5 * zone_size(file)) {
 		error("size %llu is too small to make a usable filesystem",
 			block_count);
 		error("minimum size for a zoned btrfs filesystem is %llu",
@@ -1422,35 +1460,58 @@  int BOX_MAIN(mkfs)(int argc, char **argv)
 	if (ret)
 		goto error;
 
-	if (zoned && (!zoned_profile_supported(BTRFS_BLOCK_GROUP_METADATA | metadata_profile) ||
+	if (opt_zoned && (!zoned_profile_supported(BTRFS_BLOCK_GROUP_METADATA | metadata_profile) ||
 		      !zoned_profile_supported(BTRFS_BLOCK_GROUP_DATA | data_profile))) {
 		error("zoned mode does not yet support RAID/DUP profiles, please specify '-d single -m single' manually");
 		goto error;
 	}
 
-	dev_cnt--;
+	t_prepare = calloc(dev_cnt, sizeof(*t_prepare));
+	prepare_ctx = calloc(dev_cnt, sizeof(*prepare_ctx));
 
-	oflags = O_RDWR;
-	if (zoned && zoned_model(file) == ZONED_HOST_MANAGED)
-		oflags |= O_DIRECT;
+	if (!t_prepare || !prepare_ctx) {
+		error("unable to alloc thread for preparing dev");
+		goto error;
+	}
 
-	/*
-	 * Open without O_EXCL so that the problem should not occur by the
-	 * following operation in kernel:
-	 * (btrfs_register_one_device() fails if O_EXCL is on)
-	 */
-	fd = open(file, oflags);
+	pthread_mutex_init(&prepare_mutex, NULL);
+	opt_oflags = O_RDWR;
+	for (i = 0; i < dev_cnt; i++) {
+		if (opt_zoned && zoned_model(argv[optind + i - 1]) ==
+			ZONED_HOST_MANAGED) {
+			opt_oflags |= O_DIRECT;
+			break;
+		}
+	}
+	for (i = 0; i < dev_cnt; i++) {
+		prepare_ctx[i].file = argv[optind + i - 1];
+		prepare_ctx[i].block_count = block_count;
+		prepare_ctx[i].dev_block_count = block_count;
+		ret = pthread_create(&t_prepare[i], NULL,
+			prepare_one_dev, &prepare_ctx[i]);
+		if (ret) {
+			error("create thread for prepare devices: %s failed, "
+					"errno: %d",
+					prepare_ctx[i].file, ret);
+			goto error;
+		}
+	}
+	for (i = 0; i < dev_cnt; i++)
+		pthread_join(t_prepare[i], NULL);
+	ret = prepare_ctx[0].ret;
+
+	if (ret) {
+		error("unable prepare device: %s\n", prepare_ctx[0].file);
+		goto error;
+	}
+
+	dev_cnt--;
+	fd = open(file, opt_oflags);
 	if (fd < 0) {
 		error("unable to open %s: %m", file);
 		goto error;
 	}
-	ret = btrfs_prepare_device(fd, file, &dev_block_count, block_count,
-			(zero_end ? PREP_DEVICE_ZERO_END : 0) |
-			(discard ? PREP_DEVICE_DISCARD : 0) |
-			(bconf.verbose ? PREP_DEVICE_VERBOSE : 0) |
-			(zoned ? PREP_DEVICE_ZONED : 0));
-	if (ret)
-		goto error;
+	dev_block_count = prepare_ctx[0].dev_block_count;
 	if (block_count && block_count > dev_block_count) {
 		error("%s is smaller than requested size, expected %llu, found %llu",
 		      file, (unsigned long long)block_count,
@@ -1459,7 +1520,7 @@  int BOX_MAIN(mkfs)(int argc, char **argv)
 	}
 
 	/* To create the first block group and chunk 0 in make_btrfs */
-	system_group_size = zoned ?  zone_size(file) : BTRFS_MKFS_SYSTEM_GROUP_SIZE;
+	system_group_size = opt_zoned ? zone_size(file) : BTRFS_MKFS_SYSTEM_GROUP_SIZE;
 	if (dev_block_count < system_group_size) {
 		error("device is too small to make filesystem, must be at least %llu",
 				(unsigned long long)system_group_size);
@@ -1487,7 +1548,7 @@  int BOX_MAIN(mkfs)(int argc, char **argv)
 	mkfs_cfg.runtime_features = runtime_features;
 	mkfs_cfg.csum_type = csum_type;
 	mkfs_cfg.leaf_data_size = __BTRFS_LEAF_DATA_SIZE(nodesize);
-	if (zoned)
+	if (opt_zoned)
 		mkfs_cfg.zone_size = zone_size(file);
 	else
 		mkfs_cfg.zone_size = 0;
@@ -1558,14 +1619,10 @@  int BOX_MAIN(mkfs)(int argc, char **argv)
 		goto raid_groups;
 
 	while (dev_cnt-- > 0) {
+		int dev_index = argc - saved_optind - dev_cnt - 1;
 		file = argv[optind++];
 
-		/*
-		 * open without O_EXCL so that the problem should not
-		 * occur by the following processing.
-		 * (btrfs_register_one_device() fails if O_EXCL is on)
-		 */
-		fd = open(file, O_RDWR);
+		fd = open(file, opt_oflags);
 		if (fd < 0) {
 			error("unable to open %s: %m", file);
 			goto error;
@@ -1578,13 +1635,12 @@  int BOX_MAIN(mkfs)(int argc, char **argv)
 			close(fd);
 			continue;
 		}
-		ret = btrfs_prepare_device(fd, file, &dev_block_count,
-				block_count,
-				(bconf.verbose ? PREP_DEVICE_VERBOSE : 0) |
-				(zero_end ? PREP_DEVICE_ZERO_END : 0) |
-				(discard ? PREP_DEVICE_DISCARD : 0) |
-				(zoned ? PREP_DEVICE_ZONED : 0));
-		if (ret) {
+		dev_block_count = prepare_ctx[dev_index]
+			.dev_block_count;
+
+		if (prepare_ctx[dev_index].ret) {
+			error("unable prepare device: %s.\n",
+					prepare_ctx[dev_index].file);
 			goto error;
 		}
 
@@ -1714,8 +1770,8 @@  raid_groups:
 			btrfs_group_profile_str(metadata_profile),
 			pretty_size(allocation.system));
 		printf("SSD detected:       %s\n", ssd ? "yes" : "no");
-		printf("Zoned device:       %s\n", zoned ? "yes" : "no");
-		if (zoned)
+		printf("Zoned device:       %s\n", opt_zoned ? "yes" : "no");
+		if (opt_zoned)
 			printf("  Zone size:        %s\n",
 			       pretty_size(fs_info->zone_size));
 		btrfs_parse_fs_features_to_string(features_buf, features);
@@ -1763,12 +1819,16 @@  out:
 
 	btrfs_close_all_devices();
 	free(label);
-
+	free(t_prepare);
+	free(prepare_ctx);
 	return !!ret;
+
 error:
 	if (fd > 0)
 		close(fd);
 
+	free(t_prepare);
+	free(prepare_ctx);
 	free(label);
 	exit(1);
 success: