Message ID | 20200319203913.3103-2-kreijack@libero.it (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [rfc,v3] New ioctl BTRFS_IOC_GET_CHUNK_INFO. | expand |
On 3/19/20 4:39 PM, Goffredo Baroncelli wrote: > From: Goffredo Baroncelli <kreijack@inwind.it> > > Add a new ioctl to get info about chunk without requiring the root > privileges. This allow to a non root user to know how the space of the > filesystem is allocated. > > Signed-off-by: Goffredo Baroncelli <kreijack@inwind.it> > --- > fs/btrfs/ioctl.c | 211 +++++++++++++++++++++++++++++++++++++ > include/uapi/linux/btrfs.h | 38 +++++++ > 2 files changed, 249 insertions(+) > > diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c > index 40b729dce91c..b3296a479bf6 100644 > --- a/fs/btrfs/ioctl.c > +++ b/fs/btrfs/ioctl.c > @@ -2234,6 +2234,215 @@ static noinline int btrfs_ioctl_tree_search_v2(struct file *file, > return ret; > } > > +/* > + * Return: > + * 1 -> copied all data, possible further data > + * 0 -> copied all data, no further data > + * -EAGAIN -> not enough space, restart it > + * -EFAULT -> the user passed an invalid address/size pair > + */ > +static noinline int copy_chunk_info(struct btrfs_path *path, > + char __user *ubuf, > + size_t buf_size, > + u64 *used_buf, > + int *num_found, > + u64 *offset) > +{ > + struct extent_buffer *leaf; > + unsigned long item_off; > + unsigned long item_len; > + int nritems; > + int i; > + int slot; > + struct btrfs_key key; > + > + leaf = path->nodes[0]; > + slot = path->slots[0]; > + nritems = btrfs_header_nritems(leaf); > + > + for (i = slot; i < nritems; i++) { > + u64 destsize; > + struct btrfs_chunk_info ci; > + struct btrfs_chunk chunk; > + int j, chunk_size; > + > + item_off = btrfs_item_ptr_offset(leaf, i); > + item_len = btrfs_item_size_nr(leaf, i); > + > + btrfs_item_key_to_cpu(leaf, &key, i); > + /* > + * we are not interested in other items type > + */ > + if (key.type != BTRFS_CHUNK_ITEM_KEY) > + return 0; > + > + /* > + * In any case, the next search must start from here > + */ > + *offset = key.offset; > + read_extent_buffer(leaf, &chunk, item_off, sizeof(chunk)); > + > + /* > + * chunk.num_stripes-1 is correct, because btrfs_chunk includes > + * already a stripe > + */ > + destsize = sizeof(struct btrfs_chunk_info) + > + (chunk.num_stripes - 1) * sizeof(struct btrfs_stripe); > + > + if (destsize > item_len) { > + ASSERT(0); > + return -EINVAL; > + } > + > + if (buf_size < destsize + *used_buf) { > + if (*num_found) > + /* try onother time */ > + return -EAGAIN; > + else > + /* in any case the buffer is too small */ > + return -EOVERFLOW; > + } > + > + /* copy chunk */ > + chunk_size = offsetof(struct btrfs_chunk_info, stripes); > + memset(&ci, 0, chunk_size); > + ci.length = btrfs_stack_chunk_length(&chunk); > + ci.stripe_len = btrfs_stack_chunk_stripe_len(&chunk); > + ci.type = btrfs_stack_chunk_type(&chunk); > + ci.num_stripes = btrfs_stack_chunk_num_stripes(&chunk); > + ci.sub_stripes = btrfs_stack_chunk_sub_stripes(&chunk); > + ci.offset = key.offset; > + > + if (copy_to_user(ubuf + *used_buf, &ci, chunk_size)) > + return -EFAULT; > + > + *used_buf += chunk_size; > + > + /* copy stripes */ > + for (j = 0 ; j < chunk.num_stripes ; j++) { > + struct btrfs_stripe chunk_stripe; > + struct btrfs_chunk_info_stripe csi; > + > + /* > + * j-1 is correct, because btrfs_chunk includes already > + * a stripe > + */ > + read_extent_buffer(leaf, &chunk_stripe, > + item_off + sizeof(struct btrfs_chunk) + > + sizeof(struct btrfs_stripe) * > + (j - 1), sizeof(chunk_stripe)); > + > + memset(&csi, 0, sizeof(csi)); > + > + csi.devid = btrfs_stack_stripe_devid(&chunk_stripe); > + csi.offset = btrfs_stack_stripe_offset(&chunk_stripe); > + memcpy(csi.dev_uuid, chunk_stripe.dev_uuid, > + sizeof(chunk_stripe.dev_uuid)); > + if (copy_to_user(ubuf + *used_buf, &csi, sizeof(csi))) > + return -EFAULT; > + > + *used_buf += sizeof(csi); > + } > + > + ++(*num_found); > + } > + > + if (*offset < (u64)-1) > + ++(*offset); > + > + return 1; > +} > + > +static noinline int search_chunk_info(struct inode *inode, u64 *offset, > + int *items_count, > + char __user *ubuf, u64 buf_size) > +{ > + struct btrfs_fs_info *info = btrfs_sb(inode->i_sb); > + struct btrfs_root *root; > + struct btrfs_key key; > + struct btrfs_path *path; > + int ret = -EAGAIN; > + u64 used_buf = 0; > + > + path = btrfs_alloc_path(); > + if (!path) > + return -ENOMEM; > + > + /* search for BTRFS_CHUNK_TREE_OBJECTID tree */ > + key.objectid = BTRFS_CHUNK_TREE_OBJECTID; > + key.type = BTRFS_ROOT_ITEM_KEY; > + key.offset = (u64)-1; > + root = btrfs_get_fs_root(info, &key, true); > + if (IS_ERR(root)) { > + btrfs_err(info, "could not find root\n"); > + btrfs_free_path(path); > + return -ENOENT; > + } > + > + > + while (used_buf < buf_size) { > + key.objectid = 0x0100; > + key.type = BTRFS_CHUNK_ITEM_KEY; > + key.offset = *offset; > + > + ret = btrfs_search_forward(root, &key, path, 0); > + if (ret != 0) { > + if (ret > 0) > + ret = 0; > + goto ret; > + } > + > + ret = copy_chunk_info(path, ubuf, buf_size, > + &used_buf, items_count, offset); > + > + btrfs_release_path(path); > + > + if (ret < 1) > + break; > + } > + > +ret: > + btrfs_free_path(path); > + return ret; > +} > + > +static noinline int btrfs_ioctl_get_chunk_info(struct file *file, > + void __user *argp) > +{ > + struct btrfs_ioctl_chunk_info arg; > + struct inode *inode; > + int ret; > + size_t buf_size; > + u64 data_offset; > + const size_t buf_limit = SZ_16M; > + > + > + data_offset = sizeof(struct btrfs_ioctl_chunk_info); I think I'm missing something, but since we have a single btrfs_chunk_info_stripe at the end, this will point to the next slot, so we're just copying in starting at slot 1, not slot 0, because you pass in argp + data_offset below. This looks wonky to me, thanks, Josef
On 3/19/20 9:59 PM, Josef Bacik wrote: > On 3/19/20 4:39 PM, Goffredo Baroncelli wrote: >> From: Goffredo Baroncelli <kreijack@inwind.it> >> >> Add a new ioctl to get info about chunk without requiring the root >> privileges. This allow to a non root user to know how the space of the >> filesystem is allocated. >> >> Signed-off-by: Goffredo Baroncelli <kreijack@inwind.it> >> --- >> fs/btrfs/ioctl.c | 211 +++++++++++++++++++++++++++++++++++++ >> include/uapi/linux/btrfs.h | 38 +++++++ >> 2 files changed, 249 insertions(+) >> >> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c >> index 40b729dce91c..b3296a479bf6 100644 >> --- a/fs/btrfs/ioctl.c >> +++ b/fs/btrfs/ioctl.c >> @@ -2234,6 +2234,215 @@ static noinline int btrfs_ioctl_tree_search_v2(struct file *file, >> return ret; >> } >> +/* >> + * Return: >> + * 1 -> copied all data, possible further data >> + * 0 -> copied all data, no further data >> + * -EAGAIN -> not enough space, restart it >> + * -EFAULT -> the user passed an invalid address/size pair >> + */ >> +static noinline int copy_chunk_info(struct btrfs_path *path, >> + char __user *ubuf, >> + size_t buf_size, >> + u64 *used_buf, >> + int *num_found, >> + u64 *offset) >> +{ >> + struct extent_buffer *leaf; [...] >> + >> +static noinline int btrfs_ioctl_get_chunk_info(struct file *file, >> + void __user *argp) >> +{ >> + struct btrfs_ioctl_chunk_info arg; >> + struct inode *inode; >> + int ret; >> + size_t buf_size; >> + u64 data_offset; >> + const size_t buf_limit = SZ_16M; >> + >> + >> + data_offset = sizeof(struct btrfs_ioctl_chunk_info); > > I think I'm missing something, but since we have a single btrfs_chunk_info_stripe at the end, this will point to the next slot, so we're just copying in starting at slot 1, not slot 0, because you pass in argp + data_offset below. This looks wonky to me, thanks, I think that you are confunsing "struct btrfs_ioctl_chunk_info" with "struct btrfs_chunk_info". Only the second one has the single "struct btrfs_chunk_info_stripe" at the end. May be ? > > Josef
I'll start with the data structures On Thu, Mar 19, 2020 at 09:39:13PM +0100, Goffredo Baroncelli wrote: > From: Goffredo Baroncelli <kreijack@inwind.it> > +struct btrfs_chunk_info_stripe { > + __u64 devid; > + __u64 offset; > + __u8 dev_uuid[BTRFS_UUID_SIZE]; > +}; > + > +struct btrfs_chunk_info { > + /* logical start of this chunk */ > + __u64 offset; > + /* size of this chunk in bytes */ > + __u64 length; > + > + __u64 stripe_len; > + __u64 type; > + > + /* 2^16 stripes is quite a lot, a second limit is the size of a single > + * item in the btree > + */ > + __u16 num_stripes; > + > + /* sub stripes only matter for raid10 */ > + __u16 sub_stripes; > + > + struct btrfs_chunk_info_stripe stripes[1]; > + /* additional stripes go here */ > +}; This looks like a copy of btrfs_chunk and stripe, only removing items not needed for the chunk information. Rather than copying the unnecessary fileds like dev_uuid in stripe, this should be designed for data exchange with the usecase in mind. The format does not need follow the exact layout that kernel uses, ie. chunk info with one embedded stripe and then followed by variable length array of further stripes. This is convenient in one way but not in another one. Alternatively each chunk can be emitted as a single entry, duplicating part of the common fields and adding the stripe-specific ones. This is for consideration. I've looked at my old code doing the chunk dump based on the search ioctl and found that it also allows to read the chunk usage, with one extra search to the block group item where the usage is stored. As this is can be slow, it should be optional. Ie. the main ioctl structure needs flags where this can be requested.
On 5/25/20 7:14 PM, David Sterba wrote: > I'll start with the data structures > > On Thu, Mar 19, 2020 at 09:39:13PM +0100, Goffredo Baroncelli wrote: >> From: Goffredo Baroncelli <kreijack@inwind.it> >> +struct btrfs_chunk_info_stripe { >> + __u64 devid; >> + __u64 offset; >> + __u8 dev_uuid[BTRFS_UUID_SIZE]; >> +}; >> + >> +struct btrfs_chunk_info { >> + /* logical start of this chunk */ >> + __u64 offset; >> + /* size of this chunk in bytes */ >> + __u64 length; >> + >> + __u64 stripe_len; >> + __u64 type; >> + >> + /* 2^16 stripes is quite a lot, a second limit is the size of a single >> + * item in the btree >> + */ >> + __u16 num_stripes; >> + >> + /* sub stripes only matter for raid10 */ >> + __u16 sub_stripes; >> + >> + struct btrfs_chunk_info_stripe stripes[1]; >> + /* additional stripes go here */ >> +}; > > This looks like a copy of btrfs_chunk and stripe, only removing items > not needed for the chunk information. Rather than copying the > unnecessary fileds like dev_uuid in stripe, this should be designed for > data exchange with the usecase in mind. There are two clients for this api: - btrfs fi us - btrfs dev us We can get rid of: - "offset" fields (2x) - "uuid" fields However the "offset" fields can be used to understand where a logical map is on the physical disks. I am thinking about a graphical tool to show this mapping, which doesn't exits yet -). The offset field may be used as key to get further information (like the chunk usage, see below) Regarding the UUID field, I agree it can be removed because it is redundant (there is already the devid) > > The format does not need follow the exact layout that kernel uses, ie. > chunk info with one embedded stripe and then followed by variable length > array of further stripes. This is convenient in one way but not in > another one. Alternatively each chunk can be emitted as a single entry, > duplicating part of the common fields and adding the stripe-specific > ones. This is for consideration. > > I've looked at my old code doing the chunk dump based on the search > ioctl and found that it also allows to read the chunk usage, with one > extra search to the block group item where the usage is stored. As this > is can be slow, it should be optional. Ie. the main ioctl structure > needs flags where this can be requested. This info could be very useful. I think to something like a balance of chunks which are near filled (or near empty). The question is if we should have a different ioctl. >
Hi, On 5/26/20 10:19 PM, Goffredo Baroncelli wrote: > On 5/25/20 7:14 PM, David Sterba wrote: >> I'll start with the data structures >> >> On Thu, Mar 19, 2020 at 09:39:13PM +0100, Goffredo Baroncelli wrote: >>> From: Goffredo Baroncelli <kreijack@inwind.it> >>> +struct btrfs_chunk_info_stripe { >>> + __u64 devid; >>> + __u64 offset; >>> + __u8 dev_uuid[BTRFS_UUID_SIZE]; >>> +}; >>> + >>> +struct btrfs_chunk_info { >>> + /* logical start of this chunk */ >>> + __u64 offset; >>> + /* size of this chunk in bytes */ >>> + __u64 length; >>> + >>> + __u64 stripe_len; >>> + __u64 type; >>> + >>> + /* 2^16 stripes is quite a lot, a second limit is the size of a single >>> + * item in the btree >>> + */ >>> + __u16 num_stripes; >>> + >>> + /* sub stripes only matter for raid10 */ >>> + __u16 sub_stripes; >>> + >>> + struct btrfs_chunk_info_stripe stripes[1]; >>> + /* additional stripes go here */ >>> +}; >> >> This looks like a copy of btrfs_chunk and stripe, only removing items >> not needed for the chunk information. Rather than copying the >> unnecessary fileds like dev_uuid in stripe, this should be designed for >> data exchange with the usecase in mind. > > There are two clients for this api: > - btrfs fi us > - btrfs dev us > > We can get rid of: > - "offset" fields (2x) > - "uuid" fields > > However the "offset" fields can be used to understand where a logical map > is on the physical disks. I am thinking about a graphical tool to show this > mapping, which doesn't exits yet -). > The offset field may be used as key to get further information (like the chunk > usage, see below) > > Regarding the UUID field, I agree it can be removed because it is redundant (there > is already the devid) > > >> >> The format does not need follow the exact layout that kernel uses, ie. >> chunk info with one embedded stripe and then followed by variable length >> array of further stripes. This is convenient in one way but not in >> another one. Alternatively each chunk can be emitted as a single entry, >> duplicating part of the common fields and adding the stripe-specific >> ones. This is for consideration. >> >> I've looked at my old code doing the chunk dump based on the search >> ioctl and found that it also allows to read the chunk usage, with one >> extra search to the block group item where the usage is stored. As this >> is can be slow, it should be optional. Ie. the main ioctl structure >> needs flags where this can be requested. > > This info could be very useful. I think to something like a balance of > chunks which are near filled (or near empty). The question is if we > should have a different ioctl. Do you mean that you want to allow to a non root user to run btrfs balance? Otherwise, no. IMO convenience functions for quickly retrieving a specific subset of data should be created as reusable library functions in the calling code, not as a redundant extra IOCTL that has to be maintained. Hans
On 5/28/20 11:03 PM, Hans van Kranenburg wrote: > Hi, > > On 5/26/20 10:19 PM, Goffredo Baroncelli wrote: >> On 5/25/20 7:14 PM, David Sterba wrote: >>> I'll start with the data structures [...] >>> >>> This looks like a copy of btrfs_chunk and stripe, only removing items >>> not needed for the chunk information. Rather than copying the >>> unnecessary fileds like dev_uuid in stripe, this should be designed for >>> data exchange with the usecase in mind. >> >> There are two clients for this api: >> - btrfs fi us >> - btrfs dev us >> >> We can get rid of: >> - "offset" fields (2x) >> - "uuid" fields >> >> However the "offset" fields can be used to understand where a logical map >> is on the physical disks. I am thinking about a graphical tool to show this >> mapping, which doesn't exits yet -). >> The offset field may be used as key to get further information (like the chunk >> usage, see below) >> >> Regarding the UUID field, I agree it can be removed because it is redundant (there >> is already the devid) >> >> >>> >>> The format does not need follow the exact layout that kernel uses, ie. >>> chunk info with one embedded stripe and then followed by variable length >>> array of further stripes. This is convenient in one way but not in >>> another one. Alternatively each chunk can be emitted as a single entry, >>> duplicating part of the common fields and adding the stripe-specific >>> ones. This is for consideration. >>> >>> I've looked at my old code doing the chunk dump based on the search >>> ioctl and found that it also allows to read the chunk usage, with one >>> extra search to the block group item where the usage is stored. As this >>> is can be slow, it should be optional. Ie. the main ioctl structure >>> needs flags where this can be requested. >> >> This info could be very useful. I think to something like a balance of >> chunks which are near filled (or near empty). The question is if we >> should have a different ioctl. > > Do you mean that you want to allow to a non root user to run btrfs balance? No at all. The balance is an expensive operation that IMHO need root privileges to be started. > > Otherwise, no. IMO convenience functions for quickly retrieving a > specific subset of data should be created as reusable library functions > in the calling code, not as a redundant extra IOCTL that has to be > maintained. I think that there is a misunderstood. There is no intention to take the place of the current balance ioctl. The aim of this ioctl is only to get information about the chunk distribution. Getting the chunk information could help to perform a better balance. I.e. a balance which start from the chunk more empty the go forward processing the chunk more filled. Another case use is to visulize how the chunk are filled, or how the disks are used.. > > Hans >
On 5/29/20 6:23 PM, Goffredo Baroncelli wrote: > On 5/28/20 11:03 PM, Hans van Kranenburg wrote: >> Hi, >> >> On 5/26/20 10:19 PM, Goffredo Baroncelli wrote: >>> On 5/25/20 7:14 PM, David Sterba wrote: >>>> I'll start with the data structures > [...] >>>> >>>> This looks like a copy of btrfs_chunk and stripe, only removing items >>>> not needed for the chunk information. Rather than copying the >>>> unnecessary fileds like dev_uuid in stripe, this should be designed for >>>> data exchange with the usecase in mind. >>> >>> There are two clients for this api: >>> - btrfs fi us >>> - btrfs dev us >>> >>> We can get rid of: >>> - "offset" fields (2x) >>> - "uuid" fields >>> >>> However the "offset" fields can be used to understand where a logical map >>> is on the physical disks. I am thinking about a graphical tool to show this >>> mapping, which doesn't exits yet -). >>> The offset field may be used as key to get further information (like the chunk >>> usage, see below) >>> >>> Regarding the UUID field, I agree it can be removed because it is redundant (there >>> is already the devid) >>> >>> >>>> >>>> The format does not need follow the exact layout that kernel uses, ie. >>>> chunk info with one embedded stripe and then followed by variable length >>>> array of further stripes. This is convenient in one way but not in >>>> another one. Alternatively each chunk can be emitted as a single entry, >>>> duplicating part of the common fields and adding the stripe-specific >>>> ones. This is for consideration. >>>> >>>> I've looked at my old code doing the chunk dump based on the search >>>> ioctl and found that it also allows to read the chunk usage, with one >>>> extra search to the block group item where the usage is stored. As this >>>> is can be slow, it should be optional. Ie. the main ioctl structure >>>> needs flags where this can be requested. >>> >>> This info could be very useful. I think to something like a balance of >>> chunks which are near filled (or near empty). The question is if we >>> should have a different ioctl. >> >> Do you mean that you want to allow to a non root user to run btrfs balance? > No at all. The balance is an expensive operation that IMHO need root > privileges to be started. > >> >> Otherwise, no. IMO convenience functions for quickly retrieving a >> specific subset of data should be created as reusable library functions >> in the calling code, not as a redundant extra IOCTL that has to be >> maintained. > > I think that there is a misunderstood. There is no intention to take the > place of the current balance ioctl. Ok, I'll rephrase. Using it to improve balance is not an argument for adding a new IOCTL, since it has to run as root anyway, and then you can use the SEARCH IOCTL. And as long as there's a few helper functions which do the plumbing, SEARCH isn't that bad at just getting some chunk and block group info. -# python3 >>> import btrfs >>> with btrfs.FileSystem('/') as fs: ... for chunk in fs.chunks(): ... print(fs.block_group(chunk.vaddr)) ... block group vaddr 13631488 length 8388608 flags DATA used 8388608 used_pct 100 block group vaddr 22020096 length 8388608 flags SYSTEM|DUP used 114688 used_pct 1 block group vaddr 30408704 length 1073741824 flags METADATA|DUP used 889061376 used_pct 83 block group vaddr 1104150528 length 1073741824 flags DATA used 1073741824 used_pct 100 block group vaddr 2177892352 length 1073741824 flags DATA used 1073741824 used_pct 100 [...] > The aim of this ioctl is only to get information about the chunk distribution. > Getting the chunk information could help to perform a better balance. > I.e. a balance which start from the chunk more empty the go forward > processing the chunk more filled. An example of this is the existing btrfs-balance-least-used program. > Another case use is to visulize how > the chunk are filled, or how the disks are used.. An example of that is btrfs-heatmap. Hfgl, Hans
On 5/29/20 8:54 PM, Hans van Kranenburg wrote: > On 5/29/20 6:23 PM, Goffredo Baroncelli wrote: > >> Another case use is to visulize how >> the chunk are filled, or how the disks are used.. > > An example of that is btrfs-heatmap. Oops, forgot the example. Here's it showing data and metadata getting sorted out while doing some balance after setting things up for the preferred metadata/data placement. It's two 10G disks, and it's the 'physical' view, so the left half is the "slow" part and the right half is the "fast" part of the filesystem, fun. :) https://syrinx.knorrie.org/~knorrie/btrfs/keep/2020-05-29-ssd_metadata.mp4 Hans
On 5/29/20 8:54 PM, Hans van Kranenburg wrote: > On 5/29/20 6:23 PM, Goffredo Baroncelli wrote: >> On 5/28/20 11:03 PM, Hans van Kranenburg wrote: >>> Hi, >>> >>> On 5/26/20 10:19 PM, Goffredo Baroncelli wrote: >>>> On 5/25/20 7:14 PM, David Sterba wrote: >>>>> I'll start with the data structures >> [...] >>>>> >>>>> This looks like a copy of btrfs_chunk and stripe, only removing items >>>>> not needed for the chunk information. Rather than copying the >>>>> unnecessary fileds like dev_uuid in stripe, this should be designed for >>>>> data exchange with the usecase in mind. >>>> >>>> There are two clients for this api: >>>> - btrfs fi us >>>> - btrfs dev us >>>> >>>> We can get rid of: >>>> - "offset" fields (2x) >>>> - "uuid" fields >>>> >>>> However the "offset" fields can be used to understand where a logical map >>>> is on the physical disks. I am thinking about a graphical tool to show this >>>> mapping, which doesn't exits yet -). >>>> The offset field may be used as key to get further information (like the chunk >>>> usage, see below) >>>> >>>> Regarding the UUID field, I agree it can be removed because it is redundant (there >>>> is already the devid) >>>> >>>> >>>>> >>>>> The format does not need follow the exact layout that kernel uses, ie. >>>>> chunk info with one embedded stripe and then followed by variable length >>>>> array of further stripes. This is convenient in one way but not in >>>>> another one. Alternatively each chunk can be emitted as a single entry, >>>>> duplicating part of the common fields and adding the stripe-specific >>>>> ones. This is for consideration. >>>>> >>>>> I've looked at my old code doing the chunk dump based on the search >>>>> ioctl and found that it also allows to read the chunk usage, with one >>>>> extra search to the block group item where the usage is stored. As this >>>>> is can be slow, it should be optional. Ie. the main ioctl structure >>>>> needs flags where this can be requested. >>>> >>>> This info could be very useful. I think to something like a balance of >>>> chunks which are near filled (or near empty). The question is if we >>>> should have a different ioctl. >>> >>> Do you mean that you want to allow to a non root user to run btrfs balance? >> No at all. The balance is an expensive operation that IMHO need root >> privileges to be started. >> >>> >>> Otherwise, no. IMO convenience functions for quickly retrieving a >>> specific subset of data should be created as reusable library functions >>> in the calling code, not as a redundant extra IOCTL that has to be >>> maintained. >> >> I think that there is a misunderstood. There is no intention to take the >> place of the current balance ioctl. > > Ok, I'll rephrase. Using it to improve balance is not an argument for > adding a new IOCTL, since it has to run as root anyway, and then you can > use the SEARCH IOCTL. And as long as there's a few helper functions > which do the plumbing, SEARCH isn't that bad at just getting some chunk > and block group info. Obviously using SEARCH IOCTL you can get rid of all other "read" btrfs ioctl(s). However SEARCH IOCTL has some disadvantages: 1) it is a quite complex API 2) it exposes a lot of internal of a BTRFS filesystem, which could prevent future BTRFS evolution 3) it requires root privileges May be that you missed my other patches sets "btrfs-progs: use the new ioctl BTRFS_IOC_GET_CHUNK_INFO" [*] which is the use case for which this ioctl was born. Basically we need the chunk layout info in order to run the command "btrfs fi us". And now as non root user this is impossible because this command uses SEARCH IOCTL if raid5/raid6 is used. And due to 2), I think that we should get rid of all the IOCTL SEARCH. The discussion with David, was about which information should be exposed: - if you exposed too much information, there is the risk that you will have the problem 2) - if you expose too few information, you ends to add another (similar) ioctl or you have to extend the ioctl - of course the other factor that has to be considered is the composeability of the api(s) IMHO, we need an api that exposes the CHUNK layout. And I think that we should remove all the SEARCH IOCTL instance for more reasonable api. BR G.Baroncelli [*]https://lore.kernel.org/linux-btrfs/20200315152430.7532-1-kreijack@libero.it/#t > > -# python3 >>>> import btrfs >>>> with btrfs.FileSystem('/') as fs: > ... for chunk in fs.chunks(): > ... print(fs.block_group(chunk.vaddr)) > ... > block group vaddr 13631488 length 8388608 flags DATA used 8388608 > used_pct 100 > block group vaddr 22020096 length 8388608 flags SYSTEM|DUP used 114688 > used_pct 1 > block group vaddr 30408704 length 1073741824 flags METADATA|DUP used > 889061376 used_pct 83 > block group vaddr 1104150528 length 1073741824 flags DATA used > 1073741824 used_pct 100 > block group vaddr 2177892352 length 1073741824 flags DATA used > 1073741824 used_pct 100 > [...] > >> The aim of this ioctl is only to get information about the chunk distribution. >> Getting the chunk information could help to perform a better balance. >> I.e. a balance which start from the chunk more empty the go forward >> processing the chunk more filled. > > An example of this is the existing btrfs-balance-least-used program. > >> Another case use is to visulize how >> the chunk are filled, or how the disks are used.. > > An example of that is btrfs-heatmap. > > Hfgl, > Hans >
On Tue, May 26, 2020 at 10:19:35PM +0200, Goffredo Baroncelli wrote: > On 5/25/20 7:14 PM, David Sterba wrote: > > I'll start with the data structures > > > > On Thu, Mar 19, 2020 at 09:39:13PM +0100, Goffredo Baroncelli wrote: > >> From: Goffredo Baroncelli <kreijack@inwind.it> > >> +struct btrfs_chunk_info_stripe { > >> + __u64 devid; > >> + __u64 offset; > >> + __u8 dev_uuid[BTRFS_UUID_SIZE]; > >> +}; > >> + > >> +struct btrfs_chunk_info { > >> + /* logical start of this chunk */ > >> + __u64 offset; > >> + /* size of this chunk in bytes */ > >> + __u64 length; > >> + > >> + __u64 stripe_len; > >> + __u64 type; > >> + > >> + /* 2^16 stripes is quite a lot, a second limit is the size of a single > >> + * item in the btree > >> + */ > >> + __u16 num_stripes; > >> + > >> + /* sub stripes only matter for raid10 */ > >> + __u16 sub_stripes; > >> + > >> + struct btrfs_chunk_info_stripe stripes[1]; > >> + /* additional stripes go here */ > >> +}; > > > > This looks like a copy of btrfs_chunk and stripe, only removing items > > not needed for the chunk information. Rather than copying the > > unnecessary fileds like dev_uuid in stripe, this should be designed for > > data exchange with the usecase in mind. > > There are two clients for this api: > - btrfs fi us > - btrfs dev us > > We can get rid of: > - "offset" fields (2x) > - "uuid" fields > > However the "offset" fields can be used to understand where a logical map > is on the physical disks. I am thinking about a graphical tool to show this > mapping, which doesn't exits yet -). > The offset field may be used as key to get further information (like the chunk > usage, see below) > > Regarding the UUID field, I agree it can be removed because it is redundant (there > is already the devid) Offset is ok. I had something like this: struct dump_chunks_entry { u64 devid; u64 start; u64 lstart; u64 length; u64 flags; u64 used; }; This selects the most interesting data from the CHUNK_ITEM, except the 'used' member, see below. > > The format does not need follow the exact layout that kernel uses, ie. > > chunk info with one embedded stripe and then followed by variable length > > array of further stripes. This is convenient in one way but not in > > another one. Alternatively each chunk can be emitted as a single entry, > > duplicating part of the common fields and adding the stripe-specific > > ones. This is for consideration. > > > > I've looked at my old code doing the chunk dump based on the search > > ioctl and found that it also allows to read the chunk usage, with one > > extra search to the block group item where the usage is stored. As this > > is can be slow, it should be optional. Ie. the main ioctl structure > > needs flags where this can be requested. > > This info could be very useful. I think to something like a balance of > chunks which are near filled (or near empty). The question is if we > should have a different ioctl. I was not proposing a new ioctl but designing the data exchange format to optionally provide a way to pass more information, like the usage. The reference to search ioctl was merely to point out that there's one more search for BLOCK_GROUP_ITEM where the 'used' is stored. As this is potentially expensive, it won't be filled by default. The structure above does not capture all the chunk data. We could pack more such structures into one ioctl call. I think that num_stripes is missing from there as this would make possible the raid56 calculations but otherwise it should be it.
On 6/10/20 10:30 PM, David Sterba wrote: > On Tue, May 26, 2020 at 10:19:35PM +0200, Goffredo Baroncelli wrote: >> On 5/25/20 7:14 PM, David Sterba wrote: >>> I'll start with the data structures >>> >>> On Thu, Mar 19, 2020 at 09:39:13PM +0100, Goffredo Baroncelli wrote: >>>> From: Goffredo Baroncelli <kreijack@inwind.it> >>>> +struct btrfs_chunk_info_stripe { >>>> + __u64 devid; >>>> + __u64 offset; >>>> + __u8 dev_uuid[BTRFS_UUID_SIZE]; >>>> +}; >>>> + >>>> +struct btrfs_chunk_info { >>>> + /* logical start of this chunk */ >>>> + __u64 offset; >>>> + /* size of this chunk in bytes */ >>>> + __u64 length; >>>> + >>>> + __u64 stripe_len; >>>> + __u64 type; >>>> + >>>> + /* 2^16 stripes is quite a lot, a second limit is the size of a single >>>> + * item in the btree >>>> + */ >>>> + __u16 num_stripes; >>>> + >>>> + /* sub stripes only matter for raid10 */ >>>> + __u16 sub_stripes; >>>> + >>>> + struct btrfs_chunk_info_stripe stripes[1]; >>>> + /* additional stripes go here */ >>>> +}; >>> >>> This looks like a copy of btrfs_chunk and stripe, only removing items >>> not needed for the chunk information. Rather than copying the >>> unnecessary fileds like dev_uuid in stripe, this should be designed for >>> data exchange with the usecase in mind. >> >> There are two clients for this api: >> - btrfs fi us >> - btrfs dev us >> >> We can get rid of: >> - "offset" fields (2x) >> - "uuid" fields >> >> However the "offset" fields can be used to understand where a logical map >> is on the physical disks. I am thinking about a graphical tool to show this >> mapping, which doesn't exits yet -). >> The offset field may be used as key to get further information (like the chunk >> usage, see below) >> >> Regarding the UUID field, I agree it can be removed because it is redundant (there >> is already the devid) > > Offset is ok. I had something like this: > > struct dump_chunks_entry { > u64 devid; > u64 start; > u64 lstart; > u64 length; > u64 flags; > u64 used; > }; > > This selects the most interesting data from the CHUNK_ITEM, except the > 'used' member, see below. The structure above is a structure "device basis". This means that for (e.g.) raidX chunks the fields: - lstart - length - flags - used are repeated In fact only devid and start are device specific. I see the following possibilities 1) struct dump_chunks_entry { u64 lstart; u64 length; u64 flags; u64 used; u64 start; u64 devid; } pro: simple api cons: waste of space (60% of data are repeated 2) struct dump_chunk_disk_entry { u64 devid; u64 start; } struct dump_chunks_entry { u64 lstart; u64 length; u64 flags; u64 used; u16 disks_count; struct dump_chunk_disk_entry disks[] }; pro: smaller data cons: variable length data 3) two different ioctl BTRFS_IOC_DUMP_BLOCK_GROUP struct dump_block_group { u64 lstart u64 used u64 length u64 flags } BTRFS_IOC_DUMP_CHUNK struct dump_chunks_entry { u64 lstart; u64 start; u64 devid; } Where the filed lstart is the key pro: smaller data (only lstart is repeated); quite orthogonal api cons: two IOCTLs Considering that having as optional the "used" field, means to have two ioctl (or one ioctl with a parameter, but this is not so different). More I think, more I like option 3), however I am less happy to loose an information like sub_stripes (will something like RAID50 comes to BTRFS ?). Unfortunately to have this information, we need to duplicate it for each dump_chunks_entry... GB > >>> The format does not need follow the exact layout that kernel uses, ie. >>> chunk info with one embedded stripe and then followed by variable length >>> array of further stripes. This is convenient in one way but not in >>> another one. Alternatively each chunk can be emitted as a single entry, >>> duplicating part of the common fields and adding the stripe-specific >>> ones. This is for consideration. >>> >>> I've looked at my old code doing the chunk dump based on the search >>> ioctl and found that it also allows to read the chunk usage, with one >>> extra search to the block group item where the usage is stored. As this >>> is can be slow, it should be optional. Ie. the main ioctl structure >>> needs flags where this can be requested. >> >> This info could be very useful. I think to something like a balance of >> chunks which are near filled (or near empty). The question is if we >> should have a different ioctl. > > I was not proposing a new ioctl but designing the data exchange format > to optionally provide a way to pass more information, like the usage. > The reference to search ioctl was merely to point out that there's one > more search for BLOCK_GROUP_ITEM where the 'used' is stored. As this is > potentially expensive, it won't be filled by default. > > The structure above does not capture all the chunk data. We could pack > more such structures into one ioctl call. I think that num_stripes is > missing from there as this would make possible the raid56 calculations > but otherwise it should be it. >
Hi david, On 6/11/20 1:50 PM, Goffredo Baroncelli wrote: > On 6/10/20 10:30 PM, David Sterba wrote: >> On Tue, May 26, 2020 at 10:19:35PM +0200, Goffredo Baroncelli wrote: >>> On 5/25/20 7:14 PM, David Sterba wrote: >>>> I'll start with the data structures >>>> >>>> On Thu, Mar 19, 2020 at 09:39:13PM +0100, Goffredo Baroncelli wrote: >>>>> From: Goffredo Baroncelli <kreijack@inwind.it> >>>>> +struct btrfs_chunk_info_stripe { >>>>> + __u64 devid; >>>>> + __u64 offset; >>>>> + __u8 dev_uuid[BTRFS_UUID_SIZE]; >>>>> +}; >>>>> + >>>>> +struct btrfs_chunk_info { >>>>> + /* logical start of this chunk */ >>>>> + __u64 offset; >>>>> + /* size of this chunk in bytes */ >>>>> + __u64 length; >>>>> + >>>>> + __u64 stripe_len; >>>>> + __u64 type; >>>>> + >>>>> + /* 2^16 stripes is quite a lot, a second limit is the size of a single >>>>> + * item in the btree >>>>> + */ >>>>> + __u16 num_stripes; >>>>> + >>>>> + /* sub stripes only matter for raid10 */ >>>>> + __u16 sub_stripes; >>>>> + >>>>> + struct btrfs_chunk_info_stripe stripes[1]; >>>>> + /* additional stripes go here */ >>>>> +}; >>>> >>>> This looks like a copy of btrfs_chunk and stripe, only removing items >>>> not needed for the chunk information. Rather than copying the >>>> unnecessary fileds like dev_uuid in stripe, this should be designed for >>>> data exchange with the usecase in mind. >>> >>> There are two clients for this api: >>> - btrfs fi us >>> - btrfs dev us >>> >>> We can get rid of: >>> - "offset" fields (2x) >>> - "uuid" fields >>> >>> However the "offset" fields can be used to understand where a logical map >>> is on the physical disks. I am thinking about a graphical tool to show this >>> mapping, which doesn't exits yet -). >>> The offset field may be used as key to get further information (like the chunk >>> usage, see below) >>> >>> Regarding the UUID field, I agree it can be removed because it is redundant (there >>> is already the devid) >> >> Offset is ok. I had something like this: >> >> struct dump_chunks_entry { >> u64 devid; >> u64 start; >> u64 lstart; >> u64 length; >> u64 flags; >> u64 used; >> }; >> >> This selects the most interesting data from the CHUNK_ITEM, except the >> 'used' member, see below. > > > The structure above is a structure "device basis". This means that for (e.g.) raidX chunks the fields: > - lstart > - length > - flags > - used > are repeated > > In fact only devid and start are device specific. > I see the following possibilities > > 1) > > struct dump_chunks_entry { > u64 lstart; > u64 length; > u64 flags; > u64 used; > u64 start; > u64 devid; > } > > pro: simple api > cons: waste of space (60% of data are repeated > > 2) > > struct dump_chunk_disk_entry { > u64 devid; > u64 start; > } > > struct dump_chunks_entry { > u64 lstart; > u64 length; > u64 flags; > u64 used; > u16 disks_count; > struct dump_chunk_disk_entry disks[] > }; > > pro: smaller data > cons: variable length data > > 3) > > two different ioctl > > BTRFS_IOC_DUMP_BLOCK_GROUP > > struct dump_block_group { > u64 lstart > u64 used > u64 length > u64 flags > } > > BTRFS_IOC_DUMP_CHUNK > > struct dump_chunks_entry { > u64 lstart; > u64 start; > u64 devid; > } > > > Where the filed lstart is the key > > pro: smaller data (only lstart is repeated); quite orthogonal api > cons: two IOCTLs > > Considering that having as optional the "used" field, means to have two ioctl (or one ioctl with a parameter, but this is not so different). > More I think, more I like option 3), however I am less happy to loose an information like sub_stripes (will something like RAID50 comes to BTRFS ?). Unfortunately to have this information, we need to duplicate it for each dump_chunks_entry... > > GB After some more thinking, I reached the conclusion that we should have two IOCTLs. The first one, is like the original one (where each item contains near all the information chunk related, with at the end an array of device-id involved+offset). The only exception is the removal of the UUID fields (and eventually the stripe_len/substripe/num_stripe/substripe fields). This could give us a good representation of the chunks layout. The second one is an IOCTL to get the list of the block_group_item. Basically return a little less information than the previous one, with the notability exception of the "used". My original idea was to lighting the chunk_info IOCTL and get most information from the block_group IOCTL. However I fear that if an user is interested only to the chunk layout (i.e. to answer to question like which device is used ?), he has to perform 2 ioctl. So after some thinking I returned to my original layout, with a second IOCTL to return the used values (eventually with other fields...) Have an unique ioctl which could return both the information is quite complex because doing so we should merge two lists with all the exception of the case (what if we have a block_group without a chunk_item ? or the opposite ? ). What is your opinion ? BR G.Baroncelli > > > > > >> >>>> The format does not need follow the exact layout that kernel uses, ie. >>>> chunk info with one embedded stripe and then followed by variable length >>>> array of further stripes. This is convenient in one way but not in >>>> another one. Alternatively each chunk can be emitted as a single entry, >>>> duplicating part of the common fields and adding the stripe-specific >>>> ones. This is for consideration. >>>> >>>> I've looked at my old code doing the chunk dump based on the search >>>> ioctl and found that it also allows to read the chunk usage, with one >>>> extra search to the block group item where the usage is stored. As this >>>> is can be slow, it should be optional. Ie. the main ioctl structure >>>> needs flags where this can be requested. >>> >>> This info could be very useful. I think to something like a balance of >>> chunks which are near filled (or near empty). The question is if we >>> should have a different ioctl. >> >> I was not proposing a new ioctl but designing the data exchange format >> to optionally provide a way to pass more information, like the usage. >> The reference to search ioctl was merely to point out that there's one >> more search for BLOCK_GROUP_ITEM where the 'used' is stored. As this is >> potentially expensive, it won't be filled by default. >> >> The structure above does not capture all the chunk data. We could pack >> more such structures into one ioctl call. I think that num_stripes is >> missing from there as this would make possible the raid56 calculations >> but otherwise it should be it. >> > >
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 40b729dce91c..b3296a479bf6 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2234,6 +2234,215 @@ static noinline int btrfs_ioctl_tree_search_v2(struct file *file, return ret; } +/* + * Return: + * 1 -> copied all data, possible further data + * 0 -> copied all data, no further data + * -EAGAIN -> not enough space, restart it + * -EFAULT -> the user passed an invalid address/size pair + */ +static noinline int copy_chunk_info(struct btrfs_path *path, + char __user *ubuf, + size_t buf_size, + u64 *used_buf, + int *num_found, + u64 *offset) +{ + struct extent_buffer *leaf; + unsigned long item_off; + unsigned long item_len; + int nritems; + int i; + int slot; + struct btrfs_key key; + + leaf = path->nodes[0]; + slot = path->slots[0]; + nritems = btrfs_header_nritems(leaf); + + for (i = slot; i < nritems; i++) { + u64 destsize; + struct btrfs_chunk_info ci; + struct btrfs_chunk chunk; + int j, chunk_size; + + item_off = btrfs_item_ptr_offset(leaf, i); + item_len = btrfs_item_size_nr(leaf, i); + + btrfs_item_key_to_cpu(leaf, &key, i); + /* + * we are not interested in other items type + */ + if (key.type != BTRFS_CHUNK_ITEM_KEY) + return 0; + + /* + * In any case, the next search must start from here + */ + *offset = key.offset; + read_extent_buffer(leaf, &chunk, item_off, sizeof(chunk)); + + /* + * chunk.num_stripes-1 is correct, because btrfs_chunk includes + * already a stripe + */ + destsize = sizeof(struct btrfs_chunk_info) + + (chunk.num_stripes - 1) * sizeof(struct btrfs_stripe); + + if (destsize > item_len) { + ASSERT(0); + return -EINVAL; + } + + if (buf_size < destsize + *used_buf) { + if (*num_found) + /* try onother time */ + return -EAGAIN; + else + /* in any case the buffer is too small */ + return -EOVERFLOW; + } + + /* copy chunk */ + chunk_size = offsetof(struct btrfs_chunk_info, stripes); + memset(&ci, 0, chunk_size); + ci.length = btrfs_stack_chunk_length(&chunk); + ci.stripe_len = btrfs_stack_chunk_stripe_len(&chunk); + ci.type = btrfs_stack_chunk_type(&chunk); + ci.num_stripes = btrfs_stack_chunk_num_stripes(&chunk); + ci.sub_stripes = btrfs_stack_chunk_sub_stripes(&chunk); + ci.offset = key.offset; + + if (copy_to_user(ubuf + *used_buf, &ci, chunk_size)) + return -EFAULT; + + *used_buf += chunk_size; + + /* copy stripes */ + for (j = 0 ; j < chunk.num_stripes ; j++) { + struct btrfs_stripe chunk_stripe; + struct btrfs_chunk_info_stripe csi; + + /* + * j-1 is correct, because btrfs_chunk includes already + * a stripe + */ + read_extent_buffer(leaf, &chunk_stripe, + item_off + sizeof(struct btrfs_chunk) + + sizeof(struct btrfs_stripe) * + (j - 1), sizeof(chunk_stripe)); + + memset(&csi, 0, sizeof(csi)); + + csi.devid = btrfs_stack_stripe_devid(&chunk_stripe); + csi.offset = btrfs_stack_stripe_offset(&chunk_stripe); + memcpy(csi.dev_uuid, chunk_stripe.dev_uuid, + sizeof(chunk_stripe.dev_uuid)); + if (copy_to_user(ubuf + *used_buf, &csi, sizeof(csi))) + return -EFAULT; + + *used_buf += sizeof(csi); + } + + ++(*num_found); + } + + if (*offset < (u64)-1) + ++(*offset); + + return 1; +} + +static noinline int search_chunk_info(struct inode *inode, u64 *offset, + int *items_count, + char __user *ubuf, u64 buf_size) +{ + struct btrfs_fs_info *info = btrfs_sb(inode->i_sb); + struct btrfs_root *root; + struct btrfs_key key; + struct btrfs_path *path; + int ret = -EAGAIN; + u64 used_buf = 0; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + /* search for BTRFS_CHUNK_TREE_OBJECTID tree */ + key.objectid = BTRFS_CHUNK_TREE_OBJECTID; + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = (u64)-1; + root = btrfs_get_fs_root(info, &key, true); + if (IS_ERR(root)) { + btrfs_err(info, "could not find root\n"); + btrfs_free_path(path); + return -ENOENT; + } + + + while (used_buf < buf_size) { + key.objectid = 0x0100; + key.type = BTRFS_CHUNK_ITEM_KEY; + key.offset = *offset; + + ret = btrfs_search_forward(root, &key, path, 0); + if (ret != 0) { + if (ret > 0) + ret = 0; + goto ret; + } + + ret = copy_chunk_info(path, ubuf, buf_size, + &used_buf, items_count, offset); + + btrfs_release_path(path); + + if (ret < 1) + break; + } + +ret: + btrfs_free_path(path); + return ret; +} + +static noinline int btrfs_ioctl_get_chunk_info(struct file *file, + void __user *argp) +{ + struct btrfs_ioctl_chunk_info arg; + struct inode *inode; + int ret; + size_t buf_size; + u64 data_offset; + const size_t buf_limit = SZ_16M; + + + data_offset = sizeof(struct btrfs_ioctl_chunk_info); + inode = file_inode(file); + + if (copy_from_user(&arg, argp, sizeof(arg))) + return -EFAULT; + + buf_size = arg.buf_size; + arg.items_count = 0; + + if (buf_size < sizeof(struct btrfs_ioctl_chunk_info) + + sizeof(struct btrfs_chunk_info)) + return -EOVERFLOW; + + /* limit result size to 16MB */ + if (buf_size > buf_limit) + buf_size = buf_limit; + + ret = search_chunk_info(inode, &arg.offset, &arg.items_count, + argp + data_offset, buf_size - data_offset); + + if (copy_to_user(argp, &arg, data_offset)) + return -EFAULT; + + return ret; +} + /* * Search INODE_REFs to identify path name of 'dirid' directory * in a 'tree_id' tree. and sets path name to 'name'. @@ -4907,6 +5116,8 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_get_subvol_rootref(file, argp); case BTRFS_IOC_INO_LOOKUP_USER: return btrfs_ioctl_ino_lookup_user(file, argp); + case BTRFS_IOC_GET_CHUNK_INFO: + return btrfs_ioctl_get_chunk_info(file, argp); } return -ENOTTY; diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index 8134924cfc17..b28f7886dcbd 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -734,6 +734,42 @@ struct btrfs_ioctl_received_subvol_args { __u64 reserved[16]; /* in */ }; +struct btrfs_chunk_info_stripe { + __u64 devid; + __u64 offset; + __u8 dev_uuid[BTRFS_UUID_SIZE]; +}; + +struct btrfs_chunk_info { + /* logical start of this chunk */ + __u64 offset; + /* size of this chunk in bytes */ + __u64 length; + + __u64 stripe_len; + __u64 type; + + /* 2^16 stripes is quite a lot, a second limit is the size of a single + * item in the btree + */ + __u16 num_stripes; + + /* sub stripes only matter for raid10 */ + __u16 sub_stripes; + + struct btrfs_chunk_info_stripe stripes[1]; + /* additional stripes go here */ +}; + + +struct btrfs_ioctl_chunk_info { + u64 offset; /* offset to start the search */ + u32 buf_size; /* size of the buffer, including + * btrfs_ioctl_chunk_info + */ + u32 items_count; /* number of items returned */ +}; + /* * Caller doesn't want file data in the send stream, even if the * search of clone sources doesn't find an extent. UPDATE_EXTENT @@ -972,5 +1008,7 @@ enum btrfs_err_code { struct btrfs_ioctl_ino_lookup_user_args) #define BTRFS_IOC_SNAP_DESTROY_V2 _IOW(BTRFS_IOCTL_MAGIC, 63, \ struct btrfs_ioctl_vol_args_v2) +#define BTRFS_IOC_GET_CHUNK_INFO _IOR(BTRFS_IOCTL_MAGIC, 64, \ + struct btrfs_ioctl_chunk_info) #endif /* _UAPI_LINUX_BTRFS_H */