Message ID | abea9f460c7341361e58cbba8af355654eb94b5b.1649092662.git.osandov@fb.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | btrfs: add send/receive support for reading/writing compressed data | expand |
On Mon, Apr 04, 2022 at 10:29:05AM -0700, Omar Sandoval wrote: > From: Omar Sandoval <osandov@fb.com> > > This adds the definitions of the new commands for send stream version 2 > and their respective attributes: fallocate, FS_IOC_SETFLAGS (a.k.a. > chattr), and encoded writes. It also documents two changes to the send > stream format in v2: the receiver shouldn't assume a maximum command > size, and the DATA attribute is encoded differently to allow for writes > larger than 64k. These will be implemented in subsequent changes, and > then the ioctl will accept the new version and flag. > > Reviewed-by: Josef Bacik <josef@toxicpanda.com> > Reviewed-by: Sweet Tea Dorminy <sweettea-kernel@dorminy.me> > Signed-off-by: Omar Sandoval <osandov@fb.com> > --- > fs/btrfs/send.c | 2 +- > fs/btrfs/send.h | 40 ++++++++++++++++++++++++++++++++++---- > include/uapi/linux/btrfs.h | 7 +++++++ > 3 files changed, 44 insertions(+), 5 deletions(-) > > diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c > index 9363f625fa17..1f141de3a7d6 100644 > --- a/fs/btrfs/send.c > +++ b/fs/btrfs/send.c > @@ -7459,7 +7459,7 @@ long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg) > > sctx->clone_roots_cnt = arg->clone_sources_count; > > - sctx->send_max_size = BTRFS_SEND_BUF_SIZE; > + sctx->send_max_size = BTRFS_SEND_BUF_SIZE_V1; > sctx->send_buf = kvmalloc(sctx->send_max_size, GFP_KERNEL); > if (!sctx->send_buf) { > ret = -ENOMEM; > diff --git a/fs/btrfs/send.h b/fs/btrfs/send.h > index 67721e0281ba..805d8095209a 100644 > --- a/fs/btrfs/send.h > +++ b/fs/btrfs/send.h > @@ -12,7 +12,11 @@ > #define BTRFS_SEND_STREAM_MAGIC "btrfs-stream" > #define BTRFS_SEND_STREAM_VERSION 1 > > -#define BTRFS_SEND_BUF_SIZE SZ_64K > +/* > + * In send stream v1, no command is larger than 64k. In send stream v2, no limit > + * should be assumed. > + */ > +#define BTRFS_SEND_BUF_SIZE_V1 SZ_64K > > enum btrfs_tlv_type { > BTRFS_TLV_U8, > @@ -80,16 +84,20 @@ enum btrfs_send_cmd { > BTRFS_SEND_C_MAX_V1 = 22, > > /* Version 2 */ > - BTRFS_SEND_C_MAX_V2 = 22, > + BTRFS_SEND_C_FALLOCATE = 23, > + BTRFS_SEND_C_SETFLAGS = 24, Do you have patches that implement the fallocate modes and setflags? I don't see it in this patchset. The setflags should be switched to something closer to the recent refactoring that unifies all the flags/attrs to fileattr. I have a prototype patch for that, comparing the inode flags in the same way as file mode, the tricky part is on the receive side how to apply them correctly. On the sending side it's simple though. > + BTRFS_SEND_C_ENCODED_WRITE = 25, > + BTRFS_SEND_C_MAX_V2 = 25, > > /* End */ > - BTRFS_SEND_C_MAX = 22, > + BTRFS_SEND_C_MAX = 25, > };
On Wed, May 18, 2022 at 11:00:03PM +0200, David Sterba wrote: > On Mon, Apr 04, 2022 at 10:29:05AM -0700, Omar Sandoval wrote: > > From: Omar Sandoval <osandov@fb.com> > > > > This adds the definitions of the new commands for send stream version 2 > > and their respective attributes: fallocate, FS_IOC_SETFLAGS (a.k.a. > > chattr), and encoded writes. It also documents two changes to the send > > stream format in v2: the receiver shouldn't assume a maximum command > > size, and the DATA attribute is encoded differently to allow for writes > > larger than 64k. These will be implemented in subsequent changes, and > > then the ioctl will accept the new version and flag. > > > > Reviewed-by: Josef Bacik <josef@toxicpanda.com> > > Reviewed-by: Sweet Tea Dorminy <sweettea-kernel@dorminy.me> > > Signed-off-by: Omar Sandoval <osandov@fb.com> > > --- > > fs/btrfs/send.c | 2 +- > > fs/btrfs/send.h | 40 ++++++++++++++++++++++++++++++++++---- > > include/uapi/linux/btrfs.h | 7 +++++++ > > 3 files changed, 44 insertions(+), 5 deletions(-) > > > > diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c > > index 9363f625fa17..1f141de3a7d6 100644 > > --- a/fs/btrfs/send.c > > +++ b/fs/btrfs/send.c > > @@ -7459,7 +7459,7 @@ long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg) > > > > sctx->clone_roots_cnt = arg->clone_sources_count; > > > > - sctx->send_max_size = BTRFS_SEND_BUF_SIZE; > > + sctx->send_max_size = BTRFS_SEND_BUF_SIZE_V1; > > sctx->send_buf = kvmalloc(sctx->send_max_size, GFP_KERNEL); > > if (!sctx->send_buf) { > > ret = -ENOMEM; > > diff --git a/fs/btrfs/send.h b/fs/btrfs/send.h > > index 67721e0281ba..805d8095209a 100644 > > --- a/fs/btrfs/send.h > > +++ b/fs/btrfs/send.h > > @@ -12,7 +12,11 @@ > > #define BTRFS_SEND_STREAM_MAGIC "btrfs-stream" > > #define BTRFS_SEND_STREAM_VERSION 1 > > > > -#define BTRFS_SEND_BUF_SIZE SZ_64K > > +/* > > + * In send stream v1, no command is larger than 64k. In send stream v2, no limit > > + * should be assumed. > > + */ > > +#define BTRFS_SEND_BUF_SIZE_V1 SZ_64K > > > > enum btrfs_tlv_type { > > BTRFS_TLV_U8, > > @@ -80,16 +84,20 @@ enum btrfs_send_cmd { > > BTRFS_SEND_C_MAX_V1 = 22, > > > > /* Version 2 */ > > - BTRFS_SEND_C_MAX_V2 = 22, > > + BTRFS_SEND_C_FALLOCATE = 23, > > + BTRFS_SEND_C_SETFLAGS = 24, > > Do you have patches that implement the fallocate modes and setflags? I > don't see it in this patchset. Nope, as discussed before, in order to keep the patch series managable, this series adds the definitions and receive support for fallocate and setflags, but leaves the send side to be implemented at a later time. I implemented fallocate for send back in 2019: https://github.com/osandov/linux/commits/btrfs-send-v2. It passed some basic testing back then, but it'd need a big rebase and more testing. > The setflags should be switched to > something closer to the recent refactoring that unifies all the > flags/attrs to fileattr. I have a prototype patch for that, comparing > the inode flags in the same way as file mode, the tricky part is on the > receive side how to apply them correctly. On the sending side it's > simple though. The way this series documents (and implements in receive) BTRFS_SEND_C_SETFLAGS is that it's a simple call to FS_IOC_SETFLAGS with given flags. I don't think this is affected by the change to fileattr, unless I'm misunderstanding. This is in line with the other commands being straightforward system calls, but it does mean that the sending side has to deal with the complexities of an immutable or append-only file being modified between incremental sends (by temporarily clearing the flag), and of inherited flags (e.g., a COW file inside of a NOCOW directory). I suppose it'd also be possible to have SETFLAGS define the final flags and leave it up to receive to make that happen by temporarily setting/clearing flags as necessary, but that is a bit inconsistent with how we've handled other commands.
On Wed, May 18, 2022 at 03:25:34PM -0700, Omar Sandoval wrote: > On Wed, May 18, 2022 at 11:00:03PM +0200, David Sterba wrote: > > On Mon, Apr 04, 2022 at 10:29:05AM -0700, Omar Sandoval wrote: > > > @@ -80,16 +84,20 @@ enum btrfs_send_cmd { > > > BTRFS_SEND_C_MAX_V1 = 22, > > > > > > /* Version 2 */ > > > - BTRFS_SEND_C_MAX_V2 = 22, > > > + BTRFS_SEND_C_FALLOCATE = 23, > > > + BTRFS_SEND_C_SETFLAGS = 24, > > > > Do you have patches that implement the fallocate modes and setflags? I > > don't see it in this patchset. > > Nope, as discussed before, in order to keep the patch series managable, > this series adds the definitions and receive support for fallocate and > setflags, but leaves the send side to be implemented at a later time. > > I implemented fallocate for send back in 2019: > https://github.com/osandov/linux/commits/btrfs-send-v2. It passed some > basic testing back then, but it'd need a big rebase and more testing. The patches in the branch are partially cleanups and preparatory work, so at least avoiding sending the holes would be nice to have for v2 as it was one of the first bugs reported. The falllocate modes seem to be easy. The rest is about the versioning infrastructure that we already have merged. > > The setflags should be switched to > > something closer to the recent refactoring that unifies all the > > flags/attrs to fileattr. I have a prototype patch for that, comparing > > the inode flags in the same way as file mode, the tricky part is on the > > receive side how to apply them correctly. On the sending side it's > > simple though. > > The way this series documents (and implements in receive) > BTRFS_SEND_C_SETFLAGS is that it's a simple call to FS_IOC_SETFLAGS with > given flags. I don't think this is affected by the change to fileattr, > unless I'm misunderstanding. The SETFLAGS ioctls are obsolete and I don't want to make them part of the protocol defition because the bit namespace contains flags we don't have implemented or are not releated to anything in btrfs. https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/fs.h#L220 It's basically just naming and specifying what exactly is the value so we should pick the most recent interface name that superseded SETFLAGS and the XFLAGS. > This is in line with the other commands being straightforward system > calls, but it does mean that the sending side has to deal with the > complexities of an immutable or append-only file being modified between > incremental sends (by temporarily clearing the flag), and of inherited > flags (e.g., a COW file inside of a NOCOW directory). Yeah the receiving side needs to understand the constraints of the flags, it has only the information about the final state and not the order in which the flags get applied. > I suppose it'd > also be possible to have SETFLAGS define the final flags and leave it up > to receive to make that happen by temporarily setting/clearing flags as > necessary, but that is a bit inconsistent with how we've handled other > commands. I'm not sure we can always stick to 1:1 mapping to syscalls or ioctls, of course it's the best option, but the protocol can transfer eg. more complete information and it's up to the receiving side to apply it (like if a file has NODATASUM flag set). From the other side there are multiple actions for something that could be just one, like creating file first as an orphan and then renaming it. So I'd like to look at it from the protocol perspective and not necessarily blindly copy the OS interfaces.
On Thu, May 19, 2022 at 06:07:49PM +0200, David Sterba wrote: > On Wed, May 18, 2022 at 03:25:34PM -0700, Omar Sandoval wrote: > > On Wed, May 18, 2022 at 11:00:03PM +0200, David Sterba wrote: > > > On Mon, Apr 04, 2022 at 10:29:05AM -0700, Omar Sandoval wrote: > > > > @@ -80,16 +84,20 @@ enum btrfs_send_cmd { > > > > BTRFS_SEND_C_MAX_V1 = 22, > > > > > > > > /* Version 2 */ > > > > - BTRFS_SEND_C_MAX_V2 = 22, > > > > + BTRFS_SEND_C_FALLOCATE = 23, > > > > + BTRFS_SEND_C_SETFLAGS = 24, > > > > > > Do you have patches that implement the fallocate modes and setflags? I > > > don't see it in this patchset. > > > > Nope, as discussed before, in order to keep the patch series managable, > > this series adds the definitions and receive support for fallocate and > > setflags, but leaves the send side to be implemented at a later time. > > > > I implemented fallocate for send back in 2019: > > https://github.com/osandov/linux/commits/btrfs-send-v2. It passed some > > basic testing back then, but it'd need a big rebase and more testing. > > The patches in the branch are partially cleanups and preparatory work, > so at least avoiding sending the holes would be nice to have for v2 as > it was one of the first bugs reported. The falllocate modes seem to be > easy. The rest is about the versioning infrastructure that we already > have merged. I rebased the patches on this series: https://github.com/osandov/linux/commits/btrfs-send-v2-redux. It passes some basic testing, but it'll definitely need a lot of fstests. > > > The setflags should be switched to > > > something closer to the recent refactoring that unifies all the > > > flags/attrs to fileattr. I have a prototype patch for that, comparing > > > the inode flags in the same way as file mode, the tricky part is on the > > > receive side how to apply them correctly. On the sending side it's > > > simple though. > > > > The way this series documents (and implements in receive) > > BTRFS_SEND_C_SETFLAGS is that it's a simple call to FS_IOC_SETFLAGS with > > given flags. I don't think this is affected by the change to fileattr, > > unless I'm misunderstanding. > > The SETFLAGS ioctls are obsolete and I don't want to make them part of > the protocol defition because the bit namespace contains flags we don't > have implemented or are not releated to anything in btrfs. > > https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/fs.h#L220 > > It's basically just naming and specifying what exactly is the value so > we should pick the most recent interface name that superseded SETFLAGS > and the XFLAGS. This is the situation with FS_IOC_SETFLAGS, FS_IOC_FSSETXATTR, and fileattr as I understand it. Please correct me if I'm wrong: - FS_IOC_SETFLAGS originally came from ext4 and was added to Btrfs very early on (commit 6cbff00f4632 ("Btrfs: implement FS_IOC_GETFLAGS/SETFLAGS/GETVERSION")). - FS_IOC_FSSETXATTR originally came from XFS and was added to Btrfs a few years ago (in commit 025f2121488e ("btrfs: add FS_IOC_FSSETXATTR ioctl")). - The two ioctls allow setting some of the same flags (e.g., IMMUTABLE, APPEND), but some are only supported by SETFLAGS (e.g., NOCOW) and some are only supported by FSSETXATTR (none of these are supported by Btrfs, however). - fileattr is a recent VFS interface that is used to implement those two ioctls. It basically passes through the arguments for whichever ioctl was called and translates the equivalent flags between the two ioctls. It is not a new UAPI and doesn't have its own set of flags. Is there another new UAPI that I'm missing that obsoletes SETFLAGS? I see your point about the irrelevant flags in SETFLAGS, however. Is your suggestion to have our own send protocol-specific set of flags that we translate to whatever ioctl we need to make? > > This is in line with the other commands being straightforward system > > calls, but it does mean that the sending side has to deal with the > > complexities of an immutable or append-only file being modified between > > incremental sends (by temporarily clearing the flag), and of inherited > > flags (e.g., a COW file inside of a NOCOW directory). > > Yeah the receiving side needs to understand the constraints of the > flags, it has only the information about the final state and not the > order in which the flags get applied. If the sender only tells the receiver what the final flags are, then yes, the receiver would need to deal with, e.g., temporarily clearing and resetting flags. The way I envisioned it was that the sender would instead send commands for those intermediate flag operations. E.g., if the incremental send requires writing some data to a file that is immutable in both the source and the parent subvolume, the sender could send commands to: clear the immutable flag, write the data, set the immutable flag. This is a lot like the orphan renaming that you mentioned. If we want to have receive handle the intermediate states instead, then I would like to postpone SETFLAGS (or whatever we call it) to send protocol v3, since it'll be very tricky to get right and we can't add it to the protocol without having an implementation in the receiver. On the other hand, if send takes care of the intermediate states and receive just has to blindly apply the flags, then we can add SETFLAGS to the protocol and receive now and implement it in send later. That is exactly what this patch series does. I'm fine with either of those paths forward, but I don't want to block the compressed send/receive on SETFLAGS or fallocate. Thanks, Omar
On Thu, May 19, 2022 at 03:31:56PM -0700, Omar Sandoval wrote: > On Thu, May 19, 2022 at 06:07:49PM +0200, David Sterba wrote: > > The SETFLAGS ioctls are obsolete and I don't want to make them part of > > the protocol defition because the bit namespace contains flags we don't > > have implemented or are not releated to anything in btrfs. > > > > https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/fs.h#L220 > > > > It's basically just naming and specifying what exactly is the value so > > we should pick the most recent interface name that superseded SETFLAGS > > and the XFLAGS. > > This is the situation with FS_IOC_SETFLAGS, FS_IOC_FSSETXATTR, and > fileattr as I understand it. Please correct me if I'm wrong: > > - FS_IOC_SETFLAGS originally came from ext4 and was added to Btrfs very > early on (commit 6cbff00f4632 ("Btrfs: implement > FS_IOC_GETFLAGS/SETFLAGS/GETVERSION")). > - FS_IOC_FSSETXATTR originally came from XFS and was added to Btrfs a > few years ago (in commit 025f2121488e ("btrfs: add FS_IOC_FSSETXATTR > ioctl")). > - The two ioctls allow setting some of the same flags (e.g., IMMUTABLE, > APPEND), but some are only supported by SETFLAGS (e.g., NOCOW) and > some are only supported by FSSETXATTR (none of these are supported by > Btrfs, however). > - fileattr is a recent VFS interface that is used to implement those two > ioctls. It basically passes through the arguments for whichever ioctl > was called and translates the equivalent flags between the two ioctls. > It is not a new UAPI and doesn't have its own set of flags. > > Is there another new UAPI that I'm missing that obsoletes SETFLAGS? That was supposed to be FSSETXATTR, new flags have appeared there, the reason for btrfs was to allow the FS_XFLAG_DAX bit as people are were working on the DAX support, and potentially other bits like FS_XFLAG_NOSYMLINKS or FS_XFLAG_NODEFRAG. Or new flags that we want to be able to set, NODATASUM for example. > I see your point about the irrelevant flags in SETFLAGS, however. Is > your suggestion to have our own send protocol-specific set of flags that > we translate to whatever ioctl we need to make? Yes, that's the idea, the flags are not protocol-specific but rather btrfs-specific, ie we want to support namely the bits that btrfs inodes can have. > > > This is in line with the other commands being straightforward system > > > calls, but it does mean that the sending side has to deal with the > > > complexities of an immutable or append-only file being modified between > > > incremental sends (by temporarily clearing the flag), and of inherited > > > flags (e.g., a COW file inside of a NOCOW directory). > > > > Yeah the receiving side needs to understand the constraints of the > > flags, it has only the information about the final state and not the > > order in which the flags get applied. > > If the sender only tells the receiver what the final flags are, then > yes, the receiver would need to deal with, e.g., temporarily clearing > and resetting flags. The way I envisioned it was that the sender would > instead send commands for those intermediate flag operations. E.g., if > the incremental send requires writing some data to a file that is > immutable in both the source and the parent subvolume, the sender could > send commands to: clear the immutable flag, write the data, set the > immutable flag. This is a lot like the orphan renaming that you > mentioned. I see, so the question is where do we want to put the logic. I'd go with userspace as lots of things are easier there, eg. maitaining some intermediate state or delayed application of bits/flags. > If we want to have receive handle the intermediate states instead, then > I would like to postpone SETFLAGS (or whatever we call it) to send > protocol v3, since it'll be very tricky to get right and we can't add it > to the protocol without having an implementation in the receiver. Yeah it would be tricky to generate the sequence right, while if it's on the receiving side we can simply ignore/report it or implement a subset where we know how to apply (eg. immutable) and don't need to postpone it. > On the other hand, if send takes care of the intermediate states and > receive just has to blindly apply the flags, then we can add SETFLAGS to > the protocol and receive now and implement it in send later. That is > exactly what this patch series does. It adds a command to the protocol but does not outline the plan how to use it, not counting this discussion. > I'm fine with either of those paths forward, but I don't want to block > the compressed send/receive on SETFLAGS or fallocate. I get that you care only about the encoded write, but I don't want to rev protocol every few releases because we did not bother to implement something we know is missing in the protocol. Anyway, encoded write will be in v2 scheduled for 5.20 and I'll implement the rest plus will have a look at your fallocate patches.
On 20/05/2022 20:34, David Sterba wrote: > On Thu, May 19, 2022 at 03:31:56PM -0700, Omar Sandoval wrote: >> On Thu, May 19, 2022 at 06:07:49PM +0200, David Sterba wrote: >>> The SETFLAGS ioctls are obsolete and I don't want to make them part of >>> the protocol defition because the bit namespace contains flags we don't >>> have implemented or are not releated to anything in btrfs. >>> >>> https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/fs.h#L220 >>> >>> It's basically just naming and specifying what exactly is the value so >>> we should pick the most recent interface name that superseded SETFLAGS >>> and the XFLAGS. >> >> This is the situation with FS_IOC_SETFLAGS, FS_IOC_FSSETXATTR, and >> fileattr as I understand it. Please correct me if I'm wrong: >> >> - FS_IOC_SETFLAGS originally came from ext4 and was added to Btrfs very >> early on (commit 6cbff00f4632 ("Btrfs: implement >> FS_IOC_GETFLAGS/SETFLAGS/GETVERSION")). >> - FS_IOC_FSSETXATTR originally came from XFS and was added to Btrfs a >> few years ago (in commit 025f2121488e ("btrfs: add FS_IOC_FSSETXATTR >> ioctl")). >> - The two ioctls allow setting some of the same flags (e.g., IMMUTABLE, >> APPEND), but some are only supported by SETFLAGS (e.g., NOCOW) and >> some are only supported by FSSETXATTR (none of these are supported by >> Btrfs, however). >> - fileattr is a recent VFS interface that is used to implement those two >> ioctls. It basically passes through the arguments for whichever ioctl >> was called and translates the equivalent flags between the two ioctls. >> It is not a new UAPI and doesn't have its own set of flags. >> >> Is there another new UAPI that I'm missing that obsoletes SETFLAGS? > > That was supposed to be FSSETXATTR, new flags have appeared there, the > reason for btrfs was to allow the FS_XFLAG_DAX bit as people are were > working on the DAX support, and potentially other bits like > FS_XFLAG_NOSYMLINKS or FS_XFLAG_NODEFRAG. Or new flags that we want to > be able to set, NODATASUM for example. > >> I see your point about the irrelevant flags in SETFLAGS, however. Is >> your suggestion to have our own send protocol-specific set of flags that >> we translate to whatever ioctl we need to make? > > Yes, that's the idea, the flags are not protocol-specific but rather > btrfs-specific, ie we want to support namely the bits that btrfs inodes > can have. > >>>> This is in line with the other commands being straightforward system >>>> calls, but it does mean that the sending side has to deal with the >>>> complexities of an immutable or append-only file being modified between >>>> incremental sends (by temporarily clearing the flag), and of inherited >>>> flags (e.g., a COW file inside of a NOCOW directory). >>> >>> Yeah the receiving side needs to understand the constraints of the >>> flags, it has only the information about the final state and not the >>> order in which the flags get applied. >> >> If the sender only tells the receiver what the final flags are, then >> yes, the receiver would need to deal with, e.g., temporarily clearing >> and resetting flags. The way I envisioned it was that the sender would >> instead send commands for those intermediate flag operations. E.g., if >> the incremental send requires writing some data to a file that is >> immutable in both the source and the parent subvolume, the sender could >> send commands to: clear the immutable flag, write the data, set the >> immutable flag. This is a lot like the orphan renaming that you >> mentioned. > > I see, so the question is where do we want to put the logic. I'd go with > userspace as lots of things are easier there, eg. maitaining some > intermediate state or delayed application of bits/flags. > We should remember that what you are designing here is a protocol for transmission of a snapshot. The protocol features are numbers that have to remain unchanged across all future software updates and all implementations of this version of the protocol. The values may or may not co-incidentally match some constant we know today in the Linux 5.x ABI but it may have no relation to *anything* on the receiving side. Don't forget that some people are archiving send streams as a form of backup with the intention of playing them back in 10 or 20 years time on a btrfs implementation that might bear little resemblance to anything we would recognise today (not a good idea, but people are doing it). There is no way the sender has any idea what those future implementations might have to do to replicate the source data - that is up to those implementations. For example, it is very easy to imagine that some future OS might disallow the concept of "clearing the immutable flag". In that case, what the receiver needs to know is that this new data represents a new immutable file, based on the contents of previous file with some specified differences - maybe it will handle this by turning off the immutable flag, or deleting the old file and writing a new one, or asking the system manager to authorize it, or using some versioning feature built into a future version of btrfs, or ... The right question isn't to ask "what would a Linux BTRFS receiver running the same software rev as the sender need to do", it needs to be "what information do I need to supply that gives a future receiver, on a completely different system, with a different I/O architecture and a different kernel, in 20 years, the best chance to implement it". Graham P.S. I'm an old network guy, not a file system guy. Send/receive is a network protocol, with the added problems that (i) there is no negotiation or feedback channel, and (ii) the data is probably mission critical to some people and they expect it to be usable in 10's of years time. >> If we want to have receive handle the intermediate states instead, then >> I would like to postpone SETFLAGS (or whatever we call it) to send >> protocol v3, since it'll be very tricky to get right and we can't add it >> to the protocol without having an implementation in the receiver. > > Yeah it would be tricky to generate the sequence right, while if it's on > the receiving side we can simply ignore/report it or implement a subset > where we know how to apply (eg. immutable) and don't need to postpone > it. > >> On the other hand, if send takes care of the intermediate states and >> receive just has to blindly apply the flags, then we can add SETFLAGS to >> the protocol and receive now and implement it in send later. That is >> exactly what this patch series does. > > It adds a command to the protocol but does not outline the plan how to > use it, not counting this discussion. > >> I'm fine with either of those paths forward, but I don't want to block >> the compressed send/receive on SETFLAGS or fallocate. > > I get that you care only about the encoded write, but I don't want to > rev protocol every few releases because we did not bother to implement > something we know is missing in the protocol. Anyway, encoded write will > be in v2 scheduled for 5.20 and I'll implement the rest plus will have a > look at your fallocate patches.
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 9363f625fa17..1f141de3a7d6 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -7459,7 +7459,7 @@ long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg) sctx->clone_roots_cnt = arg->clone_sources_count; - sctx->send_max_size = BTRFS_SEND_BUF_SIZE; + sctx->send_max_size = BTRFS_SEND_BUF_SIZE_V1; sctx->send_buf = kvmalloc(sctx->send_max_size, GFP_KERNEL); if (!sctx->send_buf) { ret = -ENOMEM; diff --git a/fs/btrfs/send.h b/fs/btrfs/send.h index 67721e0281ba..805d8095209a 100644 --- a/fs/btrfs/send.h +++ b/fs/btrfs/send.h @@ -12,7 +12,11 @@ #define BTRFS_SEND_STREAM_MAGIC "btrfs-stream" #define BTRFS_SEND_STREAM_VERSION 1 -#define BTRFS_SEND_BUF_SIZE SZ_64K +/* + * In send stream v1, no command is larger than 64k. In send stream v2, no limit + * should be assumed. + */ +#define BTRFS_SEND_BUF_SIZE_V1 SZ_64K enum btrfs_tlv_type { BTRFS_TLV_U8, @@ -80,16 +84,20 @@ enum btrfs_send_cmd { BTRFS_SEND_C_MAX_V1 = 22, /* Version 2 */ - BTRFS_SEND_C_MAX_V2 = 22, + BTRFS_SEND_C_FALLOCATE = 23, + BTRFS_SEND_C_SETFLAGS = 24, + BTRFS_SEND_C_ENCODED_WRITE = 25, + BTRFS_SEND_C_MAX_V2 = 25, /* End */ - BTRFS_SEND_C_MAX = 22, + BTRFS_SEND_C_MAX = 25, }; /* attributes in send stream */ enum { BTRFS_SEND_A_UNSPEC = 0, + /* Version 1 */ BTRFS_SEND_A_UUID = 1, BTRFS_SEND_A_CTRANSID = 2, @@ -112,6 +120,11 @@ enum { BTRFS_SEND_A_PATH_LINK = 17, BTRFS_SEND_A_FILE_OFFSET = 18, + /* + * As of send stream v2, this attribute is special: it must be the last + * attribute in a command, its header contains only the type, and its + * length is implicitly the remaining length of the command. + */ BTRFS_SEND_A_DATA = 19, BTRFS_SEND_A_CLONE_UUID = 20, @@ -120,7 +133,26 @@ enum { BTRFS_SEND_A_CLONE_OFFSET = 23, BTRFS_SEND_A_CLONE_LEN = 24, - BTRFS_SEND_A_MAX = 24, + BTRFS_SEND_A_MAX_V1 = 24, + + /* Version 2 */ + BTRFS_SEND_A_FALLOCATE_MODE = 25, + + BTRFS_SEND_A_SETFLAGS_FLAGS = 26, + + BTRFS_SEND_A_UNENCODED_FILE_LEN = 27, + BTRFS_SEND_A_UNENCODED_LEN = 28, + BTRFS_SEND_A_UNENCODED_OFFSET = 29, + /* + * COMPRESSION and ENCRYPTION default to NONE (0) if omitted from + * BTRFS_SEND_C_ENCODED_WRITE. + */ + BTRFS_SEND_A_COMPRESSION = 30, + BTRFS_SEND_A_ENCRYPTION = 31, + BTRFS_SEND_A_MAX_V2 = 31, + + /* End */ + BTRFS_SEND_A_MAX = 31, }; #ifdef __KERNEL__ diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index d956b2993970..b6f26a434b10 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -777,6 +777,13 @@ struct btrfs_ioctl_received_subvol_args { */ #define BTRFS_SEND_FLAG_VERSION 0x8 +/* + * Send compressed data using the ENCODED_WRITE command instead of decompressing + * the data and sending it with the WRITE command. This requires protocol + * version >= 2. + */ +#define BTRFS_SEND_FLAG_COMPRESSED 0x10 + #define BTRFS_SEND_FLAG_MASK \ (BTRFS_SEND_FLAG_NO_FILE_DATA | \ BTRFS_SEND_FLAG_OMIT_STREAM_HEADER | \