diff mbox series

[v15,3/7] btrfs: add send stream v2 definitions

Message ID abea9f460c7341361e58cbba8af355654eb94b5b.1649092662.git.osandov@fb.com (mailing list archive)
State New, archived
Headers show
Series btrfs: add send/receive support for reading/writing compressed data | expand

Commit Message

Omar Sandoval April 4, 2022, 5:29 p.m. UTC
From: Omar Sandoval <osandov@fb.com>

This adds the definitions of the new commands for send stream version 2
and their respective attributes: fallocate, FS_IOC_SETFLAGS (a.k.a.
chattr), and encoded writes. It also documents two changes to the send
stream format in v2: the receiver shouldn't assume a maximum command
size, and the DATA attribute is encoded differently to allow for writes
larger than 64k. These will be implemented in subsequent changes, and
then the ioctl will accept the new version and flag.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: Sweet Tea Dorminy <sweettea-kernel@dorminy.me>
Signed-off-by: Omar Sandoval <osandov@fb.com>
---
 fs/btrfs/send.c            |  2 +-
 fs/btrfs/send.h            | 40 ++++++++++++++++++++++++++++++++++----
 include/uapi/linux/btrfs.h |  7 +++++++
 3 files changed, 44 insertions(+), 5 deletions(-)

Comments

David Sterba May 18, 2022, 9 p.m. UTC | #1
On Mon, Apr 04, 2022 at 10:29:05AM -0700, Omar Sandoval wrote:
> From: Omar Sandoval <osandov@fb.com>
> 
> This adds the definitions of the new commands for send stream version 2
> and their respective attributes: fallocate, FS_IOC_SETFLAGS (a.k.a.
> chattr), and encoded writes. It also documents two changes to the send
> stream format in v2: the receiver shouldn't assume a maximum command
> size, and the DATA attribute is encoded differently to allow for writes
> larger than 64k. These will be implemented in subsequent changes, and
> then the ioctl will accept the new version and flag.
> 
> Reviewed-by: Josef Bacik <josef@toxicpanda.com>
> Reviewed-by: Sweet Tea Dorminy <sweettea-kernel@dorminy.me>
> Signed-off-by: Omar Sandoval <osandov@fb.com>
> ---
>  fs/btrfs/send.c            |  2 +-
>  fs/btrfs/send.h            | 40 ++++++++++++++++++++++++++++++++++----
>  include/uapi/linux/btrfs.h |  7 +++++++
>  3 files changed, 44 insertions(+), 5 deletions(-)
> 
> diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
> index 9363f625fa17..1f141de3a7d6 100644
> --- a/fs/btrfs/send.c
> +++ b/fs/btrfs/send.c
> @@ -7459,7 +7459,7 @@ long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg)
>  
>  	sctx->clone_roots_cnt = arg->clone_sources_count;
>  
> -	sctx->send_max_size = BTRFS_SEND_BUF_SIZE;
> +	sctx->send_max_size = BTRFS_SEND_BUF_SIZE_V1;
>  	sctx->send_buf = kvmalloc(sctx->send_max_size, GFP_KERNEL);
>  	if (!sctx->send_buf) {
>  		ret = -ENOMEM;
> diff --git a/fs/btrfs/send.h b/fs/btrfs/send.h
> index 67721e0281ba..805d8095209a 100644
> --- a/fs/btrfs/send.h
> +++ b/fs/btrfs/send.h
> @@ -12,7 +12,11 @@
>  #define BTRFS_SEND_STREAM_MAGIC "btrfs-stream"
>  #define BTRFS_SEND_STREAM_VERSION 1
>  
> -#define BTRFS_SEND_BUF_SIZE SZ_64K
> +/*
> + * In send stream v1, no command is larger than 64k. In send stream v2, no limit
> + * should be assumed.
> + */
> +#define BTRFS_SEND_BUF_SIZE_V1 SZ_64K
>  
>  enum btrfs_tlv_type {
>  	BTRFS_TLV_U8,
> @@ -80,16 +84,20 @@ enum btrfs_send_cmd {
>  	BTRFS_SEND_C_MAX_V1 = 22,
>  
>  	/* Version 2 */
> -	BTRFS_SEND_C_MAX_V2 = 22,
> +	BTRFS_SEND_C_FALLOCATE = 23,
> +	BTRFS_SEND_C_SETFLAGS = 24,

Do you have patches that implement the fallocate modes and setflags? I
don't see it in this patchset. The setflags should be switched to
something closer to the recent refactoring that unifies all the
flags/attrs to fileattr. I have a prototype patch for that, comparing
the inode flags in the same way as file mode, the tricky part is on the
receive side how to apply them correctly. On the sending side it's
simple though.

> +	BTRFS_SEND_C_ENCODED_WRITE = 25,
> +	BTRFS_SEND_C_MAX_V2 = 25,
>  
>  	/* End */
> -	BTRFS_SEND_C_MAX = 22,
> +	BTRFS_SEND_C_MAX = 25,
>  };
Omar Sandoval May 18, 2022, 10:25 p.m. UTC | #2
On Wed, May 18, 2022 at 11:00:03PM +0200, David Sterba wrote:
> On Mon, Apr 04, 2022 at 10:29:05AM -0700, Omar Sandoval wrote:
> > From: Omar Sandoval <osandov@fb.com>
> > 
> > This adds the definitions of the new commands for send stream version 2
> > and their respective attributes: fallocate, FS_IOC_SETFLAGS (a.k.a.
> > chattr), and encoded writes. It also documents two changes to the send
> > stream format in v2: the receiver shouldn't assume a maximum command
> > size, and the DATA attribute is encoded differently to allow for writes
> > larger than 64k. These will be implemented in subsequent changes, and
> > then the ioctl will accept the new version and flag.
> > 
> > Reviewed-by: Josef Bacik <josef@toxicpanda.com>
> > Reviewed-by: Sweet Tea Dorminy <sweettea-kernel@dorminy.me>
> > Signed-off-by: Omar Sandoval <osandov@fb.com>
> > ---
> >  fs/btrfs/send.c            |  2 +-
> >  fs/btrfs/send.h            | 40 ++++++++++++++++++++++++++++++++++----
> >  include/uapi/linux/btrfs.h |  7 +++++++
> >  3 files changed, 44 insertions(+), 5 deletions(-)
> > 
> > diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
> > index 9363f625fa17..1f141de3a7d6 100644
> > --- a/fs/btrfs/send.c
> > +++ b/fs/btrfs/send.c
> > @@ -7459,7 +7459,7 @@ long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg)
> >  
> >  	sctx->clone_roots_cnt = arg->clone_sources_count;
> >  
> > -	sctx->send_max_size = BTRFS_SEND_BUF_SIZE;
> > +	sctx->send_max_size = BTRFS_SEND_BUF_SIZE_V1;
> >  	sctx->send_buf = kvmalloc(sctx->send_max_size, GFP_KERNEL);
> >  	if (!sctx->send_buf) {
> >  		ret = -ENOMEM;
> > diff --git a/fs/btrfs/send.h b/fs/btrfs/send.h
> > index 67721e0281ba..805d8095209a 100644
> > --- a/fs/btrfs/send.h
> > +++ b/fs/btrfs/send.h
> > @@ -12,7 +12,11 @@
> >  #define BTRFS_SEND_STREAM_MAGIC "btrfs-stream"
> >  #define BTRFS_SEND_STREAM_VERSION 1
> >  
> > -#define BTRFS_SEND_BUF_SIZE SZ_64K
> > +/*
> > + * In send stream v1, no command is larger than 64k. In send stream v2, no limit
> > + * should be assumed.
> > + */
> > +#define BTRFS_SEND_BUF_SIZE_V1 SZ_64K
> >  
> >  enum btrfs_tlv_type {
> >  	BTRFS_TLV_U8,
> > @@ -80,16 +84,20 @@ enum btrfs_send_cmd {
> >  	BTRFS_SEND_C_MAX_V1 = 22,
> >  
> >  	/* Version 2 */
> > -	BTRFS_SEND_C_MAX_V2 = 22,
> > +	BTRFS_SEND_C_FALLOCATE = 23,
> > +	BTRFS_SEND_C_SETFLAGS = 24,
> 
> Do you have patches that implement the fallocate modes and setflags? I
> don't see it in this patchset.

Nope, as discussed before, in order to keep the patch series managable,
this series adds the definitions and receive support for fallocate and
setflags, but leaves the send side to be implemented at a later time.

I implemented fallocate for send back in 2019:
https://github.com/osandov/linux/commits/btrfs-send-v2. It passed some
basic testing back then, but it'd need a big rebase and more testing.

> The setflags should be switched to
> something closer to the recent refactoring that unifies all the
> flags/attrs to fileattr. I have a prototype patch for that, comparing
> the inode flags in the same way as file mode, the tricky part is on the
> receive side how to apply them correctly. On the sending side it's
> simple though.

The way this series documents (and implements in receive)
BTRFS_SEND_C_SETFLAGS is that it's a simple call to FS_IOC_SETFLAGS with
given flags. I don't think this is affected by the change to fileattr,
unless I'm misunderstanding.

This is in line with the other commands being straightforward system
calls, but it does mean that the sending side has to deal with the
complexities of an immutable or append-only file being modified between
incremental sends (by temporarily clearing the flag), and of inherited
flags (e.g., a COW file inside of a NOCOW directory). I suppose it'd
also be possible to have SETFLAGS define the final flags and leave it up
to receive to make that happen by temporarily setting/clearing flags as
necessary, but that is a bit inconsistent with how we've handled other
commands.
David Sterba May 19, 2022, 4:07 p.m. UTC | #3
On Wed, May 18, 2022 at 03:25:34PM -0700, Omar Sandoval wrote:
> On Wed, May 18, 2022 at 11:00:03PM +0200, David Sterba wrote:
> > On Mon, Apr 04, 2022 at 10:29:05AM -0700, Omar Sandoval wrote:
> > > @@ -80,16 +84,20 @@ enum btrfs_send_cmd {
> > >  	BTRFS_SEND_C_MAX_V1 = 22,
> > >  
> > >  	/* Version 2 */
> > > -	BTRFS_SEND_C_MAX_V2 = 22,
> > > +	BTRFS_SEND_C_FALLOCATE = 23,
> > > +	BTRFS_SEND_C_SETFLAGS = 24,
> > 
> > Do you have patches that implement the fallocate modes and setflags? I
> > don't see it in this patchset.
> 
> Nope, as discussed before, in order to keep the patch series managable,
> this series adds the definitions and receive support for fallocate and
> setflags, but leaves the send side to be implemented at a later time.
> 
> I implemented fallocate for send back in 2019:
> https://github.com/osandov/linux/commits/btrfs-send-v2. It passed some
> basic testing back then, but it'd need a big rebase and more testing.

The patches in the branch are partially cleanups and preparatory work,
so at least avoiding sending the holes would be nice to have for v2 as
it was one of the first bugs reported. The falllocate modes seem to be
easy. The rest is about the versioning infrastructure that we already
have merged.

> > The setflags should be switched to
> > something closer to the recent refactoring that unifies all the
> > flags/attrs to fileattr. I have a prototype patch for that, comparing
> > the inode flags in the same way as file mode, the tricky part is on the
> > receive side how to apply them correctly. On the sending side it's
> > simple though.
> 
> The way this series documents (and implements in receive)
> BTRFS_SEND_C_SETFLAGS is that it's a simple call to FS_IOC_SETFLAGS with
> given flags. I don't think this is affected by the change to fileattr,
> unless I'm misunderstanding.

The SETFLAGS ioctls are obsolete and I don't want to make them part of
the protocol defition because the bit namespace contains flags we don't
have implemented or are not releated to anything in btrfs.

https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/fs.h#L220

It's basically just naming and specifying what exactly is the value so
we should pick the most recent interface name that superseded SETFLAGS
and the XFLAGS.

> This is in line with the other commands being straightforward system
> calls, but it does mean that the sending side has to deal with the
> complexities of an immutable or append-only file being modified between
> incremental sends (by temporarily clearing the flag), and of inherited
> flags (e.g., a COW file inside of a NOCOW directory).

Yeah the receiving side needs to understand the constraints of the
flags, it has only the information about the final state and not the
order in which the flags get applied.

> I suppose it'd
> also be possible to have SETFLAGS define the final flags and leave it up
> to receive to make that happen by temporarily setting/clearing flags as
> necessary, but that is a bit inconsistent with how we've handled other
> commands.

I'm not sure we can always stick to 1:1 mapping to syscalls or ioctls,
of course it's the best option, but the protocol can transfer eg.
more complete information and it's up to the receiving side to apply it
(like if a file has NODATASUM flag set).

From the other side there are multiple actions for something that could
be just one, like creating file first as an orphan and then renaming it.
So I'd like to look at it from the protocol perspective and not
necessarily blindly copy the OS interfaces.
Omar Sandoval May 19, 2022, 10:31 p.m. UTC | #4
On Thu, May 19, 2022 at 06:07:49PM +0200, David Sterba wrote:
> On Wed, May 18, 2022 at 03:25:34PM -0700, Omar Sandoval wrote:
> > On Wed, May 18, 2022 at 11:00:03PM +0200, David Sterba wrote:
> > > On Mon, Apr 04, 2022 at 10:29:05AM -0700, Omar Sandoval wrote:
> > > > @@ -80,16 +84,20 @@ enum btrfs_send_cmd {
> > > >  	BTRFS_SEND_C_MAX_V1 = 22,
> > > >  
> > > >  	/* Version 2 */
> > > > -	BTRFS_SEND_C_MAX_V2 = 22,
> > > > +	BTRFS_SEND_C_FALLOCATE = 23,
> > > > +	BTRFS_SEND_C_SETFLAGS = 24,
> > > 
> > > Do you have patches that implement the fallocate modes and setflags? I
> > > don't see it in this patchset.
> > 
> > Nope, as discussed before, in order to keep the patch series managable,
> > this series adds the definitions and receive support for fallocate and
> > setflags, but leaves the send side to be implemented at a later time.
> > 
> > I implemented fallocate for send back in 2019:
> > https://github.com/osandov/linux/commits/btrfs-send-v2. It passed some
> > basic testing back then, but it'd need a big rebase and more testing.
> 
> The patches in the branch are partially cleanups and preparatory work,
> so at least avoiding sending the holes would be nice to have for v2 as
> it was one of the first bugs reported. The falllocate modes seem to be
> easy. The rest is about the versioning infrastructure that we already
> have merged.

I rebased the patches on this series:
https://github.com/osandov/linux/commits/btrfs-send-v2-redux. It passes
some basic testing, but it'll definitely need a lot of fstests.

> > > The setflags should be switched to
> > > something closer to the recent refactoring that unifies all the
> > > flags/attrs to fileattr. I have a prototype patch for that, comparing
> > > the inode flags in the same way as file mode, the tricky part is on the
> > > receive side how to apply them correctly. On the sending side it's
> > > simple though.
> > 
> > The way this series documents (and implements in receive)
> > BTRFS_SEND_C_SETFLAGS is that it's a simple call to FS_IOC_SETFLAGS with
> > given flags. I don't think this is affected by the change to fileattr,
> > unless I'm misunderstanding.
> 
> The SETFLAGS ioctls are obsolete and I don't want to make them part of
> the protocol defition because the bit namespace contains flags we don't
> have implemented or are not releated to anything in btrfs.
> 
> https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/fs.h#L220
> 
> It's basically just naming and specifying what exactly is the value so
> we should pick the most recent interface name that superseded SETFLAGS
> and the XFLAGS.

This is the situation with FS_IOC_SETFLAGS, FS_IOC_FSSETXATTR, and
fileattr as I understand it. Please correct me if I'm wrong:

- FS_IOC_SETFLAGS originally came from ext4 and was added to Btrfs very
  early on (commit 6cbff00f4632 ("Btrfs: implement
  FS_IOC_GETFLAGS/SETFLAGS/GETVERSION")).
- FS_IOC_FSSETXATTR originally came from XFS and was added to Btrfs a
  few years ago (in commit 025f2121488e ("btrfs: add FS_IOC_FSSETXATTR
  ioctl")).
- The two ioctls allow setting some of the same flags (e.g., IMMUTABLE,
  APPEND), but some are only supported by SETFLAGS (e.g., NOCOW) and
  some are only supported by FSSETXATTR (none of these are supported by
  Btrfs, however).
- fileattr is a recent VFS interface that is used to implement those two
  ioctls. It basically passes through the arguments for whichever ioctl
  was called and translates the equivalent flags between the two ioctls.
  It is not a new UAPI and doesn't have its own set of flags.

Is there another new UAPI that I'm missing that obsoletes SETFLAGS?

I see your point about the irrelevant flags in SETFLAGS, however. Is
your suggestion to have our own send protocol-specific set of flags that
we translate to whatever ioctl we need to make?

> > This is in line with the other commands being straightforward system
> > calls, but it does mean that the sending side has to deal with the
> > complexities of an immutable or append-only file being modified between
> > incremental sends (by temporarily clearing the flag), and of inherited
> > flags (e.g., a COW file inside of a NOCOW directory).
> 
> Yeah the receiving side needs to understand the constraints of the
> flags, it has only the information about the final state and not the
> order in which the flags get applied.

If the sender only tells the receiver what the final flags are, then
yes, the receiver would need to deal with, e.g., temporarily clearing
and resetting flags. The way I envisioned it was that the sender would
instead send commands for those intermediate flag operations. E.g., if
the incremental send requires writing some data to a file that is
immutable in both the source and the parent subvolume, the sender could
send commands to: clear the immutable flag, write the data, set the
immutable flag. This is a lot like the orphan renaming that you
mentioned.

If we want to have receive handle the intermediate states instead, then
I would like to postpone SETFLAGS (or whatever we call it) to send
protocol v3, since it'll be very tricky to get right and we can't add it
to the protocol without having an implementation in the receiver.

On the other hand, if send takes care of the intermediate states and
receive just has to blindly apply the flags, then we can add SETFLAGS to
the protocol and receive now and implement it in send later. That is
exactly what this patch series does.

I'm fine with either of those paths forward, but I don't want to block
the compressed send/receive on SETFLAGS or fallocate.

Thanks,
Omar
David Sterba May 20, 2022, 7:34 p.m. UTC | #5
On Thu, May 19, 2022 at 03:31:56PM -0700, Omar Sandoval wrote:
> On Thu, May 19, 2022 at 06:07:49PM +0200, David Sterba wrote:
> > The SETFLAGS ioctls are obsolete and I don't want to make them part of
> > the protocol defition because the bit namespace contains flags we don't
> > have implemented or are not releated to anything in btrfs.
> > 
> > https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/fs.h#L220
> > 
> > It's basically just naming and specifying what exactly is the value so
> > we should pick the most recent interface name that superseded SETFLAGS
> > and the XFLAGS.
> 
> This is the situation with FS_IOC_SETFLAGS, FS_IOC_FSSETXATTR, and
> fileattr as I understand it. Please correct me if I'm wrong:
> 
> - FS_IOC_SETFLAGS originally came from ext4 and was added to Btrfs very
>   early on (commit 6cbff00f4632 ("Btrfs: implement
>   FS_IOC_GETFLAGS/SETFLAGS/GETVERSION")).
> - FS_IOC_FSSETXATTR originally came from XFS and was added to Btrfs a
>   few years ago (in commit 025f2121488e ("btrfs: add FS_IOC_FSSETXATTR
>   ioctl")).
> - The two ioctls allow setting some of the same flags (e.g., IMMUTABLE,
>   APPEND), but some are only supported by SETFLAGS (e.g., NOCOW) and
>   some are only supported by FSSETXATTR (none of these are supported by
>   Btrfs, however).
> - fileattr is a recent VFS interface that is used to implement those two
>   ioctls. It basically passes through the arguments for whichever ioctl
>   was called and translates the equivalent flags between the two ioctls.
>   It is not a new UAPI and doesn't have its own set of flags.
> 
> Is there another new UAPI that I'm missing that obsoletes SETFLAGS?

That was supposed to be FSSETXATTR, new flags have appeared there, the
reason for btrfs was to allow the FS_XFLAG_DAX bit as people are were
working on the DAX support, and potentially other bits like
FS_XFLAG_NOSYMLINKS or FS_XFLAG_NODEFRAG. Or new flags that we want to
be able to set, NODATASUM for example.

> I see your point about the irrelevant flags in SETFLAGS, however. Is
> your suggestion to have our own send protocol-specific set of flags that
> we translate to whatever ioctl we need to make?

Yes, that's the idea, the flags are not protocol-specific but rather
btrfs-specific, ie we want to support namely the bits that btrfs inodes
can have.

> > > This is in line with the other commands being straightforward system
> > > calls, but it does mean that the sending side has to deal with the
> > > complexities of an immutable or append-only file being modified between
> > > incremental sends (by temporarily clearing the flag), and of inherited
> > > flags (e.g., a COW file inside of a NOCOW directory).
> > 
> > Yeah the receiving side needs to understand the constraints of the
> > flags, it has only the information about the final state and not the
> > order in which the flags get applied.
> 
> If the sender only tells the receiver what the final flags are, then
> yes, the receiver would need to deal with, e.g., temporarily clearing
> and resetting flags. The way I envisioned it was that the sender would
> instead send commands for those intermediate flag operations. E.g., if
> the incremental send requires writing some data to a file that is
> immutable in both the source and the parent subvolume, the sender could
> send commands to: clear the immutable flag, write the data, set the
> immutable flag. This is a lot like the orphan renaming that you
> mentioned.

I see, so the question is where do we want to put the logic. I'd go with
userspace as lots of things are easier there, eg. maitaining some
intermediate state or delayed application of bits/flags.

> If we want to have receive handle the intermediate states instead, then
> I would like to postpone SETFLAGS (or whatever we call it) to send
> protocol v3, since it'll be very tricky to get right and we can't add it
> to the protocol without having an implementation in the receiver.

Yeah it would be tricky to generate the sequence right, while if it's on
the receiving side we can simply ignore/report it or implement a subset
where we know how to apply (eg. immutable) and don't need to postpone
it.

> On the other hand, if send takes care of the intermediate states and
> receive just has to blindly apply the flags, then we can add SETFLAGS to
> the protocol and receive now and implement it in send later. That is
> exactly what this patch series does.

It adds a command to the protocol but does not outline the plan how to
use it, not counting this discussion.

> I'm fine with either of those paths forward, but I don't want to block
> the compressed send/receive on SETFLAGS or fallocate.

I get that you care only about the encoded write, but I don't want to
rev protocol every few releases because we did not bother to implement
something we know is missing in the protocol. Anyway, encoded write will
be in v2 scheduled for 5.20 and I'll implement the rest plus will have a
look at your fallocate patches.
Graham Cobb May 20, 2022, 8:58 p.m. UTC | #6
On 20/05/2022 20:34, David Sterba wrote:
> On Thu, May 19, 2022 at 03:31:56PM -0700, Omar Sandoval wrote:
>> On Thu, May 19, 2022 at 06:07:49PM +0200, David Sterba wrote:
>>> The SETFLAGS ioctls are obsolete and I don't want to make them part of
>>> the protocol defition because the bit namespace contains flags we don't
>>> have implemented or are not releated to anything in btrfs.
>>>
>>> https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/fs.h#L220
>>>
>>> It's basically just naming and specifying what exactly is the value so
>>> we should pick the most recent interface name that superseded SETFLAGS
>>> and the XFLAGS.
>>
>> This is the situation with FS_IOC_SETFLAGS, FS_IOC_FSSETXATTR, and
>> fileattr as I understand it. Please correct me if I'm wrong:
>>
>> - FS_IOC_SETFLAGS originally came from ext4 and was added to Btrfs very
>>   early on (commit 6cbff00f4632 ("Btrfs: implement
>>   FS_IOC_GETFLAGS/SETFLAGS/GETVERSION")).
>> - FS_IOC_FSSETXATTR originally came from XFS and was added to Btrfs a
>>   few years ago (in commit 025f2121488e ("btrfs: add FS_IOC_FSSETXATTR
>>   ioctl")).
>> - The two ioctls allow setting some of the same flags (e.g., IMMUTABLE,
>>   APPEND), but some are only supported by SETFLAGS (e.g., NOCOW) and
>>   some are only supported by FSSETXATTR (none of these are supported by
>>   Btrfs, however).
>> - fileattr is a recent VFS interface that is used to implement those two
>>   ioctls. It basically passes through the arguments for whichever ioctl
>>   was called and translates the equivalent flags between the two ioctls.
>>   It is not a new UAPI and doesn't have its own set of flags.
>>
>> Is there another new UAPI that I'm missing that obsoletes SETFLAGS?
> 
> That was supposed to be FSSETXATTR, new flags have appeared there, the
> reason for btrfs was to allow the FS_XFLAG_DAX bit as people are were
> working on the DAX support, and potentially other bits like
> FS_XFLAG_NOSYMLINKS or FS_XFLAG_NODEFRAG. Or new flags that we want to
> be able to set, NODATASUM for example.
> 
>> I see your point about the irrelevant flags in SETFLAGS, however. Is
>> your suggestion to have our own send protocol-specific set of flags that
>> we translate to whatever ioctl we need to make?
> 
> Yes, that's the idea, the flags are not protocol-specific but rather
> btrfs-specific, ie we want to support namely the bits that btrfs inodes
> can have.
> 

>>>> This is in line with the other commands being straightforward system
>>>> calls, but it does mean that the sending side has to deal with the
>>>> complexities of an immutable or append-only file being modified between
>>>> incremental sends (by temporarily clearing the flag), and of inherited
>>>> flags (e.g., a COW file inside of a NOCOW directory).
>>>
>>> Yeah the receiving side needs to understand the constraints of the
>>> flags, it has only the information about the final state and not the
>>> order in which the flags get applied.
>>
>> If the sender only tells the receiver what the final flags are, then
>> yes, the receiver would need to deal with, e.g., temporarily clearing
>> and resetting flags. The way I envisioned it was that the sender would
>> instead send commands for those intermediate flag operations. E.g., if
>> the incremental send requires writing some data to a file that is
>> immutable in both the source and the parent subvolume, the sender could
>> send commands to: clear the immutable flag, write the data, set the
>> immutable flag. This is a lot like the orphan renaming that you
>> mentioned.
> 
> I see, so the question is where do we want to put the logic. I'd go with
> userspace as lots of things are easier there, eg. maitaining some
> intermediate state or delayed application of bits/flags.
>
We should remember that what you are designing here is a protocol for
transmission of a snapshot. The protocol features are numbers that have
to remain unchanged across all future software updates and all
implementations of this version of the protocol. The values may or may
not co-incidentally match some constant we know today in the Linux 5.x
ABI but it may have no relation to *anything* on the receiving side.
Don't forget that some people are archiving send streams as a form of
backup with the intention of playing them back in 10 or 20 years time on
a btrfs implementation that might bear little resemblance to anything we
would recognise today (not a good idea, but people are doing it).

There is no way the sender has any idea what those future
implementations might have to do to replicate the source data - that is
up to those implementations. For example, it is very easy to imagine
that some future OS might disallow the concept of "clearing the
immutable flag". In that case, what the receiver needs to know is that
this new data represents a new immutable file, based on the contents of
previous file with some specified differences - maybe it will handle
this by turning off the immutable flag, or deleting the old file and
writing a new one, or asking the system manager to authorize it, or
using some versioning feature built into a future version of btrfs, or ...

The right question isn't to ask "what would a Linux BTRFS receiver
running the same software rev as the sender need to do", it needs to be
"what information do I need to supply that gives a future receiver, on a
completely different system, with a different I/O architecture and a
different kernel, in 20 years, the best chance to implement it".

Graham

P.S. I'm an old network guy, not a file system guy. Send/receive is a
network protocol, with the added problems that (i) there is no
negotiation or feedback channel, and (ii) the data is probably mission
critical to some people and they expect it to be usable in 10's of years
time.

>> If we want to have receive handle the intermediate states instead, then
>> I would like to postpone SETFLAGS (or whatever we call it) to send
>> protocol v3, since it'll be very tricky to get right and we can't add it
>> to the protocol without having an implementation in the receiver.
> 
> Yeah it would be tricky to generate the sequence right, while if it's on
> the receiving side we can simply ignore/report it or implement a subset
> where we know how to apply (eg. immutable) and don't need to postpone
> it.
> 
>> On the other hand, if send takes care of the intermediate states and
>> receive just has to blindly apply the flags, then we can add SETFLAGS to
>> the protocol and receive now and implement it in send later. That is
>> exactly what this patch series does.
> 
> It adds a command to the protocol but does not outline the plan how to
> use it, not counting this discussion.
> 
>> I'm fine with either of those paths forward, but I don't want to block
>> the compressed send/receive on SETFLAGS or fallocate.
> 
> I get that you care only about the encoded write, but I don't want to
> rev protocol every few releases because we did not bother to implement
> something we know is missing in the protocol. Anyway, encoded write will
> be in v2 scheduled for 5.20 and I'll implement the rest plus will have a
> look at your fallocate patches.
diff mbox series

Patch

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 9363f625fa17..1f141de3a7d6 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -7459,7 +7459,7 @@  long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg)
 
 	sctx->clone_roots_cnt = arg->clone_sources_count;
 
-	sctx->send_max_size = BTRFS_SEND_BUF_SIZE;
+	sctx->send_max_size = BTRFS_SEND_BUF_SIZE_V1;
 	sctx->send_buf = kvmalloc(sctx->send_max_size, GFP_KERNEL);
 	if (!sctx->send_buf) {
 		ret = -ENOMEM;
diff --git a/fs/btrfs/send.h b/fs/btrfs/send.h
index 67721e0281ba..805d8095209a 100644
--- a/fs/btrfs/send.h
+++ b/fs/btrfs/send.h
@@ -12,7 +12,11 @@ 
 #define BTRFS_SEND_STREAM_MAGIC "btrfs-stream"
 #define BTRFS_SEND_STREAM_VERSION 1
 
-#define BTRFS_SEND_BUF_SIZE SZ_64K
+/*
+ * In send stream v1, no command is larger than 64k. In send stream v2, no limit
+ * should be assumed.
+ */
+#define BTRFS_SEND_BUF_SIZE_V1 SZ_64K
 
 enum btrfs_tlv_type {
 	BTRFS_TLV_U8,
@@ -80,16 +84,20 @@  enum btrfs_send_cmd {
 	BTRFS_SEND_C_MAX_V1 = 22,
 
 	/* Version 2 */
-	BTRFS_SEND_C_MAX_V2 = 22,
+	BTRFS_SEND_C_FALLOCATE = 23,
+	BTRFS_SEND_C_SETFLAGS = 24,
+	BTRFS_SEND_C_ENCODED_WRITE = 25,
+	BTRFS_SEND_C_MAX_V2 = 25,
 
 	/* End */
-	BTRFS_SEND_C_MAX = 22,
+	BTRFS_SEND_C_MAX = 25,
 };
 
 /* attributes in send stream */
 enum {
 	BTRFS_SEND_A_UNSPEC = 0,
 
+	/* Version 1 */
 	BTRFS_SEND_A_UUID = 1,
 	BTRFS_SEND_A_CTRANSID = 2,
 
@@ -112,6 +120,11 @@  enum {
 	BTRFS_SEND_A_PATH_LINK = 17,
 
 	BTRFS_SEND_A_FILE_OFFSET = 18,
+	/*
+	 * As of send stream v2, this attribute is special: it must be the last
+	 * attribute in a command, its header contains only the type, and its
+	 * length is implicitly the remaining length of the command.
+	 */
 	BTRFS_SEND_A_DATA = 19,
 
 	BTRFS_SEND_A_CLONE_UUID = 20,
@@ -120,7 +133,26 @@  enum {
 	BTRFS_SEND_A_CLONE_OFFSET = 23,
 	BTRFS_SEND_A_CLONE_LEN = 24,
 
-	BTRFS_SEND_A_MAX = 24,
+	BTRFS_SEND_A_MAX_V1 = 24,
+
+	/* Version 2 */
+	BTRFS_SEND_A_FALLOCATE_MODE = 25,
+
+	BTRFS_SEND_A_SETFLAGS_FLAGS = 26,
+
+	BTRFS_SEND_A_UNENCODED_FILE_LEN = 27,
+	BTRFS_SEND_A_UNENCODED_LEN = 28,
+	BTRFS_SEND_A_UNENCODED_OFFSET = 29,
+	/*
+	 * COMPRESSION and ENCRYPTION default to NONE (0) if omitted from
+	 * BTRFS_SEND_C_ENCODED_WRITE.
+	 */
+	BTRFS_SEND_A_COMPRESSION = 30,
+	BTRFS_SEND_A_ENCRYPTION = 31,
+	BTRFS_SEND_A_MAX_V2 = 31,
+
+	/* End */
+	BTRFS_SEND_A_MAX = 31,
 };
 
 #ifdef __KERNEL__
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index d956b2993970..b6f26a434b10 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -777,6 +777,13 @@  struct btrfs_ioctl_received_subvol_args {
  */
 #define BTRFS_SEND_FLAG_VERSION			0x8
 
+/*
+ * Send compressed data using the ENCODED_WRITE command instead of decompressing
+ * the data and sending it with the WRITE command. This requires protocol
+ * version >= 2.
+ */
+#define BTRFS_SEND_FLAG_COMPRESSED		0x10
+
 #define BTRFS_SEND_FLAG_MASK \
 	(BTRFS_SEND_FLAG_NO_FILE_DATA | \
 	 BTRFS_SEND_FLAG_OMIT_STREAM_HEADER | \