diff mbox series

[v10,07/14] btrfs: add definitions + documentation for encoded I/O ioctls

Message ID 9bd601f8c5494342d8c7d8aaa86aa815c2118173.1629234193.git.osandov@fb.com (mailing list archive)
State New, archived
Headers show
Series btrfs: add ioctls and send/receive support for reading/writing compressed data | expand

Commit Message

Omar Sandoval Aug. 17, 2021, 9:06 p.m. UTC
From: Omar Sandoval <osandov@fb.com>

In order to allow sending and receiving compressed data without
decompressing it, we need an interface to write pre-compressed data
directly to the filesystem and the matching interface to read compressed
data without decompressing it. This adds the definitions for ioctls to
do that and detailed explanations of how to use them.

Signed-off-by: Omar Sandoval <osandov@fb.com>
---
 include/uapi/linux/btrfs.h | 132 +++++++++++++++++++++++++++++++++++++
 1 file changed, 132 insertions(+)

Comments

Nikolay Borisov Aug. 20, 2021, 8:56 a.m. UTC | #1
On 18.08.21 г. 0:06, Omar Sandoval wrote:
> From: Omar Sandoval <osandov@fb.com>
> 
> In order to allow sending and receiving compressed data without
> decompressing it, we need an interface to write pre-compressed data
> directly to the filesystem and the matching interface to read compressed
> data without decompressing it. This adds the definitions for ioctls to
> do that and detailed explanations of how to use them.
> 
> Signed-off-by: Omar Sandoval <osandov@fb.com>
> ---
>  include/uapi/linux/btrfs.h | 132 +++++++++++++++++++++++++++++++++++++
>  1 file changed, 132 insertions(+)
> 
> diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
> index d7d3cfead056..95da52955894 100644
> --- a/include/uapi/linux/btrfs.h
> +++ b/include/uapi/linux/btrfs.h
> @@ -861,6 +861,134 @@ struct btrfs_ioctl_get_subvol_rootref_args {
>  		__u8 align[7];
>  };
>  
> +/*
> + * Data and metadata for an encoded read or write.
> + *
> + * Encoded I/O bypasses any encoding automatically done by the filesystem (e.g.,
> + * compression). This can be used to read the compressed contents of a file or
> + * write pre-compressed data directly to a file.
> + *
> + * BTRFS_IOC_ENCODED_READ and BTRFS_IOC_ENCODED_WRITE are essentially
> + * preadv/pwritev with additional metadata about how the data is encoded and the
> + * size of the unencoded data.
> + *
> + * BTRFS_IOC_ENCODED_READ fills the given iovecs with the encoded data, fills
> + * the metadata fields, and returns the size of the encoded data. It reads one
> + * extent per call. It can also read data which is not encoded.
> + *
> + * BTRFS_IOC_ENCODED_WRITE uses the metadata fields, writes the encoded data
> + * from the iovecs, and returns the size of the encoded data. Note that the
> + * encoded data is not validated when it is written; if it is not valid (e.g.,
> + * it cannot be decompressed), then a subsequent read may return an error.
> + *
> + * Since the filesystem page cache contains decoded data, encoded I/O bypasses
> + * the page cache. Encoded I/O requires CAP_SYS_ADMIN.
> + */
> +struct btrfs_ioctl_encoded_io_args {
> +	/* Input parameters for both reads and writes. */
> +
> +	/*
> +	 * iovecs containing encoded data.
> +	 *
> +	 * For reads, if the size of the encoded data is larger than the sum of
> +	 * iov[n].iov_len for 0 <= n < iovcnt, then the ioctl fails with
> +	 * ENOBUFS.
> +	 *
> +	 * For writes, the size of the encoded data is the sum of iov[n].iov_len
> +	 * for 0 <= n < iovcnt. This must be less than 128 KiB (this limit may
> +	 * increase in the future). This must also be less than or equal to
> +	 * unencoded_len.
> +	 */
> +	const struct iovec __user *iov;
> +	/* Number of iovecs. */
> +	unsigned long iovcnt;
> +	/*
> +	 * Offset in file.
> +	 *
> +	 * For writes, must be aligned to the sector size of the filesystem.
> +	 */
> +	__s64 offset;
> +	/* Currently must be zero. */
> +	__u64 flags;
> +
> +	/*
> +	 * For reads, the following members are filled in with the metadata for
> +	 * the encoded data.
> +	 * For writes, the following members must be set to the metadata for the
> +	 * encoded data.
> +	 */
> +
> +	/*
> +	 * Length of the data in the file.
> +	 *
> +	 * Must be less than or equal to unencoded_len - unencoded_offset. For
> +	 * writes, must be aligned to the sector size of the filesystem unless
> +	 * the data ends at or beyond the current end of the file.
> +	 */
> +	__u64 len;
> +	/*
> +	 * Length of the unencoded (i.e., decrypted and decompressed) data.
> +	 *
> +	 * For writes, must be no more than 128 KiB (this limit may increase in
> +	 * the future). If the unencoded data is actually longer than
> +	 * unencoded_len, then it is truncated; if it is shorter, then it is
> +	 * extended with zeroes.
> +	 */
> +	__u64 unencoded_len;
> +	/*
> +	 * Offset from the first byte of the unencoded data to the first byte of
> +	 * logical data in the file.
> +	 *
> +	 * Must be less than unencoded_len.
> +	 */
> +	__u64 unencoded_offset;
> +	/*
> +	 * BTRFS_ENCODED_IO_COMPRESSION_* type.
> +	 *
> +	 * For writes, must not be BTRFS_ENCODED_IO_COMPRESSION_NONE.
> +	 */
> +	__u32 compression;
> +	/* Currently always BTRFS_ENCODED_IO_ENCRYPTION_NONE. */
> +	__u32 encryption;
> +	/*
> +	 * Reserved for future expansion.
> +	 *
> +	 * For reads, always returned as zero. Users should check for non-zero
> +	 * bytes. If there are any, then the kernel has a newer version of this
> +	 * structure with additional information that the user definition is
> +	 * missing.
> +	 *
> +	 * For writes, must be zeroed.
> +	 */
> +	__u8 reserved[32];
> +};
> +
> +/* Data is not compressed. */
> +#define BTRFS_ENCODED_IO_COMPRESSION_NONE 0
> +/* Data is compressed as a single zlib stream. */
> +#define BTRFS_ENCODED_IO_COMPRESSION_ZLIB 1
> +/*
> + * Data is compressed as a single zstd frame with the windowLog compression
> + * parameter set to no more than 17.
> + */
> +#define BTRFS_ENCODED_IO_COMPRESSION_ZSTD 2
> +/*
> + * Data is compressed page by page (using the page size indicated by the name of
> + * the constant) with LZO1X and wrapped in the format documented in
> + * fs/btrfs/lzo.c. For writes, the compression page size must match the
> + * filesystem page size.
> + */
> +#define BTRFS_ENCODED_IO_COMPRESSION_LZO_4K 3
> +#define BTRFS_ENCODED_IO_COMPRESSION_LZO_8K 4
> +#define BTRFS_ENCODED_IO_COMPRESSION_LZO_16K 5
> +#define BTRFS_ENCODED_IO_COMPRESSION_LZO_32K 6
> +#define BTRFS_ENCODED_IO_COMPRESSION_LZO_64K 7
> +#define BTRFS_ENCODED_IO_COMPRESSION_TYPES 8
> +
> +/* Data is not encrypted. */
> +#define BTRFS_ENCODED_IO_ENCRYPTION_NONE 0
> +#define BTRFS_ENCODED_IO_ENCRYPTION_TYPES 1

How about an enums for encryption/compression.


> +
>  /* Error codes as returned by the kernel */
>  enum btrfs_err_code {
>  	BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET = 1,
> @@ -989,5 +1117,9 @@ enum btrfs_err_code {
>  				struct btrfs_ioctl_ino_lookup_user_args)
>  #define BTRFS_IOC_SNAP_DESTROY_V2 _IOW(BTRFS_IOCTL_MAGIC, 63, \
>  				struct btrfs_ioctl_vol_args_v2)
> +#define BTRFS_IOC_ENCODED_READ _IOR(BTRFS_IOCTL_MAGIC, 64, \
> +				    struct btrfs_ioctl_encoded_io_args)
> +#define BTRFS_IOC_ENCODED_WRITE _IOW(BTRFS_IOCTL_MAGIC, 64, \
> +				     struct btrfs_ioctl_encoded_io_args)
>  
>  #endif /* _UAPI_LINUX_BTRFS_H */
>
Omar Sandoval Aug. 20, 2021, 5:48 p.m. UTC | #2
On Fri, Aug 20, 2021 at 11:56:37AM +0300, Nikolay Borisov wrote:
> 
> 
> On 18.08.21 г. 0:06, Omar Sandoval wrote:
> > From: Omar Sandoval <osandov@fb.com>
> > 
> > In order to allow sending and receiving compressed data without
> > decompressing it, we need an interface to write pre-compressed data
> > directly to the filesystem and the matching interface to read compressed
> > data without decompressing it. This adds the definitions for ioctls to
> > do that and detailed explanations of how to use them.
> > 
> > Signed-off-by: Omar Sandoval <osandov@fb.com>
> > ---
> >  include/uapi/linux/btrfs.h | 132 +++++++++++++++++++++++++++++++++++++
> >  1 file changed, 132 insertions(+)
> > 
> > diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
> > index d7d3cfead056..95da52955894 100644
> > --- a/include/uapi/linux/btrfs.h
> > +++ b/include/uapi/linux/btrfs.h
> > @@ -861,6 +861,134 @@ struct btrfs_ioctl_get_subvol_rootref_args {
> >  		__u8 align[7];
> >  };
> >  
> > +/*
> > + * Data and metadata for an encoded read or write.
> > + *
> > + * Encoded I/O bypasses any encoding automatically done by the filesystem (e.g.,
> > + * compression). This can be used to read the compressed contents of a file or
> > + * write pre-compressed data directly to a file.
> > + *
> > + * BTRFS_IOC_ENCODED_READ and BTRFS_IOC_ENCODED_WRITE are essentially
> > + * preadv/pwritev with additional metadata about how the data is encoded and the
> > + * size of the unencoded data.
> > + *
> > + * BTRFS_IOC_ENCODED_READ fills the given iovecs with the encoded data, fills
> > + * the metadata fields, and returns the size of the encoded data. It reads one
> > + * extent per call. It can also read data which is not encoded.
> > + *
> > + * BTRFS_IOC_ENCODED_WRITE uses the metadata fields, writes the encoded data
> > + * from the iovecs, and returns the size of the encoded data. Note that the
> > + * encoded data is not validated when it is written; if it is not valid (e.g.,
> > + * it cannot be decompressed), then a subsequent read may return an error.
> > + *
> > + * Since the filesystem page cache contains decoded data, encoded I/O bypasses
> > + * the page cache. Encoded I/O requires CAP_SYS_ADMIN.
> > + */
> > +struct btrfs_ioctl_encoded_io_args {
> > +	/* Input parameters for both reads and writes. */
> > +
> > +	/*
> > +	 * iovecs containing encoded data.
> > +	 *
> > +	 * For reads, if the size of the encoded data is larger than the sum of
> > +	 * iov[n].iov_len for 0 <= n < iovcnt, then the ioctl fails with
> > +	 * ENOBUFS.
> > +	 *
> > +	 * For writes, the size of the encoded data is the sum of iov[n].iov_len
> > +	 * for 0 <= n < iovcnt. This must be less than 128 KiB (this limit may
> > +	 * increase in the future). This must also be less than or equal to
> > +	 * unencoded_len.
> > +	 */
> > +	const struct iovec __user *iov;
> > +	/* Number of iovecs. */
> > +	unsigned long iovcnt;
> > +	/*
> > +	 * Offset in file.
> > +	 *
> > +	 * For writes, must be aligned to the sector size of the filesystem.
> > +	 */
> > +	__s64 offset;
> > +	/* Currently must be zero. */
> > +	__u64 flags;
> > +
> > +	/*
> > +	 * For reads, the following members are filled in with the metadata for
> > +	 * the encoded data.
> > +	 * For writes, the following members must be set to the metadata for the
> > +	 * encoded data.
> > +	 */
> > +
> > +	/*
> > +	 * Length of the data in the file.
> > +	 *
> > +	 * Must be less than or equal to unencoded_len - unencoded_offset. For
> > +	 * writes, must be aligned to the sector size of the filesystem unless
> > +	 * the data ends at or beyond the current end of the file.
> > +	 */
> > +	__u64 len;
> > +	/*
> > +	 * Length of the unencoded (i.e., decrypted and decompressed) data.
> > +	 *
> > +	 * For writes, must be no more than 128 KiB (this limit may increase in
> > +	 * the future). If the unencoded data is actually longer than
> > +	 * unencoded_len, then it is truncated; if it is shorter, then it is
> > +	 * extended with zeroes.
> > +	 */
> > +	__u64 unencoded_len;
> > +	/*
> > +	 * Offset from the first byte of the unencoded data to the first byte of
> > +	 * logical data in the file.
> > +	 *
> > +	 * Must be less than unencoded_len.
> > +	 */
> > +	__u64 unencoded_offset;
> > +	/*
> > +	 * BTRFS_ENCODED_IO_COMPRESSION_* type.
> > +	 *
> > +	 * For writes, must not be BTRFS_ENCODED_IO_COMPRESSION_NONE.
> > +	 */
> > +	__u32 compression;
> > +	/* Currently always BTRFS_ENCODED_IO_ENCRYPTION_NONE. */
> > +	__u32 encryption;
> > +	/*
> > +	 * Reserved for future expansion.
> > +	 *
> > +	 * For reads, always returned as zero. Users should check for non-zero
> > +	 * bytes. If there are any, then the kernel has a newer version of this
> > +	 * structure with additional information that the user definition is
> > +	 * missing.
> > +	 *
> > +	 * For writes, must be zeroed.
> > +	 */
> > +	__u8 reserved[32];
> > +};
> > +
> > +/* Data is not compressed. */
> > +#define BTRFS_ENCODED_IO_COMPRESSION_NONE 0
> > +/* Data is compressed as a single zlib stream. */
> > +#define BTRFS_ENCODED_IO_COMPRESSION_ZLIB 1
> > +/*
> > + * Data is compressed as a single zstd frame with the windowLog compression
> > + * parameter set to no more than 17.
> > + */
> > +#define BTRFS_ENCODED_IO_COMPRESSION_ZSTD 2
> > +/*
> > + * Data is compressed page by page (using the page size indicated by the name of
> > + * the constant) with LZO1X and wrapped in the format documented in
> > + * fs/btrfs/lzo.c. For writes, the compression page size must match the
> > + * filesystem page size.
> > + */
> > +#define BTRFS_ENCODED_IO_COMPRESSION_LZO_4K 3
> > +#define BTRFS_ENCODED_IO_COMPRESSION_LZO_8K 4
> > +#define BTRFS_ENCODED_IO_COMPRESSION_LZO_16K 5
> > +#define BTRFS_ENCODED_IO_COMPRESSION_LZO_32K 6
> > +#define BTRFS_ENCODED_IO_COMPRESSION_LZO_64K 7
> > +#define BTRFS_ENCODED_IO_COMPRESSION_TYPES 8
> > +
> > +/* Data is not encrypted. */
> > +#define BTRFS_ENCODED_IO_ENCRYPTION_NONE 0
> > +#define BTRFS_ENCODED_IO_ENCRYPTION_TYPES 1
> 
> How about an enums for encryption/compression.

With #define, the user can use #ifdef to check if the constants are
defined and provide their own definitions if not (that's what I did in
the xfstests example programs). Another option is the enum+#define
pattern:

enum {
	BTRFS_ENCODED_IO_COMPRESSION_NONE,
#define BTRFS_ENCODED_IO_COMPRESSION_NONE BTRFS_ENCODED_IO_COMPRESSION_NONE
	BTRFS_ENCODED_IO_COMPRESSION_ZLIB,
#define BTRFS_ENCODED_IO_COMPRESSION_ZLIB BTRFS_ENCODED_IO_COMPRESSION_ZLIB
	BTRFS_ENCODED_IO_COMPRESSION_ZSTD,
#define BTRFS_ENCODED_IO_COMPRESSION_ZSTD BTRFS_ENCODED_IO_COMPRESSION_ZSTD
	BTRFS_ENCODED_IO_COMPRESSION_LZO_4K,
#define BTRFS_ENCODED_IO_COMPRESSION_LZO_4K BTRFS_ENCODED_IO_COMPRESSION_LZO_4K
	BTRFS_ENCODED_IO_COMPRESSION_LZO_8K,
#define BTRFS_ENCODED_IO_COMPRESSION_LZO_8K BTRFS_ENCODED_IO_COMPRESSION_LZO_8K
	BTRFS_ENCODED_IO_COMPRESSION_LZO_16K,
#define BTRFS_ENCODED_IO_COMPRESSION_LZO_16K BTRFS_ENCODED_IO_COMPRESSION_LZO_16K
	BTRFS_ENCODED_IO_COMPRESSION_LZO_32K,
#define BTRFS_ENCODED_IO_COMPRESSION_LZO_32K BTRFS_ENCODED_IO_COMPRESSION_LZO_32K
	BTRFS_ENCODED_IO_COMPRESSION_LZO_64K,
#define BTRFS_ENCODED_IO_COMPRESSION_LZO_64K BTRFS_ENCODED_IO_COMPRESSION_LZO_64K
	BTRFS_ENCODED_IO_COMPRESSION_TYPES,
};

But that seems to confuse people. I don't feel strongly one way or
another.
diff mbox series

Patch

diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index d7d3cfead056..95da52955894 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -861,6 +861,134 @@  struct btrfs_ioctl_get_subvol_rootref_args {
 		__u8 align[7];
 };
 
+/*
+ * Data and metadata for an encoded read or write.
+ *
+ * Encoded I/O bypasses any encoding automatically done by the filesystem (e.g.,
+ * compression). This can be used to read the compressed contents of a file or
+ * write pre-compressed data directly to a file.
+ *
+ * BTRFS_IOC_ENCODED_READ and BTRFS_IOC_ENCODED_WRITE are essentially
+ * preadv/pwritev with additional metadata about how the data is encoded and the
+ * size of the unencoded data.
+ *
+ * BTRFS_IOC_ENCODED_READ fills the given iovecs with the encoded data, fills
+ * the metadata fields, and returns the size of the encoded data. It reads one
+ * extent per call. It can also read data which is not encoded.
+ *
+ * BTRFS_IOC_ENCODED_WRITE uses the metadata fields, writes the encoded data
+ * from the iovecs, and returns the size of the encoded data. Note that the
+ * encoded data is not validated when it is written; if it is not valid (e.g.,
+ * it cannot be decompressed), then a subsequent read may return an error.
+ *
+ * Since the filesystem page cache contains decoded data, encoded I/O bypasses
+ * the page cache. Encoded I/O requires CAP_SYS_ADMIN.
+ */
+struct btrfs_ioctl_encoded_io_args {
+	/* Input parameters for both reads and writes. */
+
+	/*
+	 * iovecs containing encoded data.
+	 *
+	 * For reads, if the size of the encoded data is larger than the sum of
+	 * iov[n].iov_len for 0 <= n < iovcnt, then the ioctl fails with
+	 * ENOBUFS.
+	 *
+	 * For writes, the size of the encoded data is the sum of iov[n].iov_len
+	 * for 0 <= n < iovcnt. This must be less than 128 KiB (this limit may
+	 * increase in the future). This must also be less than or equal to
+	 * unencoded_len.
+	 */
+	const struct iovec __user *iov;
+	/* Number of iovecs. */
+	unsigned long iovcnt;
+	/*
+	 * Offset in file.
+	 *
+	 * For writes, must be aligned to the sector size of the filesystem.
+	 */
+	__s64 offset;
+	/* Currently must be zero. */
+	__u64 flags;
+
+	/*
+	 * For reads, the following members are filled in with the metadata for
+	 * the encoded data.
+	 * For writes, the following members must be set to the metadata for the
+	 * encoded data.
+	 */
+
+	/*
+	 * Length of the data in the file.
+	 *
+	 * Must be less than or equal to unencoded_len - unencoded_offset. For
+	 * writes, must be aligned to the sector size of the filesystem unless
+	 * the data ends at or beyond the current end of the file.
+	 */
+	__u64 len;
+	/*
+	 * Length of the unencoded (i.e., decrypted and decompressed) data.
+	 *
+	 * For writes, must be no more than 128 KiB (this limit may increase in
+	 * the future). If the unencoded data is actually longer than
+	 * unencoded_len, then it is truncated; if it is shorter, then it is
+	 * extended with zeroes.
+	 */
+	__u64 unencoded_len;
+	/*
+	 * Offset from the first byte of the unencoded data to the first byte of
+	 * logical data in the file.
+	 *
+	 * Must be less than unencoded_len.
+	 */
+	__u64 unencoded_offset;
+	/*
+	 * BTRFS_ENCODED_IO_COMPRESSION_* type.
+	 *
+	 * For writes, must not be BTRFS_ENCODED_IO_COMPRESSION_NONE.
+	 */
+	__u32 compression;
+	/* Currently always BTRFS_ENCODED_IO_ENCRYPTION_NONE. */
+	__u32 encryption;
+	/*
+	 * Reserved for future expansion.
+	 *
+	 * For reads, always returned as zero. Users should check for non-zero
+	 * bytes. If there are any, then the kernel has a newer version of this
+	 * structure with additional information that the user definition is
+	 * missing.
+	 *
+	 * For writes, must be zeroed.
+	 */
+	__u8 reserved[32];
+};
+
+/* Data is not compressed. */
+#define BTRFS_ENCODED_IO_COMPRESSION_NONE 0
+/* Data is compressed as a single zlib stream. */
+#define BTRFS_ENCODED_IO_COMPRESSION_ZLIB 1
+/*
+ * Data is compressed as a single zstd frame with the windowLog compression
+ * parameter set to no more than 17.
+ */
+#define BTRFS_ENCODED_IO_COMPRESSION_ZSTD 2
+/*
+ * Data is compressed page by page (using the page size indicated by the name of
+ * the constant) with LZO1X and wrapped in the format documented in
+ * fs/btrfs/lzo.c. For writes, the compression page size must match the
+ * filesystem page size.
+ */
+#define BTRFS_ENCODED_IO_COMPRESSION_LZO_4K 3
+#define BTRFS_ENCODED_IO_COMPRESSION_LZO_8K 4
+#define BTRFS_ENCODED_IO_COMPRESSION_LZO_16K 5
+#define BTRFS_ENCODED_IO_COMPRESSION_LZO_32K 6
+#define BTRFS_ENCODED_IO_COMPRESSION_LZO_64K 7
+#define BTRFS_ENCODED_IO_COMPRESSION_TYPES 8
+
+/* Data is not encrypted. */
+#define BTRFS_ENCODED_IO_ENCRYPTION_NONE 0
+#define BTRFS_ENCODED_IO_ENCRYPTION_TYPES 1
+
 /* Error codes as returned by the kernel */
 enum btrfs_err_code {
 	BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET = 1,
@@ -989,5 +1117,9 @@  enum btrfs_err_code {
 				struct btrfs_ioctl_ino_lookup_user_args)
 #define BTRFS_IOC_SNAP_DESTROY_V2 _IOW(BTRFS_IOCTL_MAGIC, 63, \
 				struct btrfs_ioctl_vol_args_v2)
+#define BTRFS_IOC_ENCODED_READ _IOR(BTRFS_IOCTL_MAGIC, 64, \
+				    struct btrfs_ioctl_encoded_io_args)
+#define BTRFS_IOC_ENCODED_WRITE _IOW(BTRFS_IOCTL_MAGIC, 64, \
+				     struct btrfs_ioctl_encoded_io_args)
 
 #endif /* _UAPI_LINUX_BTRFS_H */