diff mbox series

[v5,15/40] netfs: Add support for DIO buffering

Message ID 20231221132400.1601991-16-dhowells@redhat.com (mailing list archive)
State New, archived
Headers show
Series netfs, afs, 9p: Delegate high-level I/O to netfslib | expand

Commit Message

David Howells Dec. 21, 2023, 1:23 p.m. UTC
Add a bvec array pointer and an iterator to netfs_io_request for either
holding a copy of a DIO iterator or a list of all the bits of buffer
pointed to by a DIO iterator.

There are two problems:  Firstly, if an iovec-class iov_iter is passed to
->read_iter() or ->write_iter(), this cannot be passed directly to
kernel_sendmsg() or kernel_recvmsg() as that may cause locking recursion if
a fault is generated, so we need to keep track of the pages involved
separately.

Secondly, if the I/O is asynchronous, we must copy the iov_iter describing
the buffer before returning to the caller as it may be immediately
deallocated.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: linux-cachefs@redhat.com
cc: linux-fsdevel@vger.kernel.org
cc: linux-mm@kvack.org
---
 fs/netfs/objects.c    | 10 ++++++++++
 include/linux/netfs.h |  4 ++++
 2 files changed, 14 insertions(+)

Comments

Nathan Chancellor Dec. 26, 2023, 4:54 p.m. UTC | #1
On Thu, Dec 21, 2023 at 01:23:10PM +0000, David Howells wrote:
> Add a bvec array pointer and an iterator to netfs_io_request for either
> holding a copy of a DIO iterator or a list of all the bits of buffer
> pointed to by a DIO iterator.
> 
> There are two problems:  Firstly, if an iovec-class iov_iter is passed to
> ->read_iter() or ->write_iter(), this cannot be passed directly to
> kernel_sendmsg() or kernel_recvmsg() as that may cause locking recursion if
> a fault is generated, so we need to keep track of the pages involved
> separately.
> 
> Secondly, if the I/O is asynchronous, we must copy the iov_iter describing
> the buffer before returning to the caller as it may be immediately
> deallocated.
> 
> Signed-off-by: David Howells <dhowells@redhat.com>
> Reviewed-by: Jeff Layton <jlayton@kernel.org>
> cc: linux-cachefs@redhat.com
> cc: linux-fsdevel@vger.kernel.org
> cc: linux-mm@kvack.org
> ---
>  fs/netfs/objects.c    | 10 ++++++++++
>  include/linux/netfs.h |  4 ++++
>  2 files changed, 14 insertions(+)
> 
> diff --git a/fs/netfs/objects.c b/fs/netfs/objects.c
> index 1bd20bdad983..4df5e5eeada6 100644
> --- a/fs/netfs/objects.c
> +++ b/fs/netfs/objects.c
> @@ -76,6 +76,7 @@ static void netfs_free_request(struct work_struct *work)
>  {
>  	struct netfs_io_request *rreq =
>  		container_of(work, struct netfs_io_request, work);
> +	unsigned int i;
>  
>  	trace_netfs_rreq(rreq, netfs_rreq_trace_free);
>  	netfs_proc_del_rreq(rreq);
> @@ -84,6 +85,15 @@ static void netfs_free_request(struct work_struct *work)
>  		rreq->netfs_ops->free_request(rreq);
>  	if (rreq->cache_resources.ops)
>  		rreq->cache_resources.ops->end_operation(&rreq->cache_resources);
> +	if (rreq->direct_bv) {
> +		for (i = 0; i < rreq->direct_bv_count; i++) {
> +			if (rreq->direct_bv[i].bv_page) {
> +				if (rreq->direct_bv_unpin)
> +					unpin_user_page(rreq->direct_bv[i].bv_page);
> +			}
> +		}
> +		kvfree(rreq->direct_bv);
> +	}
>  	kfree_rcu(rreq, rcu);
>  	netfs_stat_d(&netfs_n_rh_rreq);
>  }
> diff --git a/include/linux/netfs.h b/include/linux/netfs.h
> index 3da962e977f5..bbb33ccbf719 100644
> --- a/include/linux/netfs.h
> +++ b/include/linux/netfs.h
> @@ -190,6 +190,9 @@ struct netfs_io_request {
>  	struct iov_iter		iter;		/* Unencrypted-side iterator */
>  	struct iov_iter		io_iter;	/* I/O (Encrypted-side) iterator */
>  	void			*netfs_priv;	/* Private data for the netfs */
> +	struct bio_vec		*direct_bv	/* DIO buffer list (when handling iovec-iter) */
> +	__counted_by(direct_bv_count);

This will break the build with versions of clang that have support for
counted_by (as it has been reverted in main but reapplication to main is
being actively worked on) because while annotating pointers with this
attribute is a goal of the counted_by attribute, it is not ready yet.
Please consider removing this and adding a TODO to annotate it when
support is available.

  In file included from /builds/linux/fs/ceph/crypto.c:14:
  In file included from /builds/linux/fs/ceph/super.h:20:
  /builds/linux/include/linux/netfs.h:259:19: error: 'counted_by' only applies to C99 flexible array members
    259 |         struct bio_vec          *direct_bv      /* DIO buffer list (when handling iovec-iter) */
        |                                  ^~~~~~~~~
  1 error generated.

Some additional context from another recent error like this:

https://lore.kernel.org/CAKwvOdkvGTGiWzqEFq=kzqvxSYP5vUj3g9Z-=MZSQROzzSa_dg@mail.gmail.com/

Cheers,
Nathan

> +	unsigned int		direct_bv_count; /* Number of elements in direct_bv[] */
>  	unsigned int		debug_id;
>  	atomic_t		nr_outstanding;	/* Number of ops in progress */
>  	atomic_t		nr_copy_ops;	/* Number of copy-to-cache ops in progress */
> @@ -197,6 +200,7 @@ struct netfs_io_request {
>  	size_t			len;		/* Length of the request */
>  	short			error;		/* 0 or error that occurred */
>  	enum netfs_io_origin	origin;		/* Origin of the request */
> +	bool			direct_bv_unpin; /* T if direct_bv[] must be unpinned */
>  	loff_t			i_size;		/* Size of the file */
>  	loff_t			start;		/* Start position */
>  	pgoff_t			no_unlock_folio; /* Don't unlock this folio after read */
>
Christian Brauner Dec. 28, 2023, 10:47 a.m. UTC | #2
> This will break the build with versions of clang that have support for
> counted_by (as it has been reverted in main but reapplication to main is
> being actively worked on) because while annotating pointers with this
> attribute is a goal of the counted_by attribute, it is not ready yet.
> Please consider removing this and adding a TODO to annotate it when
> support is available.

It's really unpleasant that we keep getting new attributes that we
seemingly are encouraged to use and get sent patches for it. And then we
learn a little later that that stuff isn't ready yet. It's annoying. I
know it isn't your fault but it would be wise to be a little more
careful. IOW, unless both clang and gcc do support that thing
appropriately don't send patches to various subsystems for this.

In any case, this is now fixed. I pulled an updated version from David.
Nathan Chancellor Dec. 28, 2023, 4:58 p.m. UTC | #3
On Thu, Dec 28, 2023 at 11:47:45AM +0100, Christian Brauner wrote:
> > This will break the build with versions of clang that have support for
> > counted_by (as it has been reverted in main but reapplication to main is
> > being actively worked on) because while annotating pointers with this
> > attribute is a goal of the counted_by attribute, it is not ready yet.
> > Please consider removing this and adding a TODO to annotate it when
> > support is available.
> 
> It's really unpleasant that we keep getting new attributes that we
> seemingly are encouraged to use and get sent patches for it. And then we
> learn a little later that that stuff isn't ready yet. It's annoying. I

I will assume the "get sent patches for it" is referring to the patches
that Kees has been sending out to add this attribute to flexible array
members. In his defense, that part of the attribute is very nearly ready
(it is only the pointer annotations that are not ready, as in not worked
on at all as far as I am aware). In fact, it was merged in clang's main
branch for some time and the only reason that it was backed out was
because adoption in the kernel had pointed out bugs in the original
implementation that were harder to fix than initially thought; in other
words, only because we started adding this attribute to the kernel were
we able to realize that the initial implementation in clang needed to be
improved, otherwise this feature may have shipped completely broken in
clang 18.1.0 because it had not been stress tested yet. Now we can get
it right.

However, I do not necessarily disagree that it is annoying for
maintainers who are not following this saga but are just receiving
patches to add these annotatations because adds additional things to
check for. Perhaps there should be some guidance added to the
__counted_by definition or Documentation around how it is expected to be
used so that there is clear advice for both developers and maintainers?

> know it isn't your fault but it would be wise to be a little more
> careful. IOW, unless both clang and gcc do support that thing
> appropriately don't send patches to various subsystems for this.

I will assume this was not necessarily directed at me because I have not
sent any patches for __counted_by.

> In any case, this is now fixed. I pulled an updated version from David.

Thanks a lot.

Cheers,
Nathan
diff mbox series

Patch

diff --git a/fs/netfs/objects.c b/fs/netfs/objects.c
index 1bd20bdad983..4df5e5eeada6 100644
--- a/fs/netfs/objects.c
+++ b/fs/netfs/objects.c
@@ -76,6 +76,7 @@  static void netfs_free_request(struct work_struct *work)
 {
 	struct netfs_io_request *rreq =
 		container_of(work, struct netfs_io_request, work);
+	unsigned int i;
 
 	trace_netfs_rreq(rreq, netfs_rreq_trace_free);
 	netfs_proc_del_rreq(rreq);
@@ -84,6 +85,15 @@  static void netfs_free_request(struct work_struct *work)
 		rreq->netfs_ops->free_request(rreq);
 	if (rreq->cache_resources.ops)
 		rreq->cache_resources.ops->end_operation(&rreq->cache_resources);
+	if (rreq->direct_bv) {
+		for (i = 0; i < rreq->direct_bv_count; i++) {
+			if (rreq->direct_bv[i].bv_page) {
+				if (rreq->direct_bv_unpin)
+					unpin_user_page(rreq->direct_bv[i].bv_page);
+			}
+		}
+		kvfree(rreq->direct_bv);
+	}
 	kfree_rcu(rreq, rcu);
 	netfs_stat_d(&netfs_n_rh_rreq);
 }
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 3da962e977f5..bbb33ccbf719 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -190,6 +190,9 @@  struct netfs_io_request {
 	struct iov_iter		iter;		/* Unencrypted-side iterator */
 	struct iov_iter		io_iter;	/* I/O (Encrypted-side) iterator */
 	void			*netfs_priv;	/* Private data for the netfs */
+	struct bio_vec		*direct_bv	/* DIO buffer list (when handling iovec-iter) */
+	__counted_by(direct_bv_count);
+	unsigned int		direct_bv_count; /* Number of elements in direct_bv[] */
 	unsigned int		debug_id;
 	atomic_t		nr_outstanding;	/* Number of ops in progress */
 	atomic_t		nr_copy_ops;	/* Number of copy-to-cache ops in progress */
@@ -197,6 +200,7 @@  struct netfs_io_request {
 	size_t			len;		/* Length of the request */
 	short			error;		/* 0 or error that occurred */
 	enum netfs_io_origin	origin;		/* Origin of the request */
+	bool			direct_bv_unpin; /* T if direct_bv[] must be unpinned */
 	loff_t			i_size;		/* Size of the file */
 	loff_t			start;		/* Start position */
 	pgoff_t			no_unlock_folio; /* Don't unlock this folio after read */