diff mbox series

[05/12] mm: support async buffered reads in generic_file_buffered_read()

Message ID 20200523185755.8494-6-axboe@kernel.dk (mailing list archive)
State New, archived
Headers show
Series Add support for async buffered reads | expand

Commit Message

Jens Axboe May 23, 2020, 6:57 p.m. UTC
Use the async page locking infrastructure, if IOCB_WAITQ is set in the
passed in iocb. The caller must expect an -EIOCBQUEUED return value,
which means that IO is started but not done yet. This is similar to how
O_DIRECT signals the same operation. Once the callback is received by
the caller for IO completion, the caller must retry the operation.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 mm/filemap.c | 33 ++++++++++++++++++++++++++-------
 1 file changed, 26 insertions(+), 7 deletions(-)

Comments

Trond Myklebust May 24, 2020, 2:05 p.m. UTC | #1
On Sat, 2020-05-23 at 12:57 -0600, Jens Axboe wrote:
> Use the async page locking infrastructure, if IOCB_WAITQ is set in
> the
> passed in iocb. The caller must expect an -EIOCBQUEUED return value,
> which means that IO is started but not done yet. This is similar to
> how
> O_DIRECT signals the same operation. Once the callback is received by
> the caller for IO completion, the caller must retry the operation.
> 
> Signed-off-by: Jens Axboe <axboe@kernel.dk>
> ---
>  mm/filemap.c | 33 ++++++++++++++++++++++++++-------
>  1 file changed, 26 insertions(+), 7 deletions(-)
> 
> diff --git a/mm/filemap.c b/mm/filemap.c
> index c746541b1d49..a3b86c9acdc8 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -1219,6 +1219,14 @@ static int __wait_on_page_locked_async(struct
> page *page,
>  	return ret;
>  }
>  
> +static int wait_on_page_locked_async(struct page *page,
> +				     struct wait_page_queue *wait)
> +{
> +	if (!PageLocked(page))
> +		return 0;
> +	return __wait_on_page_locked_async(compound_head(page), wait,
> false);
> +}
> +
>  /**
>   * put_and_wait_on_page_locked - Drop a reference and wait for it to
> be unlocked
>   * @page: The page to wait for.
> @@ -2058,17 +2066,25 @@ static ssize_t
> generic_file_buffered_read(struct kiocb *iocb,
>  					index, last_index - index);
>  		}
>  		if (!PageUptodate(page)) {
> -			if (iocb->ki_flags & IOCB_NOWAIT) {
> -				put_page(page);
> -				goto would_block;
> -			}
> -
>  			/*
>  			 * See comment in do_read_cache_page on why
>  			 * wait_on_page_locked is used to avoid
> unnecessarily
>  			 * serialisations and why it's safe.
>  			 */
> -			error = wait_on_page_locked_killable(page);
> +			if (iocb->ki_flags & IOCB_WAITQ) {
> +				if (written) {
> +					put_page(page);
> +					goto out;
> +				}
> +				error = wait_on_page_locked_async(page,
> +								iocb-
> >private);

If it is being used in 'generic_file_buffered_read()' as storage for a
wait queue, then it is hard to consider this a 'private' field.

Perhaps either rename and add type checking, or else add a separate
field altogether to struct kiocb?

> +			} else {
> +				if (iocb->ki_flags & IOCB_NOWAIT) {
> +					put_page(page);
> +					goto would_block;
> +				}
> +				error =
> wait_on_page_locked_killable(page);
> +			}
>  			if (unlikely(error))
>  				goto readpage_error;
>  			if (PageUptodate(page))
> @@ -2156,7 +2172,10 @@ static ssize_t
> generic_file_buffered_read(struct kiocb *iocb,
>  
>  page_not_up_to_date:
>  		/* Get exclusive access to the page ... */
> -		error = lock_page_killable(page);
> +		if (iocb->ki_flags & IOCB_WAITQ)
> +			error = lock_page_async(page, iocb->private);
> +		else
> +			error = lock_page_killable(page);
>  		if (unlikely(error))
>  			goto readpage_error;
>
Jens Axboe May 24, 2020, 4:30 p.m. UTC | #2
On 5/24/20 8:05 AM, Trond Myklebust wrote:
> On Sat, 2020-05-23 at 12:57 -0600, Jens Axboe wrote:
>> Use the async page locking infrastructure, if IOCB_WAITQ is set in
>> the
>> passed in iocb. The caller must expect an -EIOCBQUEUED return value,
>> which means that IO is started but not done yet. This is similar to
>> how
>> O_DIRECT signals the same operation. Once the callback is received by
>> the caller for IO completion, the caller must retry the operation.
>>
>> Signed-off-by: Jens Axboe <axboe@kernel.dk>
>> ---
>>  mm/filemap.c | 33 ++++++++++++++++++++++++++-------
>>  1 file changed, 26 insertions(+), 7 deletions(-)
>>
>> diff --git a/mm/filemap.c b/mm/filemap.c
>> index c746541b1d49..a3b86c9acdc8 100644
>> --- a/mm/filemap.c
>> +++ b/mm/filemap.c
>> @@ -1219,6 +1219,14 @@ static int __wait_on_page_locked_async(struct
>> page *page,
>>  	return ret;
>>  }
>>  
>> +static int wait_on_page_locked_async(struct page *page,
>> +				     struct wait_page_queue *wait)
>> +{
>> +	if (!PageLocked(page))
>> +		return 0;
>> +	return __wait_on_page_locked_async(compound_head(page), wait,
>> false);
>> +}
>> +
>>  /**
>>   * put_and_wait_on_page_locked - Drop a reference and wait for it to
>> be unlocked
>>   * @page: The page to wait for.
>> @@ -2058,17 +2066,25 @@ static ssize_t
>> generic_file_buffered_read(struct kiocb *iocb,
>>  					index, last_index - index);
>>  		}
>>  		if (!PageUptodate(page)) {
>> -			if (iocb->ki_flags & IOCB_NOWAIT) {
>> -				put_page(page);
>> -				goto would_block;
>> -			}
>> -
>>  			/*
>>  			 * See comment in do_read_cache_page on why
>>  			 * wait_on_page_locked is used to avoid
>> unnecessarily
>>  			 * serialisations and why it's safe.
>>  			 */
>> -			error = wait_on_page_locked_killable(page);
>> +			if (iocb->ki_flags & IOCB_WAITQ) {
>> +				if (written) {
>> +					put_page(page);
>> +					goto out;
>> +				}
>> +				error = wait_on_page_locked_async(page,
>> +								iocb-
>>> private);
> 
> If it is being used in 'generic_file_buffered_read()' as storage for a
> wait queue, then it is hard to consider this a 'private' field.

private isn't the prettiest, and in fact this one in particular is a bit
of a mess. It's not clear if it's caller or callee owned. It's generally
not used, outside of the old usb gadget code, iomap O_DIRECT, and ocfs2.
With FMODE_BUF_RASYNC, the fs obviously can't set it if it uses
->private for buffered IO.

> Perhaps either rename and add type checking, or else add a separate
> field altogether to struct kiocb?

I'd hate to add a new field and increase the size of the kiocb... One
alternative is to do:

	union {
		void *private;
		struct wait_page_queue *ki_waitq;
	};

and still use IOCB_WAITQ to say that ->ki_waitq is valid.

There's also 4 bytes of padding in the kiocb struct. And some fields are
only used for O_DIRECT as well, eg ->ki_cookie which is just used for
polled O_DIRECT. So we could also do:

	union {
		unsigned int ki_cookie;
		struct wait_page_queue *ki_waitq;
	};

and still not grow the kiocb. How about we go with this approach, and
also add:

	if (kiocb->ki_flags & IOCB_HIPRI)
		return -EOPNOTSUPP;

to kiocb_wait_page_queue_init() to make sure that this combination isn't
valid?
Jens Axboe May 24, 2020, 4:40 p.m. UTC | #3
On 5/24/20 10:30 AM, Jens Axboe wrote:
> On 5/24/20 8:05 AM, Trond Myklebust wrote:
>> On Sat, 2020-05-23 at 12:57 -0600, Jens Axboe wrote:
>>> Use the async page locking infrastructure, if IOCB_WAITQ is set in
>>> the
>>> passed in iocb. The caller must expect an -EIOCBQUEUED return value,
>>> which means that IO is started but not done yet. This is similar to
>>> how
>>> O_DIRECT signals the same operation. Once the callback is received by
>>> the caller for IO completion, the caller must retry the operation.
>>>
>>> Signed-off-by: Jens Axboe <axboe@kernel.dk>
>>> ---
>>>  mm/filemap.c | 33 ++++++++++++++++++++++++++-------
>>>  1 file changed, 26 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/mm/filemap.c b/mm/filemap.c
>>> index c746541b1d49..a3b86c9acdc8 100644
>>> --- a/mm/filemap.c
>>> +++ b/mm/filemap.c
>>> @@ -1219,6 +1219,14 @@ static int __wait_on_page_locked_async(struct
>>> page *page,
>>>  	return ret;
>>>  }
>>>  
>>> +static int wait_on_page_locked_async(struct page *page,
>>> +				     struct wait_page_queue *wait)
>>> +{
>>> +	if (!PageLocked(page))
>>> +		return 0;
>>> +	return __wait_on_page_locked_async(compound_head(page), wait,
>>> false);
>>> +}
>>> +
>>>  /**
>>>   * put_and_wait_on_page_locked - Drop a reference and wait for it to
>>> be unlocked
>>>   * @page: The page to wait for.
>>> @@ -2058,17 +2066,25 @@ static ssize_t
>>> generic_file_buffered_read(struct kiocb *iocb,
>>>  					index, last_index - index);
>>>  		}
>>>  		if (!PageUptodate(page)) {
>>> -			if (iocb->ki_flags & IOCB_NOWAIT) {
>>> -				put_page(page);
>>> -				goto would_block;
>>> -			}
>>> -
>>>  			/*
>>>  			 * See comment in do_read_cache_page on why
>>>  			 * wait_on_page_locked is used to avoid
>>> unnecessarily
>>>  			 * serialisations and why it's safe.
>>>  			 */
>>> -			error = wait_on_page_locked_killable(page);
>>> +			if (iocb->ki_flags & IOCB_WAITQ) {
>>> +				if (written) {
>>> +					put_page(page);
>>> +					goto out;
>>> +				}
>>> +				error = wait_on_page_locked_async(page,
>>> +								iocb-
>>>> private);
>>
>> If it is being used in 'generic_file_buffered_read()' as storage for a
>> wait queue, then it is hard to consider this a 'private' field.
> 
> private isn't the prettiest, and in fact this one in particular is a bit
> of a mess. It's not clear if it's caller or callee owned. It's generally
> not used, outside of the old usb gadget code, iomap O_DIRECT, and ocfs2.
> With FMODE_BUF_RASYNC, the fs obviously can't set it if it uses
> ->private for buffered IO.
> 
>> Perhaps either rename and add type checking, or else add a separate
>> field altogether to struct kiocb?
> 
> I'd hate to add a new field and increase the size of the kiocb... One
> alternative is to do:
> 
> 	union {
> 		void *private;
> 		struct wait_page_queue *ki_waitq;
> 	};
> 
> and still use IOCB_WAITQ to say that ->ki_waitq is valid.
> 
> There's also 4 bytes of padding in the kiocb struct. And some fields are
> only used for O_DIRECT as well, eg ->ki_cookie which is just used for
> polled O_DIRECT. So we could also do:
> 
> 	union {
> 		unsigned int ki_cookie;
> 		struct wait_page_queue *ki_waitq;
> 	};
> 
> and still not grow the kiocb. How about we go with this approach, and
> also add:
> 
> 	if (kiocb->ki_flags & IOCB_HIPRI)
> 		return -EOPNOTSUPP;
> 
> to kiocb_wait_page_queue_init() to make sure that this combination isn't
> valid?

Here's the incremental, which is spread over 3 patches. I think this one
makes sense, as polled IO doesn't support buffered IO. And because doing
an async callback for completion is not how polled IO operates anyway,
so even if buffered IO supported it, we'd not use the callback for
polled IO anyway. kiocb_wait_page_queue_init() checks and backs this
up.


diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0ef5f5973b1c..f7b1eb765c6e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -317,7 +317,7 @@ enum rw_hint {
 #define IOCB_SYNC		(1 << 5)
 #define IOCB_WRITE		(1 << 6)
 #define IOCB_NOWAIT		(1 << 7)
-/* iocb->private holds wait_page_async struct */
+/* iocb->ki_waitq is valid */
 #define IOCB_WAITQ		(1 << 8)
 
 struct kiocb {
@@ -332,7 +332,10 @@ struct kiocb {
 	int			ki_flags;
 	u16			ki_hint;
 	u16			ki_ioprio; /* See linux/ioprio.h */
-	unsigned int		ki_cookie; /* for ->iopoll */
+	union {
+		unsigned int		ki_cookie; /* for ->iopoll */
+		struct wait_page_queue	*ki_waitq; /* for async buffered IO */
+	};
 
 	randomized_struct_fields_end
 };
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index def58de92053..8b65420410ee 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -498,13 +498,16 @@ static inline int kiocb_wait_page_queue_init(struct kiocb *kiocb,
 					     wait_queue_func_t func,
 					     void *data)
 {
+	/* Can't support async wakeup with polled IO */
+	if (kiocb->ki_flags & IOCB_HIPRI)
+		return -EINVAL;
 	if (kiocb->ki_filp->f_mode & FMODE_BUF_RASYNC) {
 		wait->wait.func = func;
 		wait->wait.private = data;
 		wait->wait.flags = 0;
 		INIT_LIST_HEAD(&wait->wait.entry);
 		kiocb->ki_flags |= IOCB_WAITQ;
-		kiocb->private = wait;
+		kiocb->ki_waitq = wait;
 		return 0;
 	}
 
diff --git a/mm/filemap.c b/mm/filemap.c
index a3b86c9acdc8..18022de7dc33 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2077,7 +2077,7 @@ static ssize_t generic_file_buffered_read(struct kiocb *iocb,
 					goto out;
 				}
 				error = wait_on_page_locked_async(page,
-								iocb->private);
+								iocb->ki_waitq);
 			} else {
 				if (iocb->ki_flags & IOCB_NOWAIT) {
 					put_page(page);
@@ -2173,7 +2173,7 @@ static ssize_t generic_file_buffered_read(struct kiocb *iocb,
 page_not_up_to_date:
 		/* Get exclusive access to the page ... */
 		if (iocb->ki_flags & IOCB_WAITQ)
-			error = lock_page_async(page, iocb->private);
+			error = lock_page_async(page, iocb->ki_waitq);
 		else
 			error = lock_page_killable(page);
 		if (unlikely(error))
Trond Myklebust May 24, 2020, 5:11 p.m. UTC | #4
On Sun, 2020-05-24 at 10:40 -0600, Jens Axboe wrote:
> On 5/24/20 10:30 AM, Jens Axboe wrote:
> > On 5/24/20 8:05 AM, Trond Myklebust wrote:
> > > On Sat, 2020-05-23 at 12:57 -0600, Jens Axboe wrote:
> > > > Use the async page locking infrastructure, if IOCB_WAITQ is set
> > > > in
> > > > the
> > > > passed in iocb. The caller must expect an -EIOCBQUEUED return
> > > > value,
> > > > which means that IO is started but not done yet. This is
> > > > similar to
> > > > how
> > > > O_DIRECT signals the same operation. Once the callback is
> > > > received by
> > > > the caller for IO completion, the caller must retry the
> > > > operation.
> > > > 
> > > > Signed-off-by: Jens Axboe <axboe@kernel.dk>
> > > > ---
> > > >  mm/filemap.c | 33 ++++++++++++++++++++++++++-------
> > > >  1 file changed, 26 insertions(+), 7 deletions(-)
> > > > 
> > > > diff --git a/mm/filemap.c b/mm/filemap.c
> > > > index c746541b1d49..a3b86c9acdc8 100644
> > > > --- a/mm/filemap.c
> > > > +++ b/mm/filemap.c
> > > > @@ -1219,6 +1219,14 @@ static int
> > > > __wait_on_page_locked_async(struct
> > > > page *page,
> > > >  	return ret;
> > > >  }
> > > >  
> > > > +static int wait_on_page_locked_async(struct page *page,
> > > > +				     struct wait_page_queue
> > > > *wait)
> > > > +{
> > > > +	if (!PageLocked(page))
> > > > +		return 0;
> > > > +	return __wait_on_page_locked_async(compound_head(page),
> > > > wait,
> > > > false);
> > > > +}
> > > > +
> > > >  /**
> > > >   * put_and_wait_on_page_locked - Drop a reference and wait for
> > > > it to
> > > > be unlocked
> > > >   * @page: The page to wait for.
> > > > @@ -2058,17 +2066,25 @@ static ssize_t
> > > > generic_file_buffered_read(struct kiocb *iocb,
> > > >  					index, last_index -
> > > > index);
> > > >  		}
> > > >  		if (!PageUptodate(page)) {
> > > > -			if (iocb->ki_flags & IOCB_NOWAIT) {
> > > > -				put_page(page);
> > > > -				goto would_block;
> > > > -			}
> > > > -
> > > >  			/*
> > > >  			 * See comment in do_read_cache_page on
> > > > why
> > > >  			 * wait_on_page_locked is used to avoid
> > > > unnecessarily
> > > >  			 * serialisations and why it's safe.
> > > >  			 */
> > > > -			error =
> > > > wait_on_page_locked_killable(page);
> > > > +			if (iocb->ki_flags & IOCB_WAITQ) {
> > > > +				if (written) {
> > > > +					put_page(page);
> > > > +					goto out;
> > > > +				}
> > > > +				error =
> > > > wait_on_page_locked_async(page,
> > > > +								
> > > > iocb-
> > > > > private);
> > > 
> > > If it is being used in 'generic_file_buffered_read()' as storage
> > > for a
> > > wait queue, then it is hard to consider this a 'private' field.
> > 
> > private isn't the prettiest, and in fact this one in particular is
> > a bit
> > of a mess. It's not clear if it's caller or callee owned. It's
> > generally
> > not used, outside of the old usb gadget code, iomap O_DIRECT, and
> > ocfs2.
> > With FMODE_BUF_RASYNC, the fs obviously can't set it if it uses
> > ->private for buffered IO.
> > 
> > > Perhaps either rename and add type checking, or else add a
> > > separate
> > > field altogether to struct kiocb?
> > 
> > I'd hate to add a new field and increase the size of the kiocb...
> > One
> > alternative is to do:
> > 
> > 	union {
> > 		void *private;
> > 		struct wait_page_queue *ki_waitq;
> > 	};
> > 
> > and still use IOCB_WAITQ to say that ->ki_waitq is valid.
> > 
> > There's also 4 bytes of padding in the kiocb struct. And some
> > fields are
> > only used for O_DIRECT as well, eg ->ki_cookie which is just used
> > for
> > polled O_DIRECT. So we could also do:
> > 
> > 	union {
> > 		unsigned int ki_cookie;
> > 		struct wait_page_queue *ki_waitq;
> > 	};
> > 
> > and still not grow the kiocb. How about we go with this approach,
> > and
> > also add:
> > 
> > 	if (kiocb->ki_flags & IOCB_HIPRI)
> > 		return -EOPNOTSUPP;
> > 
> > to kiocb_wait_page_queue_init() to make sure that this combination
> > isn't
> > valid?
> 
> Here's the incremental, which is spread over 3 patches. I think this
> one
> makes sense, as polled IO doesn't support buffered IO. And because
> doing
> an async callback for completion is not how polled IO operates
> anyway,
> so even if buffered IO supported it, we'd not use the callback for
> polled IO anyway. kiocb_wait_page_queue_init() checks and backs this
> up.
> 
> 
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 0ef5f5973b1c..f7b1eb765c6e 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -317,7 +317,7 @@ enum rw_hint {
>  #define IOCB_SYNC		(1 << 5)
>  #define IOCB_WRITE		(1 << 6)
>  #define IOCB_NOWAIT		(1 << 7)
> -/* iocb->private holds wait_page_async struct */
> +/* iocb->ki_waitq is valid */
>  #define IOCB_WAITQ		(1 << 8)
>  
>  struct kiocb {
> @@ -332,7 +332,10 @@ struct kiocb {
>  	int			ki_flags;
>  	u16			ki_hint;
>  	u16			ki_ioprio; /* See linux/ioprio.h */
> -	unsigned int		ki_cookie; /* for ->iopoll */
> +	union {
> +		unsigned int		ki_cookie; /* for ->iopoll */
> +		struct wait_page_queue	*ki_waitq; /* for async
> buffered IO */
> +	};
>  
>  	randomized_struct_fields_end
>  };
> diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
> index def58de92053..8b65420410ee 100644
> --- a/include/linux/pagemap.h
> +++ b/include/linux/pagemap.h
> @@ -498,13 +498,16 @@ static inline int
> kiocb_wait_page_queue_init(struct kiocb *kiocb,
>  					     wait_queue_func_t func,
>  					     void *data)
>  {
> +	/* Can't support async wakeup with polled IO */
> +	if (kiocb->ki_flags & IOCB_HIPRI)
> +		return -EINVAL;
>  	if (kiocb->ki_filp->f_mode & FMODE_BUF_RASYNC) {
>  		wait->wait.func = func;
>  		wait->wait.private = data;
>  		wait->wait.flags = 0;
>  		INIT_LIST_HEAD(&wait->wait.entry);
>  		kiocb->ki_flags |= IOCB_WAITQ;
> -		kiocb->private = wait;
> +		kiocb->ki_waitq = wait;
>  		return 0;
>  	}
>  
> diff --git a/mm/filemap.c b/mm/filemap.c
> index a3b86c9acdc8..18022de7dc33 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -2077,7 +2077,7 @@ static ssize_t
> generic_file_buffered_read(struct kiocb *iocb,
>  					goto out;
>  				}
>  				error = wait_on_page_locked_async(page,
> -								iocb-
> >private);
> +								iocb-
> >ki_waitq);
>  			} else {
>  				if (iocb->ki_flags & IOCB_NOWAIT) {
>  					put_page(page);
> @@ -2173,7 +2173,7 @@ static ssize_t
> generic_file_buffered_read(struct kiocb *iocb,
>  page_not_up_to_date:
>  		/* Get exclusive access to the page ... */
>  		if (iocb->ki_flags & IOCB_WAITQ)
> -			error = lock_page_async(page, iocb->private);
> +			error = lock_page_async(page, iocb->ki_waitq);
>  		else
>  			error = lock_page_killable(page);
>  		if (unlikely(error))

Ack. That seems cleaner to me.
Jens Axboe May 24, 2020, 5:12 p.m. UTC | #5
On 5/24/20 11:11 AM, Trond Myklebust wrote:
> On Sun, 2020-05-24 at 10:40 -0600, Jens Axboe wrote:
>> On 5/24/20 10:30 AM, Jens Axboe wrote:
>>> On 5/24/20 8:05 AM, Trond Myklebust wrote:
>>>> On Sat, 2020-05-23 at 12:57 -0600, Jens Axboe wrote:
>>>>> Use the async page locking infrastructure, if IOCB_WAITQ is set
>>>>> in
>>>>> the
>>>>> passed in iocb. The caller must expect an -EIOCBQUEUED return
>>>>> value,
>>>>> which means that IO is started but not done yet. This is
>>>>> similar to
>>>>> how
>>>>> O_DIRECT signals the same operation. Once the callback is
>>>>> received by
>>>>> the caller for IO completion, the caller must retry the
>>>>> operation.
>>>>>
>>>>> Signed-off-by: Jens Axboe <axboe@kernel.dk>
>>>>> ---
>>>>>  mm/filemap.c | 33 ++++++++++++++++++++++++++-------
>>>>>  1 file changed, 26 insertions(+), 7 deletions(-)
>>>>>
>>>>> diff --git a/mm/filemap.c b/mm/filemap.c
>>>>> index c746541b1d49..a3b86c9acdc8 100644
>>>>> --- a/mm/filemap.c
>>>>> +++ b/mm/filemap.c
>>>>> @@ -1219,6 +1219,14 @@ static int
>>>>> __wait_on_page_locked_async(struct
>>>>> page *page,
>>>>>  	return ret;
>>>>>  }
>>>>>  
>>>>> +static int wait_on_page_locked_async(struct page *page,
>>>>> +				     struct wait_page_queue
>>>>> *wait)
>>>>> +{
>>>>> +	if (!PageLocked(page))
>>>>> +		return 0;
>>>>> +	return __wait_on_page_locked_async(compound_head(page),
>>>>> wait,
>>>>> false);
>>>>> +}
>>>>> +
>>>>>  /**
>>>>>   * put_and_wait_on_page_locked - Drop a reference and wait for
>>>>> it to
>>>>> be unlocked
>>>>>   * @page: The page to wait for.
>>>>> @@ -2058,17 +2066,25 @@ static ssize_t
>>>>> generic_file_buffered_read(struct kiocb *iocb,
>>>>>  					index, last_index -
>>>>> index);
>>>>>  		}
>>>>>  		if (!PageUptodate(page)) {
>>>>> -			if (iocb->ki_flags & IOCB_NOWAIT) {
>>>>> -				put_page(page);
>>>>> -				goto would_block;
>>>>> -			}
>>>>> -
>>>>>  			/*
>>>>>  			 * See comment in do_read_cache_page on
>>>>> why
>>>>>  			 * wait_on_page_locked is used to avoid
>>>>> unnecessarily
>>>>>  			 * serialisations and why it's safe.
>>>>>  			 */
>>>>> -			error =
>>>>> wait_on_page_locked_killable(page);
>>>>> +			if (iocb->ki_flags & IOCB_WAITQ) {
>>>>> +				if (written) {
>>>>> +					put_page(page);
>>>>> +					goto out;
>>>>> +				}
>>>>> +				error =
>>>>> wait_on_page_locked_async(page,
>>>>> +								
>>>>> iocb-
>>>>>> private);
>>>>
>>>> If it is being used in 'generic_file_buffered_read()' as storage
>>>> for a
>>>> wait queue, then it is hard to consider this a 'private' field.
>>>
>>> private isn't the prettiest, and in fact this one in particular is
>>> a bit
>>> of a mess. It's not clear if it's caller or callee owned. It's
>>> generally
>>> not used, outside of the old usb gadget code, iomap O_DIRECT, and
>>> ocfs2.
>>> With FMODE_BUF_RASYNC, the fs obviously can't set it if it uses
>>> ->private for buffered IO.
>>>
>>>> Perhaps either rename and add type checking, or else add a
>>>> separate
>>>> field altogether to struct kiocb?
>>>
>>> I'd hate to add a new field and increase the size of the kiocb...
>>> One
>>> alternative is to do:
>>>
>>> 	union {
>>> 		void *private;
>>> 		struct wait_page_queue *ki_waitq;
>>> 	};
>>>
>>> and still use IOCB_WAITQ to say that ->ki_waitq is valid.
>>>
>>> There's also 4 bytes of padding in the kiocb struct. And some
>>> fields are
>>> only used for O_DIRECT as well, eg ->ki_cookie which is just used
>>> for
>>> polled O_DIRECT. So we could also do:
>>>
>>> 	union {
>>> 		unsigned int ki_cookie;
>>> 		struct wait_page_queue *ki_waitq;
>>> 	};
>>>
>>> and still not grow the kiocb. How about we go with this approach,
>>> and
>>> also add:
>>>
>>> 	if (kiocb->ki_flags & IOCB_HIPRI)
>>> 		return -EOPNOTSUPP;
>>>
>>> to kiocb_wait_page_queue_init() to make sure that this combination
>>> isn't
>>> valid?
>>
>> Here's the incremental, which is spread over 3 patches. I think this
>> one
>> makes sense, as polled IO doesn't support buffered IO. And because
>> doing
>> an async callback for completion is not how polled IO operates
>> anyway,
>> so even if buffered IO supported it, we'd not use the callback for
>> polled IO anyway. kiocb_wait_page_queue_init() checks and backs this
>> up.
>>
>>
>> diff --git a/include/linux/fs.h b/include/linux/fs.h
>> index 0ef5f5973b1c..f7b1eb765c6e 100644
>> --- a/include/linux/fs.h
>> +++ b/include/linux/fs.h
>> @@ -317,7 +317,7 @@ enum rw_hint {
>>  #define IOCB_SYNC		(1 << 5)
>>  #define IOCB_WRITE		(1 << 6)
>>  #define IOCB_NOWAIT		(1 << 7)
>> -/* iocb->private holds wait_page_async struct */
>> +/* iocb->ki_waitq is valid */
>>  #define IOCB_WAITQ		(1 << 8)
>>  
>>  struct kiocb {
>> @@ -332,7 +332,10 @@ struct kiocb {
>>  	int			ki_flags;
>>  	u16			ki_hint;
>>  	u16			ki_ioprio; /* See linux/ioprio.h */
>> -	unsigned int		ki_cookie; /* for ->iopoll */
>> +	union {
>> +		unsigned int		ki_cookie; /* for ->iopoll */
>> +		struct wait_page_queue	*ki_waitq; /* for async
>> buffered IO */
>> +	};
>>  
>>  	randomized_struct_fields_end
>>  };
>> diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
>> index def58de92053..8b65420410ee 100644
>> --- a/include/linux/pagemap.h
>> +++ b/include/linux/pagemap.h
>> @@ -498,13 +498,16 @@ static inline int
>> kiocb_wait_page_queue_init(struct kiocb *kiocb,
>>  					     wait_queue_func_t func,
>>  					     void *data)
>>  {
>> +	/* Can't support async wakeup with polled IO */
>> +	if (kiocb->ki_flags & IOCB_HIPRI)
>> +		return -EINVAL;
>>  	if (kiocb->ki_filp->f_mode & FMODE_BUF_RASYNC) {
>>  		wait->wait.func = func;
>>  		wait->wait.private = data;
>>  		wait->wait.flags = 0;
>>  		INIT_LIST_HEAD(&wait->wait.entry);
>>  		kiocb->ki_flags |= IOCB_WAITQ;
>> -		kiocb->private = wait;
>> +		kiocb->ki_waitq = wait;
>>  		return 0;
>>  	}
>>  
>> diff --git a/mm/filemap.c b/mm/filemap.c
>> index a3b86c9acdc8..18022de7dc33 100644
>> --- a/mm/filemap.c
>> +++ b/mm/filemap.c
>> @@ -2077,7 +2077,7 @@ static ssize_t
>> generic_file_buffered_read(struct kiocb *iocb,
>>  					goto out;
>>  				}
>>  				error = wait_on_page_locked_async(page,
>> -								iocb-
>>> private);
>> +								iocb-
>>> ki_waitq);
>>  			} else {
>>  				if (iocb->ki_flags & IOCB_NOWAIT) {
>>  					put_page(page);
>> @@ -2173,7 +2173,7 @@ static ssize_t
>> generic_file_buffered_read(struct kiocb *iocb,
>>  page_not_up_to_date:
>>  		/* Get exclusive access to the page ... */
>>  		if (iocb->ki_flags & IOCB_WAITQ)
>> -			error = lock_page_async(page, iocb->private);
>> +			error = lock_page_async(page, iocb->ki_waitq);
>>  		else
>>  			error = lock_page_killable(page);
>>  		if (unlikely(error))
> 
> Ack. That seems cleaner to me.

I agree, this is better. Ran it through the testing and updated the
series accordingly:

https://git.kernel.dk/cgit/linux-block/log/?h=async-buffered.4
diff mbox series

Patch

diff --git a/mm/filemap.c b/mm/filemap.c
index c746541b1d49..a3b86c9acdc8 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1219,6 +1219,14 @@  static int __wait_on_page_locked_async(struct page *page,
 	return ret;
 }
 
+static int wait_on_page_locked_async(struct page *page,
+				     struct wait_page_queue *wait)
+{
+	if (!PageLocked(page))
+		return 0;
+	return __wait_on_page_locked_async(compound_head(page), wait, false);
+}
+
 /**
  * put_and_wait_on_page_locked - Drop a reference and wait for it to be unlocked
  * @page: The page to wait for.
@@ -2058,17 +2066,25 @@  static ssize_t generic_file_buffered_read(struct kiocb *iocb,
 					index, last_index - index);
 		}
 		if (!PageUptodate(page)) {
-			if (iocb->ki_flags & IOCB_NOWAIT) {
-				put_page(page);
-				goto would_block;
-			}
-
 			/*
 			 * See comment in do_read_cache_page on why
 			 * wait_on_page_locked is used to avoid unnecessarily
 			 * serialisations and why it's safe.
 			 */
-			error = wait_on_page_locked_killable(page);
+			if (iocb->ki_flags & IOCB_WAITQ) {
+				if (written) {
+					put_page(page);
+					goto out;
+				}
+				error = wait_on_page_locked_async(page,
+								iocb->private);
+			} else {
+				if (iocb->ki_flags & IOCB_NOWAIT) {
+					put_page(page);
+					goto would_block;
+				}
+				error = wait_on_page_locked_killable(page);
+			}
 			if (unlikely(error))
 				goto readpage_error;
 			if (PageUptodate(page))
@@ -2156,7 +2172,10 @@  static ssize_t generic_file_buffered_read(struct kiocb *iocb,
 
 page_not_up_to_date:
 		/* Get exclusive access to the page ... */
-		error = lock_page_killable(page);
+		if (iocb->ki_flags & IOCB_WAITQ)
+			error = lock_page_async(page, iocb->private);
+		else
+			error = lock_page_killable(page);
 		if (unlikely(error))
 			goto readpage_error;