diff mbox

NFS: filelayout should use nfs_generic_pg_test

Message ID 1306898310-9229-1-git-send-email-dros@netapp.com (mailing list archive)
State New, archived
Headers show

Commit Message

Weston Andros Adamson June 1, 2011, 3:18 a.m. UTC
Use nfs_generic_pg_test instead of pnfs_generic_pg_test.

This fixes the BUG at fs/nfs/write.c:941 introduced by
89a58e32d9105c01022a757fb32ddc3b51bf0025.

I was able to trigger this BUG reliably using pynfs in pnfs mode,
by using dd(1) to write many small blocks.

Signed-off-by: Weston Andros Adamson <dros@netapp.com>
---
Fix proposed by Trond.

Benny- Does this make sense?

 fs/nfs/nfs4filelayout.c  |    2 +-
 fs/nfs/pagelist.c        |    5 ++++-
 include/linux/nfs_page.h |    3 ++-
 3 files changed, 7 insertions(+), 3 deletions(-)

Comments

Boaz Harrosh June 1, 2011, 5:47 a.m. UTC | #1
On 06/01/2011 06:18 AM, Weston Andros Adamson wrote:
> Use nfs_generic_pg_test instead of pnfs_generic_pg_test.
> 
> This fixes the BUG at fs/nfs/write.c:941 introduced by
> 89a58e32d9105c01022a757fb32ddc3b51bf0025.
> 
> I was able to trigger this BUG reliably using pynfs in pnfs mode,
> by using dd(1) to write many small blocks.
> 
> Signed-off-by: Weston Andros Adamson <dros@netapp.com>
> ---
> Fix proposed by Trond.
> 
> Benny- Does this make sense?
> 
>  fs/nfs/nfs4filelayout.c  |    2 +-
>  fs/nfs/pagelist.c        |    5 ++++-
>  include/linux/nfs_page.h |    3 ++-
>  3 files changed, 7 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
> index 4269088..1c3bb72 100644
> --- a/fs/nfs/nfs4filelayout.c
> +++ b/fs/nfs/nfs4filelayout.c
> @@ -661,7 +661,7 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
>  	u64 p_stripe, r_stripe;
>  	u32 stripe_unit;
>  
> -	if (!pnfs_generic_pg_test(pgio, prev, req))
> +	if (!nfs_generic_pg_test(pgio, prev, req))
>  		return 0;
>  

pnfs_generic_pg_test is the one that gets the layout.

What you've done is revert to MDS IO

Boaz

>  	if (!pgio->pg_lseg)
> diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
> index 7913961..1a4b0de 100644
> --- a/fs/nfs/pagelist.c
> +++ b/fs/nfs/pagelist.c
> @@ -204,7 +204,9 @@ nfs_wait_on_request(struct nfs_page *req)
>  			TASK_UNINTERRUPTIBLE);
>  }
>  
> -static bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req)
> +bool
> +nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev,
> +		    struct nfs_page *req)
>  {
>  	/*
>  	 * FIXME: ideally we should be able to coalesce all requests
> @@ -218,6 +220,7 @@ static bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_p
>  
>  	return desc->pg_count + req->wb_bytes <= desc->pg_bsize;
>  }
> +EXPORT_SYMBOL_GPL(nfs_generic_pg_test);
>  
>  /**
>   * nfs_pageio_init - initialise a page io descriptor
> diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
> index 3a34e80..7d8a779 100644
> --- a/include/linux/nfs_page.h
> +++ b/include/linux/nfs_page.h
> @@ -96,7 +96,8 @@ extern  int nfs_wait_on_request(struct nfs_page *);
>  extern	void nfs_unlock_request(struct nfs_page *req);
>  extern	int nfs_set_page_tag_locked(struct nfs_page *req);
>  extern  void nfs_clear_page_tag_locked(struct nfs_page *req);
> -
> +extern  bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
> +				 struct nfs_page *prev, struct nfs_page *req);
>  
>  /*
>   * Lock the page of an asynchronous request without getting a new reference

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Trond Myklebust June 1, 2011, 12:14 p.m. UTC | #2
On Wed, 2011-06-01 at 08:47 +0300, Boaz Harrosh wrote: 
> On 06/01/2011 06:18 AM, Weston Andros Adamson wrote:
> > Use nfs_generic_pg_test instead of pnfs_generic_pg_test.
> > 
> > This fixes the BUG at fs/nfs/write.c:941 introduced by
> > 89a58e32d9105c01022a757fb32ddc3b51bf0025.
> > 
> > I was able to trigger this BUG reliably using pynfs in pnfs mode,
> > by using dd(1) to write many small blocks.
> > 
> > Signed-off-by: Weston Andros Adamson <dros@netapp.com>
> > ---
> > Fix proposed by Trond.
> > 
> > Benny- Does this make sense?
> > 
> >  fs/nfs/nfs4filelayout.c  |    2 +-
> >  fs/nfs/pagelist.c        |    5 ++++-
> >  include/linux/nfs_page.h |    3 ++-
> >  3 files changed, 7 insertions(+), 3 deletions(-)
> > 
> > diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
> > index 4269088..1c3bb72 100644
> > --- a/fs/nfs/nfs4filelayout.c
> > +++ b/fs/nfs/nfs4filelayout.c
> > @@ -661,7 +661,7 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
> >  	u64 p_stripe, r_stripe;
> >  	u32 stripe_unit;
> >  
> > -	if (!pnfs_generic_pg_test(pgio, prev, req))
> > +	if (!nfs_generic_pg_test(pgio, prev, req))
> >  		return 0;
> >  
> 
> pnfs_generic_pg_test is the one that gets the layout.
> 
> What you've done is revert to MDS IO

The "files" layout type always gets the layout in the pg_doio() method
instead of the pg_test().

Cheers
  Trond
Boaz Harrosh June 1, 2011, 1:36 p.m. UTC | #3
On 06/01/2011 03:14 PM, Trond Myklebust wrote:
> On Wed, 2011-06-01 at 08:47 +0300, Boaz Harrosh wrote: 
>> On 06/01/2011 06:18 AM, Weston Andros Adamson wrote:
>>
>> pnfs_generic_pg_test is the one that gets the layout.
>>
>> What you've done is revert to MDS IO
> 
> The "files" layout type always gets the layout in the pg_doio() method
> instead of the pg_test().
> 

Well I don't see where? I fought this all day, when trying to make the
new code run with objlayout, which was missing the implementation of pg_test().
And never got a pnfs-IO.

I've searched the full tree for calls to pnfs_update_layout() the only
one I can see are in:
nfs_pagein_multi() - which means within a single page, right?
nfs_pagein_one()   - But is protected with list_is_singular() so only in the
                     single page case
nfs_flush_multi()  - Same as nfs_pagein_multi
nfs_flush_one()    - Also here protected with list_is_singular()

and the all mighty
pnfs_generic_pg_test()

I cannot see where the filelayout is different then other layouts
in that respect. Sorry to be slow, I would like to understand?

And also be careful with nfs_generic_pg_test() it inspects
desc->bsize which is negotiated with MDS, it's very small.

> Cheers
>   Trond
> 

Thanks
Boaz
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Benny Halevy June 1, 2011, 1:43 p.m. UTC | #4
On 2011-06-01 16:36, Boaz Harrosh wrote:
> On 06/01/2011 03:14 PM, Trond Myklebust wrote:
>> On Wed, 2011-06-01 at 08:47 +0300, Boaz Harrosh wrote: 
>>> On 06/01/2011 06:18 AM, Weston Andros Adamson wrote:
>>>
>>> pnfs_generic_pg_test is the one that gets the layout.
>>>
>>> What you've done is revert to MDS IO
>>
>> The "files" layout type always gets the layout in the pg_doio() method
>> instead of the pg_test().
>>
> 
> Well I don't see where? I fought this all day, when trying to make the
> new code run with objlayout, which was missing the implementation of pg_test().
> And never got a pnfs-IO.
> 
> I've searched the full tree for calls to pnfs_update_layout() the only
> one I can see are in:
> nfs_pagein_multi() - which means within a single page, right?
> nfs_pagein_one()   - But is protected with list_is_singular() so only in the
>                      single page case
> nfs_flush_multi()  - Same as nfs_pagein_multi
> nfs_flush_one()    - Also here protected with list_is_singular()
> 
> and the all mighty
> pnfs_generic_pg_test()
> 
> I cannot see where the filelayout is different then other layouts
> in that respect. Sorry to be slow, I would like to understand?
> 
> And also be careful with nfs_generic_pg_test() it inspects
> desc->bsize which is negotiated with MDS, it's very small.
> 

I'm also looking into this.
The call to pnfs_generic_pg_test wasn't a typo.
As pre dfed206 "NFSv4.1: unify pnfs_pageio_init functions"
we were setting pg_pgio->test to pnfs_write_pg_test
which is equivalent to pnfs_generic_pg_test
and 89a58e3 "NFSv4.1: use pnfs_generic_pg_test directly by layout driver"
only reversed the call from pnfs_generic_pg_test to ld->pg_test
to a call from ld->pg_test to pnfs_generic_pg_test

Benny

>> Cheers
>>   Trond
>>
> 
> Thanks
> Boaz
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Benny Halevy June 1, 2011, 2:32 p.m. UTC | #5
I guess the culprit is the moving of the
desc->pg_bsize < PAGE_SIZE condition to nfs_generic_pg_test
in 5b36c7dc.

Previously, it stopped coalescing early for both nfs and pnfs
but with this patch it is performed only when pnfs is disabled.

Benny

On 2011-06-01 16:43, Benny Halevy wrote:
> On 2011-06-01 16:36, Boaz Harrosh wrote:
>> On 06/01/2011 03:14 PM, Trond Myklebust wrote:
>>> On Wed, 2011-06-01 at 08:47 +0300, Boaz Harrosh wrote: 
>>>> On 06/01/2011 06:18 AM, Weston Andros Adamson wrote:
>>>>
>>>> pnfs_generic_pg_test is the one that gets the layout.
>>>>
>>>> What you've done is revert to MDS IO
>>>
>>> The "files" layout type always gets the layout in the pg_doio() method
>>> instead of the pg_test().
>>>
>>
>> Well I don't see where? I fought this all day, when trying to make the
>> new code run with objlayout, which was missing the implementation of pg_test().
>> And never got a pnfs-IO.
>>
>> I've searched the full tree for calls to pnfs_update_layout() the only
>> one I can see are in:
>> nfs_pagein_multi() - which means within a single page, right?
>> nfs_pagein_one()   - But is protected with list_is_singular() so only in the
>>                      single page case
>> nfs_flush_multi()  - Same as nfs_pagein_multi
>> nfs_flush_one()    - Also here protected with list_is_singular()
>>
>> and the all mighty
>> pnfs_generic_pg_test()
>>
>> I cannot see where the filelayout is different then other layouts
>> in that respect. Sorry to be slow, I would like to understand?
>>
>> And also be careful with nfs_generic_pg_test() it inspects
>> desc->bsize which is negotiated with MDS, it's very small.
>>
> 
> I'm also looking into this.
> The call to pnfs_generic_pg_test wasn't a typo.
> As pre dfed206 "NFSv4.1: unify pnfs_pageio_init functions"
> we were setting pg_pgio->test to pnfs_write_pg_test
> which is equivalent to pnfs_generic_pg_test
> and 89a58e3 "NFSv4.1: use pnfs_generic_pg_test directly by layout driver"
> only reversed the call from pnfs_generic_pg_test to ld->pg_test
> to a call from ld->pg_test to pnfs_generic_pg_test
> 
> Benny
> 
>>> Cheers
>>>   Trond
>>>
>>
>> Thanks
>> Boaz
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Weston Andros Adamson June 1, 2011, 2:44 p.m. UTC | #6
On Jun 1, 2011, at 1:47 AM, Boaz Harrosh wrote:

> On 06/01/2011 06:18 AM, Weston Andros Adamson wrote:
>> Use nfs_generic_pg_test instead of pnfs_generic_pg_test.
>> 
>> This fixes the BUG at fs/nfs/write.c:941 introduced by
>> 89a58e32d9105c01022a757fb32ddc3b51bf0025.
>> 
>> I was able to trigger this BUG reliably using pynfs in pnfs mode,
>> by using dd(1) to write many small blocks.
>> 
>> Signed-off-by: Weston Andros Adamson <dros@netapp.com>
>> ---
>> Fix proposed by Trond.
>> 
>> Benny- Does this make sense?
>> 
>> fs/nfs/nfs4filelayout.c  |    2 +-
>> fs/nfs/pagelist.c        |    5 ++++-
>> include/linux/nfs_page.h |    3 ++-
>> 3 files changed, 7 insertions(+), 3 deletions(-)
>> 
>> diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
>> index 4269088..1c3bb72 100644
>> --- a/fs/nfs/nfs4filelayout.c
>> +++ b/fs/nfs/nfs4filelayout.c
>> @@ -661,7 +661,7 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
>> 	u64 p_stripe, r_stripe;
>> 	u32 stripe_unit;
>> 
>> -	if (!pnfs_generic_pg_test(pgio, prev, req))
>> +	if (!nfs_generic_pg_test(pgio, prev, req))
>> 		return 0;
>> 
> 
> pnfs_generic_pg_test is the one that gets the layout.
> 
> What you've done is revert to MDS IO
> 
> Boaz

Ah, you're right - I didn't even notice that!  I usually confirm client -> DS communication with tcpdump.  I was working for too long yesterday :)

Patch: recalled.  Discussion about a real fix: started.

-dros

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 4269088..1c3bb72 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -661,7 +661,7 @@  filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
 	u64 p_stripe, r_stripe;
 	u32 stripe_unit;
 
-	if (!pnfs_generic_pg_test(pgio, prev, req))
+	if (!nfs_generic_pg_test(pgio, prev, req))
 		return 0;
 
 	if (!pgio->pg_lseg)
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 7913961..1a4b0de 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -204,7 +204,9 @@  nfs_wait_on_request(struct nfs_page *req)
 			TASK_UNINTERRUPTIBLE);
 }
 
-static bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req)
+bool
+nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev,
+		    struct nfs_page *req)
 {
 	/*
 	 * FIXME: ideally we should be able to coalesce all requests
@@ -218,6 +220,7 @@  static bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_p
 
 	return desc->pg_count + req->wb_bytes <= desc->pg_bsize;
 }
+EXPORT_SYMBOL_GPL(nfs_generic_pg_test);
 
 /**
  * nfs_pageio_init - initialise a page io descriptor
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 3a34e80..7d8a779 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -96,7 +96,8 @@  extern  int nfs_wait_on_request(struct nfs_page *);
 extern	void nfs_unlock_request(struct nfs_page *req);
 extern	int nfs_set_page_tag_locked(struct nfs_page *req);
 extern  void nfs_clear_page_tag_locked(struct nfs_page *req);
-
+extern  bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
+				 struct nfs_page *prev, struct nfs_page *req);
 
 /*
  * Lock the page of an asynchronous request without getting a new reference