diff mbox series

[v3] ceph: force sending a cap update msg back to MDS for revoke op

Message ID 20240716120724.134512-1-xiubli@redhat.com (mailing list archive)
State New, archived
Headers show
Series [v3] ceph: force sending a cap update msg back to MDS for revoke op | expand

Commit Message

Xiubo Li July 16, 2024, 12:07 p.m. UTC
From: Xiubo Li <xiubli@redhat.com>

If a client sends out a cap update dropping caps with the prior 'seq'
just before an incoming cap revoke request, then the client may drop
the revoke because it believes it's already released the requested
capabilities.

This causes the MDS to wait indefinitely for the client to respond
to the revoke. It's therefore always a good idea to ack the cap
revoke request with the bumped up 'seq'.

Currently if the cap->issued equals to the newcaps the check_caps()
will do nothing, we should force flush the caps.

Cc: stable@vger.kernel.org
Link: https://tracker.ceph.com/issues/61782
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---

V3:
- Move the force check earlier

V2:
- Improved the patch to force send the cap update only when no caps
being used.


 fs/ceph/caps.c  | 35 ++++++++++++++++++++++++-----------
 fs/ceph/super.h |  7 ++++---
 2 files changed, 28 insertions(+), 14 deletions(-)

Comments

Venky Shankar July 24, 2024, 2:08 p.m. UTC | #1
Hi Xiubo,

On Tue, Jul 16, 2024 at 5:37 PM <xiubli@redhat.com> wrote:
>
> From: Xiubo Li <xiubli@redhat.com>
>
> If a client sends out a cap update dropping caps with the prior 'seq'
> just before an incoming cap revoke request, then the client may drop
> the revoke because it believes it's already released the requested
> capabilities.
>
> This causes the MDS to wait indefinitely for the client to respond
> to the revoke. It's therefore always a good idea to ack the cap
> revoke request with the bumped up 'seq'.
>
> Currently if the cap->issued equals to the newcaps the check_caps()
> will do nothing, we should force flush the caps.
>
> Cc: stable@vger.kernel.org
> Link: https://tracker.ceph.com/issues/61782
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
>
> V3:
> - Move the force check earlier
>
> V2:
> - Improved the patch to force send the cap update only when no caps
> being used.
>
>
>  fs/ceph/caps.c  | 35 ++++++++++++++++++++++++-----------
>  fs/ceph/super.h |  7 ++++---
>  2 files changed, 28 insertions(+), 14 deletions(-)
>
> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> index 24c31f795938..672c6611d749 100644
> --- a/fs/ceph/caps.c
> +++ b/fs/ceph/caps.c
> @@ -2024,6 +2024,8 @@ bool __ceph_should_report_size(struct ceph_inode_info *ci)
>   *  CHECK_CAPS_AUTHONLY - we should only check the auth cap
>   *  CHECK_CAPS_FLUSH - we should flush any dirty caps immediately, without
>   *    further delay.
> + *  CHECK_CAPS_FLUSH_FORCE - we should flush any caps immediately, without
> + *    further delay.
>   */
>  void ceph_check_caps(struct ceph_inode_info *ci, int flags)
>  {
> @@ -2105,7 +2107,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags)
>         }
>
>         doutc(cl, "%p %llx.%llx file_want %s used %s dirty %s "
> -             "flushing %s issued %s revoking %s retain %s %s%s%s\n",
> +             "flushing %s issued %s revoking %s retain %s %s%s%s%s\n",
>              inode, ceph_vinop(inode), ceph_cap_string(file_wanted),
>              ceph_cap_string(used), ceph_cap_string(ci->i_dirty_caps),
>              ceph_cap_string(ci->i_flushing_caps),
> @@ -2113,7 +2115,8 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags)
>              ceph_cap_string(retain),
>              (flags & CHECK_CAPS_AUTHONLY) ? " AUTHONLY" : "",
>              (flags & CHECK_CAPS_FLUSH) ? " FLUSH" : "",
> -            (flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : "");
> +            (flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : "",
> +            (flags & CHECK_CAPS_FLUSH_FORCE) ? " FLUSH_FORCE" : "");
>
>         /*
>          * If we no longer need to hold onto old our caps, and we may
> @@ -2188,6 +2191,11 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags)
>                                 queue_writeback = true;
>                 }
>
> +               if (flags & CHECK_CAPS_FLUSH_FORCE) {
> +                       doutc(cl, "force to flush caps\n");
> +                       goto ack;
> +               }
> +
>                 if (cap == ci->i_auth_cap &&
>                     (cap->issued & CEPH_CAP_FILE_WR)) {
>                         /* request larger max_size from MDS? */
> @@ -3518,6 +3526,8 @@ static void handle_cap_grant(struct inode *inode,
>         bool queue_invalidate = false;
>         bool deleted_inode = false;
>         bool fill_inline = false;
> +       bool revoke_wait = false;
> +       int flags = 0;
>
>         /*
>          * If there is at least one crypto block then we'll trust
> @@ -3713,16 +3723,18 @@ static void handle_cap_grant(struct inode *inode,
>                       ceph_cap_string(cap->issued), ceph_cap_string(newcaps),
>                       ceph_cap_string(revoking));
>                 if (S_ISREG(inode->i_mode) &&
> -                   (revoking & used & CEPH_CAP_FILE_BUFFER))
> +                   (revoking & used & CEPH_CAP_FILE_BUFFER)) {
>                         writeback = true;  /* initiate writeback; will delay ack */
> -               else if (queue_invalidate &&
> +                       revoke_wait = true;
> +               } else if (queue_invalidate &&
>                          revoking == CEPH_CAP_FILE_CACHE &&
> -                        (newcaps & CEPH_CAP_FILE_LAZYIO) == 0)
> -                       ; /* do nothing yet, invalidation will be queued */
> -               else if (cap == ci->i_auth_cap)
> +                        (newcaps & CEPH_CAP_FILE_LAZYIO) == 0) {
> +                       revoke_wait = true; /* do nothing yet, invalidation will be queued */
> +               } else if (cap == ci->i_auth_cap) {
>                         check_caps = 1; /* check auth cap only */
> -               else
> +               } else {
>                         check_caps = 2; /* check all caps */
> +               }
>                 /* If there is new caps, try to wake up the waiters */
>                 if (~cap->issued & newcaps)
>                         wake = true;
> @@ -3749,8 +3761,9 @@ static void handle_cap_grant(struct inode *inode,
>         BUG_ON(cap->issued & ~cap->implemented);
>
>         /* don't let check_caps skip sending a response to MDS for revoke msgs */
> -       if (le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) {
> +       if (!revoke_wait && le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) {
>                 cap->mds_wanted = 0;
> +               flags |= CHECK_CAPS_FLUSH_FORCE;
>                 if (cap == ci->i_auth_cap)
>                         check_caps = 1; /* check auth cap only */
>                 else
> @@ -3806,9 +3819,9 @@ static void handle_cap_grant(struct inode *inode,
>
>         mutex_unlock(&session->s_mutex);
>         if (check_caps == 1)
> -               ceph_check_caps(ci, CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL);
> +               ceph_check_caps(ci, flags | CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL);
>         else if (check_caps == 2)
> -               ceph_check_caps(ci, CHECK_CAPS_NOINVAL);
> +               ceph_check_caps(ci, flags | CHECK_CAPS_NOINVAL);
>  }
>
>  /*
> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> index b0b368ed3018..831e8ec4d5da 100644
> --- a/fs/ceph/super.h
> +++ b/fs/ceph/super.h
> @@ -200,9 +200,10 @@ struct ceph_cap {
>         struct list_head caps_item;
>  };
>
> -#define CHECK_CAPS_AUTHONLY   1  /* only check auth cap */
> -#define CHECK_CAPS_FLUSH      2  /* flush any dirty caps */
> -#define CHECK_CAPS_NOINVAL    4  /* don't invalidate pagecache */
> +#define CHECK_CAPS_AUTHONLY     1  /* only check auth cap */
> +#define CHECK_CAPS_FLUSH        2  /* flush any dirty caps */
> +#define CHECK_CAPS_NOINVAL      4  /* don't invalidate pagecache */
> +#define CHECK_CAPS_FLUSH_FORCE  8  /* force flush any caps */
>
>  struct ceph_cap_flush {
>         u64 tid;
> --
> 2.45.1
>

Unfortunately, the test run using this change has unrelated issues,
therefore, the tests have to be rerun. I'll schedule a fs suite run on
priority so that we get the results by tomorrow.

Will update once done. Apologies!
Xiubo Li July 25, 2024, 1:01 a.m. UTC | #2
On 7/24/24 22:08, Venky Shankar wrote:
> Hi Xiubo,
>
> On Tue, Jul 16, 2024 at 5:37 PM <xiubli@redhat.com> wrote:
>> From: Xiubo Li <xiubli@redhat.com>
>>
>> If a client sends out a cap update dropping caps with the prior 'seq'
>> just before an incoming cap revoke request, then the client may drop
>> the revoke because it believes it's already released the requested
>> capabilities.
>>
>> This causes the MDS to wait indefinitely for the client to respond
>> to the revoke. It's therefore always a good idea to ack the cap
>> revoke request with the bumped up 'seq'.
>>
>> Currently if the cap->issued equals to the newcaps the check_caps()
>> will do nothing, we should force flush the caps.
>>
>> Cc: stable@vger.kernel.org
>> Link: https://tracker.ceph.com/issues/61782
>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>> ---
>>
>> V3:
>> - Move the force check earlier
>>
>> V2:
>> - Improved the patch to force send the cap update only when no caps
>> being used.
>>
>>
>>   fs/ceph/caps.c  | 35 ++++++++++++++++++++++++-----------
>>   fs/ceph/super.h |  7 ++++---
>>   2 files changed, 28 insertions(+), 14 deletions(-)
>>
>> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
>> index 24c31f795938..672c6611d749 100644
>> --- a/fs/ceph/caps.c
>> +++ b/fs/ceph/caps.c
>> @@ -2024,6 +2024,8 @@ bool __ceph_should_report_size(struct ceph_inode_info *ci)
>>    *  CHECK_CAPS_AUTHONLY - we should only check the auth cap
>>    *  CHECK_CAPS_FLUSH - we should flush any dirty caps immediately, without
>>    *    further delay.
>> + *  CHECK_CAPS_FLUSH_FORCE - we should flush any caps immediately, without
>> + *    further delay.
>>    */
>>   void ceph_check_caps(struct ceph_inode_info *ci, int flags)
>>   {
>> @@ -2105,7 +2107,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags)
>>          }
>>
>>          doutc(cl, "%p %llx.%llx file_want %s used %s dirty %s "
>> -             "flushing %s issued %s revoking %s retain %s %s%s%s\n",
>> +             "flushing %s issued %s revoking %s retain %s %s%s%s%s\n",
>>               inode, ceph_vinop(inode), ceph_cap_string(file_wanted),
>>               ceph_cap_string(used), ceph_cap_string(ci->i_dirty_caps),
>>               ceph_cap_string(ci->i_flushing_caps),
>> @@ -2113,7 +2115,8 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags)
>>               ceph_cap_string(retain),
>>               (flags & CHECK_CAPS_AUTHONLY) ? " AUTHONLY" : "",
>>               (flags & CHECK_CAPS_FLUSH) ? " FLUSH" : "",
>> -            (flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : "");
>> +            (flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : "",
>> +            (flags & CHECK_CAPS_FLUSH_FORCE) ? " FLUSH_FORCE" : "");
>>
>>          /*
>>           * If we no longer need to hold onto old our caps, and we may
>> @@ -2188,6 +2191,11 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags)
>>                                  queue_writeback = true;
>>                  }
>>
>> +               if (flags & CHECK_CAPS_FLUSH_FORCE) {
>> +                       doutc(cl, "force to flush caps\n");
>> +                       goto ack;
>> +               }
>> +
>>                  if (cap == ci->i_auth_cap &&
>>                      (cap->issued & CEPH_CAP_FILE_WR)) {
>>                          /* request larger max_size from MDS? */
>> @@ -3518,6 +3526,8 @@ static void handle_cap_grant(struct inode *inode,
>>          bool queue_invalidate = false;
>>          bool deleted_inode = false;
>>          bool fill_inline = false;
>> +       bool revoke_wait = false;
>> +       int flags = 0;
>>
>>          /*
>>           * If there is at least one crypto block then we'll trust
>> @@ -3713,16 +3723,18 @@ static void handle_cap_grant(struct inode *inode,
>>                        ceph_cap_string(cap->issued), ceph_cap_string(newcaps),
>>                        ceph_cap_string(revoking));
>>                  if (S_ISREG(inode->i_mode) &&
>> -                   (revoking & used & CEPH_CAP_FILE_BUFFER))
>> +                   (revoking & used & CEPH_CAP_FILE_BUFFER)) {
>>                          writeback = true;  /* initiate writeback; will delay ack */
>> -               else if (queue_invalidate &&
>> +                       revoke_wait = true;
>> +               } else if (queue_invalidate &&
>>                           revoking == CEPH_CAP_FILE_CACHE &&
>> -                        (newcaps & CEPH_CAP_FILE_LAZYIO) == 0)
>> -                       ; /* do nothing yet, invalidation will be queued */
>> -               else if (cap == ci->i_auth_cap)
>> +                        (newcaps & CEPH_CAP_FILE_LAZYIO) == 0) {
>> +                       revoke_wait = true; /* do nothing yet, invalidation will be queued */
>> +               } else if (cap == ci->i_auth_cap) {
>>                          check_caps = 1; /* check auth cap only */
>> -               else
>> +               } else {
>>                          check_caps = 2; /* check all caps */
>> +               }
>>                  /* If there is new caps, try to wake up the waiters */
>>                  if (~cap->issued & newcaps)
>>                          wake = true;
>> @@ -3749,8 +3761,9 @@ static void handle_cap_grant(struct inode *inode,
>>          BUG_ON(cap->issued & ~cap->implemented);
>>
>>          /* don't let check_caps skip sending a response to MDS for revoke msgs */
>> -       if (le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) {
>> +       if (!revoke_wait && le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) {
>>                  cap->mds_wanted = 0;
>> +               flags |= CHECK_CAPS_FLUSH_FORCE;
>>                  if (cap == ci->i_auth_cap)
>>                          check_caps = 1; /* check auth cap only */
>>                  else
>> @@ -3806,9 +3819,9 @@ static void handle_cap_grant(struct inode *inode,
>>
>>          mutex_unlock(&session->s_mutex);
>>          if (check_caps == 1)
>> -               ceph_check_caps(ci, CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL);
>> +               ceph_check_caps(ci, flags | CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL);
>>          else if (check_caps == 2)
>> -               ceph_check_caps(ci, CHECK_CAPS_NOINVAL);
>> +               ceph_check_caps(ci, flags | CHECK_CAPS_NOINVAL);
>>   }
>>
>>   /*
>> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
>> index b0b368ed3018..831e8ec4d5da 100644
>> --- a/fs/ceph/super.h
>> +++ b/fs/ceph/super.h
>> @@ -200,9 +200,10 @@ struct ceph_cap {
>>          struct list_head caps_item;
>>   };
>>
>> -#define CHECK_CAPS_AUTHONLY   1  /* only check auth cap */
>> -#define CHECK_CAPS_FLUSH      2  /* flush any dirty caps */
>> -#define CHECK_CAPS_NOINVAL    4  /* don't invalidate pagecache */
>> +#define CHECK_CAPS_AUTHONLY     1  /* only check auth cap */
>> +#define CHECK_CAPS_FLUSH        2  /* flush any dirty caps */
>> +#define CHECK_CAPS_NOINVAL      4  /* don't invalidate pagecache */
>> +#define CHECK_CAPS_FLUSH_FORCE  8  /* force flush any caps */
>>
>>   struct ceph_cap_flush {
>>          u64 tid;
>> --
>> 2.45.1
>>
> Unfortunately, the test run using this change has unrelated issues,
> therefore, the tests have to be rerun. I'll schedule a fs suite run on
> priority so that we get the results by tomorrow.
>
> Will update once done. Apologies!

No worry. Just take your time.

Thanks Venky!

- Xiubo
Venky Shankar July 25, 2024, 4:51 a.m. UTC | #3
On Thu, Jul 25, 2024 at 6:31 AM Xiubo Li <xiubli@redhat.com> wrote:
>
>
> On 7/24/24 22:08, Venky Shankar wrote:
> > Hi Xiubo,
> >
> > On Tue, Jul 16, 2024 at 5:37 PM <xiubli@redhat.com> wrote:
> >> From: Xiubo Li <xiubli@redhat.com>
> >>
> >> If a client sends out a cap update dropping caps with the prior 'seq'
> >> just before an incoming cap revoke request, then the client may drop
> >> the revoke because it believes it's already released the requested
> >> capabilities.
> >>
> >> This causes the MDS to wait indefinitely for the client to respond
> >> to the revoke. It's therefore always a good idea to ack the cap
> >> revoke request with the bumped up 'seq'.
> >>
> >> Currently if the cap->issued equals to the newcaps the check_caps()
> >> will do nothing, we should force flush the caps.
> >>
> >> Cc: stable@vger.kernel.org
> >> Link: https://tracker.ceph.com/issues/61782
> >> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> >> ---
> >>
> >> V3:
> >> - Move the force check earlier
> >>
> >> V2:
> >> - Improved the patch to force send the cap update only when no caps
> >> being used.
> >>
> >>
> >>   fs/ceph/caps.c  | 35 ++++++++++++++++++++++++-----------
> >>   fs/ceph/super.h |  7 ++++---
> >>   2 files changed, 28 insertions(+), 14 deletions(-)
> >>
> >> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> >> index 24c31f795938..672c6611d749 100644
> >> --- a/fs/ceph/caps.c
> >> +++ b/fs/ceph/caps.c
> >> @@ -2024,6 +2024,8 @@ bool __ceph_should_report_size(struct ceph_inode_info *ci)
> >>    *  CHECK_CAPS_AUTHONLY - we should only check the auth cap
> >>    *  CHECK_CAPS_FLUSH - we should flush any dirty caps immediately, without
> >>    *    further delay.
> >> + *  CHECK_CAPS_FLUSH_FORCE - we should flush any caps immediately, without
> >> + *    further delay.
> >>    */
> >>   void ceph_check_caps(struct ceph_inode_info *ci, int flags)
> >>   {
> >> @@ -2105,7 +2107,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags)
> >>          }
> >>
> >>          doutc(cl, "%p %llx.%llx file_want %s used %s dirty %s "
> >> -             "flushing %s issued %s revoking %s retain %s %s%s%s\n",
> >> +             "flushing %s issued %s revoking %s retain %s %s%s%s%s\n",
> >>               inode, ceph_vinop(inode), ceph_cap_string(file_wanted),
> >>               ceph_cap_string(used), ceph_cap_string(ci->i_dirty_caps),
> >>               ceph_cap_string(ci->i_flushing_caps),
> >> @@ -2113,7 +2115,8 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags)
> >>               ceph_cap_string(retain),
> >>               (flags & CHECK_CAPS_AUTHONLY) ? " AUTHONLY" : "",
> >>               (flags & CHECK_CAPS_FLUSH) ? " FLUSH" : "",
> >> -            (flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : "");
> >> +            (flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : "",
> >> +            (flags & CHECK_CAPS_FLUSH_FORCE) ? " FLUSH_FORCE" : "");
> >>
> >>          /*
> >>           * If we no longer need to hold onto old our caps, and we may
> >> @@ -2188,6 +2191,11 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags)
> >>                                  queue_writeback = true;
> >>                  }
> >>
> >> +               if (flags & CHECK_CAPS_FLUSH_FORCE) {
> >> +                       doutc(cl, "force to flush caps\n");
> >> +                       goto ack;
> >> +               }
> >> +
> >>                  if (cap == ci->i_auth_cap &&
> >>                      (cap->issued & CEPH_CAP_FILE_WR)) {
> >>                          /* request larger max_size from MDS? */
> >> @@ -3518,6 +3526,8 @@ static void handle_cap_grant(struct inode *inode,
> >>          bool queue_invalidate = false;
> >>          bool deleted_inode = false;
> >>          bool fill_inline = false;
> >> +       bool revoke_wait = false;
> >> +       int flags = 0;
> >>
> >>          /*
> >>           * If there is at least one crypto block then we'll trust
> >> @@ -3713,16 +3723,18 @@ static void handle_cap_grant(struct inode *inode,
> >>                        ceph_cap_string(cap->issued), ceph_cap_string(newcaps),
> >>                        ceph_cap_string(revoking));
> >>                  if (S_ISREG(inode->i_mode) &&
> >> -                   (revoking & used & CEPH_CAP_FILE_BUFFER))
> >> +                   (revoking & used & CEPH_CAP_FILE_BUFFER)) {
> >>                          writeback = true;  /* initiate writeback; will delay ack */
> >> -               else if (queue_invalidate &&
> >> +                       revoke_wait = true;
> >> +               } else if (queue_invalidate &&
> >>                           revoking == CEPH_CAP_FILE_CACHE &&
> >> -                        (newcaps & CEPH_CAP_FILE_LAZYIO) == 0)
> >> -                       ; /* do nothing yet, invalidation will be queued */
> >> -               else if (cap == ci->i_auth_cap)
> >> +                        (newcaps & CEPH_CAP_FILE_LAZYIO) == 0) {
> >> +                       revoke_wait = true; /* do nothing yet, invalidation will be queued */
> >> +               } else if (cap == ci->i_auth_cap) {
> >>                          check_caps = 1; /* check auth cap only */
> >> -               else
> >> +               } else {
> >>                          check_caps = 2; /* check all caps */
> >> +               }
> >>                  /* If there is new caps, try to wake up the waiters */
> >>                  if (~cap->issued & newcaps)
> >>                          wake = true;
> >> @@ -3749,8 +3761,9 @@ static void handle_cap_grant(struct inode *inode,
> >>          BUG_ON(cap->issued & ~cap->implemented);
> >>
> >>          /* don't let check_caps skip sending a response to MDS for revoke msgs */
> >> -       if (le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) {
> >> +       if (!revoke_wait && le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) {
> >>                  cap->mds_wanted = 0;
> >> +               flags |= CHECK_CAPS_FLUSH_FORCE;
> >>                  if (cap == ci->i_auth_cap)
> >>                          check_caps = 1; /* check auth cap only */
> >>                  else
> >> @@ -3806,9 +3819,9 @@ static void handle_cap_grant(struct inode *inode,
> >>
> >>          mutex_unlock(&session->s_mutex);
> >>          if (check_caps == 1)
> >> -               ceph_check_caps(ci, CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL);
> >> +               ceph_check_caps(ci, flags | CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL);
> >>          else if (check_caps == 2)
> >> -               ceph_check_caps(ci, CHECK_CAPS_NOINVAL);
> >> +               ceph_check_caps(ci, flags | CHECK_CAPS_NOINVAL);
> >>   }
> >>
> >>   /*
> >> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> >> index b0b368ed3018..831e8ec4d5da 100644
> >> --- a/fs/ceph/super.h
> >> +++ b/fs/ceph/super.h
> >> @@ -200,9 +200,10 @@ struct ceph_cap {
> >>          struct list_head caps_item;
> >>   };
> >>
> >> -#define CHECK_CAPS_AUTHONLY   1  /* only check auth cap */
> >> -#define CHECK_CAPS_FLUSH      2  /* flush any dirty caps */
> >> -#define CHECK_CAPS_NOINVAL    4  /* don't invalidate pagecache */
> >> +#define CHECK_CAPS_AUTHONLY     1  /* only check auth cap */
> >> +#define CHECK_CAPS_FLUSH        2  /* flush any dirty caps */
> >> +#define CHECK_CAPS_NOINVAL      4  /* don't invalidate pagecache */
> >> +#define CHECK_CAPS_FLUSH_FORCE  8  /* force flush any caps */
> >>
> >>   struct ceph_cap_flush {
> >>          u64 tid;
> >> --
> >> 2.45.1
> >>
> > Unfortunately, the test run using this change has unrelated issues,
> > therefore, the tests have to be rerun. I'll schedule a fs suite run on
> > priority so that we get the results by tomorrow.
> >
> > Will update once done. Apologies!
>
> No worry. Just take your time.

Here are the test results

        https://pulpito.ceph.com/vshankar-2024-07-24_14:14:53-fs-main-testing-default-smithi/

Mostly look good. I'll review those in depth to see if something stands out.

Thanks, Xiubo!
Venky Shankar July 25, 2024, 11:41 a.m. UTC | #4
On Tue, Jul 16, 2024 at 5:37 PM <xiubli@redhat.com> wrote:
>
> From: Xiubo Li <xiubli@redhat.com>
>
> If a client sends out a cap update dropping caps with the prior 'seq'
> just before an incoming cap revoke request, then the client may drop
> the revoke because it believes it's already released the requested
> capabilities.
>
> This causes the MDS to wait indefinitely for the client to respond
> to the revoke. It's therefore always a good idea to ack the cap
> revoke request with the bumped up 'seq'.
>
> Currently if the cap->issued equals to the newcaps the check_caps()
> will do nothing, we should force flush the caps.
>
> Cc: stable@vger.kernel.org
> Link: https://tracker.ceph.com/issues/61782
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
>
> V3:
> - Move the force check earlier
>
> V2:
> - Improved the patch to force send the cap update only when no caps
> being used.
>
>
>  fs/ceph/caps.c  | 35 ++++++++++++++++++++++++-----------
>  fs/ceph/super.h |  7 ++++---
>  2 files changed, 28 insertions(+), 14 deletions(-)
>
> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> index 24c31f795938..672c6611d749 100644
> --- a/fs/ceph/caps.c
> +++ b/fs/ceph/caps.c
> @@ -2024,6 +2024,8 @@ bool __ceph_should_report_size(struct ceph_inode_info *ci)
>   *  CHECK_CAPS_AUTHONLY - we should only check the auth cap
>   *  CHECK_CAPS_FLUSH - we should flush any dirty caps immediately, without
>   *    further delay.
> + *  CHECK_CAPS_FLUSH_FORCE - we should flush any caps immediately, without
> + *    further delay.
>   */
>  void ceph_check_caps(struct ceph_inode_info *ci, int flags)
>  {
> @@ -2105,7 +2107,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags)
>         }
>
>         doutc(cl, "%p %llx.%llx file_want %s used %s dirty %s "
> -             "flushing %s issued %s revoking %s retain %s %s%s%s\n",
> +             "flushing %s issued %s revoking %s retain %s %s%s%s%s\n",
>              inode, ceph_vinop(inode), ceph_cap_string(file_wanted),
>              ceph_cap_string(used), ceph_cap_string(ci->i_dirty_caps),
>              ceph_cap_string(ci->i_flushing_caps),
> @@ -2113,7 +2115,8 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags)
>              ceph_cap_string(retain),
>              (flags & CHECK_CAPS_AUTHONLY) ? " AUTHONLY" : "",
>              (flags & CHECK_CAPS_FLUSH) ? " FLUSH" : "",
> -            (flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : "");
> +            (flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : "",
> +            (flags & CHECK_CAPS_FLUSH_FORCE) ? " FLUSH_FORCE" : "");
>
>         /*
>          * If we no longer need to hold onto old our caps, and we may
> @@ -2188,6 +2191,11 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags)
>                                 queue_writeback = true;
>                 }
>
> +               if (flags & CHECK_CAPS_FLUSH_FORCE) {
> +                       doutc(cl, "force to flush caps\n");
> +                       goto ack;
> +               }
> +
>                 if (cap == ci->i_auth_cap &&
>                     (cap->issued & CEPH_CAP_FILE_WR)) {
>                         /* request larger max_size from MDS? */
> @@ -3518,6 +3526,8 @@ static void handle_cap_grant(struct inode *inode,
>         bool queue_invalidate = false;
>         bool deleted_inode = false;
>         bool fill_inline = false;
> +       bool revoke_wait = false;
> +       int flags = 0;
>
>         /*
>          * If there is at least one crypto block then we'll trust
> @@ -3713,16 +3723,18 @@ static void handle_cap_grant(struct inode *inode,
>                       ceph_cap_string(cap->issued), ceph_cap_string(newcaps),
>                       ceph_cap_string(revoking));
>                 if (S_ISREG(inode->i_mode) &&
> -                   (revoking & used & CEPH_CAP_FILE_BUFFER))
> +                   (revoking & used & CEPH_CAP_FILE_BUFFER)) {
>                         writeback = true;  /* initiate writeback; will delay ack */
> -               else if (queue_invalidate &&
> +                       revoke_wait = true;
> +               } else if (queue_invalidate &&
>                          revoking == CEPH_CAP_FILE_CACHE &&
> -                        (newcaps & CEPH_CAP_FILE_LAZYIO) == 0)
> -                       ; /* do nothing yet, invalidation will be queued */
> -               else if (cap == ci->i_auth_cap)
> +                        (newcaps & CEPH_CAP_FILE_LAZYIO) == 0) {
> +                       revoke_wait = true; /* do nothing yet, invalidation will be queued */
> +               } else if (cap == ci->i_auth_cap) {
>                         check_caps = 1; /* check auth cap only */
> -               else
> +               } else {
>                         check_caps = 2; /* check all caps */
> +               }
>                 /* If there is new caps, try to wake up the waiters */
>                 if (~cap->issued & newcaps)
>                         wake = true;
> @@ -3749,8 +3761,9 @@ static void handle_cap_grant(struct inode *inode,
>         BUG_ON(cap->issued & ~cap->implemented);
>
>         /* don't let check_caps skip sending a response to MDS for revoke msgs */
> -       if (le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) {
> +       if (!revoke_wait && le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) {
>                 cap->mds_wanted = 0;
> +               flags |= CHECK_CAPS_FLUSH_FORCE;
>                 if (cap == ci->i_auth_cap)
>                         check_caps = 1; /* check auth cap only */
>                 else
> @@ -3806,9 +3819,9 @@ static void handle_cap_grant(struct inode *inode,
>
>         mutex_unlock(&session->s_mutex);
>         if (check_caps == 1)
> -               ceph_check_caps(ci, CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL);
> +               ceph_check_caps(ci, flags | CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL);
>         else if (check_caps == 2)
> -               ceph_check_caps(ci, CHECK_CAPS_NOINVAL);
> +               ceph_check_caps(ci, flags | CHECK_CAPS_NOINVAL);
>  }
>
>  /*
> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> index b0b368ed3018..831e8ec4d5da 100644
> --- a/fs/ceph/super.h
> +++ b/fs/ceph/super.h
> @@ -200,9 +200,10 @@ struct ceph_cap {
>         struct list_head caps_item;
>  };
>
> -#define CHECK_CAPS_AUTHONLY   1  /* only check auth cap */
> -#define CHECK_CAPS_FLUSH      2  /* flush any dirty caps */
> -#define CHECK_CAPS_NOINVAL    4  /* don't invalidate pagecache */
> +#define CHECK_CAPS_AUTHONLY     1  /* only check auth cap */
> +#define CHECK_CAPS_FLUSH        2  /* flush any dirty caps */
> +#define CHECK_CAPS_NOINVAL      4  /* don't invalidate pagecache */
> +#define CHECK_CAPS_FLUSH_FORCE  8  /* force flush any caps */
>
>  struct ceph_cap_flush {
>         u64 tid;
> --
> 2.45.1
>

v3 pathset looks good.

Reviewed-by: Venky Shankar <vshankar@redhat.com>
Tested-by: Venky Shankar <vshankar@redhat.com>
Xiubo Li July 26, 2024, 1:41 a.m. UTC | #5
On 7/25/24 19:41, Venky Shankar wrote:
> On Tue, Jul 16, 2024 at 5:37 PM <xiubli@redhat.com> wrote:
>> From: Xiubo Li <xiubli@redhat.com>
>>
>> If a client sends out a cap update dropping caps with the prior 'seq'
>> just before an incoming cap revoke request, then the client may drop
>> the revoke because it believes it's already released the requested
>> capabilities.
>>
>> This causes the MDS to wait indefinitely for the client to respond
>> to the revoke. It's therefore always a good idea to ack the cap
>> revoke request with the bumped up 'seq'.
>>
>> Currently if the cap->issued equals to the newcaps the check_caps()
>> will do nothing, we should force flush the caps.
>>
>> Cc: stable@vger.kernel.org
>> Link: https://tracker.ceph.com/issues/61782
>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>> ---
>>
>> V3:
>> - Move the force check earlier
>>
>> V2:
>> - Improved the patch to force send the cap update only when no caps
>> being used.
>>
>>
>>   fs/ceph/caps.c  | 35 ++++++++++++++++++++++++-----------
>>   fs/ceph/super.h |  7 ++++---
>>   2 files changed, 28 insertions(+), 14 deletions(-)
>>
>> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
>> index 24c31f795938..672c6611d749 100644
>> --- a/fs/ceph/caps.c
>> +++ b/fs/ceph/caps.c
>> @@ -2024,6 +2024,8 @@ bool __ceph_should_report_size(struct ceph_inode_info *ci)
>>    *  CHECK_CAPS_AUTHONLY - we should only check the auth cap
>>    *  CHECK_CAPS_FLUSH - we should flush any dirty caps immediately, without
>>    *    further delay.
>> + *  CHECK_CAPS_FLUSH_FORCE - we should flush any caps immediately, without
>> + *    further delay.
>>    */
>>   void ceph_check_caps(struct ceph_inode_info *ci, int flags)
>>   {
>> @@ -2105,7 +2107,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags)
>>          }
>>
>>          doutc(cl, "%p %llx.%llx file_want %s used %s dirty %s "
>> -             "flushing %s issued %s revoking %s retain %s %s%s%s\n",
>> +             "flushing %s issued %s revoking %s retain %s %s%s%s%s\n",
>>               inode, ceph_vinop(inode), ceph_cap_string(file_wanted),
>>               ceph_cap_string(used), ceph_cap_string(ci->i_dirty_caps),
>>               ceph_cap_string(ci->i_flushing_caps),
>> @@ -2113,7 +2115,8 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags)
>>               ceph_cap_string(retain),
>>               (flags & CHECK_CAPS_AUTHONLY) ? " AUTHONLY" : "",
>>               (flags & CHECK_CAPS_FLUSH) ? " FLUSH" : "",
>> -            (flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : "");
>> +            (flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : "",
>> +            (flags & CHECK_CAPS_FLUSH_FORCE) ? " FLUSH_FORCE" : "");
>>
>>          /*
>>           * If we no longer need to hold onto old our caps, and we may
>> @@ -2188,6 +2191,11 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags)
>>                                  queue_writeback = true;
>>                  }
>>
>> +               if (flags & CHECK_CAPS_FLUSH_FORCE) {
>> +                       doutc(cl, "force to flush caps\n");
>> +                       goto ack;
>> +               }
>> +
>>                  if (cap == ci->i_auth_cap &&
>>                      (cap->issued & CEPH_CAP_FILE_WR)) {
>>                          /* request larger max_size from MDS? */
>> @@ -3518,6 +3526,8 @@ static void handle_cap_grant(struct inode *inode,
>>          bool queue_invalidate = false;
>>          bool deleted_inode = false;
>>          bool fill_inline = false;
>> +       bool revoke_wait = false;
>> +       int flags = 0;
>>
>>          /*
>>           * If there is at least one crypto block then we'll trust
>> @@ -3713,16 +3723,18 @@ static void handle_cap_grant(struct inode *inode,
>>                        ceph_cap_string(cap->issued), ceph_cap_string(newcaps),
>>                        ceph_cap_string(revoking));
>>                  if (S_ISREG(inode->i_mode) &&
>> -                   (revoking & used & CEPH_CAP_FILE_BUFFER))
>> +                   (revoking & used & CEPH_CAP_FILE_BUFFER)) {
>>                          writeback = true;  /* initiate writeback; will delay ack */
>> -               else if (queue_invalidate &&
>> +                       revoke_wait = true;
>> +               } else if (queue_invalidate &&
>>                           revoking == CEPH_CAP_FILE_CACHE &&
>> -                        (newcaps & CEPH_CAP_FILE_LAZYIO) == 0)
>> -                       ; /* do nothing yet, invalidation will be queued */
>> -               else if (cap == ci->i_auth_cap)
>> +                        (newcaps & CEPH_CAP_FILE_LAZYIO) == 0) {
>> +                       revoke_wait = true; /* do nothing yet, invalidation will be queued */
>> +               } else if (cap == ci->i_auth_cap) {
>>                          check_caps = 1; /* check auth cap only */
>> -               else
>> +               } else {
>>                          check_caps = 2; /* check all caps */
>> +               }
>>                  /* If there is new caps, try to wake up the waiters */
>>                  if (~cap->issued & newcaps)
>>                          wake = true;
>> @@ -3749,8 +3761,9 @@ static void handle_cap_grant(struct inode *inode,
>>          BUG_ON(cap->issued & ~cap->implemented);
>>
>>          /* don't let check_caps skip sending a response to MDS for revoke msgs */
>> -       if (le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) {
>> +       if (!revoke_wait && le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) {
>>                  cap->mds_wanted = 0;
>> +               flags |= CHECK_CAPS_FLUSH_FORCE;
>>                  if (cap == ci->i_auth_cap)
>>                          check_caps = 1; /* check auth cap only */
>>                  else
>> @@ -3806,9 +3819,9 @@ static void handle_cap_grant(struct inode *inode,
>>
>>          mutex_unlock(&session->s_mutex);
>>          if (check_caps == 1)
>> -               ceph_check_caps(ci, CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL);
>> +               ceph_check_caps(ci, flags | CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL);
>>          else if (check_caps == 2)
>> -               ceph_check_caps(ci, CHECK_CAPS_NOINVAL);
>> +               ceph_check_caps(ci, flags | CHECK_CAPS_NOINVAL);
>>   }
>>
>>   /*
>> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
>> index b0b368ed3018..831e8ec4d5da 100644
>> --- a/fs/ceph/super.h
>> +++ b/fs/ceph/super.h
>> @@ -200,9 +200,10 @@ struct ceph_cap {
>>          struct list_head caps_item;
>>   };
>>
>> -#define CHECK_CAPS_AUTHONLY   1  /* only check auth cap */
>> -#define CHECK_CAPS_FLUSH      2  /* flush any dirty caps */
>> -#define CHECK_CAPS_NOINVAL    4  /* don't invalidate pagecache */
>> +#define CHECK_CAPS_AUTHONLY     1  /* only check auth cap */
>> +#define CHECK_CAPS_FLUSH        2  /* flush any dirty caps */
>> +#define CHECK_CAPS_NOINVAL      4  /* don't invalidate pagecache */
>> +#define CHECK_CAPS_FLUSH_FORCE  8  /* force flush any caps */
>>
>>   struct ceph_cap_flush {
>>          u64 tid;
>> --
>> 2.45.1
>>
> v3 pathset looks good.
>
> Reviewed-by: Venky Shankar <vshankar@redhat.com>
> Tested-by: Venky Shankar <vshankar@redhat.com>
Thanks Venky.
>
diff mbox series

Patch

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 24c31f795938..672c6611d749 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2024,6 +2024,8 @@  bool __ceph_should_report_size(struct ceph_inode_info *ci)
  *  CHECK_CAPS_AUTHONLY - we should only check the auth cap
  *  CHECK_CAPS_FLUSH - we should flush any dirty caps immediately, without
  *    further delay.
+ *  CHECK_CAPS_FLUSH_FORCE - we should flush any caps immediately, without
+ *    further delay.
  */
 void ceph_check_caps(struct ceph_inode_info *ci, int flags)
 {
@@ -2105,7 +2107,7 @@  void ceph_check_caps(struct ceph_inode_info *ci, int flags)
 	}
 
 	doutc(cl, "%p %llx.%llx file_want %s used %s dirty %s "
-	      "flushing %s issued %s revoking %s retain %s %s%s%s\n",
+	      "flushing %s issued %s revoking %s retain %s %s%s%s%s\n",
 	     inode, ceph_vinop(inode), ceph_cap_string(file_wanted),
 	     ceph_cap_string(used), ceph_cap_string(ci->i_dirty_caps),
 	     ceph_cap_string(ci->i_flushing_caps),
@@ -2113,7 +2115,8 @@  void ceph_check_caps(struct ceph_inode_info *ci, int flags)
 	     ceph_cap_string(retain),
 	     (flags & CHECK_CAPS_AUTHONLY) ? " AUTHONLY" : "",
 	     (flags & CHECK_CAPS_FLUSH) ? " FLUSH" : "",
-	     (flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : "");
+	     (flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : "",
+	     (flags & CHECK_CAPS_FLUSH_FORCE) ? " FLUSH_FORCE" : "");
 
 	/*
 	 * If we no longer need to hold onto old our caps, and we may
@@ -2188,6 +2191,11 @@  void ceph_check_caps(struct ceph_inode_info *ci, int flags)
 				queue_writeback = true;
 		}
 
+		if (flags & CHECK_CAPS_FLUSH_FORCE) {
+			doutc(cl, "force to flush caps\n");
+			goto ack;
+		}
+
 		if (cap == ci->i_auth_cap &&
 		    (cap->issued & CEPH_CAP_FILE_WR)) {
 			/* request larger max_size from MDS? */
@@ -3518,6 +3526,8 @@  static void handle_cap_grant(struct inode *inode,
 	bool queue_invalidate = false;
 	bool deleted_inode = false;
 	bool fill_inline = false;
+	bool revoke_wait = false;
+	int flags = 0;
 
 	/*
 	 * If there is at least one crypto block then we'll trust
@@ -3713,16 +3723,18 @@  static void handle_cap_grant(struct inode *inode,
 		      ceph_cap_string(cap->issued), ceph_cap_string(newcaps),
 		      ceph_cap_string(revoking));
 		if (S_ISREG(inode->i_mode) &&
-		    (revoking & used & CEPH_CAP_FILE_BUFFER))
+		    (revoking & used & CEPH_CAP_FILE_BUFFER)) {
 			writeback = true;  /* initiate writeback; will delay ack */
-		else if (queue_invalidate &&
+			revoke_wait = true;
+		} else if (queue_invalidate &&
 			 revoking == CEPH_CAP_FILE_CACHE &&
-			 (newcaps & CEPH_CAP_FILE_LAZYIO) == 0)
-			; /* do nothing yet, invalidation will be queued */
-		else if (cap == ci->i_auth_cap)
+			 (newcaps & CEPH_CAP_FILE_LAZYIO) == 0) {
+			revoke_wait = true; /* do nothing yet, invalidation will be queued */
+		} else if (cap == ci->i_auth_cap) {
 			check_caps = 1; /* check auth cap only */
-		else
+		} else {
 			check_caps = 2; /* check all caps */
+		}
 		/* If there is new caps, try to wake up the waiters */
 		if (~cap->issued & newcaps)
 			wake = true;
@@ -3749,8 +3761,9 @@  static void handle_cap_grant(struct inode *inode,
 	BUG_ON(cap->issued & ~cap->implemented);
 
 	/* don't let check_caps skip sending a response to MDS for revoke msgs */
-	if (le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) {
+	if (!revoke_wait && le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) {
 		cap->mds_wanted = 0;
+		flags |= CHECK_CAPS_FLUSH_FORCE;
 		if (cap == ci->i_auth_cap)
 			check_caps = 1; /* check auth cap only */
 		else
@@ -3806,9 +3819,9 @@  static void handle_cap_grant(struct inode *inode,
 
 	mutex_unlock(&session->s_mutex);
 	if (check_caps == 1)
-		ceph_check_caps(ci, CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL);
+		ceph_check_caps(ci, flags | CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL);
 	else if (check_caps == 2)
-		ceph_check_caps(ci, CHECK_CAPS_NOINVAL);
+		ceph_check_caps(ci, flags | CHECK_CAPS_NOINVAL);
 }
 
 /*
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index b0b368ed3018..831e8ec4d5da 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -200,9 +200,10 @@  struct ceph_cap {
 	struct list_head caps_item;
 };
 
-#define CHECK_CAPS_AUTHONLY   1  /* only check auth cap */
-#define CHECK_CAPS_FLUSH      2  /* flush any dirty caps */
-#define CHECK_CAPS_NOINVAL    4  /* don't invalidate pagecache */
+#define CHECK_CAPS_AUTHONLY     1  /* only check auth cap */
+#define CHECK_CAPS_FLUSH        2  /* flush any dirty caps */
+#define CHECK_CAPS_NOINVAL      4  /* don't invalidate pagecache */
+#define CHECK_CAPS_FLUSH_FORCE  8  /* force flush any caps */
 
 struct ceph_cap_flush {
 	u64 tid;