Message ID | 20240716120724.134512-1-xiubli@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v3] ceph: force sending a cap update msg back to MDS for revoke op | expand |
Hi Xiubo, On Tue, Jul 16, 2024 at 5:37 PM <xiubli@redhat.com> wrote: > > From: Xiubo Li <xiubli@redhat.com> > > If a client sends out a cap update dropping caps with the prior 'seq' > just before an incoming cap revoke request, then the client may drop > the revoke because it believes it's already released the requested > capabilities. > > This causes the MDS to wait indefinitely for the client to respond > to the revoke. It's therefore always a good idea to ack the cap > revoke request with the bumped up 'seq'. > > Currently if the cap->issued equals to the newcaps the check_caps() > will do nothing, we should force flush the caps. > > Cc: stable@vger.kernel.org > Link: https://tracker.ceph.com/issues/61782 > Signed-off-by: Xiubo Li <xiubli@redhat.com> > --- > > V3: > - Move the force check earlier > > V2: > - Improved the patch to force send the cap update only when no caps > being used. > > > fs/ceph/caps.c | 35 ++++++++++++++++++++++++----------- > fs/ceph/super.h | 7 ++++--- > 2 files changed, 28 insertions(+), 14 deletions(-) > > diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c > index 24c31f795938..672c6611d749 100644 > --- a/fs/ceph/caps.c > +++ b/fs/ceph/caps.c > @@ -2024,6 +2024,8 @@ bool __ceph_should_report_size(struct ceph_inode_info *ci) > * CHECK_CAPS_AUTHONLY - we should only check the auth cap > * CHECK_CAPS_FLUSH - we should flush any dirty caps immediately, without > * further delay. > + * CHECK_CAPS_FLUSH_FORCE - we should flush any caps immediately, without > + * further delay. > */ > void ceph_check_caps(struct ceph_inode_info *ci, int flags) > { > @@ -2105,7 +2107,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags) > } > > doutc(cl, "%p %llx.%llx file_want %s used %s dirty %s " > - "flushing %s issued %s revoking %s retain %s %s%s%s\n", > + "flushing %s issued %s revoking %s retain %s %s%s%s%s\n", > inode, ceph_vinop(inode), ceph_cap_string(file_wanted), > ceph_cap_string(used), ceph_cap_string(ci->i_dirty_caps), > ceph_cap_string(ci->i_flushing_caps), > @@ -2113,7 +2115,8 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags) > ceph_cap_string(retain), > (flags & CHECK_CAPS_AUTHONLY) ? " AUTHONLY" : "", > (flags & CHECK_CAPS_FLUSH) ? " FLUSH" : "", > - (flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : ""); > + (flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : "", > + (flags & CHECK_CAPS_FLUSH_FORCE) ? " FLUSH_FORCE" : ""); > > /* > * If we no longer need to hold onto old our caps, and we may > @@ -2188,6 +2191,11 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags) > queue_writeback = true; > } > > + if (flags & CHECK_CAPS_FLUSH_FORCE) { > + doutc(cl, "force to flush caps\n"); > + goto ack; > + } > + > if (cap == ci->i_auth_cap && > (cap->issued & CEPH_CAP_FILE_WR)) { > /* request larger max_size from MDS? */ > @@ -3518,6 +3526,8 @@ static void handle_cap_grant(struct inode *inode, > bool queue_invalidate = false; > bool deleted_inode = false; > bool fill_inline = false; > + bool revoke_wait = false; > + int flags = 0; > > /* > * If there is at least one crypto block then we'll trust > @@ -3713,16 +3723,18 @@ static void handle_cap_grant(struct inode *inode, > ceph_cap_string(cap->issued), ceph_cap_string(newcaps), > ceph_cap_string(revoking)); > if (S_ISREG(inode->i_mode) && > - (revoking & used & CEPH_CAP_FILE_BUFFER)) > + (revoking & used & CEPH_CAP_FILE_BUFFER)) { > writeback = true; /* initiate writeback; will delay ack */ > - else if (queue_invalidate && > + revoke_wait = true; > + } else if (queue_invalidate && > revoking == CEPH_CAP_FILE_CACHE && > - (newcaps & CEPH_CAP_FILE_LAZYIO) == 0) > - ; /* do nothing yet, invalidation will be queued */ > - else if (cap == ci->i_auth_cap) > + (newcaps & CEPH_CAP_FILE_LAZYIO) == 0) { > + revoke_wait = true; /* do nothing yet, invalidation will be queued */ > + } else if (cap == ci->i_auth_cap) { > check_caps = 1; /* check auth cap only */ > - else > + } else { > check_caps = 2; /* check all caps */ > + } > /* If there is new caps, try to wake up the waiters */ > if (~cap->issued & newcaps) > wake = true; > @@ -3749,8 +3761,9 @@ static void handle_cap_grant(struct inode *inode, > BUG_ON(cap->issued & ~cap->implemented); > > /* don't let check_caps skip sending a response to MDS for revoke msgs */ > - if (le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) { > + if (!revoke_wait && le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) { > cap->mds_wanted = 0; > + flags |= CHECK_CAPS_FLUSH_FORCE; > if (cap == ci->i_auth_cap) > check_caps = 1; /* check auth cap only */ > else > @@ -3806,9 +3819,9 @@ static void handle_cap_grant(struct inode *inode, > > mutex_unlock(&session->s_mutex); > if (check_caps == 1) > - ceph_check_caps(ci, CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL); > + ceph_check_caps(ci, flags | CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL); > else if (check_caps == 2) > - ceph_check_caps(ci, CHECK_CAPS_NOINVAL); > + ceph_check_caps(ci, flags | CHECK_CAPS_NOINVAL); > } > > /* > diff --git a/fs/ceph/super.h b/fs/ceph/super.h > index b0b368ed3018..831e8ec4d5da 100644 > --- a/fs/ceph/super.h > +++ b/fs/ceph/super.h > @@ -200,9 +200,10 @@ struct ceph_cap { > struct list_head caps_item; > }; > > -#define CHECK_CAPS_AUTHONLY 1 /* only check auth cap */ > -#define CHECK_CAPS_FLUSH 2 /* flush any dirty caps */ > -#define CHECK_CAPS_NOINVAL 4 /* don't invalidate pagecache */ > +#define CHECK_CAPS_AUTHONLY 1 /* only check auth cap */ > +#define CHECK_CAPS_FLUSH 2 /* flush any dirty caps */ > +#define CHECK_CAPS_NOINVAL 4 /* don't invalidate pagecache */ > +#define CHECK_CAPS_FLUSH_FORCE 8 /* force flush any caps */ > > struct ceph_cap_flush { > u64 tid; > -- > 2.45.1 > Unfortunately, the test run using this change has unrelated issues, therefore, the tests have to be rerun. I'll schedule a fs suite run on priority so that we get the results by tomorrow. Will update once done. Apologies!
On 7/24/24 22:08, Venky Shankar wrote: > Hi Xiubo, > > On Tue, Jul 16, 2024 at 5:37 PM <xiubli@redhat.com> wrote: >> From: Xiubo Li <xiubli@redhat.com> >> >> If a client sends out a cap update dropping caps with the prior 'seq' >> just before an incoming cap revoke request, then the client may drop >> the revoke because it believes it's already released the requested >> capabilities. >> >> This causes the MDS to wait indefinitely for the client to respond >> to the revoke. It's therefore always a good idea to ack the cap >> revoke request with the bumped up 'seq'. >> >> Currently if the cap->issued equals to the newcaps the check_caps() >> will do nothing, we should force flush the caps. >> >> Cc: stable@vger.kernel.org >> Link: https://tracker.ceph.com/issues/61782 >> Signed-off-by: Xiubo Li <xiubli@redhat.com> >> --- >> >> V3: >> - Move the force check earlier >> >> V2: >> - Improved the patch to force send the cap update only when no caps >> being used. >> >> >> fs/ceph/caps.c | 35 ++++++++++++++++++++++++----------- >> fs/ceph/super.h | 7 ++++--- >> 2 files changed, 28 insertions(+), 14 deletions(-) >> >> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c >> index 24c31f795938..672c6611d749 100644 >> --- a/fs/ceph/caps.c >> +++ b/fs/ceph/caps.c >> @@ -2024,6 +2024,8 @@ bool __ceph_should_report_size(struct ceph_inode_info *ci) >> * CHECK_CAPS_AUTHONLY - we should only check the auth cap >> * CHECK_CAPS_FLUSH - we should flush any dirty caps immediately, without >> * further delay. >> + * CHECK_CAPS_FLUSH_FORCE - we should flush any caps immediately, without >> + * further delay. >> */ >> void ceph_check_caps(struct ceph_inode_info *ci, int flags) >> { >> @@ -2105,7 +2107,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags) >> } >> >> doutc(cl, "%p %llx.%llx file_want %s used %s dirty %s " >> - "flushing %s issued %s revoking %s retain %s %s%s%s\n", >> + "flushing %s issued %s revoking %s retain %s %s%s%s%s\n", >> inode, ceph_vinop(inode), ceph_cap_string(file_wanted), >> ceph_cap_string(used), ceph_cap_string(ci->i_dirty_caps), >> ceph_cap_string(ci->i_flushing_caps), >> @@ -2113,7 +2115,8 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags) >> ceph_cap_string(retain), >> (flags & CHECK_CAPS_AUTHONLY) ? " AUTHONLY" : "", >> (flags & CHECK_CAPS_FLUSH) ? " FLUSH" : "", >> - (flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : ""); >> + (flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : "", >> + (flags & CHECK_CAPS_FLUSH_FORCE) ? " FLUSH_FORCE" : ""); >> >> /* >> * If we no longer need to hold onto old our caps, and we may >> @@ -2188,6 +2191,11 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags) >> queue_writeback = true; >> } >> >> + if (flags & CHECK_CAPS_FLUSH_FORCE) { >> + doutc(cl, "force to flush caps\n"); >> + goto ack; >> + } >> + >> if (cap == ci->i_auth_cap && >> (cap->issued & CEPH_CAP_FILE_WR)) { >> /* request larger max_size from MDS? */ >> @@ -3518,6 +3526,8 @@ static void handle_cap_grant(struct inode *inode, >> bool queue_invalidate = false; >> bool deleted_inode = false; >> bool fill_inline = false; >> + bool revoke_wait = false; >> + int flags = 0; >> >> /* >> * If there is at least one crypto block then we'll trust >> @@ -3713,16 +3723,18 @@ static void handle_cap_grant(struct inode *inode, >> ceph_cap_string(cap->issued), ceph_cap_string(newcaps), >> ceph_cap_string(revoking)); >> if (S_ISREG(inode->i_mode) && >> - (revoking & used & CEPH_CAP_FILE_BUFFER)) >> + (revoking & used & CEPH_CAP_FILE_BUFFER)) { >> writeback = true; /* initiate writeback; will delay ack */ >> - else if (queue_invalidate && >> + revoke_wait = true; >> + } else if (queue_invalidate && >> revoking == CEPH_CAP_FILE_CACHE && >> - (newcaps & CEPH_CAP_FILE_LAZYIO) == 0) >> - ; /* do nothing yet, invalidation will be queued */ >> - else if (cap == ci->i_auth_cap) >> + (newcaps & CEPH_CAP_FILE_LAZYIO) == 0) { >> + revoke_wait = true; /* do nothing yet, invalidation will be queued */ >> + } else if (cap == ci->i_auth_cap) { >> check_caps = 1; /* check auth cap only */ >> - else >> + } else { >> check_caps = 2; /* check all caps */ >> + } >> /* If there is new caps, try to wake up the waiters */ >> if (~cap->issued & newcaps) >> wake = true; >> @@ -3749,8 +3761,9 @@ static void handle_cap_grant(struct inode *inode, >> BUG_ON(cap->issued & ~cap->implemented); >> >> /* don't let check_caps skip sending a response to MDS for revoke msgs */ >> - if (le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) { >> + if (!revoke_wait && le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) { >> cap->mds_wanted = 0; >> + flags |= CHECK_CAPS_FLUSH_FORCE; >> if (cap == ci->i_auth_cap) >> check_caps = 1; /* check auth cap only */ >> else >> @@ -3806,9 +3819,9 @@ static void handle_cap_grant(struct inode *inode, >> >> mutex_unlock(&session->s_mutex); >> if (check_caps == 1) >> - ceph_check_caps(ci, CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL); >> + ceph_check_caps(ci, flags | CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL); >> else if (check_caps == 2) >> - ceph_check_caps(ci, CHECK_CAPS_NOINVAL); >> + ceph_check_caps(ci, flags | CHECK_CAPS_NOINVAL); >> } >> >> /* >> diff --git a/fs/ceph/super.h b/fs/ceph/super.h >> index b0b368ed3018..831e8ec4d5da 100644 >> --- a/fs/ceph/super.h >> +++ b/fs/ceph/super.h >> @@ -200,9 +200,10 @@ struct ceph_cap { >> struct list_head caps_item; >> }; >> >> -#define CHECK_CAPS_AUTHONLY 1 /* only check auth cap */ >> -#define CHECK_CAPS_FLUSH 2 /* flush any dirty caps */ >> -#define CHECK_CAPS_NOINVAL 4 /* don't invalidate pagecache */ >> +#define CHECK_CAPS_AUTHONLY 1 /* only check auth cap */ >> +#define CHECK_CAPS_FLUSH 2 /* flush any dirty caps */ >> +#define CHECK_CAPS_NOINVAL 4 /* don't invalidate pagecache */ >> +#define CHECK_CAPS_FLUSH_FORCE 8 /* force flush any caps */ >> >> struct ceph_cap_flush { >> u64 tid; >> -- >> 2.45.1 >> > Unfortunately, the test run using this change has unrelated issues, > therefore, the tests have to be rerun. I'll schedule a fs suite run on > priority so that we get the results by tomorrow. > > Will update once done. Apologies! No worry. Just take your time. Thanks Venky! - Xiubo
On Thu, Jul 25, 2024 at 6:31 AM Xiubo Li <xiubli@redhat.com> wrote: > > > On 7/24/24 22:08, Venky Shankar wrote: > > Hi Xiubo, > > > > On Tue, Jul 16, 2024 at 5:37 PM <xiubli@redhat.com> wrote: > >> From: Xiubo Li <xiubli@redhat.com> > >> > >> If a client sends out a cap update dropping caps with the prior 'seq' > >> just before an incoming cap revoke request, then the client may drop > >> the revoke because it believes it's already released the requested > >> capabilities. > >> > >> This causes the MDS to wait indefinitely for the client to respond > >> to the revoke. It's therefore always a good idea to ack the cap > >> revoke request with the bumped up 'seq'. > >> > >> Currently if the cap->issued equals to the newcaps the check_caps() > >> will do nothing, we should force flush the caps. > >> > >> Cc: stable@vger.kernel.org > >> Link: https://tracker.ceph.com/issues/61782 > >> Signed-off-by: Xiubo Li <xiubli@redhat.com> > >> --- > >> > >> V3: > >> - Move the force check earlier > >> > >> V2: > >> - Improved the patch to force send the cap update only when no caps > >> being used. > >> > >> > >> fs/ceph/caps.c | 35 ++++++++++++++++++++++++----------- > >> fs/ceph/super.h | 7 ++++--- > >> 2 files changed, 28 insertions(+), 14 deletions(-) > >> > >> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c > >> index 24c31f795938..672c6611d749 100644 > >> --- a/fs/ceph/caps.c > >> +++ b/fs/ceph/caps.c > >> @@ -2024,6 +2024,8 @@ bool __ceph_should_report_size(struct ceph_inode_info *ci) > >> * CHECK_CAPS_AUTHONLY - we should only check the auth cap > >> * CHECK_CAPS_FLUSH - we should flush any dirty caps immediately, without > >> * further delay. > >> + * CHECK_CAPS_FLUSH_FORCE - we should flush any caps immediately, without > >> + * further delay. > >> */ > >> void ceph_check_caps(struct ceph_inode_info *ci, int flags) > >> { > >> @@ -2105,7 +2107,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags) > >> } > >> > >> doutc(cl, "%p %llx.%llx file_want %s used %s dirty %s " > >> - "flushing %s issued %s revoking %s retain %s %s%s%s\n", > >> + "flushing %s issued %s revoking %s retain %s %s%s%s%s\n", > >> inode, ceph_vinop(inode), ceph_cap_string(file_wanted), > >> ceph_cap_string(used), ceph_cap_string(ci->i_dirty_caps), > >> ceph_cap_string(ci->i_flushing_caps), > >> @@ -2113,7 +2115,8 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags) > >> ceph_cap_string(retain), > >> (flags & CHECK_CAPS_AUTHONLY) ? " AUTHONLY" : "", > >> (flags & CHECK_CAPS_FLUSH) ? " FLUSH" : "", > >> - (flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : ""); > >> + (flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : "", > >> + (flags & CHECK_CAPS_FLUSH_FORCE) ? " FLUSH_FORCE" : ""); > >> > >> /* > >> * If we no longer need to hold onto old our caps, and we may > >> @@ -2188,6 +2191,11 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags) > >> queue_writeback = true; > >> } > >> > >> + if (flags & CHECK_CAPS_FLUSH_FORCE) { > >> + doutc(cl, "force to flush caps\n"); > >> + goto ack; > >> + } > >> + > >> if (cap == ci->i_auth_cap && > >> (cap->issued & CEPH_CAP_FILE_WR)) { > >> /* request larger max_size from MDS? */ > >> @@ -3518,6 +3526,8 @@ static void handle_cap_grant(struct inode *inode, > >> bool queue_invalidate = false; > >> bool deleted_inode = false; > >> bool fill_inline = false; > >> + bool revoke_wait = false; > >> + int flags = 0; > >> > >> /* > >> * If there is at least one crypto block then we'll trust > >> @@ -3713,16 +3723,18 @@ static void handle_cap_grant(struct inode *inode, > >> ceph_cap_string(cap->issued), ceph_cap_string(newcaps), > >> ceph_cap_string(revoking)); > >> if (S_ISREG(inode->i_mode) && > >> - (revoking & used & CEPH_CAP_FILE_BUFFER)) > >> + (revoking & used & CEPH_CAP_FILE_BUFFER)) { > >> writeback = true; /* initiate writeback; will delay ack */ > >> - else if (queue_invalidate && > >> + revoke_wait = true; > >> + } else if (queue_invalidate && > >> revoking == CEPH_CAP_FILE_CACHE && > >> - (newcaps & CEPH_CAP_FILE_LAZYIO) == 0) > >> - ; /* do nothing yet, invalidation will be queued */ > >> - else if (cap == ci->i_auth_cap) > >> + (newcaps & CEPH_CAP_FILE_LAZYIO) == 0) { > >> + revoke_wait = true; /* do nothing yet, invalidation will be queued */ > >> + } else if (cap == ci->i_auth_cap) { > >> check_caps = 1; /* check auth cap only */ > >> - else > >> + } else { > >> check_caps = 2; /* check all caps */ > >> + } > >> /* If there is new caps, try to wake up the waiters */ > >> if (~cap->issued & newcaps) > >> wake = true; > >> @@ -3749,8 +3761,9 @@ static void handle_cap_grant(struct inode *inode, > >> BUG_ON(cap->issued & ~cap->implemented); > >> > >> /* don't let check_caps skip sending a response to MDS for revoke msgs */ > >> - if (le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) { > >> + if (!revoke_wait && le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) { > >> cap->mds_wanted = 0; > >> + flags |= CHECK_CAPS_FLUSH_FORCE; > >> if (cap == ci->i_auth_cap) > >> check_caps = 1; /* check auth cap only */ > >> else > >> @@ -3806,9 +3819,9 @@ static void handle_cap_grant(struct inode *inode, > >> > >> mutex_unlock(&session->s_mutex); > >> if (check_caps == 1) > >> - ceph_check_caps(ci, CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL); > >> + ceph_check_caps(ci, flags | CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL); > >> else if (check_caps == 2) > >> - ceph_check_caps(ci, CHECK_CAPS_NOINVAL); > >> + ceph_check_caps(ci, flags | CHECK_CAPS_NOINVAL); > >> } > >> > >> /* > >> diff --git a/fs/ceph/super.h b/fs/ceph/super.h > >> index b0b368ed3018..831e8ec4d5da 100644 > >> --- a/fs/ceph/super.h > >> +++ b/fs/ceph/super.h > >> @@ -200,9 +200,10 @@ struct ceph_cap { > >> struct list_head caps_item; > >> }; > >> > >> -#define CHECK_CAPS_AUTHONLY 1 /* only check auth cap */ > >> -#define CHECK_CAPS_FLUSH 2 /* flush any dirty caps */ > >> -#define CHECK_CAPS_NOINVAL 4 /* don't invalidate pagecache */ > >> +#define CHECK_CAPS_AUTHONLY 1 /* only check auth cap */ > >> +#define CHECK_CAPS_FLUSH 2 /* flush any dirty caps */ > >> +#define CHECK_CAPS_NOINVAL 4 /* don't invalidate pagecache */ > >> +#define CHECK_CAPS_FLUSH_FORCE 8 /* force flush any caps */ > >> > >> struct ceph_cap_flush { > >> u64 tid; > >> -- > >> 2.45.1 > >> > > Unfortunately, the test run using this change has unrelated issues, > > therefore, the tests have to be rerun. I'll schedule a fs suite run on > > priority so that we get the results by tomorrow. > > > > Will update once done. Apologies! > > No worry. Just take your time. Here are the test results https://pulpito.ceph.com/vshankar-2024-07-24_14:14:53-fs-main-testing-default-smithi/ Mostly look good. I'll review those in depth to see if something stands out. Thanks, Xiubo!
On Tue, Jul 16, 2024 at 5:37 PM <xiubli@redhat.com> wrote: > > From: Xiubo Li <xiubli@redhat.com> > > If a client sends out a cap update dropping caps with the prior 'seq' > just before an incoming cap revoke request, then the client may drop > the revoke because it believes it's already released the requested > capabilities. > > This causes the MDS to wait indefinitely for the client to respond > to the revoke. It's therefore always a good idea to ack the cap > revoke request with the bumped up 'seq'. > > Currently if the cap->issued equals to the newcaps the check_caps() > will do nothing, we should force flush the caps. > > Cc: stable@vger.kernel.org > Link: https://tracker.ceph.com/issues/61782 > Signed-off-by: Xiubo Li <xiubli@redhat.com> > --- > > V3: > - Move the force check earlier > > V2: > - Improved the patch to force send the cap update only when no caps > being used. > > > fs/ceph/caps.c | 35 ++++++++++++++++++++++++----------- > fs/ceph/super.h | 7 ++++--- > 2 files changed, 28 insertions(+), 14 deletions(-) > > diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c > index 24c31f795938..672c6611d749 100644 > --- a/fs/ceph/caps.c > +++ b/fs/ceph/caps.c > @@ -2024,6 +2024,8 @@ bool __ceph_should_report_size(struct ceph_inode_info *ci) > * CHECK_CAPS_AUTHONLY - we should only check the auth cap > * CHECK_CAPS_FLUSH - we should flush any dirty caps immediately, without > * further delay. > + * CHECK_CAPS_FLUSH_FORCE - we should flush any caps immediately, without > + * further delay. > */ > void ceph_check_caps(struct ceph_inode_info *ci, int flags) > { > @@ -2105,7 +2107,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags) > } > > doutc(cl, "%p %llx.%llx file_want %s used %s dirty %s " > - "flushing %s issued %s revoking %s retain %s %s%s%s\n", > + "flushing %s issued %s revoking %s retain %s %s%s%s%s\n", > inode, ceph_vinop(inode), ceph_cap_string(file_wanted), > ceph_cap_string(used), ceph_cap_string(ci->i_dirty_caps), > ceph_cap_string(ci->i_flushing_caps), > @@ -2113,7 +2115,8 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags) > ceph_cap_string(retain), > (flags & CHECK_CAPS_AUTHONLY) ? " AUTHONLY" : "", > (flags & CHECK_CAPS_FLUSH) ? " FLUSH" : "", > - (flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : ""); > + (flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : "", > + (flags & CHECK_CAPS_FLUSH_FORCE) ? " FLUSH_FORCE" : ""); > > /* > * If we no longer need to hold onto old our caps, and we may > @@ -2188,6 +2191,11 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags) > queue_writeback = true; > } > > + if (flags & CHECK_CAPS_FLUSH_FORCE) { > + doutc(cl, "force to flush caps\n"); > + goto ack; > + } > + > if (cap == ci->i_auth_cap && > (cap->issued & CEPH_CAP_FILE_WR)) { > /* request larger max_size from MDS? */ > @@ -3518,6 +3526,8 @@ static void handle_cap_grant(struct inode *inode, > bool queue_invalidate = false; > bool deleted_inode = false; > bool fill_inline = false; > + bool revoke_wait = false; > + int flags = 0; > > /* > * If there is at least one crypto block then we'll trust > @@ -3713,16 +3723,18 @@ static void handle_cap_grant(struct inode *inode, > ceph_cap_string(cap->issued), ceph_cap_string(newcaps), > ceph_cap_string(revoking)); > if (S_ISREG(inode->i_mode) && > - (revoking & used & CEPH_CAP_FILE_BUFFER)) > + (revoking & used & CEPH_CAP_FILE_BUFFER)) { > writeback = true; /* initiate writeback; will delay ack */ > - else if (queue_invalidate && > + revoke_wait = true; > + } else if (queue_invalidate && > revoking == CEPH_CAP_FILE_CACHE && > - (newcaps & CEPH_CAP_FILE_LAZYIO) == 0) > - ; /* do nothing yet, invalidation will be queued */ > - else if (cap == ci->i_auth_cap) > + (newcaps & CEPH_CAP_FILE_LAZYIO) == 0) { > + revoke_wait = true; /* do nothing yet, invalidation will be queued */ > + } else if (cap == ci->i_auth_cap) { > check_caps = 1; /* check auth cap only */ > - else > + } else { > check_caps = 2; /* check all caps */ > + } > /* If there is new caps, try to wake up the waiters */ > if (~cap->issued & newcaps) > wake = true; > @@ -3749,8 +3761,9 @@ static void handle_cap_grant(struct inode *inode, > BUG_ON(cap->issued & ~cap->implemented); > > /* don't let check_caps skip sending a response to MDS for revoke msgs */ > - if (le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) { > + if (!revoke_wait && le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) { > cap->mds_wanted = 0; > + flags |= CHECK_CAPS_FLUSH_FORCE; > if (cap == ci->i_auth_cap) > check_caps = 1; /* check auth cap only */ > else > @@ -3806,9 +3819,9 @@ static void handle_cap_grant(struct inode *inode, > > mutex_unlock(&session->s_mutex); > if (check_caps == 1) > - ceph_check_caps(ci, CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL); > + ceph_check_caps(ci, flags | CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL); > else if (check_caps == 2) > - ceph_check_caps(ci, CHECK_CAPS_NOINVAL); > + ceph_check_caps(ci, flags | CHECK_CAPS_NOINVAL); > } > > /* > diff --git a/fs/ceph/super.h b/fs/ceph/super.h > index b0b368ed3018..831e8ec4d5da 100644 > --- a/fs/ceph/super.h > +++ b/fs/ceph/super.h > @@ -200,9 +200,10 @@ struct ceph_cap { > struct list_head caps_item; > }; > > -#define CHECK_CAPS_AUTHONLY 1 /* only check auth cap */ > -#define CHECK_CAPS_FLUSH 2 /* flush any dirty caps */ > -#define CHECK_CAPS_NOINVAL 4 /* don't invalidate pagecache */ > +#define CHECK_CAPS_AUTHONLY 1 /* only check auth cap */ > +#define CHECK_CAPS_FLUSH 2 /* flush any dirty caps */ > +#define CHECK_CAPS_NOINVAL 4 /* don't invalidate pagecache */ > +#define CHECK_CAPS_FLUSH_FORCE 8 /* force flush any caps */ > > struct ceph_cap_flush { > u64 tid; > -- > 2.45.1 > v3 pathset looks good. Reviewed-by: Venky Shankar <vshankar@redhat.com> Tested-by: Venky Shankar <vshankar@redhat.com>
On 7/25/24 19:41, Venky Shankar wrote: > On Tue, Jul 16, 2024 at 5:37 PM <xiubli@redhat.com> wrote: >> From: Xiubo Li <xiubli@redhat.com> >> >> If a client sends out a cap update dropping caps with the prior 'seq' >> just before an incoming cap revoke request, then the client may drop >> the revoke because it believes it's already released the requested >> capabilities. >> >> This causes the MDS to wait indefinitely for the client to respond >> to the revoke. It's therefore always a good idea to ack the cap >> revoke request with the bumped up 'seq'. >> >> Currently if the cap->issued equals to the newcaps the check_caps() >> will do nothing, we should force flush the caps. >> >> Cc: stable@vger.kernel.org >> Link: https://tracker.ceph.com/issues/61782 >> Signed-off-by: Xiubo Li <xiubli@redhat.com> >> --- >> >> V3: >> - Move the force check earlier >> >> V2: >> - Improved the patch to force send the cap update only when no caps >> being used. >> >> >> fs/ceph/caps.c | 35 ++++++++++++++++++++++++----------- >> fs/ceph/super.h | 7 ++++--- >> 2 files changed, 28 insertions(+), 14 deletions(-) >> >> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c >> index 24c31f795938..672c6611d749 100644 >> --- a/fs/ceph/caps.c >> +++ b/fs/ceph/caps.c >> @@ -2024,6 +2024,8 @@ bool __ceph_should_report_size(struct ceph_inode_info *ci) >> * CHECK_CAPS_AUTHONLY - we should only check the auth cap >> * CHECK_CAPS_FLUSH - we should flush any dirty caps immediately, without >> * further delay. >> + * CHECK_CAPS_FLUSH_FORCE - we should flush any caps immediately, without >> + * further delay. >> */ >> void ceph_check_caps(struct ceph_inode_info *ci, int flags) >> { >> @@ -2105,7 +2107,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags) >> } >> >> doutc(cl, "%p %llx.%llx file_want %s used %s dirty %s " >> - "flushing %s issued %s revoking %s retain %s %s%s%s\n", >> + "flushing %s issued %s revoking %s retain %s %s%s%s%s\n", >> inode, ceph_vinop(inode), ceph_cap_string(file_wanted), >> ceph_cap_string(used), ceph_cap_string(ci->i_dirty_caps), >> ceph_cap_string(ci->i_flushing_caps), >> @@ -2113,7 +2115,8 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags) >> ceph_cap_string(retain), >> (flags & CHECK_CAPS_AUTHONLY) ? " AUTHONLY" : "", >> (flags & CHECK_CAPS_FLUSH) ? " FLUSH" : "", >> - (flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : ""); >> + (flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : "", >> + (flags & CHECK_CAPS_FLUSH_FORCE) ? " FLUSH_FORCE" : ""); >> >> /* >> * If we no longer need to hold onto old our caps, and we may >> @@ -2188,6 +2191,11 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags) >> queue_writeback = true; >> } >> >> + if (flags & CHECK_CAPS_FLUSH_FORCE) { >> + doutc(cl, "force to flush caps\n"); >> + goto ack; >> + } >> + >> if (cap == ci->i_auth_cap && >> (cap->issued & CEPH_CAP_FILE_WR)) { >> /* request larger max_size from MDS? */ >> @@ -3518,6 +3526,8 @@ static void handle_cap_grant(struct inode *inode, >> bool queue_invalidate = false; >> bool deleted_inode = false; >> bool fill_inline = false; >> + bool revoke_wait = false; >> + int flags = 0; >> >> /* >> * If there is at least one crypto block then we'll trust >> @@ -3713,16 +3723,18 @@ static void handle_cap_grant(struct inode *inode, >> ceph_cap_string(cap->issued), ceph_cap_string(newcaps), >> ceph_cap_string(revoking)); >> if (S_ISREG(inode->i_mode) && >> - (revoking & used & CEPH_CAP_FILE_BUFFER)) >> + (revoking & used & CEPH_CAP_FILE_BUFFER)) { >> writeback = true; /* initiate writeback; will delay ack */ >> - else if (queue_invalidate && >> + revoke_wait = true; >> + } else if (queue_invalidate && >> revoking == CEPH_CAP_FILE_CACHE && >> - (newcaps & CEPH_CAP_FILE_LAZYIO) == 0) >> - ; /* do nothing yet, invalidation will be queued */ >> - else if (cap == ci->i_auth_cap) >> + (newcaps & CEPH_CAP_FILE_LAZYIO) == 0) { >> + revoke_wait = true; /* do nothing yet, invalidation will be queued */ >> + } else if (cap == ci->i_auth_cap) { >> check_caps = 1; /* check auth cap only */ >> - else >> + } else { >> check_caps = 2; /* check all caps */ >> + } >> /* If there is new caps, try to wake up the waiters */ >> if (~cap->issued & newcaps) >> wake = true; >> @@ -3749,8 +3761,9 @@ static void handle_cap_grant(struct inode *inode, >> BUG_ON(cap->issued & ~cap->implemented); >> >> /* don't let check_caps skip sending a response to MDS for revoke msgs */ >> - if (le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) { >> + if (!revoke_wait && le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) { >> cap->mds_wanted = 0; >> + flags |= CHECK_CAPS_FLUSH_FORCE; >> if (cap == ci->i_auth_cap) >> check_caps = 1; /* check auth cap only */ >> else >> @@ -3806,9 +3819,9 @@ static void handle_cap_grant(struct inode *inode, >> >> mutex_unlock(&session->s_mutex); >> if (check_caps == 1) >> - ceph_check_caps(ci, CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL); >> + ceph_check_caps(ci, flags | CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL); >> else if (check_caps == 2) >> - ceph_check_caps(ci, CHECK_CAPS_NOINVAL); >> + ceph_check_caps(ci, flags | CHECK_CAPS_NOINVAL); >> } >> >> /* >> diff --git a/fs/ceph/super.h b/fs/ceph/super.h >> index b0b368ed3018..831e8ec4d5da 100644 >> --- a/fs/ceph/super.h >> +++ b/fs/ceph/super.h >> @@ -200,9 +200,10 @@ struct ceph_cap { >> struct list_head caps_item; >> }; >> >> -#define CHECK_CAPS_AUTHONLY 1 /* only check auth cap */ >> -#define CHECK_CAPS_FLUSH 2 /* flush any dirty caps */ >> -#define CHECK_CAPS_NOINVAL 4 /* don't invalidate pagecache */ >> +#define CHECK_CAPS_AUTHONLY 1 /* only check auth cap */ >> +#define CHECK_CAPS_FLUSH 2 /* flush any dirty caps */ >> +#define CHECK_CAPS_NOINVAL 4 /* don't invalidate pagecache */ >> +#define CHECK_CAPS_FLUSH_FORCE 8 /* force flush any caps */ >> >> struct ceph_cap_flush { >> u64 tid; >> -- >> 2.45.1 >> > v3 pathset looks good. > > Reviewed-by: Venky Shankar <vshankar@redhat.com> > Tested-by: Venky Shankar <vshankar@redhat.com> Thanks Venky. >
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 24c31f795938..672c6611d749 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2024,6 +2024,8 @@ bool __ceph_should_report_size(struct ceph_inode_info *ci) * CHECK_CAPS_AUTHONLY - we should only check the auth cap * CHECK_CAPS_FLUSH - we should flush any dirty caps immediately, without * further delay. + * CHECK_CAPS_FLUSH_FORCE - we should flush any caps immediately, without + * further delay. */ void ceph_check_caps(struct ceph_inode_info *ci, int flags) { @@ -2105,7 +2107,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags) } doutc(cl, "%p %llx.%llx file_want %s used %s dirty %s " - "flushing %s issued %s revoking %s retain %s %s%s%s\n", + "flushing %s issued %s revoking %s retain %s %s%s%s%s\n", inode, ceph_vinop(inode), ceph_cap_string(file_wanted), ceph_cap_string(used), ceph_cap_string(ci->i_dirty_caps), ceph_cap_string(ci->i_flushing_caps), @@ -2113,7 +2115,8 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags) ceph_cap_string(retain), (flags & CHECK_CAPS_AUTHONLY) ? " AUTHONLY" : "", (flags & CHECK_CAPS_FLUSH) ? " FLUSH" : "", - (flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : ""); + (flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : "", + (flags & CHECK_CAPS_FLUSH_FORCE) ? " FLUSH_FORCE" : ""); /* * If we no longer need to hold onto old our caps, and we may @@ -2188,6 +2191,11 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags) queue_writeback = true; } + if (flags & CHECK_CAPS_FLUSH_FORCE) { + doutc(cl, "force to flush caps\n"); + goto ack; + } + if (cap == ci->i_auth_cap && (cap->issued & CEPH_CAP_FILE_WR)) { /* request larger max_size from MDS? */ @@ -3518,6 +3526,8 @@ static void handle_cap_grant(struct inode *inode, bool queue_invalidate = false; bool deleted_inode = false; bool fill_inline = false; + bool revoke_wait = false; + int flags = 0; /* * If there is at least one crypto block then we'll trust @@ -3713,16 +3723,18 @@ static void handle_cap_grant(struct inode *inode, ceph_cap_string(cap->issued), ceph_cap_string(newcaps), ceph_cap_string(revoking)); if (S_ISREG(inode->i_mode) && - (revoking & used & CEPH_CAP_FILE_BUFFER)) + (revoking & used & CEPH_CAP_FILE_BUFFER)) { writeback = true; /* initiate writeback; will delay ack */ - else if (queue_invalidate && + revoke_wait = true; + } else if (queue_invalidate && revoking == CEPH_CAP_FILE_CACHE && - (newcaps & CEPH_CAP_FILE_LAZYIO) == 0) - ; /* do nothing yet, invalidation will be queued */ - else if (cap == ci->i_auth_cap) + (newcaps & CEPH_CAP_FILE_LAZYIO) == 0) { + revoke_wait = true; /* do nothing yet, invalidation will be queued */ + } else if (cap == ci->i_auth_cap) { check_caps = 1; /* check auth cap only */ - else + } else { check_caps = 2; /* check all caps */ + } /* If there is new caps, try to wake up the waiters */ if (~cap->issued & newcaps) wake = true; @@ -3749,8 +3761,9 @@ static void handle_cap_grant(struct inode *inode, BUG_ON(cap->issued & ~cap->implemented); /* don't let check_caps skip sending a response to MDS for revoke msgs */ - if (le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) { + if (!revoke_wait && le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) { cap->mds_wanted = 0; + flags |= CHECK_CAPS_FLUSH_FORCE; if (cap == ci->i_auth_cap) check_caps = 1; /* check auth cap only */ else @@ -3806,9 +3819,9 @@ static void handle_cap_grant(struct inode *inode, mutex_unlock(&session->s_mutex); if (check_caps == 1) - ceph_check_caps(ci, CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL); + ceph_check_caps(ci, flags | CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL); else if (check_caps == 2) - ceph_check_caps(ci, CHECK_CAPS_NOINVAL); + ceph_check_caps(ci, flags | CHECK_CAPS_NOINVAL); } /* diff --git a/fs/ceph/super.h b/fs/ceph/super.h index b0b368ed3018..831e8ec4d5da 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -200,9 +200,10 @@ struct ceph_cap { struct list_head caps_item; }; -#define CHECK_CAPS_AUTHONLY 1 /* only check auth cap */ -#define CHECK_CAPS_FLUSH 2 /* flush any dirty caps */ -#define CHECK_CAPS_NOINVAL 4 /* don't invalidate pagecache */ +#define CHECK_CAPS_AUTHONLY 1 /* only check auth cap */ +#define CHECK_CAPS_FLUSH 2 /* flush any dirty caps */ +#define CHECK_CAPS_NOINVAL 4 /* don't invalidate pagecache */ +#define CHECK_CAPS_FLUSH_FORCE 8 /* force flush any caps */ struct ceph_cap_flush { u64 tid;