diff mbox series

[v2,2/2] ceph: flush all the caps release when syncing the whole filesystem

Message ID 20240730054135.640396-3-xiubli@redhat.com (mailing list archive)
State New, archived
Headers show
Series ceph: flush all the caps release when syncing the whole filesystem | expand

Commit Message

Xiubo Li July 30, 2024, 5:41 a.m. UTC
From: Xiubo Li <xiubli@redhat.com>

We have hit a race between cap releases and cap revoke request
that will cause the check_caps() to miss sending a cap revoke ack
to MDS. And the client will depend on the cap release to release
that revoking caps, which could be delayed for some unknown reasons.

In Kclient we have figured out the RCA about race and we need
a way to explictly trigger this manually could help to get rid
of the caps revoke stuck issue.

URL: https://tracker.ceph.com/issues/67221
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
 fs/ceph/caps.c       | 22 ++++++++++++++++++++++
 fs/ceph/mds_client.c |  1 +
 fs/ceph/super.c      |  1 +
 fs/ceph/super.h      |  1 +
 4 files changed, 25 insertions(+)

Comments

Ilya Dryomov Sept. 24, 2024, 6:26 a.m. UTC | #1
On Tue, Jul 30, 2024 at 7:41 AM <xiubli@redhat.com> wrote:
>
> From: Xiubo Li <xiubli@redhat.com>
>
> We have hit a race between cap releases and cap revoke request
> that will cause the check_caps() to miss sending a cap revoke ack
> to MDS. And the client will depend on the cap release to release
> that revoking caps, which could be delayed for some unknown reasons.
>
> In Kclient we have figured out the RCA about race and we need
> a way to explictly trigger this manually could help to get rid
> of the caps revoke stuck issue.
>
> URL: https://tracker.ceph.com/issues/67221
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
>  fs/ceph/caps.c       | 22 ++++++++++++++++++++++
>  fs/ceph/mds_client.c |  1 +
>  fs/ceph/super.c      |  1 +
>  fs/ceph/super.h      |  1 +
>  4 files changed, 25 insertions(+)
>
> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> index c09add6d6516..a0a39243aeb3 100644
> --- a/fs/ceph/caps.c
> +++ b/fs/ceph/caps.c
> @@ -4729,6 +4729,28 @@ void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
>         ceph_mdsc_iterate_sessions(mdsc, flush_dirty_session_caps, true);
>  }
>
> +/*
> + * Flush all cap releases to the mds
> + */
> +static void flush_cap_releases(struct ceph_mds_session *s)
> +{
> +       struct ceph_mds_client *mdsc = s->s_mdsc;
> +       struct ceph_client *cl = mdsc->fsc->client;
> +
> +       doutc(cl, "begin\n");
> +       spin_lock(&s->s_cap_lock);
> +       if (s->s_num_cap_releases)
> +               ceph_flush_session_cap_releases(mdsc, s);
> +       spin_unlock(&s->s_cap_lock);
> +       doutc(cl, "done\n");
> +
> +}
> +
> +void ceph_flush_cap_releases(struct ceph_mds_client *mdsc)
> +{
> +       ceph_mdsc_iterate_sessions(mdsc, flush_cap_releases, true);
> +}
> +
>  void __ceph_touch_fmode(struct ceph_inode_info *ci,
>                         struct ceph_mds_client *mdsc, int fmode)
>  {
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index 86d0148819b0..fc563b59959a 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -5904,6 +5904,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
>         want_tid = mdsc->last_tid;
>         mutex_unlock(&mdsc->mutex);
>
> +       ceph_flush_cap_releases(mdsc);
>         ceph_flush_dirty_caps(mdsc);
>         spin_lock(&mdsc->cap_dirty_lock);
>         want_flush = mdsc->last_cap_flush_tid;
> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
> index f489b3e12429..0a1215b4f0ba 100644
> --- a/fs/ceph/super.c
> +++ b/fs/ceph/super.c
> @@ -126,6 +126,7 @@ static int ceph_sync_fs(struct super_block *sb, int wait)
>         if (!wait) {
>                 doutc(cl, "(non-blocking)\n");
>                 ceph_flush_dirty_caps(fsc->mdsc);
> +               ceph_flush_cap_releases(fsc->mdsc);

Hi Xiubo,

Is there a significance to flushing cap releases before dirty caps on
the blocking path and doing it vice versa (i.e. flushing cap releases
after dirty caps) on the non-blocking path?

Thanks,

                Ilya
Xiubo Li Sept. 24, 2024, 11:46 a.m. UTC | #2
On 9/24/24 14:26, Ilya Dryomov wrote:
> On Tue, Jul 30, 2024 at 7:41 AM <xiubli@redhat.com> wrote:
>> From: Xiubo Li <xiubli@redhat.com>
>>
>> We have hit a race between cap releases and cap revoke request
>> that will cause the check_caps() to miss sending a cap revoke ack
>> to MDS. And the client will depend on the cap release to release
>> that revoking caps, which could be delayed for some unknown reasons.
>>
>> In Kclient we have figured out the RCA about race and we need
>> a way to explictly trigger this manually could help to get rid
>> of the caps revoke stuck issue.
>>
>> URL: https://tracker.ceph.com/issues/67221
>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>> ---
>>   fs/ceph/caps.c       | 22 ++++++++++++++++++++++
>>   fs/ceph/mds_client.c |  1 +
>>   fs/ceph/super.c      |  1 +
>>   fs/ceph/super.h      |  1 +
>>   4 files changed, 25 insertions(+)
>>
>> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
>> index c09add6d6516..a0a39243aeb3 100644
>> --- a/fs/ceph/caps.c
>> +++ b/fs/ceph/caps.c
>> @@ -4729,6 +4729,28 @@ void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
>>          ceph_mdsc_iterate_sessions(mdsc, flush_dirty_session_caps, true);
>>   }
>>
>> +/*
>> + * Flush all cap releases to the mds
>> + */
>> +static void flush_cap_releases(struct ceph_mds_session *s)
>> +{
>> +       struct ceph_mds_client *mdsc = s->s_mdsc;
>> +       struct ceph_client *cl = mdsc->fsc->client;
>> +
>> +       doutc(cl, "begin\n");
>> +       spin_lock(&s->s_cap_lock);
>> +       if (s->s_num_cap_releases)
>> +               ceph_flush_session_cap_releases(mdsc, s);
>> +       spin_unlock(&s->s_cap_lock);
>> +       doutc(cl, "done\n");
>> +
>> +}
>> +
>> +void ceph_flush_cap_releases(struct ceph_mds_client *mdsc)
>> +{
>> +       ceph_mdsc_iterate_sessions(mdsc, flush_cap_releases, true);
>> +}
>> +
>>   void __ceph_touch_fmode(struct ceph_inode_info *ci,
>>                          struct ceph_mds_client *mdsc, int fmode)
>>   {
>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>> index 86d0148819b0..fc563b59959a 100644
>> --- a/fs/ceph/mds_client.c
>> +++ b/fs/ceph/mds_client.c
>> @@ -5904,6 +5904,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
>>          want_tid = mdsc->last_tid;
>>          mutex_unlock(&mdsc->mutex);
>>
>> +       ceph_flush_cap_releases(mdsc);
>>          ceph_flush_dirty_caps(mdsc);
>>          spin_lock(&mdsc->cap_dirty_lock);
>>          want_flush = mdsc->last_cap_flush_tid;
>> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
>> index f489b3e12429..0a1215b4f0ba 100644
>> --- a/fs/ceph/super.c
>> +++ b/fs/ceph/super.c
>> @@ -126,6 +126,7 @@ static int ceph_sync_fs(struct super_block *sb, int wait)
>>          if (!wait) {
>>                  doutc(cl, "(non-blocking)\n");
>>                  ceph_flush_dirty_caps(fsc->mdsc);
>> +               ceph_flush_cap_releases(fsc->mdsc);
> Hi Xiubo,
>
> Is there a significance to flushing cap releases before dirty caps on
> the blocking path and doing it vice versa (i.e. flushing cap releases
> after dirty caps) on the non-blocking path?

Hi Ilya,

The dirty caps and the cap releases are not related.

If caps are dirty it should be in the dirty list anyway in theory. Else 
when the file is closed or inode is released will it be in the release 
lists.

Thanks

- Xiubo


> Thanks,
>
>                  Ilya
>
Ilya Dryomov Sept. 24, 2024, 1:24 p.m. UTC | #3
On Tue, Sep 24, 2024 at 1:47 PM Xiubo Li <xiubli@redhat.com> wrote:
>
>
> On 9/24/24 14:26, Ilya Dryomov wrote:
> > On Tue, Jul 30, 2024 at 7:41 AM <xiubli@redhat.com> wrote:
> >> From: Xiubo Li <xiubli@redhat.com>
> >>
> >> We have hit a race between cap releases and cap revoke request
> >> that will cause the check_caps() to miss sending a cap revoke ack
> >> to MDS. And the client will depend on the cap release to release
> >> that revoking caps, which could be delayed for some unknown reasons.
> >>
> >> In Kclient we have figured out the RCA about race and we need
> >> a way to explictly trigger this manually could help to get rid
> >> of the caps revoke stuck issue.
> >>
> >> URL: https://tracker.ceph.com/issues/67221
> >> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> >> ---
> >>   fs/ceph/caps.c       | 22 ++++++++++++++++++++++
> >>   fs/ceph/mds_client.c |  1 +
> >>   fs/ceph/super.c      |  1 +
> >>   fs/ceph/super.h      |  1 +
> >>   4 files changed, 25 insertions(+)
> >>
> >> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> >> index c09add6d6516..a0a39243aeb3 100644
> >> --- a/fs/ceph/caps.c
> >> +++ b/fs/ceph/caps.c
> >> @@ -4729,6 +4729,28 @@ void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
> >>          ceph_mdsc_iterate_sessions(mdsc, flush_dirty_session_caps, true);
> >>   }
> >>
> >> +/*
> >> + * Flush all cap releases to the mds
> >> + */
> >> +static void flush_cap_releases(struct ceph_mds_session *s)
> >> +{
> >> +       struct ceph_mds_client *mdsc = s->s_mdsc;
> >> +       struct ceph_client *cl = mdsc->fsc->client;
> >> +
> >> +       doutc(cl, "begin\n");
> >> +       spin_lock(&s->s_cap_lock);
> >> +       if (s->s_num_cap_releases)
> >> +               ceph_flush_session_cap_releases(mdsc, s);
> >> +       spin_unlock(&s->s_cap_lock);
> >> +       doutc(cl, "done\n");
> >> +
> >> +}
> >> +
> >> +void ceph_flush_cap_releases(struct ceph_mds_client *mdsc)
> >> +{
> >> +       ceph_mdsc_iterate_sessions(mdsc, flush_cap_releases, true);
> >> +}
> >> +
> >>   void __ceph_touch_fmode(struct ceph_inode_info *ci,
> >>                          struct ceph_mds_client *mdsc, int fmode)
> >>   {
> >> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> >> index 86d0148819b0..fc563b59959a 100644
> >> --- a/fs/ceph/mds_client.c
> >> +++ b/fs/ceph/mds_client.c
> >> @@ -5904,6 +5904,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
> >>          want_tid = mdsc->last_tid;
> >>          mutex_unlock(&mdsc->mutex);
> >>
> >> +       ceph_flush_cap_releases(mdsc);
> >>          ceph_flush_dirty_caps(mdsc);
> >>          spin_lock(&mdsc->cap_dirty_lock);
> >>          want_flush = mdsc->last_cap_flush_tid;
> >> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
> >> index f489b3e12429..0a1215b4f0ba 100644
> >> --- a/fs/ceph/super.c
> >> +++ b/fs/ceph/super.c
> >> @@ -126,6 +126,7 @@ static int ceph_sync_fs(struct super_block *sb, int wait)
> >>          if (!wait) {
> >>                  doutc(cl, "(non-blocking)\n");
> >>                  ceph_flush_dirty_caps(fsc->mdsc);
> >> +               ceph_flush_cap_releases(fsc->mdsc);
> > Hi Xiubo,
> >
> > Is there a significance to flushing cap releases before dirty caps on
> > the blocking path and doing it vice versa (i.e. flushing cap releases
> > after dirty caps) on the non-blocking path?
>
> Hi Ilya,
>
> The dirty caps and the cap releases are not related.
>
> If caps are dirty it should be in the dirty list anyway in theory. Else
> when the file is closed or inode is released will it be in the release
> lists.

So doing

    ceph_flush_dirty_caps(mdsc);
    ceph_flush_cap_releases(mdsc);

in both cases just so that it's consistent is fine, right?

Thanks,

                Ilya
Xiubo Li Sept. 27, 2024, 2:36 a.m. UTC | #4
On 9/24/24 21:24, Ilya Dryomov wrote:
> On Tue, Sep 24, 2024 at 1:47 PM Xiubo Li <xiubli@redhat.com> wrote:
>>
>> On 9/24/24 14:26, Ilya Dryomov wrote:
>>> On Tue, Jul 30, 2024 at 7:41 AM <xiubli@redhat.com> wrote:
>>>> From: Xiubo Li <xiubli@redhat.com>
>>>>
>>>> We have hit a race between cap releases and cap revoke request
>>>> that will cause the check_caps() to miss sending a cap revoke ack
>>>> to MDS. And the client will depend on the cap release to release
>>>> that revoking caps, which could be delayed for some unknown reasons.
>>>>
>>>> In Kclient we have figured out the RCA about race and we need
>>>> a way to explictly trigger this manually could help to get rid
>>>> of the caps revoke stuck issue.
>>>>
>>>> URL: https://tracker.ceph.com/issues/67221
>>>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>>>> ---
>>>>    fs/ceph/caps.c       | 22 ++++++++++++++++++++++
>>>>    fs/ceph/mds_client.c |  1 +
>>>>    fs/ceph/super.c      |  1 +
>>>>    fs/ceph/super.h      |  1 +
>>>>    4 files changed, 25 insertions(+)
>>>>
>>>> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
>>>> index c09add6d6516..a0a39243aeb3 100644
>>>> --- a/fs/ceph/caps.c
>>>> +++ b/fs/ceph/caps.c
>>>> @@ -4729,6 +4729,28 @@ void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
>>>>           ceph_mdsc_iterate_sessions(mdsc, flush_dirty_session_caps, true);
>>>>    }
>>>>
>>>> +/*
>>>> + * Flush all cap releases to the mds
>>>> + */
>>>> +static void flush_cap_releases(struct ceph_mds_session *s)
>>>> +{
>>>> +       struct ceph_mds_client *mdsc = s->s_mdsc;
>>>> +       struct ceph_client *cl = mdsc->fsc->client;
>>>> +
>>>> +       doutc(cl, "begin\n");
>>>> +       spin_lock(&s->s_cap_lock);
>>>> +       if (s->s_num_cap_releases)
>>>> +               ceph_flush_session_cap_releases(mdsc, s);
>>>> +       spin_unlock(&s->s_cap_lock);
>>>> +       doutc(cl, "done\n");
>>>> +
>>>> +}
>>>> +
>>>> +void ceph_flush_cap_releases(struct ceph_mds_client *mdsc)
>>>> +{
>>>> +       ceph_mdsc_iterate_sessions(mdsc, flush_cap_releases, true);
>>>> +}
>>>> +
>>>>    void __ceph_touch_fmode(struct ceph_inode_info *ci,
>>>>                           struct ceph_mds_client *mdsc, int fmode)
>>>>    {
>>>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>>>> index 86d0148819b0..fc563b59959a 100644
>>>> --- a/fs/ceph/mds_client.c
>>>> +++ b/fs/ceph/mds_client.c
>>>> @@ -5904,6 +5904,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
>>>>           want_tid = mdsc->last_tid;
>>>>           mutex_unlock(&mdsc->mutex);
>>>>
>>>> +       ceph_flush_cap_releases(mdsc);
>>>>           ceph_flush_dirty_caps(mdsc);
>>>>           spin_lock(&mdsc->cap_dirty_lock);
>>>>           want_flush = mdsc->last_cap_flush_tid;
>>>> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
>>>> index f489b3e12429..0a1215b4f0ba 100644
>>>> --- a/fs/ceph/super.c
>>>> +++ b/fs/ceph/super.c
>>>> @@ -126,6 +126,7 @@ static int ceph_sync_fs(struct super_block *sb, int wait)
>>>>           if (!wait) {
>>>>                   doutc(cl, "(non-blocking)\n");
>>>>                   ceph_flush_dirty_caps(fsc->mdsc);
>>>> +               ceph_flush_cap_releases(fsc->mdsc);
>>> Hi Xiubo,
>>>
>>> Is there a significance to flushing cap releases before dirty caps on
>>> the blocking path and doing it vice versa (i.e. flushing cap releases
>>> after dirty caps) on the non-blocking path?
>> Hi Ilya,
>>
>> The dirty caps and the cap releases are not related.
>>
>> If caps are dirty it should be in the dirty list anyway in theory. Else
>> when the file is closed or inode is released will it be in the release
>> lists.
> So doing
>
>      ceph_flush_dirty_caps(mdsc);
>      ceph_flush_cap_releases(mdsc);
>
> in both cases just so that it's consistent is fine, right?

Yeah.


> Thanks,
>
>                  Ilya
>
diff mbox series

Patch

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index c09add6d6516..a0a39243aeb3 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -4729,6 +4729,28 @@  void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
 	ceph_mdsc_iterate_sessions(mdsc, flush_dirty_session_caps, true);
 }
 
+/*
+ * Flush all cap releases to the mds
+ */
+static void flush_cap_releases(struct ceph_mds_session *s)
+{
+	struct ceph_mds_client *mdsc = s->s_mdsc;
+	struct ceph_client *cl = mdsc->fsc->client;
+
+	doutc(cl, "begin\n");
+	spin_lock(&s->s_cap_lock);
+	if (s->s_num_cap_releases)
+		ceph_flush_session_cap_releases(mdsc, s);
+	spin_unlock(&s->s_cap_lock);
+	doutc(cl, "done\n");
+
+}
+
+void ceph_flush_cap_releases(struct ceph_mds_client *mdsc)
+{
+	ceph_mdsc_iterate_sessions(mdsc, flush_cap_releases, true);
+}
+
 void __ceph_touch_fmode(struct ceph_inode_info *ci,
 			struct ceph_mds_client *mdsc, int fmode)
 {
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 86d0148819b0..fc563b59959a 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -5904,6 +5904,7 @@  void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
 	want_tid = mdsc->last_tid;
 	mutex_unlock(&mdsc->mutex);
 
+	ceph_flush_cap_releases(mdsc);
 	ceph_flush_dirty_caps(mdsc);
 	spin_lock(&mdsc->cap_dirty_lock);
 	want_flush = mdsc->last_cap_flush_tid;
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index f489b3e12429..0a1215b4f0ba 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -126,6 +126,7 @@  static int ceph_sync_fs(struct super_block *sb, int wait)
 	if (!wait) {
 		doutc(cl, "(non-blocking)\n");
 		ceph_flush_dirty_caps(fsc->mdsc);
+		ceph_flush_cap_releases(fsc->mdsc);
 		doutc(cl, "(non-blocking) done\n");
 		return 0;
 	}
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 831e8ec4d5da..5645121337cf 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -1273,6 +1273,7 @@  extern bool __ceph_should_report_size(struct ceph_inode_info *ci);
 extern void ceph_check_caps(struct ceph_inode_info *ci, int flags);
 extern unsigned long ceph_check_delayed_caps(struct ceph_mds_client *mdsc);
 extern void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc);
+extern void ceph_flush_cap_releases(struct ceph_mds_client *mdsc);
 extern int  ceph_drop_caps_for_unlink(struct inode *inode);
 extern int ceph_encode_inode_release(void **p, struct inode *inode,
 				     int mds, int drop, int unless, int force);