Message ID | 20240409064943.1377026-1-xiubli@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | ceph: switch to use cap_delay_lock for the unlink delay list | expand |
On Tue, Apr 9, 2024 at 8:52 AM <xiubli@redhat.com> wrote: > > From: Xiubo Li <xiubli@redhat.com> > > The same list item will be used in both cap_delay_list and > cap_unlink_delay_list, so it's buggy to use two different locks > to protect them. > > Reported-by: Marc Ruhmann <ruhmann@luis.uni-hannover.de> > Fixes: dbc347ef7f0c ("ceph: add ceph_cap_unlink_work to fire check_caps() immediately") > URL: https://lists.ceph.io/hyperkitty/list/ceph-users@ceph.io/thread/AODC76VXRAMXKLFDCTK4TKFDDPWUSCN5 > Signed-off-by: Xiubo Li <xiubli@redhat.com> > --- > fs/ceph/caps.c | 4 ++-- > fs/ceph/mds_client.c | 9 ++++----- > fs/ceph/mds_client.h | 3 +-- > 3 files changed, 7 insertions(+), 9 deletions(-) > > diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c > index bfa8f72cd3cf..197cb383f829 100644 > --- a/fs/ceph/caps.c > +++ b/fs/ceph/caps.c > @@ -4799,13 +4799,13 @@ int ceph_drop_caps_for_unlink(struct inode *inode) > > doutc(mdsc->fsc->client, "%p %llx.%llx\n", inode, > ceph_vinop(inode)); > - spin_lock(&mdsc->cap_unlink_delay_lock); > + spin_lock(&mdsc->cap_delay_lock); > ci->i_ceph_flags |= CEPH_I_FLUSH; > if (!list_empty(&ci->i_cap_delay_list)) > list_del_init(&ci->i_cap_delay_list); > list_add_tail(&ci->i_cap_delay_list, > &mdsc->cap_unlink_delay_list); > - spin_unlock(&mdsc->cap_unlink_delay_lock); > + spin_unlock(&mdsc->cap_delay_lock); > > /* > * Fire the work immediately, because the MDS maybe > diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c > index 639bde44ab23..494d80b3e41d 100644 > --- a/fs/ceph/mds_client.c > +++ b/fs/ceph/mds_client.c > @@ -2532,7 +2532,7 @@ static void ceph_cap_unlink_work(struct work_struct *work) > struct ceph_client *cl = mdsc->fsc->client; > > doutc(cl, "begin\n"); > - spin_lock(&mdsc->cap_unlink_delay_lock); > + spin_lock(&mdsc->cap_delay_lock); > while (!list_empty(&mdsc->cap_unlink_delay_list)) { > struct ceph_inode_info *ci; > struct inode *inode; > @@ -2544,15 +2544,15 @@ static void ceph_cap_unlink_work(struct work_struct *work) > > inode = igrab(&ci->netfs.inode); > if (inode) { > - spin_unlock(&mdsc->cap_unlink_delay_lock); > + spin_unlock(&mdsc->cap_delay_lock); > doutc(cl, "on %p %llx.%llx\n", inode, > ceph_vinop(inode)); > ceph_check_caps(ci, CHECK_CAPS_FLUSH); > iput(inode); > - spin_lock(&mdsc->cap_unlink_delay_lock); > + spin_lock(&mdsc->cap_delay_lock); > } > } > - spin_unlock(&mdsc->cap_unlink_delay_lock); > + spin_unlock(&mdsc->cap_delay_lock); > doutc(cl, "done\n"); > } > > @@ -5538,7 +5538,6 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) > INIT_LIST_HEAD(&mdsc->cap_wait_list); > spin_lock_init(&mdsc->cap_delay_lock); > INIT_LIST_HEAD(&mdsc->cap_unlink_delay_list); > - spin_lock_init(&mdsc->cap_unlink_delay_lock); > INIT_LIST_HEAD(&mdsc->snap_flush_list); > spin_lock_init(&mdsc->snap_flush_lock); > mdsc->last_cap_flush_tid = 1; > diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h > index 317a0fd6a8ba..80b310e33f2b 100644 > --- a/fs/ceph/mds_client.h > +++ b/fs/ceph/mds_client.h > @@ -481,9 +481,8 @@ struct ceph_mds_client { > struct delayed_work delayed_work; /* delayed work */ > unsigned long last_renew_caps; /* last time we renewed our caps */ > struct list_head cap_delay_list; /* caps with delayed release */ > - spinlock_t cap_delay_lock; /* protects cap_delay_list */ > struct list_head cap_unlink_delay_list; /* caps with delayed release for unlink */ > - spinlock_t cap_unlink_delay_lock; /* protects cap_unlink_delay_list */ > + spinlock_t cap_delay_lock; /* protects cap_delay_list and cap_unlink_delay_list */ > struct list_head snap_flush_list; /* cap_snaps ready to flush */ > spinlock_t snap_flush_lock; > > -- > 2.43.0 > LGTM, queued up for 6.9-rc. Thanks, Ilya
Am 09.04.24 um 08:49 schrieb xiubli@redhat.com: > From: Xiubo Li <xiubli@redhat.com> > > The same list item will be used in both cap_delay_list and > cap_unlink_delay_list, so it's buggy to use two different locks > to protect them. > > Reported-by: Marc Ruhmann <ruhmann@luis.uni-hannover.de> > Fixes: dbc347ef7f0c ("ceph: add ceph_cap_unlink_work to fire check_caps() immediately") > URL: https://lists.ceph.io/hyperkitty/list/ceph-users@ceph.io/thread/AODC76VXRAMXKLFDCTK4TKFDDPWUSCN5 > Signed-off-by: Xiubo Li <xiubli@redhat.com> > --- > fs/ceph/caps.c | 4 ++-- > fs/ceph/mds_client.c | 9 ++++----- > fs/ceph/mds_client.h | 3 +-- > 3 files changed, 7 insertions(+), 9 deletions(-) > > diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c > index bfa8f72cd3cf..197cb383f829 100644 > --- a/fs/ceph/caps.c > +++ b/fs/ceph/caps.c > @@ -4799,13 +4799,13 @@ int ceph_drop_caps_for_unlink(struct inode *inode) > > doutc(mdsc->fsc->client, "%p %llx.%llx\n", inode, > ceph_vinop(inode)); > - spin_lock(&mdsc->cap_unlink_delay_lock); > + spin_lock(&mdsc->cap_delay_lock); > ci->i_ceph_flags |= CEPH_I_FLUSH; > if (!list_empty(&ci->i_cap_delay_list)) > list_del_init(&ci->i_cap_delay_list); > list_add_tail(&ci->i_cap_delay_list, > &mdsc->cap_unlink_delay_list); > - spin_unlock(&mdsc->cap_unlink_delay_lock); > + spin_unlock(&mdsc->cap_delay_lock); > > /* > * Fire the work immediately, because the MDS maybe > diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c > index 639bde44ab23..494d80b3e41d 100644 > --- a/fs/ceph/mds_client.c > +++ b/fs/ceph/mds_client.c > @@ -2532,7 +2532,7 @@ static void ceph_cap_unlink_work(struct work_struct *work) > struct ceph_client *cl = mdsc->fsc->client; > > doutc(cl, "begin\n"); > - spin_lock(&mdsc->cap_unlink_delay_lock); > + spin_lock(&mdsc->cap_delay_lock); > while (!list_empty(&mdsc->cap_unlink_delay_list)) { > struct ceph_inode_info *ci; > struct inode *inode; > @@ -2544,15 +2544,15 @@ static void ceph_cap_unlink_work(struct work_struct *work) > > inode = igrab(&ci->netfs.inode); > if (inode) { > - spin_unlock(&mdsc->cap_unlink_delay_lock); > + spin_unlock(&mdsc->cap_delay_lock); > doutc(cl, "on %p %llx.%llx\n", inode, > ceph_vinop(inode)); > ceph_check_caps(ci, CHECK_CAPS_FLUSH); > iput(inode); > - spin_lock(&mdsc->cap_unlink_delay_lock); > + spin_lock(&mdsc->cap_delay_lock); > } > } > - spin_unlock(&mdsc->cap_unlink_delay_lock); > + spin_unlock(&mdsc->cap_delay_lock); > doutc(cl, "done\n"); > } > > @@ -5538,7 +5538,6 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) > INIT_LIST_HEAD(&mdsc->cap_wait_list); > spin_lock_init(&mdsc->cap_delay_lock); > INIT_LIST_HEAD(&mdsc->cap_unlink_delay_list); > - spin_lock_init(&mdsc->cap_unlink_delay_lock); > INIT_LIST_HEAD(&mdsc->snap_flush_list); > spin_lock_init(&mdsc->snap_flush_lock); > mdsc->last_cap_flush_tid = 1; > diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h > index 317a0fd6a8ba..80b310e33f2b 100644 > --- a/fs/ceph/mds_client.h > +++ b/fs/ceph/mds_client.h > @@ -481,9 +481,8 @@ struct ceph_mds_client { > struct delayed_work delayed_work; /* delayed work */ > unsigned long last_renew_caps; /* last time we renewed our caps */ > struct list_head cap_delay_list; /* caps with delayed release */ > - spinlock_t cap_delay_lock; /* protects cap_delay_list */ > struct list_head cap_unlink_delay_list; /* caps with delayed release for unlink */ > - spinlock_t cap_unlink_delay_lock; /* protects cap_unlink_delay_list */ > + spinlock_t cap_delay_lock; /* protects cap_delay_list and cap_unlink_delay_list */ > struct list_head snap_flush_list; /* cap_snaps ready to flush */ > spinlock_t snap_flush_lock; > Tested-by: Marc Ruhmann <ruhmann@luis.uni-hannover.de> Reviewed-by: Marc Ruhmann <ruhmann@luis.uni-hannover.de> Patch looks sound. Using different locks when operating on two lists containing the same item can cause conflicts. Changes tested on 5.14.0-435.el9. Without this patch we observed about 10 crashes a day, the majority of which related to corrupted lists when dropping caps for unlink. With the patch applied not a single crash occurred in 1.5 days. Thanks and best regards, Marc
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index bfa8f72cd3cf..197cb383f829 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -4799,13 +4799,13 @@ int ceph_drop_caps_for_unlink(struct inode *inode) doutc(mdsc->fsc->client, "%p %llx.%llx\n", inode, ceph_vinop(inode)); - spin_lock(&mdsc->cap_unlink_delay_lock); + spin_lock(&mdsc->cap_delay_lock); ci->i_ceph_flags |= CEPH_I_FLUSH; if (!list_empty(&ci->i_cap_delay_list)) list_del_init(&ci->i_cap_delay_list); list_add_tail(&ci->i_cap_delay_list, &mdsc->cap_unlink_delay_list); - spin_unlock(&mdsc->cap_unlink_delay_lock); + spin_unlock(&mdsc->cap_delay_lock); /* * Fire the work immediately, because the MDS maybe diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 639bde44ab23..494d80b3e41d 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2532,7 +2532,7 @@ static void ceph_cap_unlink_work(struct work_struct *work) struct ceph_client *cl = mdsc->fsc->client; doutc(cl, "begin\n"); - spin_lock(&mdsc->cap_unlink_delay_lock); + spin_lock(&mdsc->cap_delay_lock); while (!list_empty(&mdsc->cap_unlink_delay_list)) { struct ceph_inode_info *ci; struct inode *inode; @@ -2544,15 +2544,15 @@ static void ceph_cap_unlink_work(struct work_struct *work) inode = igrab(&ci->netfs.inode); if (inode) { - spin_unlock(&mdsc->cap_unlink_delay_lock); + spin_unlock(&mdsc->cap_delay_lock); doutc(cl, "on %p %llx.%llx\n", inode, ceph_vinop(inode)); ceph_check_caps(ci, CHECK_CAPS_FLUSH); iput(inode); - spin_lock(&mdsc->cap_unlink_delay_lock); + spin_lock(&mdsc->cap_delay_lock); } } - spin_unlock(&mdsc->cap_unlink_delay_lock); + spin_unlock(&mdsc->cap_delay_lock); doutc(cl, "done\n"); } @@ -5538,7 +5538,6 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) INIT_LIST_HEAD(&mdsc->cap_wait_list); spin_lock_init(&mdsc->cap_delay_lock); INIT_LIST_HEAD(&mdsc->cap_unlink_delay_list); - spin_lock_init(&mdsc->cap_unlink_delay_lock); INIT_LIST_HEAD(&mdsc->snap_flush_list); spin_lock_init(&mdsc->snap_flush_lock); mdsc->last_cap_flush_tid = 1; diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 317a0fd6a8ba..80b310e33f2b 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -481,9 +481,8 @@ struct ceph_mds_client { struct delayed_work delayed_work; /* delayed work */ unsigned long last_renew_caps; /* last time we renewed our caps */ struct list_head cap_delay_list; /* caps with delayed release */ - spinlock_t cap_delay_lock; /* protects cap_delay_list */ struct list_head cap_unlink_delay_list; /* caps with delayed release for unlink */ - spinlock_t cap_unlink_delay_lock; /* protects cap_unlink_delay_list */ + spinlock_t cap_delay_lock; /* protects cap_delay_list and cap_unlink_delay_list */ struct list_head snap_flush_list; /* cap_snaps ready to flush */ spinlock_t snap_flush_lock;