Message ID | 20190523081345.20410-4-zyan@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [1/8] ceph: fix error handling in ceph_get_caps() | expand |
On Thu, 2019-05-23 at 16:13 +0800, Yan, Zheng wrote: > d_name_cmp() and update_dentry_lease() lock and unlock dentry->d_lock > respectively. Dentry may get renamed between them. The fix is moving > the dentry name compare into update_dentry_lease(). > > This patch introduce two version of update_dentry_lease(). One version > is for the case that parent inode is locked. It does not need to check > parent/target inode and dentry name. Another version is for the case > that parent inode is not locked. It checks arent/target inode and dentry > name after locking dentry->d_lock. > > Signed-off-by: "Yan, Zheng" <zyan@redhat.com> > --- > fs/ceph/inode.c | 164 ++++++++++++++++++++++++++---------------------- > 1 file changed, 88 insertions(+), 76 deletions(-) > > diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c > index 8cfece240ffe..e47a25495be5 100644 > --- a/fs/ceph/inode.c > +++ b/fs/ceph/inode.c > @@ -1031,59 +1031,38 @@ static int fill_inode(struct inode *inode, struct page *locked_page, > } > > /* > - * caller should hold session s_mutex. > + * caller should hold session s_mutex and dentry->d_lock. > */ > -static void update_dentry_lease(struct dentry *dentry, > - struct ceph_mds_reply_lease *lease, > - struct ceph_mds_session *session, > - unsigned long from_time, > - struct ceph_vino *tgt_vino, > - struct ceph_vino *dir_vino) > +static void __update_dentry_lease(struct inode *dir, struct dentry *dentry, > + struct ceph_mds_reply_lease *lease, > + struct ceph_mds_session *session, > + unsigned long from_time, > + struct ceph_mds_session **old_lease_session) > { > struct ceph_dentry_info *di = ceph_dentry(dentry); > long unsigned duration = le32_to_cpu(lease->duration_ms); > long unsigned ttl = from_time + (duration * HZ) / 1000; > long unsigned half_ttl = from_time + (duration * HZ / 2) / 1000; > - struct inode *dir; > - struct ceph_mds_session *old_lease_session = NULL; > - > - /* > - * Make sure dentry's inode matches tgt_vino. NULL tgt_vino means that > - * we expect a negative dentry. > - */ > - if (!tgt_vino && d_really_is_positive(dentry)) > - return; > - > - if (tgt_vino && (d_really_is_negative(dentry) || > - !ceph_ino_compare(d_inode(dentry), tgt_vino))) > - return; > > - spin_lock(&dentry->d_lock); > dout("update_dentry_lease %p duration %lu ms ttl %lu\n", > dentry, duration, ttl); > > - dir = d_inode(dentry->d_parent); > - > - /* make sure parent matches dir_vino */ > - if (!ceph_ino_compare(dir, dir_vino)) > - goto out_unlock; > - > /* only track leases on regular dentries */ > if (ceph_snap(dir) != CEPH_NOSNAP) > - goto out_unlock; > + return; > > di->lease_shared_gen = atomic_read(&ceph_inode(dir)->i_shared_gen); > if (duration == 0) { > __ceph_dentry_dir_lease_touch(di); > - goto out_unlock; > + return; > } > > if (di->lease_gen == session->s_cap_gen && > time_before(ttl, di->time)) > - goto out_unlock; /* we already have a newer lease. */ > + return; /* we already have a newer lease. */ > > if (di->lease_session && di->lease_session != session) { > - old_lease_session = di->lease_session; > + *old_lease_session = di->lease_session; > di->lease_session = NULL; > } > > @@ -1096,6 +1075,62 @@ static void update_dentry_lease(struct dentry *dentry, > di->time = ttl; > > __ceph_dentry_lease_touch(di); > +} > + > +static inline void update_dentry_lease(struct inode *dir, struct dentry *dentry, > + struct ceph_mds_reply_lease *lease, > + struct ceph_mds_session *session, > + unsigned long from_time) > +{ > + struct ceph_mds_session *old_lease_session = NULL; > + spin_lock(&dentry->d_lock); > + __update_dentry_lease(dir, dentry, lease, session, from_time, > + &old_lease_session); > + spin_unlock(&dentry->d_lock); > + if (old_lease_session) > + ceph_put_mds_session(old_lease_session); > +} > + > +/* > + * update dentry lease without having parent inode locked > + */ > +static void update_dentry_lease_careful(struct dentry *dentry, > + struct ceph_mds_reply_lease *lease, > + struct ceph_mds_session *session, > + unsigned long from_time, > + char *dname, u32 dname_len, > + struct ceph_vino *pdvino, > + struct ceph_vino *ptvino) > + This argument list is huge. I wonder if we'd be better off passing in a pointer to "req" instead and getting some of the fields from that. For instance, session, from_time, etc... > +{ > + struct inode *dir; > + struct ceph_mds_session *old_lease_session = NULL; > + > + spin_lock(&dentry->d_lock); > + /* make sure dentry's name matches target */ > + if (dentry->d_name.len != dname_len || > + memcmp(dentry->d_name.name, dname, dname_len)) > + goto out_unlock; > + > + dir = d_inode(dentry->d_parent); > + /* make sure parent matches dvino */ > + if (!ceph_ino_compare(dir, pdvino)) > + goto out_unlock; > + > + /* make sure dentry's inode matches target. NULL ptvino means that > + * we expect a negative dentry */ > + if (ptvino) { > + if (d_really_is_negative(dentry)) > + goto out_unlock; > + if (!ceph_ino_compare(d_inode(dentry), ptvino)) > + goto out_unlock; > + } else { > + if (d_really_is_positive(dentry)) > + goto out_unlock; > + } > + > + __update_dentry_lease(dir, dentry, lease, session, > + from_time, &old_lease_session); > out_unlock: > spin_unlock(&dentry->d_lock); > if (old_lease_session) > @@ -1160,19 +1195,6 @@ static int splice_dentry(struct dentry **pdn, struct inode *in) > return 0; > } > > -static int d_name_cmp(struct dentry *dentry, const char *name, size_t len) > -{ > - int ret; > - > - /* take d_lock to ensure dentry->d_name stability */ > - spin_lock(&dentry->d_lock); > - ret = dentry->d_name.len - len; > - if (!ret) > - ret = memcmp(dentry->d_name.name, name, len); > - spin_unlock(&dentry->d_lock); > - return ret; > -} > - > /* > * Incorporate results into the local cache. This is either just > * one inode, or a directory, dentry, and possibly linked-to inode (e.g., > @@ -1375,10 +1397,9 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) > } else if (have_lease) { > if (d_unhashed(dn)) > d_add(dn, NULL); > - update_dentry_lease(dn, rinfo->dlease, > - session, > - req->r_request_started, > - NULL, &dvino); > + update_dentry_lease(dir, dn, > + rinfo->dlease, session, > + req->r_request_started); > } > goto done; > } > @@ -1400,11 +1421,9 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) > } > > if (have_lease) { > - tvino.ino = le64_to_cpu(rinfo->targeti.in->ino); > - tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid); > - update_dentry_lease(dn, rinfo->dlease, session, > - req->r_request_started, > - &tvino, &dvino); > + update_dentry_lease(dir, dn, > + rinfo->dlease, session, > + req->r_request_started); > } > dout(" final dn %p\n", dn); > } else if ((req->r_op == CEPH_MDS_OP_LOOKUPSNAP || > @@ -1422,27 +1441,20 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) > err = splice_dentry(&req->r_dentry, in); > if (err < 0) > goto done; > - } else if (rinfo->head->is_dentry && > - !d_name_cmp(req->r_dentry, rinfo->dname, rinfo->dname_len)) { > + } else if (rinfo->head->is_dentry && req->r_dentry) { > + /* parent inode is not locked, be carefull */ > struct ceph_vino *ptvino = NULL; > - > - if ((le32_to_cpu(rinfo->diri.in->cap.caps) & CEPH_CAP_FILE_SHARED) || > - le32_to_cpu(rinfo->dlease->duration_ms)) { > - dvino.ino = le64_to_cpu(rinfo->diri.in->ino); > - dvino.snap = le64_to_cpu(rinfo->diri.in->snapid); > - > - if (rinfo->head->is_target) { > - tvino.ino = le64_to_cpu(rinfo->targeti.in->ino); > - tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid); > - ptvino = &tvino; > - } > - > - update_dentry_lease(req->r_dentry, rinfo->dlease, > - session, req->r_request_started, ptvino, > - &dvino); > - } else { > - dout("%s: no dentry lease or dir cap\n", __func__); > + dvino.ino = le64_to_cpu(rinfo->diri.in->ino); > + dvino.snap = le64_to_cpu(rinfo->diri.in->snapid); > + if (rinfo->head->is_target) { > + tvino.ino = le64_to_cpu(rinfo->targeti.in->ino); > + tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid); > + ptvino = &tvino; > } > + update_dentry_lease_careful(req->r_dentry, rinfo->dlease, > + session, req->r_request_started, > + rinfo->dname, rinfo->dname_len, > + &dvino, ptvino); > } > done: > dout("fill_trace done err=%d\n", err); > @@ -1604,7 +1616,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, > /* FIXME: release caps/leases if error occurs */ > for (i = 0; i < rinfo->dir_nr; i++) { > struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i; > - struct ceph_vino tvino, dvino; > + struct ceph_vino tvino; > > dname.name = rde->name; > dname.len = rde->name_len; > @@ -1705,9 +1717,9 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, > > ceph_dentry(dn)->offset = rde->offset; > > - dvino = ceph_vino(d_inode(parent)); > - update_dentry_lease(dn, rde->lease, req->r_session, > - req->r_request_started, &tvino, &dvino); > + update_dentry_lease(d_inode(parent), dn, > + rde->lease, req->r_session, > + req->r_request_started); > > if (err == 0 && skipped == 0 && cache_ctl.index >= 0) { > ret = fill_readdir_cache(d_inode(parent), dn, That said... Reviewed-by: Jeff Layton <jlayton@redhat.com>
On Tue, May 28, 2019 at 9:53 PM Jeff Layton <jlayton@redhat.com> wrote: > > On Thu, 2019-05-23 at 16:13 +0800, Yan, Zheng wrote: > > d_name_cmp() and update_dentry_lease() lock and unlock dentry->d_lock > > respectively. Dentry may get renamed between them. The fix is moving > > the dentry name compare into update_dentry_lease(). > > > > This patch introduce two version of update_dentry_lease(). One version > > is for the case that parent inode is locked. It does not need to check > > parent/target inode and dentry name. Another version is for the case > > that parent inode is not locked. It checks arent/target inode and dentry > > name after locking dentry->d_lock. > > > > Signed-off-by: "Yan, Zheng" <zyan@redhat.com> > > --- > > fs/ceph/inode.c | 164 ++++++++++++++++++++++++++---------------------- > > 1 file changed, 88 insertions(+), 76 deletions(-) > > > > diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c > > index 8cfece240ffe..e47a25495be5 100644 > > --- a/fs/ceph/inode.c > > +++ b/fs/ceph/inode.c > > @@ -1031,59 +1031,38 @@ static int fill_inode(struct inode *inode, struct page *locked_page, > > } > > > > /* > > - * caller should hold session s_mutex. > > + * caller should hold session s_mutex and dentry->d_lock. > > */ > > -static void update_dentry_lease(struct dentry *dentry, > > - struct ceph_mds_reply_lease *lease, > > - struct ceph_mds_session *session, > > - unsigned long from_time, > > - struct ceph_vino *tgt_vino, > > - struct ceph_vino *dir_vino) > > +static void __update_dentry_lease(struct inode *dir, struct dentry *dentry, > > + struct ceph_mds_reply_lease *lease, > > + struct ceph_mds_session *session, > > + unsigned long from_time, > > + struct ceph_mds_session **old_lease_session) > > { > > struct ceph_dentry_info *di = ceph_dentry(dentry); > > long unsigned duration = le32_to_cpu(lease->duration_ms); > > long unsigned ttl = from_time + (duration * HZ) / 1000; > > long unsigned half_ttl = from_time + (duration * HZ / 2) / 1000; > > - struct inode *dir; > > - struct ceph_mds_session *old_lease_session = NULL; > > - > > - /* > > - * Make sure dentry's inode matches tgt_vino. NULL tgt_vino means that > > - * we expect a negative dentry. > > - */ > > - if (!tgt_vino && d_really_is_positive(dentry)) > > - return; > > - > > - if (tgt_vino && (d_really_is_negative(dentry) || > > - !ceph_ino_compare(d_inode(dentry), tgt_vino))) > > - return; > > > > - spin_lock(&dentry->d_lock); > > dout("update_dentry_lease %p duration %lu ms ttl %lu\n", > > dentry, duration, ttl); > > > > - dir = d_inode(dentry->d_parent); > > - > > - /* make sure parent matches dir_vino */ > > - if (!ceph_ino_compare(dir, dir_vino)) > > - goto out_unlock; > > - > > /* only track leases on regular dentries */ > > if (ceph_snap(dir) != CEPH_NOSNAP) > > - goto out_unlock; > > + return; > > > > di->lease_shared_gen = atomic_read(&ceph_inode(dir)->i_shared_gen); > > if (duration == 0) { > > __ceph_dentry_dir_lease_touch(di); > > - goto out_unlock; > > + return; > > } > > > > if (di->lease_gen == session->s_cap_gen && > > time_before(ttl, di->time)) > > - goto out_unlock; /* we already have a newer lease. */ > > + return; /* we already have a newer lease. */ > > > > if (di->lease_session && di->lease_session != session) { > > - old_lease_session = di->lease_session; > > + *old_lease_session = di->lease_session; > > di->lease_session = NULL; > > } > > > > @@ -1096,6 +1075,62 @@ static void update_dentry_lease(struct dentry *dentry, > > di->time = ttl; > > > > __ceph_dentry_lease_touch(di); > > +} > > + > > +static inline void update_dentry_lease(struct inode *dir, struct dentry *dentry, > > + struct ceph_mds_reply_lease *lease, > > + struct ceph_mds_session *session, > > + unsigned long from_time) > > +{ > > + struct ceph_mds_session *old_lease_session = NULL; > > + spin_lock(&dentry->d_lock); > > + __update_dentry_lease(dir, dentry, lease, session, from_time, > > + &old_lease_session); > > + spin_unlock(&dentry->d_lock); > > + if (old_lease_session) > > + ceph_put_mds_session(old_lease_session); > > +} > > + > > +/* > > + * update dentry lease without having parent inode locked > > + */ > > +static void update_dentry_lease_careful(struct dentry *dentry, > > + struct ceph_mds_reply_lease *lease, > > + struct ceph_mds_session *session, > > + unsigned long from_time, > > + char *dname, u32 dname_len, > > + struct ceph_vino *pdvino, > > + struct ceph_vino *ptvino) > > + > > This argument list is huge. I wonder if we'd be better off passing in a > pointer to "req" instead and getting some of the fields from that. For > instance, session, from_time, etc... > If we pass 'req' to update_dentry_lease_careful(), we need to re-define some local variables already exist in ceph_fill_trace(). It requires more code. Regards Yan, Zheng > > +{ > > + struct inode *dir; > > + struct ceph_mds_session *old_lease_session = NULL; > > + > > + spin_lock(&dentry->d_lock); > > + /* make sure dentry's name matches target */ > > + if (dentry->d_name.len != dname_len || > > + memcmp(dentry->d_name.name, dname, dname_len)) > > + goto out_unlock; > > + > > + dir = d_inode(dentry->d_parent); > > + /* make sure parent matches dvino */ > > + if (!ceph_ino_compare(dir, pdvino)) > > + goto out_unlock; > > + > > + /* make sure dentry's inode matches target. NULL ptvino means that > > + * we expect a negative dentry */ > > + if (ptvino) { > > + if (d_really_is_negative(dentry)) > > + goto out_unlock; > > + if (!ceph_ino_compare(d_inode(dentry), ptvino)) > > + goto out_unlock; > > + } else { > > + if (d_really_is_positive(dentry)) > > + goto out_unlock; > > + } > > + > > + __update_dentry_lease(dir, dentry, lease, session, > > + from_time, &old_lease_session); > > out_unlock: > > spin_unlock(&dentry->d_lock); > > if (old_lease_session) > > @@ -1160,19 +1195,6 @@ static int splice_dentry(struct dentry **pdn, struct inode *in) > > return 0; > > } > > > > -static int d_name_cmp(struct dentry *dentry, const char *name, size_t len) > > -{ > > - int ret; > > - > > - /* take d_lock to ensure dentry->d_name stability */ > > - spin_lock(&dentry->d_lock); > > - ret = dentry->d_name.len - len; > > - if (!ret) > > - ret = memcmp(dentry->d_name.name, name, len); > > - spin_unlock(&dentry->d_lock); > > - return ret; > > -} > > - > > /* > > * Incorporate results into the local cache. This is either just > > * one inode, or a directory, dentry, and possibly linked-to inode (e.g., > > @@ -1375,10 +1397,9 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) > > } else if (have_lease) { > > if (d_unhashed(dn)) > > d_add(dn, NULL); > > - update_dentry_lease(dn, rinfo->dlease, > > - session, > > - req->r_request_started, > > - NULL, &dvino); > > + update_dentry_lease(dir, dn, > > + rinfo->dlease, session, > > + req->r_request_started); > > } > > goto done; > > } > > @@ -1400,11 +1421,9 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) > > } > > > > if (have_lease) { > > - tvino.ino = le64_to_cpu(rinfo->targeti.in->ino); > > - tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid); > > - update_dentry_lease(dn, rinfo->dlease, session, > > - req->r_request_started, > > - &tvino, &dvino); > > + update_dentry_lease(dir, dn, > > + rinfo->dlease, session, > > + req->r_request_started); > > } > > dout(" final dn %p\n", dn); > > } else if ((req->r_op == CEPH_MDS_OP_LOOKUPSNAP || > > @@ -1422,27 +1441,20 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) > > err = splice_dentry(&req->r_dentry, in); > > if (err < 0) > > goto done; > > - } else if (rinfo->head->is_dentry && > > - !d_name_cmp(req->r_dentry, rinfo->dname, rinfo->dname_len)) { > > + } else if (rinfo->head->is_dentry && req->r_dentry) { > > + /* parent inode is not locked, be carefull */ > > struct ceph_vino *ptvino = NULL; > > - > > - if ((le32_to_cpu(rinfo->diri.in->cap.caps) & CEPH_CAP_FILE_SHARED) || > > - le32_to_cpu(rinfo->dlease->duration_ms)) { > > - dvino.ino = le64_to_cpu(rinfo->diri.in->ino); > > - dvino.snap = le64_to_cpu(rinfo->diri.in->snapid); > > - > > - if (rinfo->head->is_target) { > > - tvino.ino = le64_to_cpu(rinfo->targeti.in->ino); > > - tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid); > > - ptvino = &tvino; > > - } > > - > > - update_dentry_lease(req->r_dentry, rinfo->dlease, > > - session, req->r_request_started, ptvino, > > - &dvino); > > - } else { > > - dout("%s: no dentry lease or dir cap\n", __func__); > > + dvino.ino = le64_to_cpu(rinfo->diri.in->ino); > > + dvino.snap = le64_to_cpu(rinfo->diri.in->snapid); > > + if (rinfo->head->is_target) { > > + tvino.ino = le64_to_cpu(rinfo->targeti.in->ino); > > + tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid); > > + ptvino = &tvino; > > } > > + update_dentry_lease_careful(req->r_dentry, rinfo->dlease, > > + session, req->r_request_started, > > + rinfo->dname, rinfo->dname_len, > > + &dvino, ptvino); > > } > > done: > > dout("fill_trace done err=%d\n", err); > > @@ -1604,7 +1616,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, > > /* FIXME: release caps/leases if error occurs */ > > for (i = 0; i < rinfo->dir_nr; i++) { > > struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i; > > - struct ceph_vino tvino, dvino; > > + struct ceph_vino tvino; > > > > dname.name = rde->name; > > dname.len = rde->name_len; > > @@ -1705,9 +1717,9 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, > > > > ceph_dentry(dn)->offset = rde->offset; > > > > - dvino = ceph_vino(d_inode(parent)); > > - update_dentry_lease(dn, rde->lease, req->r_session, > > - req->r_request_started, &tvino, &dvino); > > + update_dentry_lease(d_inode(parent), dn, > > + rde->lease, req->r_session, > > + req->r_request_started); > > > > if (err == 0 && skipped == 0 && cache_ctl.index >= 0) { > > ret = fill_readdir_cache(d_inode(parent), dn, > > That said... > > Reviewed-by: Jeff Layton <jlayton@redhat.com> >
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 8cfece240ffe..e47a25495be5 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -1031,59 +1031,38 @@ static int fill_inode(struct inode *inode, struct page *locked_page, } /* - * caller should hold session s_mutex. + * caller should hold session s_mutex and dentry->d_lock. */ -static void update_dentry_lease(struct dentry *dentry, - struct ceph_mds_reply_lease *lease, - struct ceph_mds_session *session, - unsigned long from_time, - struct ceph_vino *tgt_vino, - struct ceph_vino *dir_vino) +static void __update_dentry_lease(struct inode *dir, struct dentry *dentry, + struct ceph_mds_reply_lease *lease, + struct ceph_mds_session *session, + unsigned long from_time, + struct ceph_mds_session **old_lease_session) { struct ceph_dentry_info *di = ceph_dentry(dentry); long unsigned duration = le32_to_cpu(lease->duration_ms); long unsigned ttl = from_time + (duration * HZ) / 1000; long unsigned half_ttl = from_time + (duration * HZ / 2) / 1000; - struct inode *dir; - struct ceph_mds_session *old_lease_session = NULL; - - /* - * Make sure dentry's inode matches tgt_vino. NULL tgt_vino means that - * we expect a negative dentry. - */ - if (!tgt_vino && d_really_is_positive(dentry)) - return; - - if (tgt_vino && (d_really_is_negative(dentry) || - !ceph_ino_compare(d_inode(dentry), tgt_vino))) - return; - spin_lock(&dentry->d_lock); dout("update_dentry_lease %p duration %lu ms ttl %lu\n", dentry, duration, ttl); - dir = d_inode(dentry->d_parent); - - /* make sure parent matches dir_vino */ - if (!ceph_ino_compare(dir, dir_vino)) - goto out_unlock; - /* only track leases on regular dentries */ if (ceph_snap(dir) != CEPH_NOSNAP) - goto out_unlock; + return; di->lease_shared_gen = atomic_read(&ceph_inode(dir)->i_shared_gen); if (duration == 0) { __ceph_dentry_dir_lease_touch(di); - goto out_unlock; + return; } if (di->lease_gen == session->s_cap_gen && time_before(ttl, di->time)) - goto out_unlock; /* we already have a newer lease. */ + return; /* we already have a newer lease. */ if (di->lease_session && di->lease_session != session) { - old_lease_session = di->lease_session; + *old_lease_session = di->lease_session; di->lease_session = NULL; } @@ -1096,6 +1075,62 @@ static void update_dentry_lease(struct dentry *dentry, di->time = ttl; __ceph_dentry_lease_touch(di); +} + +static inline void update_dentry_lease(struct inode *dir, struct dentry *dentry, + struct ceph_mds_reply_lease *lease, + struct ceph_mds_session *session, + unsigned long from_time) +{ + struct ceph_mds_session *old_lease_session = NULL; + spin_lock(&dentry->d_lock); + __update_dentry_lease(dir, dentry, lease, session, from_time, + &old_lease_session); + spin_unlock(&dentry->d_lock); + if (old_lease_session) + ceph_put_mds_session(old_lease_session); +} + +/* + * update dentry lease without having parent inode locked + */ +static void update_dentry_lease_careful(struct dentry *dentry, + struct ceph_mds_reply_lease *lease, + struct ceph_mds_session *session, + unsigned long from_time, + char *dname, u32 dname_len, + struct ceph_vino *pdvino, + struct ceph_vino *ptvino) + +{ + struct inode *dir; + struct ceph_mds_session *old_lease_session = NULL; + + spin_lock(&dentry->d_lock); + /* make sure dentry's name matches target */ + if (dentry->d_name.len != dname_len || + memcmp(dentry->d_name.name, dname, dname_len)) + goto out_unlock; + + dir = d_inode(dentry->d_parent); + /* make sure parent matches dvino */ + if (!ceph_ino_compare(dir, pdvino)) + goto out_unlock; + + /* make sure dentry's inode matches target. NULL ptvino means that + * we expect a negative dentry */ + if (ptvino) { + if (d_really_is_negative(dentry)) + goto out_unlock; + if (!ceph_ino_compare(d_inode(dentry), ptvino)) + goto out_unlock; + } else { + if (d_really_is_positive(dentry)) + goto out_unlock; + } + + __update_dentry_lease(dir, dentry, lease, session, + from_time, &old_lease_session); out_unlock: spin_unlock(&dentry->d_lock); if (old_lease_session) @@ -1160,19 +1195,6 @@ static int splice_dentry(struct dentry **pdn, struct inode *in) return 0; } -static int d_name_cmp(struct dentry *dentry, const char *name, size_t len) -{ - int ret; - - /* take d_lock to ensure dentry->d_name stability */ - spin_lock(&dentry->d_lock); - ret = dentry->d_name.len - len; - if (!ret) - ret = memcmp(dentry->d_name.name, name, len); - spin_unlock(&dentry->d_lock); - return ret; -} - /* * Incorporate results into the local cache. This is either just * one inode, or a directory, dentry, and possibly linked-to inode (e.g., @@ -1375,10 +1397,9 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) } else if (have_lease) { if (d_unhashed(dn)) d_add(dn, NULL); - update_dentry_lease(dn, rinfo->dlease, - session, - req->r_request_started, - NULL, &dvino); + update_dentry_lease(dir, dn, + rinfo->dlease, session, + req->r_request_started); } goto done; } @@ -1400,11 +1421,9 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) } if (have_lease) { - tvino.ino = le64_to_cpu(rinfo->targeti.in->ino); - tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid); - update_dentry_lease(dn, rinfo->dlease, session, - req->r_request_started, - &tvino, &dvino); + update_dentry_lease(dir, dn, + rinfo->dlease, session, + req->r_request_started); } dout(" final dn %p\n", dn); } else if ((req->r_op == CEPH_MDS_OP_LOOKUPSNAP || @@ -1422,27 +1441,20 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) err = splice_dentry(&req->r_dentry, in); if (err < 0) goto done; - } else if (rinfo->head->is_dentry && - !d_name_cmp(req->r_dentry, rinfo->dname, rinfo->dname_len)) { + } else if (rinfo->head->is_dentry && req->r_dentry) { + /* parent inode is not locked, be carefull */ struct ceph_vino *ptvino = NULL; - - if ((le32_to_cpu(rinfo->diri.in->cap.caps) & CEPH_CAP_FILE_SHARED) || - le32_to_cpu(rinfo->dlease->duration_ms)) { - dvino.ino = le64_to_cpu(rinfo->diri.in->ino); - dvino.snap = le64_to_cpu(rinfo->diri.in->snapid); - - if (rinfo->head->is_target) { - tvino.ino = le64_to_cpu(rinfo->targeti.in->ino); - tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid); - ptvino = &tvino; - } - - update_dentry_lease(req->r_dentry, rinfo->dlease, - session, req->r_request_started, ptvino, - &dvino); - } else { - dout("%s: no dentry lease or dir cap\n", __func__); + dvino.ino = le64_to_cpu(rinfo->diri.in->ino); + dvino.snap = le64_to_cpu(rinfo->diri.in->snapid); + if (rinfo->head->is_target) { + tvino.ino = le64_to_cpu(rinfo->targeti.in->ino); + tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid); + ptvino = &tvino; } + update_dentry_lease_careful(req->r_dentry, rinfo->dlease, + session, req->r_request_started, + rinfo->dname, rinfo->dname_len, + &dvino, ptvino); } done: dout("fill_trace done err=%d\n", err); @@ -1604,7 +1616,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, /* FIXME: release caps/leases if error occurs */ for (i = 0; i < rinfo->dir_nr; i++) { struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i; - struct ceph_vino tvino, dvino; + struct ceph_vino tvino; dname.name = rde->name; dname.len = rde->name_len; @@ -1705,9 +1717,9 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, ceph_dentry(dn)->offset = rde->offset; - dvino = ceph_vino(d_inode(parent)); - update_dentry_lease(dn, rde->lease, req->r_session, - req->r_request_started, &tvino, &dvino); + update_dentry_lease(d_inode(parent), dn, + rde->lease, req->r_session, + req->r_request_started); if (err == 0 && skipped == 0 && cache_ctl.index >= 0) { ret = fill_readdir_cache(d_inode(parent), dn,
d_name_cmp() and update_dentry_lease() lock and unlock dentry->d_lock respectively. Dentry may get renamed between them. The fix is moving the dentry name compare into update_dentry_lease(). This patch introduce two version of update_dentry_lease(). One version is for the case that parent inode is locked. It does not need to check parent/target inode and dentry name. Another version is for the case that parent inode is not locked. It checks arent/target inode and dentry name after locking dentry->d_lock. Signed-off-by: "Yan, Zheng" <zyan@redhat.com> --- fs/ceph/inode.c | 164 ++++++++++++++++++++++++++---------------------- 1 file changed, 88 insertions(+), 76 deletions(-)