Message ID | 1365754273-14088-3-git-send-email-zheng.z.yan@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 04/12/2013 03:11 AM, Yan, Zheng wrote: > From: "Yan, Zheng" <zheng.z.yan@intel.com> > > There is deadlock as illustrated bellow. The fix is taking i_mutex > before getting Fw cap reference. > > write truncate MDS > --------------------- -------------------- -------------- > get Fw cap > lock i_mutex > lock i_mutex (blocked) > request setattr.size -> > <- revoke Fw cap This looks good to me, but I would prefer if Sage or someone else with deeper ceph FS knowledge said so as well. Reviewed-by: Alex Elder <elder@inktank.com> > Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com> > --- > fs/ceph/caps.c | 13 +++++++------ > fs/ceph/file.c | 12 ++++++------ > 2 files changed, 13 insertions(+), 12 deletions(-) > > diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c > index 0da2e94..8737572 100644 > --- a/fs/ceph/caps.c > +++ b/fs/ceph/caps.c > @@ -2058,6 +2058,13 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, > goto out; > } > > + /* finish pending truncate */ > + while (ci->i_truncate_pending) { > + spin_unlock(&ci->i_ceph_lock); > + __ceph_do_pending_vmtruncate(inode, !(need & CEPH_CAP_FILE_WR)); > + spin_lock(&ci->i_ceph_lock); > + } > + > if (need & CEPH_CAP_FILE_WR) { > if (endoff >= 0 && endoff > (loff_t)ci->i_max_size) { > dout("get_cap_refs %p endoff %llu > maxsize %llu\n", > @@ -2079,12 +2086,6 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, > } > have = __ceph_caps_issued(ci, &implemented); > > - /* > - * disallow writes while a truncate is pending > - */ > - if (ci->i_truncate_pending) > - have &= ~CEPH_CAP_FILE_WR; > - > if ((have & need) == need) { > /* > * Look at (implemented & ~have & not) so that we keep waiting > diff --git a/fs/ceph/file.c b/fs/ceph/file.c > index 546a705..5490598 100644 > --- a/fs/ceph/file.c > +++ b/fs/ceph/file.c > @@ -647,7 +647,6 @@ static ssize_t ceph_aio_read(struct kiocb *iocb, const struct iovec *iov, > dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n", > inode, ceph_vinop(inode), pos, (unsigned)len, inode); > again: > - __ceph_do_pending_vmtruncate(inode, true); > if (fi->fmode & CEPH_FILE_MODE_LAZY) > want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO; > else > @@ -724,7 +723,7 @@ retry_snap: > ret = -ENOSPC; > goto out; > } > - __ceph_do_pending_vmtruncate(inode, true); > + mutex_lock(&inode->i_mutex); > dout("aio_write %p %llx.%llx %llu~%u getting caps. i_size %llu\n", > inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, > inode->i_size); > @@ -733,8 +732,10 @@ retry_snap: > else > want = CEPH_CAP_FILE_BUFFER; > ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff); > - if (ret < 0) > - goto out_put; > + if (ret < 0) { > + mutex_unlock(&inode->i_mutex); > + goto out; > + } > > dout("aio_write %p %llx.%llx %llu~%u got cap refs on %s\n", > inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, > @@ -744,10 +745,10 @@ retry_snap: > (iocb->ki_filp->f_flags & O_DIRECT) || > (inode->i_sb->s_flags & MS_SYNCHRONOUS) || > (fi->flags & CEPH_F_SYNC)) { > + mutex_unlock(&inode->i_mutex); > ret = ceph_sync_write(file, iov->iov_base, iov->iov_len, > &iocb->ki_pos); > } else { > - mutex_lock(&inode->i_mutex); > ret = __generic_file_aio_write(iocb, iov, nr_segs, > &iocb->ki_pos); > mutex_unlock(&inode->i_mutex); > @@ -762,7 +763,6 @@ retry_snap: > __mark_inode_dirty(inode, dirty); > } > > -out_put: > dout("aio_write %p %llx.%llx %llu~%u dropping cap refs on %s\n", > inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, > ceph_cap_string(got)); > -- To unsubscribe from this list: send the line "unsubscribe ceph-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, 12 Apr 2013, Yan, Zheng wrote: > From: "Yan, Zheng" <zheng.z.yan@intel.com> > > There is deadlock as illustrated bellow. The fix is taking i_mutex > before getting Fw cap reference. > > write truncate MDS > --------------------- -------------------- -------------- > get Fw cap > lock i_mutex > lock i_mutex (blocked) > request setattr.size -> > <- revoke Fw cap > > Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com> > --- > fs/ceph/caps.c | 13 +++++++------ > fs/ceph/file.c | 12 ++++++------ > 2 files changed, 13 insertions(+), 12 deletions(-) > > diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c > index 0da2e94..8737572 100644 > --- a/fs/ceph/caps.c > +++ b/fs/ceph/caps.c > @@ -2058,6 +2058,13 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, > goto out; > } > > + /* finish pending truncate */ > + while (ci->i_truncate_pending) { > + spin_unlock(&ci->i_ceph_lock); > + __ceph_do_pending_vmtruncate(inode, !(need & CEPH_CAP_FILE_WR)); > + spin_lock(&ci->i_ceph_lock); I think if we retake i_ceph_lock we need to goto the top to make sure our local variables aren't stale.. in this case, just file_wanted. > + } > + > if (need & CEPH_CAP_FILE_WR) { > if (endoff >= 0 && endoff > (loff_t)ci->i_max_size) { > dout("get_cap_refs %p endoff %llu > maxsize %llu\n", > @@ -2079,12 +2086,6 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, > } > have = __ceph_caps_issued(ci, &implemented); > > - /* > - * disallow writes while a truncate is pending > - */ > - if (ci->i_truncate_pending) > - have &= ~CEPH_CAP_FILE_WR; > - > if ((have & need) == need) { > /* > * Look at (implemented & ~have & not) so that we keep waiting > diff --git a/fs/ceph/file.c b/fs/ceph/file.c > index 546a705..5490598 100644 > --- a/fs/ceph/file.c > +++ b/fs/ceph/file.c > @@ -647,7 +647,6 @@ static ssize_t ceph_aio_read(struct kiocb *iocb, const struct iovec *iov, > dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n", > inode, ceph_vinop(inode), pos, (unsigned)len, inode); > again: > - __ceph_do_pending_vmtruncate(inode, true); > if (fi->fmode & CEPH_FILE_MODE_LAZY) > want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO; > else > @@ -724,7 +723,7 @@ retry_snap: > ret = -ENOSPC; > goto out; > } > - __ceph_do_pending_vmtruncate(inode, true); > + mutex_lock(&inode->i_mutex); > dout("aio_write %p %llx.%llx %llu~%u getting caps. i_size %llu\n", > inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, > inode->i_size); > @@ -733,8 +732,10 @@ retry_snap: > else > want = CEPH_CAP_FILE_BUFFER; > ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff); > - if (ret < 0) > - goto out_put; > + if (ret < 0) { > + mutex_unlock(&inode->i_mutex); > + goto out; > + } > > dout("aio_write %p %llx.%llx %llu~%u got cap refs on %s\n", > inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, > @@ -744,10 +745,10 @@ retry_snap: > (iocb->ki_filp->f_flags & O_DIRECT) || > (inode->i_sb->s_flags & MS_SYNCHRONOUS) || > (fi->flags & CEPH_F_SYNC)) { > + mutex_unlock(&inode->i_mutex); > ret = ceph_sync_write(file, iov->iov_base, iov->iov_len, > &iocb->ki_pos); > } else { > - mutex_lock(&inode->i_mutex); > ret = __generic_file_aio_write(iocb, iov, nr_segs, > &iocb->ki_pos); > mutex_unlock(&inode->i_mutex); > @@ -762,7 +763,6 @@ retry_snap: > __mark_inode_dirty(inode, dirty); > } > > -out_put: > dout("aio_write %p %llx.%llx %llu~%u dropping cap refs on %s\n", > inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, > ceph_cap_string(got)); Mechanically, the rest of this looks correct. I seem to remember us changing this (or something similar) so that we *didn't* hold i_mutex while waiting on the caps in order to avoid some deadlock. But... looking through the git history I can't find anything. I think the race to consider is if we are blocked waiting for the WR cap and the MDS sends us a truncate. It will queue the async truncate work but that will block waiting for i_mutex. Can that deadlock? (I think no, but perhaps the pending truncate check needs to happen after we acquire the cap, too!) Similarly, if we block holding i_mutex and wait for WR, but the MDS revokes some other cap (say, WRBUFFER), could we deadlock from teh async writeback worker? Both sound dangerous to me. I wonder if something in the spirit of while (true) { get_cap(Fw) if (try_lock_mutex(...)) break; put_cap(Fw); lock_mutex(...) unlock_mutex(...) } would be simpler. Or, make a get_cap variant that drops i_mutex while waiting, but takes it before grabbing the actual Fw cap ref. sage -- To unsubscribe from this list: send the line "unsubscribe ceph-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 04/17/2013 11:42 AM, Sage Weil wrote: > On Fri, 12 Apr 2013, Yan, Zheng wrote: >> From: "Yan, Zheng" <zheng.z.yan@intel.com> >> >> There is deadlock as illustrated bellow. The fix is taking i_mutex >> before getting Fw cap reference. >> >> write truncate MDS >> --------------------- -------------------- -------------- >> get Fw cap >> lock i_mutex >> lock i_mutex (blocked) >> request setattr.size -> >> <- revoke Fw cap >> >> Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com> >> --- >> fs/ceph/caps.c | 13 +++++++------ >> fs/ceph/file.c | 12 ++++++------ >> 2 files changed, 13 insertions(+), 12 deletions(-) >> >> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c >> index 0da2e94..8737572 100644 >> --- a/fs/ceph/caps.c >> +++ b/fs/ceph/caps.c >> @@ -2058,6 +2058,13 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, >> goto out; >> } >> >> + /* finish pending truncate */ >> + while (ci->i_truncate_pending) { >> + spin_unlock(&ci->i_ceph_lock); >> + __ceph_do_pending_vmtruncate(inode, !(need & CEPH_CAP_FILE_WR)); >> + spin_lock(&ci->i_ceph_lock); > > I think if we retake i_ceph_lock we need to goto the top to make sure our > local variables aren't stale.. in this case, just file_wanted. > >> + } >> + >> if (need & CEPH_CAP_FILE_WR) { >> if (endoff >= 0 && endoff > (loff_t)ci->i_max_size) { >> dout("get_cap_refs %p endoff %llu > maxsize %llu\n", >> @@ -2079,12 +2086,6 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, >> } >> have = __ceph_caps_issued(ci, &implemented); >> >> - /* >> - * disallow writes while a truncate is pending >> - */ >> - if (ci->i_truncate_pending) >> - have &= ~CEPH_CAP_FILE_WR; >> - >> if ((have & need) == need) { >> /* >> * Look at (implemented & ~have & not) so that we keep waiting >> diff --git a/fs/ceph/file.c b/fs/ceph/file.c >> index 546a705..5490598 100644 >> --- a/fs/ceph/file.c >> +++ b/fs/ceph/file.c >> @@ -647,7 +647,6 @@ static ssize_t ceph_aio_read(struct kiocb *iocb, const struct iovec *iov, >> dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n", >> inode, ceph_vinop(inode), pos, (unsigned)len, inode); >> again: >> - __ceph_do_pending_vmtruncate(inode, true); >> if (fi->fmode & CEPH_FILE_MODE_LAZY) >> want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO; >> else >> @@ -724,7 +723,7 @@ retry_snap: >> ret = -ENOSPC; >> goto out; >> } >> - __ceph_do_pending_vmtruncate(inode, true); >> + mutex_lock(&inode->i_mutex); >> dout("aio_write %p %llx.%llx %llu~%u getting caps. i_size %llu\n", >> inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, >> inode->i_size); >> @@ -733,8 +732,10 @@ retry_snap: >> else >> want = CEPH_CAP_FILE_BUFFER; >> ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff); >> - if (ret < 0) >> - goto out_put; >> + if (ret < 0) { >> + mutex_unlock(&inode->i_mutex); >> + goto out; >> + } >> >> dout("aio_write %p %llx.%llx %llu~%u got cap refs on %s\n", >> inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, >> @@ -744,10 +745,10 @@ retry_snap: >> (iocb->ki_filp->f_flags & O_DIRECT) || >> (inode->i_sb->s_flags & MS_SYNCHRONOUS) || >> (fi->flags & CEPH_F_SYNC)) { >> + mutex_unlock(&inode->i_mutex); >> ret = ceph_sync_write(file, iov->iov_base, iov->iov_len, >> &iocb->ki_pos); >> } else { >> - mutex_lock(&inode->i_mutex); >> ret = __generic_file_aio_write(iocb, iov, nr_segs, >> &iocb->ki_pos); >> mutex_unlock(&inode->i_mutex); >> @@ -762,7 +763,6 @@ retry_snap: >> __mark_inode_dirty(inode, dirty); >> } >> >> -out_put: >> dout("aio_write %p %llx.%llx %llu~%u dropping cap refs on %s\n", >> inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, >> ceph_cap_string(got)); > > Mechanically, the rest of this looks correct. I seem to remember us > changing this (or something similar) so that we *didn't* hold i_mutex > while waiting on the caps in order to avoid some deadlock. But... looking > through the git history I can't find anything. > > I think the race to consider is if we are blocked waiting for the WR cap > and the MDS sends us a truncate. It will queue the async truncate work > but that will block waiting for i_mutex. Can that deadlock? (I think no, > but perhaps the pending truncate check needs to happen after we acquire > the cap, too!) Maybe we should also call wake_up_all(&ci->i_cap_wq) when receiving truncate from MDS. To truncate a file, the MDS xlock the filelock firstly, which revokes all Fw caps from clients. Then the MDS sends truncate to clients and finally drops the xlock. It's impossible that client receives a truncate request while having Fw cap. So I don't think we need check pending truncate after acquiring the Fw cap. > > Similarly, if we block holding i_mutex and wait for WR, but the MDS > revokes some other cap (say, WRBUFFER), could we deadlock from teh > async writeback worker? I think no, i_mutex is not involved in writeback. > > Both sound dangerous to me. I wonder if something in the spirit of > > while (true) { > get_cap(Fw) > if (try_lock_mutex(...)) > break; > put_cap(Fw); > lock_mutex(...) > unlock_mutex(...) > } > > would be simpler. Or, make a get_cap variant that drops i_mutex while > waiting, but takes it before grabbing the actual Fw cap ref. > See my patch "ceph: apply write checks in ceph_aio_write". I think we really should acquire i_mutex before getting caps. ceph_get_caps() needs a parameter 'endoff', if the file is opened in append mode, the endoff is calculated from i_size. If we drop i_mutex in ceph_get_caps(), someone else may change the i_size. Regards Yan, Zheng > sage > -- To unsubscribe from this list: send the line "unsubscribe ceph-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, 17 Apr 2013, Yan, Zheng wrote: > On 04/17/2013 11:42 AM, Sage Weil wrote: > > On Fri, 12 Apr 2013, Yan, Zheng wrote: > >> From: "Yan, Zheng" <zheng.z.yan@intel.com> > >> > >> There is deadlock as illustrated bellow. The fix is taking i_mutex > >> before getting Fw cap reference. > >> > >> write truncate MDS > >> --------------------- -------------------- -------------- > >> get Fw cap > >> lock i_mutex > >> lock i_mutex (blocked) > >> request setattr.size -> > >> <- revoke Fw cap > >> > >> Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com> > >> --- > >> fs/ceph/caps.c | 13 +++++++------ > >> fs/ceph/file.c | 12 ++++++------ > >> 2 files changed, 13 insertions(+), 12 deletions(-) > >> > >> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c > >> index 0da2e94..8737572 100644 > >> --- a/fs/ceph/caps.c > >> +++ b/fs/ceph/caps.c > >> @@ -2058,6 +2058,13 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, > >> goto out; > >> } > >> > >> + /* finish pending truncate */ > >> + while (ci->i_truncate_pending) { > >> + spin_unlock(&ci->i_ceph_lock); > >> + __ceph_do_pending_vmtruncate(inode, !(need & CEPH_CAP_FILE_WR)); > >> + spin_lock(&ci->i_ceph_lock); > > > > I think if we retake i_ceph_lock we need to goto the top to make sure our > > local variables aren't stale.. in this case, just file_wanted. > > > >> + } > >> + > >> if (need & CEPH_CAP_FILE_WR) { > >> if (endoff >= 0 && endoff > (loff_t)ci->i_max_size) { > >> dout("get_cap_refs %p endoff %llu > maxsize %llu\n", > >> @@ -2079,12 +2086,6 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, > >> } > >> have = __ceph_caps_issued(ci, &implemented); > >> > >> - /* > >> - * disallow writes while a truncate is pending > >> - */ > >> - if (ci->i_truncate_pending) > >> - have &= ~CEPH_CAP_FILE_WR; > >> - > >> if ((have & need) == need) { > >> /* > >> * Look at (implemented & ~have & not) so that we keep waiting > >> diff --git a/fs/ceph/file.c b/fs/ceph/file.c > >> index 546a705..5490598 100644 > >> --- a/fs/ceph/file.c > >> +++ b/fs/ceph/file.c > >> @@ -647,7 +647,6 @@ static ssize_t ceph_aio_read(struct kiocb *iocb, const struct iovec *iov, > >> dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n", > >> inode, ceph_vinop(inode), pos, (unsigned)len, inode); > >> again: > >> - __ceph_do_pending_vmtruncate(inode, true); > >> if (fi->fmode & CEPH_FILE_MODE_LAZY) > >> want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO; > >> else > >> @@ -724,7 +723,7 @@ retry_snap: > >> ret = -ENOSPC; > >> goto out; > >> } > >> - __ceph_do_pending_vmtruncate(inode, true); > >> + mutex_lock(&inode->i_mutex); > >> dout("aio_write %p %llx.%llx %llu~%u getting caps. i_size %llu\n", > >> inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, > >> inode->i_size); > >> @@ -733,8 +732,10 @@ retry_snap: > >> else > >> want = CEPH_CAP_FILE_BUFFER; > >> ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff); > >> - if (ret < 0) > >> - goto out_put; > >> + if (ret < 0) { > >> + mutex_unlock(&inode->i_mutex); > >> + goto out; > >> + } > >> > >> dout("aio_write %p %llx.%llx %llu~%u got cap refs on %s\n", > >> inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, > >> @@ -744,10 +745,10 @@ retry_snap: > >> (iocb->ki_filp->f_flags & O_DIRECT) || > >> (inode->i_sb->s_flags & MS_SYNCHRONOUS) || > >> (fi->flags & CEPH_F_SYNC)) { > >> + mutex_unlock(&inode->i_mutex); > >> ret = ceph_sync_write(file, iov->iov_base, iov->iov_len, > >> &iocb->ki_pos); > >> } else { > >> - mutex_lock(&inode->i_mutex); > >> ret = __generic_file_aio_write(iocb, iov, nr_segs, > >> &iocb->ki_pos); > >> mutex_unlock(&inode->i_mutex); > >> @@ -762,7 +763,6 @@ retry_snap: > >> __mark_inode_dirty(inode, dirty); > >> } > >> > >> -out_put: > >> dout("aio_write %p %llx.%llx %llu~%u dropping cap refs on %s\n", > >> inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, > >> ceph_cap_string(got)); > > > > Mechanically, the rest of this looks correct. I seem to remember us > > changing this (or something similar) so that we *didn't* hold i_mutex > > while waiting on the caps in order to avoid some deadlock. But... looking > > through the git history I can't find anything. > > > > I think the race to consider is if we are blocked waiting for the WR cap > > and the MDS sends us a truncate. It will queue the async truncate work > > but that will block waiting for i_mutex. Can that deadlock? (I think no, > > but perhaps the pending truncate check needs to happen after we acquire > > the cap, too!) > > Maybe we should also call wake_up_all(&ci->i_cap_wq) when receiving truncate from > MDS. > > To truncate a file, the MDS xlock the filelock firstly, which revokes all Fw caps > from clients. Then the MDS sends truncate to clients and finally drops the xlock. > It's impossible that client receives a truncate request while having Fw cap. So I > don't think we need check pending truncate after acquiring the Fw cap. Okay, sounds good to me then! Perhaps we should add a comment and WARN_ON that we don't have the Fw cap at that time. Reviewed-by: Sage Weil <sage@inktank.com> > > > > Similarly, if we block holding i_mutex and wait for WR, but the MDS > > revokes some other cap (say, WRBUFFER), could we deadlock from teh > > async writeback worker? > > I think no, i_mutex is not involved in writeback. > > > > > Both sound dangerous to me. I wonder if something in the spirit of > > > > while (true) { > > get_cap(Fw) > > if (try_lock_mutex(...)) > > break; > > put_cap(Fw); > > lock_mutex(...) > > unlock_mutex(...) > > } > > > > would be simpler. Or, make a get_cap variant that drops i_mutex while > > waiting, but takes it before grabbing the actual Fw cap ref. > > > > See my patch "ceph: apply write checks in ceph_aio_write". I think we really > should acquire i_mutex before getting caps. ceph_get_caps() needs a parameter > 'endoff', if the file is opened in append mode, the endoff is calculated from > i_size. If we drop i_mutex in ceph_get_caps(), someone else may change the > i_size. > > > Regards > Yan, Zheng > > > > sage > > > > -- > To unsubscribe from this list: send the line "unsubscribe ceph-devel" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > > -- To unsubscribe from this list: send the line "unsubscribe ceph-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 0da2e94..8737572 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2058,6 +2058,13 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, goto out; } + /* finish pending truncate */ + while (ci->i_truncate_pending) { + spin_unlock(&ci->i_ceph_lock); + __ceph_do_pending_vmtruncate(inode, !(need & CEPH_CAP_FILE_WR)); + spin_lock(&ci->i_ceph_lock); + } + if (need & CEPH_CAP_FILE_WR) { if (endoff >= 0 && endoff > (loff_t)ci->i_max_size) { dout("get_cap_refs %p endoff %llu > maxsize %llu\n", @@ -2079,12 +2086,6 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, } have = __ceph_caps_issued(ci, &implemented); - /* - * disallow writes while a truncate is pending - */ - if (ci->i_truncate_pending) - have &= ~CEPH_CAP_FILE_WR; - if ((have & need) == need) { /* * Look at (implemented & ~have & not) so that we keep waiting diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 546a705..5490598 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -647,7 +647,6 @@ static ssize_t ceph_aio_read(struct kiocb *iocb, const struct iovec *iov, dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n", inode, ceph_vinop(inode), pos, (unsigned)len, inode); again: - __ceph_do_pending_vmtruncate(inode, true); if (fi->fmode & CEPH_FILE_MODE_LAZY) want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO; else @@ -724,7 +723,7 @@ retry_snap: ret = -ENOSPC; goto out; } - __ceph_do_pending_vmtruncate(inode, true); + mutex_lock(&inode->i_mutex); dout("aio_write %p %llx.%llx %llu~%u getting caps. i_size %llu\n", inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, inode->i_size); @@ -733,8 +732,10 @@ retry_snap: else want = CEPH_CAP_FILE_BUFFER; ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff); - if (ret < 0) - goto out_put; + if (ret < 0) { + mutex_unlock(&inode->i_mutex); + goto out; + } dout("aio_write %p %llx.%llx %llu~%u got cap refs on %s\n", inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, @@ -744,10 +745,10 @@ retry_snap: (iocb->ki_filp->f_flags & O_DIRECT) || (inode->i_sb->s_flags & MS_SYNCHRONOUS) || (fi->flags & CEPH_F_SYNC)) { + mutex_unlock(&inode->i_mutex); ret = ceph_sync_write(file, iov->iov_base, iov->iov_len, &iocb->ki_pos); } else { - mutex_lock(&inode->i_mutex); ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); mutex_unlock(&inode->i_mutex); @@ -762,7 +763,6 @@ retry_snap: __mark_inode_dirty(inode, dirty); } -out_put: dout("aio_write %p %llx.%llx %llu~%u dropping cap refs on %s\n", inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, ceph_cap_string(got));