@@ -1295,7 +1295,7 @@ void __ceph_remove_caps(struct ceph_inode_info *ci)
* caller should hold snap_rwsem (read), s_mutex.
*/
static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
- int op, bool sync, int used, int want, int retain,
+ int op, int flags, int used, int want, int retain,
int flushing, u64 flush_tid, u64 oldest_flush_tid)
__releases(cap->ci->i_ceph_lock)
{
@@ -1393,12 +1393,19 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
arg.mode = inode->i_mode;
arg.inline_data = ci->i_inline_version != CEPH_INLINE_NONE;
- if (list_empty(&ci->i_cap_snaps))
- arg.flags = CEPH_CLIENT_CAPS_NO_CAPSNAP;
- else
- arg.flags = CEPH_CLIENT_CAPS_PENDING_CAPSNAP;
- if (sync)
- arg.flags |= CEPH_CLIENT_CAPS_SYNC;
+ if (!(flags & CEPH_CLIENT_CAPS_PENDING_CAPSNAP) &&
+ !list_empty(&ci->i_cap_snaps)) {
+ struct ceph_cap_snap *capsnap;
+ list_for_each_entry_reverse(capsnap, &ci->i_cap_snaps, ci_item) {
+ if (capsnap->cap_flush.tid)
+ break;
+ if (capsnap->need_flush) {
+ flags |= CEPH_CLIENT_CAPS_PENDING_CAPSNAP;
+ break;
+ }
+ }
+ }
+ arg.flags = flags;
spin_unlock(&ci->i_ceph_lock);
@@ -2085,7 +2092,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
sent++;
/* __send_cap drops i_ceph_lock */
- delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, false,
+ delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, 0,
cap_used, want, retain, flushing,
flush_tid, oldest_flush_tid);
goto retry; /* retake i_ceph_lock and restart our cap scan. */
@@ -2155,7 +2162,8 @@ static int try_flush_caps(struct inode *inode, u64 *ptid)
&flush_tid, &oldest_flush_tid);
/* __send_cap drops i_ceph_lock */
- delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, true,
+ delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH,
+ CEPH_CLIENT_CAPS_SYNC,
__ceph_caps_used(ci),
__ceph_caps_wanted(ci),
(cap->issued | cap->implemented),
@@ -2338,9 +2346,17 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc,
struct ceph_cap_flush *cf;
int ret;
u64 first_tid = 0;
+ u64 last_snap_flush = 0;
ci->i_ceph_flags &= ~CEPH_I_KICK_FLUSH;
+ list_for_each_entry_reverse(cf, &ci->i_cap_flush_list, i_list) {
+ if (!cf->caps) {
+ last_snap_flush = cf->tid;
+ break;
+ }
+ }
+
list_for_each_entry(cf, &ci->i_cap_flush_list, i_list) {
if (cf->tid < first_tid)
continue;
@@ -2358,10 +2374,13 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc,
dout("kick_flushing_caps %p cap %p tid %llu %s\n",
inode, cap, cf->tid, ceph_cap_string(cf->caps));
ci->i_ceph_flags |= CEPH_I_NODELAY;
+
ret = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH,
- false, __ceph_caps_used(ci),
+ (cf->tid < last_snap_flush ?
+ CEPH_CLIENT_CAPS_PENDING_CAPSNAP : 0),
+ __ceph_caps_used(ci),
__ceph_caps_wanted(ci),
- cap->issued | cap->implemented,
+ (cap->issued | cap->implemented),
cf->caps, cf->tid, oldest_flush_tid);
if (ret) {
pr_err("kick_flushing_caps: error sending "
@@ -682,7 +682,7 @@ extern const char *ceph_cap_op_name(int op);
/* flags field in client cap messages (version >= 10) */
#define CEPH_CLIENT_CAPS_SYNC (1<<0)
#define CEPH_CLIENT_CAPS_NO_CAPSNAP (1<<1)
-#define CEPH_CLIENT_CAPS_PENDING_CAPSNAP (1<<2);
+#define CEPH_CLIENT_CAPS_PENDING_CAPSNAP (1<<2)
/*
* caps message, used for capability callbacks, acks, requests, etc.
Client uses this flag to tell mds if there is more cap snap need to flush. It's mainly for the case that client needs to re-send cap/snap flushes after mds failover, but CEPH_CAP_ANY_FILE_WR on corresponding inodes are all released before mds failover. Signed-off-by: "Yan, Zheng" <zyan@redhat.com> --- fs/ceph/caps.c | 41 ++++++++++++++++++++++++++---------- include/linux/ceph/ceph_fs.h | 2 +- 2 files changed, 31 insertions(+), 12 deletions(-)