@@ -2651,6 +2651,15 @@ void MDCache::handle_mds_failure(int who)
if (p->second->slave_to_mds == who) {
if (p->second->slave_did_prepare()) {
dout(10) << " slave request " << *p->second << " uncommitted, will resolve shortly" << dendl;
+ if (!p->second->more()->waiting_on_slave.empty()) {
+ assert(p->second->more()->srcdn_auth_mds == mds->get_nodeid());
+ // will rollback, no need to wait
+ if (p->second->slave_request) {
+ p->second->slave_request->put();
+ p->second->slave_request = 0;
+ }
+ p->second->more()->waiting_on_slave.clear();
+ }
} else {
dout(10) << " slave request " << *p->second << " has no prepare, finishing up" << dendl;
if (p->second->slave_request)
@@ -2660,12 +2669,22 @@ void MDCache::handle_mds_failure(int who)
}
}
- if (p->second->is_slave() &&
- p->second->slave_did_prepare() && p->second->more()->srcdn_auth_mds == who &&
- mds->mdsmap->is_clientreplay_or_active_or_stopping(p->second->slave_to_mds)) {
- // rename srcdn's auth mds failed, resolve even I'm a survivor.
- dout(10) << " slave request " << *p->second << " uncommitted, will resolve shortly" << dendl;
- add_ambiguous_slave_update(p->first, p->second->slave_to_mds);
+ if (p->second->is_slave() && p->second->slave_did_prepare()) {
+ if (p->second->more()->waiting_on_slave.count(who)) {
+ assert(p->second->more()->srcdn_auth_mds == mds->get_nodeid());
+ dout(10) << " slave request " << *p->second << " no longer need rename notity ack from mds."
+ << who << dendl;
+ p->second->more()->waiting_on_slave.erase(who);
+ if (p->second->more()->waiting_on_slave.empty())
+ mds->queue_waiter(new C_MDS_RetryRequest(this, p->second));
+ }
+
+ if (p->second->more()->srcdn_auth_mds == who &&
+ mds->mdsmap->is_clientreplay_or_active_or_stopping(p->second->slave_to_mds)) {
+ // rename srcdn's auth mds failed, resolve even I'm a survivor.
+ dout(10) << " slave request " << *p->second << " uncommitted, will resolve shortly" << dendl;
+ add_ambiguous_slave_update(p->first, p->second->slave_to_mds);
+ }
}
// failed node is slave?
@@ -1280,6 +1280,16 @@ void Server::handle_slave_request(MMDSSlaveRequest *m)
if (m->is_reply())
return handle_slave_request_reply(m);
+ // the purpose of rename notify is enforcing causal message ordering. making sure
+ // bystanders have received all messages from rename srcdn's auth MDS.
+ if (m->get_op() == MMDSSlaveRequest::OP_RENAMENOTIFY) {
+ MMDSSlaveRequest *reply = new MMDSSlaveRequest(m->get_reqid(), m->get_attempt(),
+ MMDSSlaveRequest::OP_RENAMENOTIFYACK);
+ mds->send_message(reply, m->get_connection());
+ m->put();
+ return;
+ }
+
CDentry *straydn = NULL;
if (m->stray.length() > 0) {
straydn = mdcache->add_replica_stray(m->stray, from);
@@ -1432,6 +1442,10 @@ void Server::handle_slave_request_reply(MMDSSlaveRequest *m)
handle_slave_rename_prep_ack(mdr, m);
break;
+ case MMDSSlaveRequest::OP_RENAMENOTIFYACK:
+ handle_slave_rename_notify_ack(mdr, m);
+ break;
+
default:
assert(0);
}
@@ -6560,6 +6574,9 @@ void Server::handle_slave_rename_prep(MDRequest *mdr)
// am i srcdn auth?
if (srcdn->is_auth()) {
+ set<int> srcdnrep;
+ srcdn->list_replicas(srcdnrep);
+
bool reply_witness = false;
if (srcdnl->is_primary() && !srcdnl->get_inode()->state_test(CInode::STATE_AMBIGUOUSAUTH)) {
// freeze?
@@ -6594,12 +6611,19 @@ void Server::handle_slave_rename_prep(MDRequest *mdr)
if (mdr->slave_request->witnesses.size() > 1) {
dout(10) << " set srci ambiguous auth; providing srcdn replica list" << dendl;
reply_witness = true;
+ for (set<int>::iterator p = srcdnrep.begin(); p != srcdnrep.end(); ++p) {
+ if (*p == mdr->slave_to_mds ||
+ !mds->mdsmap->is_clientreplay_or_active_or_stopping(*p))
+ continue;
+ MMDSSlaveRequest *notify = new MMDSSlaveRequest(mdr->reqid, mdr->attempt,
+ MMDSSlaveRequest::OP_RENAMENOTIFY);
+ mds->send_message_mds(notify, *p);
+ mdr->more()->waiting_on_slave.insert(*p);
+ }
}
}
// is witness list sufficient?
- set<int> srcdnrep;
- srcdn->list_replicas(srcdnrep);
for (set<int>::iterator p = srcdnrep.begin(); p != srcdnrep.end(); ++p) {
if (*p == mdr->slave_to_mds ||
mdr->slave_request->witnesses.count(*p)) continue;
@@ -6619,6 +6643,11 @@ void Server::handle_slave_rename_prep(MDRequest *mdr)
return;
}
dout(10) << " witness list sufficient: includes all srcdn replicas" << dendl;
+ if (!mdr->more()->waiting_on_slave.empty()) {
+ dout(10) << " still waiting for rename notify acks from "
+ << mdr->more()->waiting_on_slave << dendl;
+ return;
+ }
} else if (srcdnl->is_primary() && srcdn->authority() != destdn->authority()) {
// set ambiguous auth for srci on witnesses
mdr->set_ambiguous_auth(srcdnl->get_inode());
@@ -7187,6 +7216,24 @@ void Server::handle_slave_rename_prep_ack(MDRequest *mdr, MMDSSlaveRequest *ack)
dout(10) << "still waiting on slaves " << mdr->more()->waiting_on_slave << dendl;
}
+void Server::handle_slave_rename_notify_ack(MDRequest *mdr, MMDSSlaveRequest *ack)
+{
+ dout(10) << "handle_slave_rename_notify_ack " << *mdr << " from mds."
+ << ack->get_source() << dendl;
+ assert(mdr->is_slave());
+ int from = ack->get_source().num();
+
+ if (mdr->more()->waiting_on_slave.count(from)) {
+ mdr->more()->waiting_on_slave.erase(from);
+
+ if (mdr->more()->waiting_on_slave.empty()) {
+ if (mdr->slave_request)
+ dispatch_slave_request(mdr);
+ } else
+ dout(10) << " still waiting for rename notify acks from "
+ << mdr->more()->waiting_on_slave << dendl;
+ }
+}
@@ -242,6 +242,7 @@ public:
// slaving
void handle_slave_rename_prep(MDRequest *mdr);
void handle_slave_rename_prep_ack(MDRequest *mdr, MMDSSlaveRequest *m);
+ void handle_slave_rename_notify_ack(MDRequest *mdr, MMDSSlaveRequest *m);
void _logged_slave_rename(MDRequest *mdr, CDentry *srcdn, CDentry *destdn, CDentry *straydn);
void _commit_slave_rename(MDRequest *mdr, int r, CDentry *srcdn, CDentry *destdn, CDentry *straydn);
void do_rename_rollback(bufferlist &rbl, int master, MDRequest *mdr, bool finish_mdr=false);
@@ -43,6 +43,9 @@ class MMDSSlaveRequest : public Message {
static const int OP_DROPLOCKS = 11;
+ static const int OP_RENAMENOTIFY = 12;
+ static const int OP_RENAMENOTIFYACK = -12;
+
static const int OP_FINISH = 17;
static const int OP_COMMITTED = -18;
@@ -77,6 +80,9 @@ class MMDSSlaveRequest : public Message {
case OP_DROPLOCKS: return "drop_locks";
+ case OP_RENAMENOTIFY: return "reame_notify";
+ case OP_RENAMENOTIFYACK: return "rename_notify_ack";
+
case OP_ABORT: return "abort";
//case OP_COMMIT: return "commit";