Message ID | 1363531902-24909-22-git-send-email-zheng.z.yan@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
This needs to handle versioning the encoding based on peer feature bits too. On Sun, Mar 17, 2013 at 7:51 AM, Yan, Zheng <zheng.z.yan@intel.com> wrote: > From: "Yan, Zheng" <zheng.z.yan@intel.com> > > Cache rejoin ack message already encodes inode base, make it also encode > dirfrag base. This allowes the message to replicate stray dentries like > MDentryUnlink message. The function will be used by later patch. > > Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com> > --- > src/mds/CDir.h | 20 +++++++++++++------- > src/mds/MDCache.cc | 20 ++++++++++++++++++-- > src/messages/MMDSCacheRejoin.h | 12 +++++++++++- > 3 files changed, 42 insertions(+), 10 deletions(-) > > diff --git a/src/mds/CDir.h b/src/mds/CDir.h > index 79946f1..f4a3a3d 100644 > --- a/src/mds/CDir.h > +++ b/src/mds/CDir.h > @@ -437,23 +437,29 @@ private: > ::encode(dist, bl); > } > > - void encode_replica(int who, bufferlist& bl) { > - __u32 nonce = add_replica(who); > - ::encode(nonce, bl); > + void _encode_base(bufferlist& bl) { > ::encode(first, bl); > ::encode(fnode, bl); > ::encode(dir_rep, bl); > ::encode(dir_rep_by, bl); > } > - void decode_replica(bufferlist::iterator& p) { > - __u32 nonce; > - ::decode(nonce, p); > - replica_nonce = nonce; > + void _decode_base(bufferlist::iterator& p) { > ::decode(first, p); > ::decode(fnode, p); > ::decode(dir_rep, p); > ::decode(dir_rep_by, p); > } > + void encode_replica(int who, bufferlist& bl) { > + __u32 nonce = add_replica(who); > + ::encode(nonce, bl); > + _encode_base(bl); > + } > + void decode_replica(bufferlist::iterator& p) { > + __u32 nonce; > + ::decode(nonce, p); > + replica_nonce = nonce; > + _decode_base(p); > + } > > > > diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc > index 8ba676e..344777e 100644 > --- a/src/mds/MDCache.cc > +++ b/src/mds/MDCache.cc > @@ -4510,8 +4510,22 @@ void MDCache::handle_cache_rejoin_ack(MMDSCacheRejoin *ack) > } > } > > + // full dirfrags > + bufferlist::iterator p = ack->dirfrag_base.begin(); > + while (!p.end()) { > + dirfrag_t df; > + bufferlist basebl; > + ::decode(df, p); > + ::decode(basebl, p); > + CDir *dir = get_dirfrag(df); > + assert(dir); > + bufferlist::iterator q = basebl.begin(); > + dir->_decode_base(q); > + dout(10) << " got dir replica " << *dir << dendl; > + } > + > // full inodes > - bufferlist::iterator p = ack->inode_base.begin(); > + p = ack->inode_base.begin(); > while (!p.end()) { > inodeno_t ino; > snapid_t last; > @@ -5178,8 +5192,10 @@ void MDCache::rejoin_send_acks() > // dir > for (map<int,int>::iterator r = dir->replicas_begin(); > r != dir->replicas_end(); > - ++r) > + ++r) { > ack[r->first]->add_strong_dirfrag(dir->dirfrag(), ++r->second, dir->dir_rep); > + ack[r->first]->add_dirfrag_base(dir); > + } > > for (CDir::map_t::iterator q = dir->items.begin(); > q != dir->items.end(); > diff --git a/src/messages/MMDSCacheRejoin.h b/src/messages/MMDSCacheRejoin.h > index b88f551..7c37ab4 100644 > --- a/src/messages/MMDSCacheRejoin.h > +++ b/src/messages/MMDSCacheRejoin.h > @@ -20,6 +20,7 @@ > #include "include/types.h" > > #include "mds/CInode.h" > +#include "mds/CDir.h" > > // sent from replica to auth > > @@ -169,6 +170,7 @@ class MMDSCacheRejoin : public Message { > // full > bufferlist inode_base; > bufferlist inode_locks; > + bufferlist dirfrag_base; > > // authpins, xlocks > struct slave_reqid { > @@ -258,7 +260,13 @@ public: > void add_strong_dirfrag(dirfrag_t df, int n, int dr) { > strong_dirfrags[df] = dirfrag_strong(n, dr); > } > - > + void add_dirfrag_base(CDir *dir) { > + ::encode(dir->dirfrag(), dirfrag_base); > + bufferlist bl; > + dir->_encode_base(bl); > + ::encode(bl, dirfrag_base); > + } We are guilty of doing this in other places, but we should avoid implicit encodings like this one, especially when the decode happens somewhere else like it does here. We can make a vector dirfrag_bases and add to that, and then encode and decode it along with the rest of the message — would that work for your purposes? -Greg > + > // dentries > void add_weak_dirfrag(dirfrag_t df) { > weak_dirfrags.insert(df); > @@ -294,6 +302,7 @@ public: > ::encode(wrlocked_inodes, payload); > ::encode(cap_export_bl, payload); > ::encode(strong_dirfrags, payload); > + ::encode(dirfrag_base, payload); > ::encode(weak, payload); > ::encode(weak_dirfrags, payload); > ::encode(weak_inodes, payload); > @@ -319,6 +328,7 @@ public: > ::decode(cap_export_paths, q); > } > ::decode(strong_dirfrags, p); > + ::decode(dirfrag_base, p); > ::decode(weak, p); > ::decode(weak_dirfrags, p); > ::decode(weak_inodes, p); > -- > 1.7.11.7 > -- To unsubscribe from this list: send the line "unsubscribe ceph-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Mar 20, 2013 at 4:33 PM, Gregory Farnum <greg@inktank.com> wrote: > This needs to handle versioning the encoding based on peer feature bits too. > > On Sun, Mar 17, 2013 at 7:51 AM, Yan, Zheng <zheng.z.yan@intel.com> wrote: >> + void add_dirfrag_base(CDir *dir) { >> + ::encode(dir->dirfrag(), dirfrag_base); >> + bufferlist bl; >> + dir->_encode_base(bl); >> + ::encode(bl, dirfrag_base); >> + } > > We are guilty of doing this in other places, but we should avoid > implicit encodings like this one, especially when the decode happens > somewhere else like it does here. We can make a vector dirfrag_bases > and add to that, and then encode and decode it along with the rest of > the message — would that work for your purposes? > -Greg Sorry, a vector (called dirfrag_bases) of pair<dirfrag_t, bl> where bl is the encoded base. Or something like that. :) -- To unsubscribe from this list: send the line "unsubscribe ceph-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 03/21/2013 07:33 AM, Gregory Farnum wrote: > This needs to handle versioning the encoding based on peer feature bits too. > > On Sun, Mar 17, 2013 at 7:51 AM, Yan, Zheng <zheng.z.yan@intel.com> wrote: >> From: "Yan, Zheng" <zheng.z.yan@intel.com> >> >> Cache rejoin ack message already encodes inode base, make it also encode >> dirfrag base. This allowes the message to replicate stray dentries like >> MDentryUnlink message. The function will be used by later patch. >> >> Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com> >> --- >> src/mds/CDir.h | 20 +++++++++++++------- >> src/mds/MDCache.cc | 20 ++++++++++++++++++-- >> src/messages/MMDSCacheRejoin.h | 12 +++++++++++- >> 3 files changed, 42 insertions(+), 10 deletions(-) >> >> diff --git a/src/mds/CDir.h b/src/mds/CDir.h >> index 79946f1..f4a3a3d 100644 >> --- a/src/mds/CDir.h >> +++ b/src/mds/CDir.h >> @@ -437,23 +437,29 @@ private: >> ::encode(dist, bl); >> } >> >> - void encode_replica(int who, bufferlist& bl) { >> - __u32 nonce = add_replica(who); >> - ::encode(nonce, bl); >> + void _encode_base(bufferlist& bl) { >> ::encode(first, bl); >> ::encode(fnode, bl); >> ::encode(dir_rep, bl); >> ::encode(dir_rep_by, bl); >> } >> - void decode_replica(bufferlist::iterator& p) { >> - __u32 nonce; >> - ::decode(nonce, p); >> - replica_nonce = nonce; >> + void _decode_base(bufferlist::iterator& p) { >> ::decode(first, p); >> ::decode(fnode, p); >> ::decode(dir_rep, p); >> ::decode(dir_rep_by, p); >> } >> + void encode_replica(int who, bufferlist& bl) { >> + __u32 nonce = add_replica(who); >> + ::encode(nonce, bl); >> + _encode_base(bl); >> + } >> + void decode_replica(bufferlist::iterator& p) { >> + __u32 nonce; >> + ::decode(nonce, p); >> + replica_nonce = nonce; >> + _decode_base(p); >> + } >> >> >> >> diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc >> index 8ba676e..344777e 100644 >> --- a/src/mds/MDCache.cc >> +++ b/src/mds/MDCache.cc >> @@ -4510,8 +4510,22 @@ void MDCache::handle_cache_rejoin_ack(MMDSCacheRejoin *ack) >> } >> } >> >> + // full dirfrags >> + bufferlist::iterator p = ack->dirfrag_base.begin(); >> + while (!p.end()) { >> + dirfrag_t df; >> + bufferlist basebl; >> + ::decode(df, p); >> + ::decode(basebl, p); >> + CDir *dir = get_dirfrag(df); >> + assert(dir); >> + bufferlist::iterator q = basebl.begin(); >> + dir->_decode_base(q); >> + dout(10) << " got dir replica " << *dir << dendl; >> + } >> + >> // full inodes >> - bufferlist::iterator p = ack->inode_base.begin(); >> + p = ack->inode_base.begin(); >> while (!p.end()) { >> inodeno_t ino; >> snapid_t last; >> @@ -5178,8 +5192,10 @@ void MDCache::rejoin_send_acks() >> // dir >> for (map<int,int>::iterator r = dir->replicas_begin(); >> r != dir->replicas_end(); >> - ++r) >> + ++r) { >> ack[r->first]->add_strong_dirfrag(dir->dirfrag(), ++r->second, dir->dir_rep); >> + ack[r->first]->add_dirfrag_base(dir); >> + } >> >> for (CDir::map_t::iterator q = dir->items.begin(); >> q != dir->items.end(); >> diff --git a/src/messages/MMDSCacheRejoin.h b/src/messages/MMDSCacheRejoin.h >> index b88f551..7c37ab4 100644 >> --- a/src/messages/MMDSCacheRejoin.h >> +++ b/src/messages/MMDSCacheRejoin.h >> @@ -20,6 +20,7 @@ >> #include "include/types.h" >> >> #include "mds/CInode.h" >> +#include "mds/CDir.h" >> >> // sent from replica to auth >> >> @@ -169,6 +170,7 @@ class MMDSCacheRejoin : public Message { >> // full >> bufferlist inode_base; >> bufferlist inode_locks; >> + bufferlist dirfrag_base; >> >> // authpins, xlocks >> struct slave_reqid { >> @@ -258,7 +260,13 @@ public: >> void add_strong_dirfrag(dirfrag_t df, int n, int dr) { >> strong_dirfrags[df] = dirfrag_strong(n, dr); >> } >> - >> + void add_dirfrag_base(CDir *dir) { >> + ::encode(dir->dirfrag(), dirfrag_base); >> + bufferlist bl; >> + dir->_encode_base(bl); >> + ::encode(bl, dirfrag_base); >> + } > > We are guilty of doing this in other places, but we should avoid > implicit encodings like this one, especially when the decode happens > somewhere else like it does here. We can make a vector dirfrag_bases > and add to that, and then encode and decode it along with the rest of > the message — would that work for your purposes? > -Greg > update this patch or send a new patch that updates both {inode,dirfrag}_base? Thanks Yan, Zheng >> + >> // dentries >> void add_weak_dirfrag(dirfrag_t df) { >> weak_dirfrags.insert(df); >> @@ -294,6 +302,7 @@ public: >> ::encode(wrlocked_inodes, payload); >> ::encode(cap_export_bl, payload); >> ::encode(strong_dirfrags, payload); >> + ::encode(dirfrag_base, payload); >> ::encode(weak, payload); >> ::encode(weak_dirfrags, payload); >> ::encode(weak_inodes, payload); >> @@ -319,6 +328,7 @@ public: >> ::decode(cap_export_paths, q); >> } >> ::decode(strong_dirfrags, p); >> + ::decode(dirfrag_base, p); >> ::decode(weak, p); >> ::decode(weak_dirfrags, p); >> ::decode(weak_inodes, p); >> -- >> 1.7.11.7 >> -- To unsubscribe from this list: send the line "unsubscribe ceph-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Mar 20, 2013 at 11:41 PM, Yan, Zheng <zheng.z.yan@intel.com> wrote: > On 03/21/2013 07:33 AM, Gregory Farnum wrote: >> This needs to handle versioning the encoding based on peer feature bits too. >> >> On Sun, Mar 17, 2013 at 7:51 AM, Yan, Zheng <zheng.z.yan@intel.com> wrote: >>> From: "Yan, Zheng" <zheng.z.yan@intel.com> >>> >>> Cache rejoin ack message already encodes inode base, make it also encode >>> dirfrag base. This allowes the message to replicate stray dentries like >>> MDentryUnlink message. The function will be used by later patch. >>> >>> Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com> >>> --- >>> src/mds/CDir.h | 20 +++++++++++++------- >>> src/mds/MDCache.cc | 20 ++++++++++++++++++-- >>> src/messages/MMDSCacheRejoin.h | 12 +++++++++++- >>> 3 files changed, 42 insertions(+), 10 deletions(-) >>> >>> diff --git a/src/mds/CDir.h b/src/mds/CDir.h >>> index 79946f1..f4a3a3d 100644 >>> --- a/src/mds/CDir.h >>> +++ b/src/mds/CDir.h >>> @@ -437,23 +437,29 @@ private: >>> ::encode(dist, bl); >>> } >>> >>> - void encode_replica(int who, bufferlist& bl) { >>> - __u32 nonce = add_replica(who); >>> - ::encode(nonce, bl); >>> + void _encode_base(bufferlist& bl) { >>> ::encode(first, bl); >>> ::encode(fnode, bl); >>> ::encode(dir_rep, bl); >>> ::encode(dir_rep_by, bl); >>> } >>> - void decode_replica(bufferlist::iterator& p) { >>> - __u32 nonce; >>> - ::decode(nonce, p); >>> - replica_nonce = nonce; >>> + void _decode_base(bufferlist::iterator& p) { >>> ::decode(first, p); >>> ::decode(fnode, p); >>> ::decode(dir_rep, p); >>> ::decode(dir_rep_by, p); >>> } >>> + void encode_replica(int who, bufferlist& bl) { >>> + __u32 nonce = add_replica(who); >>> + ::encode(nonce, bl); >>> + _encode_base(bl); >>> + } >>> + void decode_replica(bufferlist::iterator& p) { >>> + __u32 nonce; >>> + ::decode(nonce, p); >>> + replica_nonce = nonce; >>> + _decode_base(p); >>> + } >>> >>> >>> >>> diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc >>> index 8ba676e..344777e 100644 >>> --- a/src/mds/MDCache.cc >>> +++ b/src/mds/MDCache.cc >>> @@ -4510,8 +4510,22 @@ void MDCache::handle_cache_rejoin_ack(MMDSCacheRejoin *ack) >>> } >>> } >>> >>> + // full dirfrags >>> + bufferlist::iterator p = ack->dirfrag_base.begin(); >>> + while (!p.end()) { >>> + dirfrag_t df; >>> + bufferlist basebl; >>> + ::decode(df, p); >>> + ::decode(basebl, p); >>> + CDir *dir = get_dirfrag(df); >>> + assert(dir); >>> + bufferlist::iterator q = basebl.begin(); >>> + dir->_decode_base(q); >>> + dout(10) << " got dir replica " << *dir << dendl; >>> + } >>> + >>> // full inodes >>> - bufferlist::iterator p = ack->inode_base.begin(); >>> + p = ack->inode_base.begin(); >>> while (!p.end()) { >>> inodeno_t ino; >>> snapid_t last; >>> @@ -5178,8 +5192,10 @@ void MDCache::rejoin_send_acks() >>> // dir >>> for (map<int,int>::iterator r = dir->replicas_begin(); >>> r != dir->replicas_end(); >>> - ++r) >>> + ++r) { >>> ack[r->first]->add_strong_dirfrag(dir->dirfrag(), ++r->second, dir->dir_rep); >>> + ack[r->first]->add_dirfrag_base(dir); >>> + } >>> >>> for (CDir::map_t::iterator q = dir->items.begin(); >>> q != dir->items.end(); >>> diff --git a/src/messages/MMDSCacheRejoin.h b/src/messages/MMDSCacheRejoin.h >>> index b88f551..7c37ab4 100644 >>> --- a/src/messages/MMDSCacheRejoin.h >>> +++ b/src/messages/MMDSCacheRejoin.h >>> @@ -20,6 +20,7 @@ >>> #include "include/types.h" >>> >>> #include "mds/CInode.h" >>> +#include "mds/CDir.h" >>> >>> // sent from replica to auth >>> >>> @@ -169,6 +170,7 @@ class MMDSCacheRejoin : public Message { >>> // full >>> bufferlist inode_base; >>> bufferlist inode_locks; >>> + bufferlist dirfrag_base; >>> >>> // authpins, xlocks >>> struct slave_reqid { >>> @@ -258,7 +260,13 @@ public: >>> void add_strong_dirfrag(dirfrag_t df, int n, int dr) { >>> strong_dirfrags[df] = dirfrag_strong(n, dr); >>> } >>> - >>> + void add_dirfrag_base(CDir *dir) { >>> + ::encode(dir->dirfrag(), dirfrag_base); >>> + bufferlist bl; >>> + dir->_encode_base(bl); >>> + ::encode(bl, dirfrag_base); >>> + } >> >> We are guilty of doing this in other places, but we should avoid >> implicit encodings like this one, especially when the decode happens >> somewhere else like it does here. We can make a vector dirfrag_bases >> and add to that, and then encode and decode it along with the rest of >> the message — would that work for your purposes? >> -Greg >> > > update this patch or send a new patch that updates both {inode,dirfrag}_base? > > Thanks > Yan, Zheng Updating this one is fine for me. :) -Greg > >>> + >>> // dentries >>> void add_weak_dirfrag(dirfrag_t df) { >>> weak_dirfrags.insert(df); >>> @@ -294,6 +302,7 @@ public: >>> ::encode(wrlocked_inodes, payload); >>> ::encode(cap_export_bl, payload); >>> ::encode(strong_dirfrags, payload); >>> + ::encode(dirfrag_base, payload); >>> ::encode(weak, payload); >>> ::encode(weak_dirfrags, payload); >>> ::encode(weak_inodes, payload); >>> @@ -319,6 +328,7 @@ public: >>> ::decode(cap_export_paths, q); >>> } >>> ::decode(strong_dirfrags, p); >>> + ::decode(dirfrag_base, p); >>> ::decode(weak, p); >>> ::decode(weak_dirfrags, p); >>> ::decode(weak_inodes, p); >>> -- >>> 1.7.11.7 >>> > -- To unsubscribe from this list: send the line "unsubscribe ceph-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/src/mds/CDir.h b/src/mds/CDir.h index 79946f1..f4a3a3d 100644 --- a/src/mds/CDir.h +++ b/src/mds/CDir.h @@ -437,23 +437,29 @@ private: ::encode(dist, bl); } - void encode_replica(int who, bufferlist& bl) { - __u32 nonce = add_replica(who); - ::encode(nonce, bl); + void _encode_base(bufferlist& bl) { ::encode(first, bl); ::encode(fnode, bl); ::encode(dir_rep, bl); ::encode(dir_rep_by, bl); } - void decode_replica(bufferlist::iterator& p) { - __u32 nonce; - ::decode(nonce, p); - replica_nonce = nonce; + void _decode_base(bufferlist::iterator& p) { ::decode(first, p); ::decode(fnode, p); ::decode(dir_rep, p); ::decode(dir_rep_by, p); } + void encode_replica(int who, bufferlist& bl) { + __u32 nonce = add_replica(who); + ::encode(nonce, bl); + _encode_base(bl); + } + void decode_replica(bufferlist::iterator& p) { + __u32 nonce; + ::decode(nonce, p); + replica_nonce = nonce; + _decode_base(p); + } diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 8ba676e..344777e 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -4510,8 +4510,22 @@ void MDCache::handle_cache_rejoin_ack(MMDSCacheRejoin *ack) } } + // full dirfrags + bufferlist::iterator p = ack->dirfrag_base.begin(); + while (!p.end()) { + dirfrag_t df; + bufferlist basebl; + ::decode(df, p); + ::decode(basebl, p); + CDir *dir = get_dirfrag(df); + assert(dir); + bufferlist::iterator q = basebl.begin(); + dir->_decode_base(q); + dout(10) << " got dir replica " << *dir << dendl; + } + // full inodes - bufferlist::iterator p = ack->inode_base.begin(); + p = ack->inode_base.begin(); while (!p.end()) { inodeno_t ino; snapid_t last; @@ -5178,8 +5192,10 @@ void MDCache::rejoin_send_acks() // dir for (map<int,int>::iterator r = dir->replicas_begin(); r != dir->replicas_end(); - ++r) + ++r) { ack[r->first]->add_strong_dirfrag(dir->dirfrag(), ++r->second, dir->dir_rep); + ack[r->first]->add_dirfrag_base(dir); + } for (CDir::map_t::iterator q = dir->items.begin(); q != dir->items.end(); diff --git a/src/messages/MMDSCacheRejoin.h b/src/messages/MMDSCacheRejoin.h index b88f551..7c37ab4 100644 --- a/src/messages/MMDSCacheRejoin.h +++ b/src/messages/MMDSCacheRejoin.h @@ -20,6 +20,7 @@ #include "include/types.h" #include "mds/CInode.h" +#include "mds/CDir.h" // sent from replica to auth @@ -169,6 +170,7 @@ class MMDSCacheRejoin : public Message { // full bufferlist inode_base; bufferlist inode_locks; + bufferlist dirfrag_base; // authpins, xlocks struct slave_reqid { @@ -258,7 +260,13 @@ public: void add_strong_dirfrag(dirfrag_t df, int n, int dr) { strong_dirfrags[df] = dirfrag_strong(n, dr); } - + void add_dirfrag_base(CDir *dir) { + ::encode(dir->dirfrag(), dirfrag_base); + bufferlist bl; + dir->_encode_base(bl); + ::encode(bl, dirfrag_base); + } + // dentries void add_weak_dirfrag(dirfrag_t df) { weak_dirfrags.insert(df); @@ -294,6 +302,7 @@ public: ::encode(wrlocked_inodes, payload); ::encode(cap_export_bl, payload); ::encode(strong_dirfrags, payload); + ::encode(dirfrag_base, payload); ::encode(weak, payload); ::encode(weak_dirfrags, payload); ::encode(weak_inodes, payload); @@ -319,6 +328,7 @@ public: ::decode(cap_export_paths, q); } ::decode(strong_dirfrags, p); + ::decode(dirfrag_base, p); ::decode(weak, p); ::decode(weak_dirfrags, p); ::decode(weak_inodes, p);