Message ID | 20210615054921.101421-14-colyli@suse.de (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | bcache patches for Linux v5.14 | expand |
On 6/15/21 7:49 AM, Coly Li wrote: > This patch implements two methods to read jset from media for journal > replay, > - __jnl_rd_bkt() for block device > This is the legacy method to read jset via block device interface. > - __jnl_rd_nvm_bkt() for NVDIMM > This is the method to read jset from NVDIMM memory interface, a.k.a > memcopy() from NVDIMM pages to DRAM pages. > > If BCH_FEATURE_INCOMPAT_NVDIMM_META is set in incompat feature set, > during running cache set, journal_read_bucket() will read the journal > content from NVDIMM by __jnl_rd_nvm_bkt(). The linear addresses of > NVDIMM pages to read jset are stored in sb.d[SB_JOURNAL_BUCKETS], which > were initialized and maintained in previous runs of the cache set. > > A thing should be noticed is, when bch_journal_read() is called, the > linear address of NVDIMM pages is not loaded and initialized yet, it > is necessary to call __bch_journal_nvdimm_init() before reading the jset > from NVDIMM pages. > > Signed-off-by: Coly Li <colyli@suse.de> > Cc: Jianpeng Ma <jianpeng.ma@intel.com> > Cc: Qiaowei Ren <qiaowei.ren@intel.com> > --- > drivers/md/bcache/journal.c | 93 +++++++++++++++++++++++++++---------- > 1 file changed, 69 insertions(+), 24 deletions(-) > > diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c > index 03ecedf813b0..23e5ccf125df 100644 > --- a/drivers/md/bcache/journal.c > +++ b/drivers/md/bcache/journal.c > @@ -34,60 +34,96 @@ static void journal_read_endio(struct bio *bio) > closure_put(cl); > } > > +static struct jset *__jnl_rd_bkt(struct cache *ca, unsigned int bkt_idx, > + unsigned int len, unsigned int offset, > + struct closure *cl) > +{ > + sector_t bucket = bucket_to_sector(ca->set, ca->sb.d[bkt_idx]); > + struct bio *bio = &ca->journal.bio; > + struct jset *data = ca->set->journal.w[0].data; > + > + bio_reset(bio); > + bio->bi_iter.bi_sector = bucket + offset; > + bio_set_dev(bio, ca->bdev); > + bio->bi_iter.bi_size = len << 9; > + bio->bi_end_io = journal_read_endio; > + bio->bi_private = cl; > + bio_set_op_attrs(bio, REQ_OP_READ, 0); > + bch_bio_map(bio, data); > + > + closure_bio_submit(ca->set, bio, cl); > + closure_sync(cl); > + > + /* Indeed journal.w[0].data */ > + return data; > +} > + > +#if defined(CONFIG_BCACHE_NVM_PAGES) > + > +static struct jset *__jnl_rd_nvm_bkt(struct cache *ca, unsigned int bkt_idx, > + unsigned int len, unsigned int offset) > +{ > + void *jset_addr = (void *)ca->sb.d[bkt_idx] + (offset << 9); > + struct jset *data = ca->set->journal.w[0].data; > + > + memcpy(data, jset_addr, len << 9); > + > + /* Indeed journal.w[0].data */ > + return data; > +} > + > +#else /* CONFIG_BCACHE_NVM_PAGES */ > + > +static struct jset *__jnl_rd_nvm_bkt(struct cache *ca, unsigned int bkt_idx, > + unsigned int len, unsigned int offset) > +{ > + return NULL; > +} > + > +#endif /* CONFIG_BCACHE_NVM_PAGES */ > + > static int journal_read_bucket(struct cache *ca, struct list_head *list, > - unsigned int bucket_index) > + unsigned int bucket_idx) This renaming is pointless. > { > struct journal_device *ja = &ca->journal; > - struct bio *bio = &ja->bio; > > struct journal_replay *i; > - struct jset *j, *data = ca->set->journal.w[0].data; > + struct jset *j; > struct closure cl; > unsigned int len, left, offset = 0; > int ret = 0; > - sector_t bucket = bucket_to_sector(ca->set, ca->sb.d[bucket_index]); > > closure_init_stack(&cl); > > - pr_debug("reading %u\n", bucket_index); > + pr_debug("reading %u\n", bucket_idx); > > while (offset < ca->sb.bucket_size) { > reread: left = ca->sb.bucket_size - offset; > len = min_t(unsigned int, left, PAGE_SECTORS << JSET_BITS); > > - bio_reset(bio); > - bio->bi_iter.bi_sector = bucket + offset; > - bio_set_dev(bio, ca->bdev); > - bio->bi_iter.bi_size = len << 9; > - > - bio->bi_end_io = journal_read_endio; > - bio->bi_private = &cl; > - bio_set_op_attrs(bio, REQ_OP_READ, 0); > - bch_bio_map(bio, data); > - > - closure_bio_submit(ca->set, bio, &cl); > - closure_sync(&cl); > + if (!bch_has_feature_nvdimm_meta(&ca->sb)) > + j = __jnl_rd_bkt(ca, bucket_idx, len, offset, &cl); > + else > + j = __jnl_rd_nvm_bkt(ca, bucket_idx, len, offset); > > /* This function could be simpler now since we no longer write > * journal entries that overlap bucket boundaries; this means > * the start of a bucket will always have a valid journal entry > * if it has any journal entries at all. > */ > - > - j = data; > while (len) { > struct list_head *where; > size_t blocks, bytes = set_bytes(j); > > if (j->magic != jset_magic(&ca->sb)) { > - pr_debug("%u: bad magic\n", bucket_index); > + pr_debug("%u: bad magic\n", bucket_idx); > return ret; > } > > if (bytes > left << 9 || > bytes > PAGE_SIZE << JSET_BITS) { > pr_info("%u: too big, %zu bytes, offset %u\n", > - bucket_index, bytes, offset); > + bucket_idx, bytes, offset); > return ret; > } > > @@ -96,7 +132,7 @@ reread: left = ca->sb.bucket_size - offset; > > if (j->csum != csum_set(j)) { > pr_info("%u: bad csum, %zu bytes, offset %u\n", > - bucket_index, bytes, offset); > + bucket_idx, bytes, offset); > return ret; > } > > @@ -158,8 +194,8 @@ reread: left = ca->sb.bucket_size - offset; > list_add(&i->list, where); > ret = 1; > > - if (j->seq > ja->seq[bucket_index]) > - ja->seq[bucket_index] = j->seq; > + if (j->seq > ja->seq[bucket_idx]) > + ja->seq[bucket_idx] = j->seq; > next_set: > offset += blocks * ca->sb.block_size; > len -= blocks * ca->sb.block_size; > @@ -170,6 +206,8 @@ reread: left = ca->sb.bucket_size - offset; > return ret; > } > > +static int __bch_journal_nvdimm_init(struct cache *ca); > + > int bch_journal_read(struct cache_set *c, struct list_head *list) > { > #define read_bucket(b) \ > @@ -188,6 +226,13 @@ int bch_journal_read(struct cache_set *c, struct list_head *list) > unsigned int i, l, r, m; > uint64_t seq; > > + /* > + * Linear addresses of NVDIMM pages for journaling is not > + * initialized yet, do it before read jset from NVDIMM pages. > + */ > + if (bch_has_feature_nvdimm_meta(&ca->sb)) > + __bch_journal_nvdimm_init(ca); > + > bitmap_zero(bitmap, SB_JOURNAL_BUCKETS); > pr_debug("%u journal buckets\n", ca->sb.njournal_buckets); > > Cheers, Hannes
On 6/22/21 7:04 PM, Hannes Reinecke wrote: > On 6/15/21 7:49 AM, Coly Li wrote: >> This patch implements two methods to read jset from media for journal >> replay, >> - __jnl_rd_bkt() for block device >> This is the legacy method to read jset via block device interface. >> - __jnl_rd_nvm_bkt() for NVDIMM >> This is the method to read jset from NVDIMM memory interface, a.k.a >> memcopy() from NVDIMM pages to DRAM pages. >> >> If BCH_FEATURE_INCOMPAT_NVDIMM_META is set in incompat feature set, >> during running cache set, journal_read_bucket() will read the journal >> content from NVDIMM by __jnl_rd_nvm_bkt(). The linear addresses of >> NVDIMM pages to read jset are stored in sb.d[SB_JOURNAL_BUCKETS], which >> were initialized and maintained in previous runs of the cache set. >> >> A thing should be noticed is, when bch_journal_read() is called, the >> linear address of NVDIMM pages is not loaded and initialized yet, it >> is necessary to call __bch_journal_nvdimm_init() before reading the jset >> from NVDIMM pages. >> >> Signed-off-by: Coly Li <colyli@suse.de> >> Cc: Jianpeng Ma <jianpeng.ma@intel.com> >> Cc: Qiaowei Ren <qiaowei.ren@intel.com> >> --- >> drivers/md/bcache/journal.c | 93 +++++++++++++++++++++++++++---------- >> 1 file changed, 69 insertions(+), 24 deletions(-) >> >> diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c >> index 03ecedf813b0..23e5ccf125df 100644 >> --- a/drivers/md/bcache/journal.c >> +++ b/drivers/md/bcache/journal.c >> @@ -34,60 +34,96 @@ static void journal_read_endio(struct bio *bio) >> closure_put(cl); >> } >> >> +static struct jset *__jnl_rd_bkt(struct cache *ca, unsigned int bkt_idx, >> + unsigned int len, unsigned int offset, >> + struct closure *cl) >> +{ >> + sector_t bucket = bucket_to_sector(ca->set, ca->sb.d[bkt_idx]); >> + struct bio *bio = &ca->journal.bio; >> + struct jset *data = ca->set->journal.w[0].data; >> + >> + bio_reset(bio); >> + bio->bi_iter.bi_sector = bucket + offset; >> + bio_set_dev(bio, ca->bdev); >> + bio->bi_iter.bi_size = len << 9; >> + bio->bi_end_io = journal_read_endio; >> + bio->bi_private = cl; >> + bio_set_op_attrs(bio, REQ_OP_READ, 0); >> + bch_bio_map(bio, data); >> + >> + closure_bio_submit(ca->set, bio, cl); >> + closure_sync(cl); >> + >> + /* Indeed journal.w[0].data */ >> + return data; >> +} >> + >> +#if defined(CONFIG_BCACHE_NVM_PAGES) >> + >> +static struct jset *__jnl_rd_nvm_bkt(struct cache *ca, unsigned int bkt_idx, >> + unsigned int len, unsigned int offset) >> +{ >> + void *jset_addr = (void *)ca->sb.d[bkt_idx] + (offset << 9); >> + struct jset *data = ca->set->journal.w[0].data; >> + >> + memcpy(data, jset_addr, len << 9); >> + >> + /* Indeed journal.w[0].data */ >> + return data; >> +} >> + >> +#else /* CONFIG_BCACHE_NVM_PAGES */ >> + >> +static struct jset *__jnl_rd_nvm_bkt(struct cache *ca, unsigned int bkt_idx, >> + unsigned int len, unsigned int offset) >> +{ >> + return NULL; >> +} >> + >> +#endif /* CONFIG_BCACHE_NVM_PAGES */ >> + >> static int journal_read_bucket(struct cache *ca, struct list_head *list, >> - unsigned int bucket_index) >> + unsigned int bucket_idx) > This renaming is pointless. Copied, will revert this in next post. Thanks for your review. Coly Li
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 03ecedf813b0..23e5ccf125df 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -34,60 +34,96 @@ static void journal_read_endio(struct bio *bio) closure_put(cl); } +static struct jset *__jnl_rd_bkt(struct cache *ca, unsigned int bkt_idx, + unsigned int len, unsigned int offset, + struct closure *cl) +{ + sector_t bucket = bucket_to_sector(ca->set, ca->sb.d[bkt_idx]); + struct bio *bio = &ca->journal.bio; + struct jset *data = ca->set->journal.w[0].data; + + bio_reset(bio); + bio->bi_iter.bi_sector = bucket + offset; + bio_set_dev(bio, ca->bdev); + bio->bi_iter.bi_size = len << 9; + bio->bi_end_io = journal_read_endio; + bio->bi_private = cl; + bio_set_op_attrs(bio, REQ_OP_READ, 0); + bch_bio_map(bio, data); + + closure_bio_submit(ca->set, bio, cl); + closure_sync(cl); + + /* Indeed journal.w[0].data */ + return data; +} + +#if defined(CONFIG_BCACHE_NVM_PAGES) + +static struct jset *__jnl_rd_nvm_bkt(struct cache *ca, unsigned int bkt_idx, + unsigned int len, unsigned int offset) +{ + void *jset_addr = (void *)ca->sb.d[bkt_idx] + (offset << 9); + struct jset *data = ca->set->journal.w[0].data; + + memcpy(data, jset_addr, len << 9); + + /* Indeed journal.w[0].data */ + return data; +} + +#else /* CONFIG_BCACHE_NVM_PAGES */ + +static struct jset *__jnl_rd_nvm_bkt(struct cache *ca, unsigned int bkt_idx, + unsigned int len, unsigned int offset) +{ + return NULL; +} + +#endif /* CONFIG_BCACHE_NVM_PAGES */ + static int journal_read_bucket(struct cache *ca, struct list_head *list, - unsigned int bucket_index) + unsigned int bucket_idx) { struct journal_device *ja = &ca->journal; - struct bio *bio = &ja->bio; struct journal_replay *i; - struct jset *j, *data = ca->set->journal.w[0].data; + struct jset *j; struct closure cl; unsigned int len, left, offset = 0; int ret = 0; - sector_t bucket = bucket_to_sector(ca->set, ca->sb.d[bucket_index]); closure_init_stack(&cl); - pr_debug("reading %u\n", bucket_index); + pr_debug("reading %u\n", bucket_idx); while (offset < ca->sb.bucket_size) { reread: left = ca->sb.bucket_size - offset; len = min_t(unsigned int, left, PAGE_SECTORS << JSET_BITS); - bio_reset(bio); - bio->bi_iter.bi_sector = bucket + offset; - bio_set_dev(bio, ca->bdev); - bio->bi_iter.bi_size = len << 9; - - bio->bi_end_io = journal_read_endio; - bio->bi_private = &cl; - bio_set_op_attrs(bio, REQ_OP_READ, 0); - bch_bio_map(bio, data); - - closure_bio_submit(ca->set, bio, &cl); - closure_sync(&cl); + if (!bch_has_feature_nvdimm_meta(&ca->sb)) + j = __jnl_rd_bkt(ca, bucket_idx, len, offset, &cl); + else + j = __jnl_rd_nvm_bkt(ca, bucket_idx, len, offset); /* This function could be simpler now since we no longer write * journal entries that overlap bucket boundaries; this means * the start of a bucket will always have a valid journal entry * if it has any journal entries at all. */ - - j = data; while (len) { struct list_head *where; size_t blocks, bytes = set_bytes(j); if (j->magic != jset_magic(&ca->sb)) { - pr_debug("%u: bad magic\n", bucket_index); + pr_debug("%u: bad magic\n", bucket_idx); return ret; } if (bytes > left << 9 || bytes > PAGE_SIZE << JSET_BITS) { pr_info("%u: too big, %zu bytes, offset %u\n", - bucket_index, bytes, offset); + bucket_idx, bytes, offset); return ret; } @@ -96,7 +132,7 @@ reread: left = ca->sb.bucket_size - offset; if (j->csum != csum_set(j)) { pr_info("%u: bad csum, %zu bytes, offset %u\n", - bucket_index, bytes, offset); + bucket_idx, bytes, offset); return ret; } @@ -158,8 +194,8 @@ reread: left = ca->sb.bucket_size - offset; list_add(&i->list, where); ret = 1; - if (j->seq > ja->seq[bucket_index]) - ja->seq[bucket_index] = j->seq; + if (j->seq > ja->seq[bucket_idx]) + ja->seq[bucket_idx] = j->seq; next_set: offset += blocks * ca->sb.block_size; len -= blocks * ca->sb.block_size; @@ -170,6 +206,8 @@ reread: left = ca->sb.bucket_size - offset; return ret; } +static int __bch_journal_nvdimm_init(struct cache *ca); + int bch_journal_read(struct cache_set *c, struct list_head *list) { #define read_bucket(b) \ @@ -188,6 +226,13 @@ int bch_journal_read(struct cache_set *c, struct list_head *list) unsigned int i, l, r, m; uint64_t seq; + /* + * Linear addresses of NVDIMM pages for journaling is not + * initialized yet, do it before read jset from NVDIMM pages. + */ + if (bch_has_feature_nvdimm_meta(&ca->sb)) + __bch_journal_nvdimm_init(ca); + bitmap_zero(bitmap, SB_JOURNAL_BUCKETS); pr_debug("%u journal buckets\n", ca->sb.njournal_buckets);
This patch implements two methods to read jset from media for journal replay, - __jnl_rd_bkt() for block device This is the legacy method to read jset via block device interface. - __jnl_rd_nvm_bkt() for NVDIMM This is the method to read jset from NVDIMM memory interface, a.k.a memcopy() from NVDIMM pages to DRAM pages. If BCH_FEATURE_INCOMPAT_NVDIMM_META is set in incompat feature set, during running cache set, journal_read_bucket() will read the journal content from NVDIMM by __jnl_rd_nvm_bkt(). The linear addresses of NVDIMM pages to read jset are stored in sb.d[SB_JOURNAL_BUCKETS], which were initialized and maintained in previous runs of the cache set. A thing should be noticed is, when bch_journal_read() is called, the linear address of NVDIMM pages is not loaded and initialized yet, it is necessary to call __bch_journal_nvdimm_init() before reading the jset from NVDIMM pages. Signed-off-by: Coly Li <colyli@suse.de> Cc: Jianpeng Ma <jianpeng.ma@intel.com> Cc: Qiaowei Ren <qiaowei.ren@intel.com> --- drivers/md/bcache/journal.c | 93 +++++++++++++++++++++++++++---------- 1 file changed, 69 insertions(+), 24 deletions(-)