diff mbox series

nvdimm: Add NVDIMM_NO_DEEPFLUSH flag to control btt data deepflush

Message ID 20220629135801.192821-1-dennis.wu@intel.com (mailing list archive)
State New, archived
Headers show
Series nvdimm: Add NVDIMM_NO_DEEPFLUSH flag to control btt data deepflush | expand

Commit Message

dennis.wu June 29, 2022, 1:58 p.m. UTC
Reason: we can have a global control of deepflush in the nfit module
by "no_deepflush" param. In the case of "no_deepflush=0", we still
need control data deepflush or not by the NVDIMM_NO_DEEPFLUSH flag.
In the BTT, the btt information block(btt_sb) will use deepflush.
Other like the data blocks(512B or 4KB),4 bytes btt_map and 16 bytes
bflog will not use the deepflush. so that, during the runtime, no
deepflush will be called in the BTT.

How: Add flag NVDIMM_NO_DEEPFLUSH which can use with NVDIMM_IO_ATOMIC
like NVDIMM_NO_DEEPFLUSH | NVDIMM_IO_ATOMIC.
"if (!(flags & NVDIMM_NO_DEEPFLUSH))", nvdimm_flush() will be called,
otherwise, the pmem_wmb() called to fense all previous write.

Signed-off-by: Dennis.Wu <dennis.wu@intel.com>
---
 drivers/nvdimm/btt.c   | 26 +++++++++++++++++---------
 drivers/nvdimm/claim.c |  9 +++++++--
 drivers/nvdimm/nd.h    |  4 ++++
 3 files changed, 28 insertions(+), 11 deletions(-)

Comments

Ira Weiny June 30, 2022, 10:32 p.m. UTC | #1
On Wed, Jun 29, 2022 at 09:58:01PM +0800, Dennis.Wu wrote:
> Reason: we can have a global control of deepflush in the nfit module
> by "no_deepflush" param. In the case of "no_deepflush=0", we still
> need control data deepflush or not by the NVDIMM_NO_DEEPFLUSH flag.
> In the BTT, the btt information block(btt_sb) will use deepflush.
> Other like the data blocks(512B or 4KB),4 bytes btt_map and 16 bytes
> bflog will not use the deepflush. so that, during the runtime, no
> deepflush will be called in the BTT.
> 
> How: Add flag NVDIMM_NO_DEEPFLUSH which can use with NVDIMM_IO_ATOMIC
> like NVDIMM_NO_DEEPFLUSH | NVDIMM_IO_ATOMIC.
> "if (!(flags & NVDIMM_NO_DEEPFLUSH))", nvdimm_flush() will be called,
> otherwise, the pmem_wmb() called to fense all previous write.
> 

This looks like the same patch you sent earlier?  Did it change?  Is this a V2?

Ira

> Signed-off-by: Dennis.Wu <dennis.wu@intel.com>
> ---
>  drivers/nvdimm/btt.c   | 26 +++++++++++++++++---------
>  drivers/nvdimm/claim.c |  9 +++++++--
>  drivers/nvdimm/nd.h    |  4 ++++
>  3 files changed, 28 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
> index 9613e54c7a67..c71ba7a1edd0 100644
> --- a/drivers/nvdimm/btt.c
> +++ b/drivers/nvdimm/btt.c
> @@ -70,6 +70,10 @@ static int btt_info_write(struct arena_info *arena, struct btt_sb *super)
>  	dev_WARN_ONCE(to_dev(arena), !IS_ALIGNED(arena->info2off, 512),
>  		"arena->info2off: %#llx is unaligned\n", arena->info2off);
>  
> +	/*
> +	 * btt_sb is critial information and need proper write
> +	 * nvdimm_flush will be called (deepflush)
> +	 */
>  	ret = arena_write_bytes(arena, arena->info2off, super,
>  			sizeof(struct btt_sb), 0);
>  	if (ret)
> @@ -384,7 +388,8 @@ static int btt_flog_write(struct arena_info *arena, u32 lane, u32 sub,
>  {
>  	int ret;
>  
> -	ret = __btt_log_write(arena, lane, sub, ent, NVDIMM_IO_ATOMIC);
> +	ret = __btt_log_write(arena, lane, sub, ent,
> +		NVDIMM_IO_ATOMIC|NVDIMM_NO_DEEPFLUSH);
>  	if (ret)
>  		return ret;
>  
> @@ -429,7 +434,7 @@ static int btt_map_init(struct arena_info *arena)
>  		dev_WARN_ONCE(to_dev(arena), size < 512,
>  			"chunk size: %#zx is unaligned\n", size);
>  		ret = arena_write_bytes(arena, arena->mapoff + offset, zerobuf,
> -				size, 0);
> +				size, NVDIMM_NO_DEEPFLUSH);
>  		if (ret)
>  			goto free;
>  
> @@ -473,7 +478,7 @@ static int btt_log_init(struct arena_info *arena)
>  		dev_WARN_ONCE(to_dev(arena), size < 512,
>  			"chunk size: %#zx is unaligned\n", size);
>  		ret = arena_write_bytes(arena, arena->logoff + offset, zerobuf,
> -				size, 0);
> +				size, NVDIMM_NO_DEEPFLUSH);
>  		if (ret)
>  			goto free;
>  
> @@ -487,7 +492,7 @@ static int btt_log_init(struct arena_info *arena)
>  		ent.old_map = cpu_to_le32(arena->external_nlba + i);
>  		ent.new_map = cpu_to_le32(arena->external_nlba + i);
>  		ent.seq = cpu_to_le32(LOG_SEQ_INIT);
> -		ret = __btt_log_write(arena, i, 0, &ent, 0);
> +		ret = __btt_log_write(arena, i, 0, &ent, NVDIMM_NO_DEEPFLUSH);
>  		if (ret)
>  			goto free;
>  	}
> @@ -518,7 +523,7 @@ static int arena_clear_freelist_error(struct arena_info *arena, u32 lane)
>  			unsigned long chunk = min(len, PAGE_SIZE);
>  
>  			ret = arena_write_bytes(arena, nsoff, zero_page,
> -				chunk, 0);
> +				chunk, NVDIMM_NO_DEEPFLUSH);
>  			if (ret)
>  				break;
>  			len -= chunk;
> @@ -592,7 +597,8 @@ static int btt_freelist_init(struct arena_info *arena)
>  			 * to complete the map write. So fix up the map.
>  			 */
>  			ret = btt_map_write(arena, le32_to_cpu(log_new.lba),
> -					le32_to_cpu(log_new.new_map), 0, 0, 0);
> +					le32_to_cpu(log_new.new_map), 0, 0,
> +					NVDIMM_NO_DEEPFLUSH);
>  			if (ret)
>  				return ret;
>  		}
> @@ -1123,7 +1129,8 @@ static int btt_data_write(struct arena_info *arena, u32 lba,
>  	u64 nsoff = to_namespace_offset(arena, lba);
>  	void *mem = kmap_atomic(page);
>  
> -	ret = arena_write_bytes(arena, nsoff, mem + off, len, NVDIMM_IO_ATOMIC);
> +	ret = arena_write_bytes(arena, nsoff, mem + off, len,
> +		NVDIMM_IO_ATOMIC|NVDIMM_NO_DEEPFLUSH);
>  	kunmap_atomic(mem);
>  
>  	return ret;
> @@ -1260,7 +1267,8 @@ static int btt_read_pg(struct btt *btt, struct bio_integrity_payload *bip,
>  		ret = btt_data_read(arena, page, off, postmap, cur_len);
>  		if (ret) {
>  			/* Media error - set the e_flag */
> -			if (btt_map_write(arena, premap, postmap, 0, 1, NVDIMM_IO_ATOMIC))
> +			if (btt_map_write(arena, premap, postmap, 0, 1,
> +				NVDIMM_IO_ATOMIC|NVDIMM_NO_DEEPFLUSH))
>  				dev_warn_ratelimited(to_dev(arena),
>  					"Error persistently tracking bad blocks at %#x\n",
>  					premap);
> @@ -1393,7 +1401,7 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip,
>  			goto out_map;
>  
>  		ret = btt_map_write(arena, premap, new_postmap, 0, 0,
> -			NVDIMM_IO_ATOMIC);
> +			NVDIMM_IO_ATOMIC|NVDIMM_NO_DEEPFLUSH);
>  		if (ret)
>  			goto out_map;
>  
> diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
> index 030dbde6b088..c1fa3291c063 100644
> --- a/drivers/nvdimm/claim.c
> +++ b/drivers/nvdimm/claim.c
> @@ -294,9 +294,14 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
>  	}
>  
>  	memcpy_flushcache(nsio->addr + offset, buf, size);
> -	ret = nvdimm_flush(to_nd_region(ndns->dev.parent), NULL);
> -	if (ret)
> +	if (!(flags & NVDIMM_NO_DEEPFLUSH)) {
> +		ret = nvdimm_flush(to_nd_region(ndns->dev.parent), NULL);
> +		if (ret)
> +			rc = ret;
> +	} else {
>  		rc = ret;
> +		pmem_wmb();
> +	}
>  
>  	return rc;
>  }
> diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
> index ec5219680092..a16e259a8cff 100644
> --- a/drivers/nvdimm/nd.h
> +++ b/drivers/nvdimm/nd.h
> @@ -22,7 +22,11 @@ enum {
>  	 */
>  	ND_MAX_LANES = 256,
>  	INT_LBASIZE_ALIGNMENT = 64,
> +	/*
> +	 * NVDIMM_IO_ATOMIC | NVDIMM_NO_DEEPFLUSH is support.
> +	 */
>  	NVDIMM_IO_ATOMIC = 1,
> +	NVDIMM_NO_DEEPFLUSH = 2,
>  };
>  
>  struct nvdimm_drvdata {
> -- 
> 2.27.0
>
dennis.wu July 1, 2022, 4:38 a.m. UTC | #2
Thank you Ira! Sorry for duplicating the patch.

The first patch is made on the kernel v5.17 and can't patch with the 
latest Linux master and the second one is made with the Linux master.

On 7/1/22 06:32, Weiny, Ira wrote:
> On Wed, Jun 29, 2022 at 09:58:01PM +0800, Dennis.Wu wrote:
>> Reason: we can have a global control of deepflush in the nfit module
>> by "no_deepflush" param. In the case of "no_deepflush=0", we still
>> need control data deepflush or not by the NVDIMM_NO_DEEPFLUSH flag.
>> In the BTT, the btt information block(btt_sb) will use deepflush.
>> Other like the data blocks(512B or 4KB),4 bytes btt_map and 16 bytes
>> bflog will not use the deepflush. so that, during the runtime, no
>> deepflush will be called in the BTT.
>>
>> How: Add flag NVDIMM_NO_DEEPFLUSH which can use with NVDIMM_IO_ATOMIC
>> like NVDIMM_NO_DEEPFLUSH | NVDIMM_IO_ATOMIC.
>> "if (!(flags & NVDIMM_NO_DEEPFLUSH))", nvdimm_flush() will be called,
>> otherwise, the pmem_wmb() called to fense all previous write.
>>
> This looks like the same patch you sent earlier?  Did it change?  Is this a V2?
>
> Ira
>
>> Signed-off-by: Dennis.Wu <dennis.wu@intel.com>
>> ---
>>   drivers/nvdimm/btt.c   | 26 +++++++++++++++++---------
>>   drivers/nvdimm/claim.c |  9 +++++++--
>>   drivers/nvdimm/nd.h    |  4 ++++
>>   3 files changed, 28 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
>> index 9613e54c7a67..c71ba7a1edd0 100644
>> --- a/drivers/nvdimm/btt.c
>> +++ b/drivers/nvdimm/btt.c
>> @@ -70,6 +70,10 @@ static int btt_info_write(struct arena_info *arena, struct btt_sb *super)
>>   	dev_WARN_ONCE(to_dev(arena), !IS_ALIGNED(arena->info2off, 512),
>>   		"arena->info2off: %#llx is unaligned\n", arena->info2off);
>>   
>> +	/*
>> +	 * btt_sb is critial information and need proper write
>> +	 * nvdimm_flush will be called (deepflush)
>> +	 */
>>   	ret = arena_write_bytes(arena, arena->info2off, super,
>>   			sizeof(struct btt_sb), 0);
>>   	if (ret)
>> @@ -384,7 +388,8 @@ static int btt_flog_write(struct arena_info *arena, u32 lane, u32 sub,
>>   {
>>   	int ret;
>>   
>> -	ret = __btt_log_write(arena, lane, sub, ent, NVDIMM_IO_ATOMIC);
>> +	ret = __btt_log_write(arena, lane, sub, ent,
>> +		NVDIMM_IO_ATOMIC|NVDIMM_NO_DEEPFLUSH);
>>   	if (ret)
>>   		return ret;
>>   
>> @@ -429,7 +434,7 @@ static int btt_map_init(struct arena_info *arena)
>>   		dev_WARN_ONCE(to_dev(arena), size < 512,
>>   			"chunk size: %#zx is unaligned\n", size);
>>   		ret = arena_write_bytes(arena, arena->mapoff + offset, zerobuf,
>> -				size, 0);
>> +				size, NVDIMM_NO_DEEPFLUSH);
>>   		if (ret)
>>   			goto free;
>>   
>> @@ -473,7 +478,7 @@ static int btt_log_init(struct arena_info *arena)
>>   		dev_WARN_ONCE(to_dev(arena), size < 512,
>>   			"chunk size: %#zx is unaligned\n", size);
>>   		ret = arena_write_bytes(arena, arena->logoff + offset, zerobuf,
>> -				size, 0);
>> +				size, NVDIMM_NO_DEEPFLUSH);
>>   		if (ret)
>>   			goto free;
>>   
>> @@ -487,7 +492,7 @@ static int btt_log_init(struct arena_info *arena)
>>   		ent.old_map = cpu_to_le32(arena->external_nlba + i);
>>   		ent.new_map = cpu_to_le32(arena->external_nlba + i);
>>   		ent.seq = cpu_to_le32(LOG_SEQ_INIT);
>> -		ret = __btt_log_write(arena, i, 0, &ent, 0);
>> +		ret = __btt_log_write(arena, i, 0, &ent, NVDIMM_NO_DEEPFLUSH);
>>   		if (ret)
>>   			goto free;
>>   	}
>> @@ -518,7 +523,7 @@ static int arena_clear_freelist_error(struct arena_info *arena, u32 lane)
>>   			unsigned long chunk = min(len, PAGE_SIZE);
>>   
>>   			ret = arena_write_bytes(arena, nsoff, zero_page,
>> -				chunk, 0);
>> +				chunk, NVDIMM_NO_DEEPFLUSH);
>>   			if (ret)
>>   				break;
>>   			len -= chunk;
>> @@ -592,7 +597,8 @@ static int btt_freelist_init(struct arena_info *arena)
>>   			 * to complete the map write. So fix up the map.
>>   			 */
>>   			ret = btt_map_write(arena, le32_to_cpu(log_new.lba),
>> -					le32_to_cpu(log_new.new_map), 0, 0, 0);
>> +					le32_to_cpu(log_new.new_map), 0, 0,
>> +					NVDIMM_NO_DEEPFLUSH);
>>   			if (ret)
>>   				return ret;
>>   		}
>> @@ -1123,7 +1129,8 @@ static int btt_data_write(struct arena_info *arena, u32 lba,
>>   	u64 nsoff = to_namespace_offset(arena, lba);
>>   	void *mem = kmap_atomic(page);
>>   
>> -	ret = arena_write_bytes(arena, nsoff, mem + off, len, NVDIMM_IO_ATOMIC);
>> +	ret = arena_write_bytes(arena, nsoff, mem + off, len,
>> +		NVDIMM_IO_ATOMIC|NVDIMM_NO_DEEPFLUSH);
>>   	kunmap_atomic(mem);
>>   
>>   	return ret;
>> @@ -1260,7 +1267,8 @@ static int btt_read_pg(struct btt *btt, struct bio_integrity_payload *bip,
>>   		ret = btt_data_read(arena, page, off, postmap, cur_len);
>>   		if (ret) {
>>   			/* Media error - set the e_flag */
>> -			if (btt_map_write(arena, premap, postmap, 0, 1, NVDIMM_IO_ATOMIC))
>> +			if (btt_map_write(arena, premap, postmap, 0, 1,
>> +				NVDIMM_IO_ATOMIC|NVDIMM_NO_DEEPFLUSH))
>>   				dev_warn_ratelimited(to_dev(arena),
>>   					"Error persistently tracking bad blocks at %#x\n",
>>   					premap);
>> @@ -1393,7 +1401,7 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip,
>>   			goto out_map;
>>   
>>   		ret = btt_map_write(arena, premap, new_postmap, 0, 0,
>> -			NVDIMM_IO_ATOMIC);
>> +			NVDIMM_IO_ATOMIC|NVDIMM_NO_DEEPFLUSH);
>>   		if (ret)
>>   			goto out_map;
>>   
>> diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
>> index 030dbde6b088..c1fa3291c063 100644
>> --- a/drivers/nvdimm/claim.c
>> +++ b/drivers/nvdimm/claim.c
>> @@ -294,9 +294,14 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
>>   	}
>>   
>>   	memcpy_flushcache(nsio->addr + offset, buf, size);
>> -	ret = nvdimm_flush(to_nd_region(ndns->dev.parent), NULL);
>> -	if (ret)
>> +	if (!(flags & NVDIMM_NO_DEEPFLUSH)) {
>> +		ret = nvdimm_flush(to_nd_region(ndns->dev.parent), NULL);
>> +		if (ret)
>> +			rc = ret;
>> +	} else {
>>   		rc = ret;
>> +		pmem_wmb();
>> +	}
>>   
>>   	return rc;
>>   }
>> diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
>> index ec5219680092..a16e259a8cff 100644
>> --- a/drivers/nvdimm/nd.h
>> +++ b/drivers/nvdimm/nd.h
>> @@ -22,7 +22,11 @@ enum {
>>   	 */
>>   	ND_MAX_LANES = 256,
>>   	INT_LBASIZE_ALIGNMENT = 64,
>> +	/*
>> +	 * NVDIMM_IO_ATOMIC | NVDIMM_NO_DEEPFLUSH is support.
>> +	 */
>>   	NVDIMM_IO_ATOMIC = 1,
>> +	NVDIMM_NO_DEEPFLUSH = 2,
>>   };
>>   
>>   struct nvdimm_drvdata {
>> -- 
>> 2.27.0
>>
Ira Weiny July 1, 2022, 4:23 p.m. UTC | #3
On Fri, Jul 01, 2022 at 12:38:16PM +0800, dennis.wu wrote:
> Thank you Ira! Sorry for duplicating the patch.

It's ok we all make mistakes.

> 
> The first patch is made on the kernel v5.17 and can't patch with the latest
> Linux master and the second one is made with the Linux master.

However, this should have indicated V2 with the changelog updated below the
'---' that V2 was required due to the original being on the wrong base.

That will help us reviewers to know that the previous one is not valid and to
spend our time reviewing this new version.  lore will also pick up the correct
version when Dan trys to merge the patch.

Hope this makes sense!  :-D

Ira

> 
> On 7/1/22 06:32, Weiny, Ira wrote:
> > On Wed, Jun 29, 2022 at 09:58:01PM +0800, Dennis.Wu wrote:
> > > Reason: we can have a global control of deepflush in the nfit module
> > > by "no_deepflush" param. In the case of "no_deepflush=0", we still
> > > need control data deepflush or not by the NVDIMM_NO_DEEPFLUSH flag.
> > > In the BTT, the btt information block(btt_sb) will use deepflush.
> > > Other like the data blocks(512B or 4KB),4 bytes btt_map and 16 bytes
> > > bflog will not use the deepflush. so that, during the runtime, no
> > > deepflush will be called in the BTT.
> > > 
> > > How: Add flag NVDIMM_NO_DEEPFLUSH which can use with NVDIMM_IO_ATOMIC
> > > like NVDIMM_NO_DEEPFLUSH | NVDIMM_IO_ATOMIC.
> > > "if (!(flags & NVDIMM_NO_DEEPFLUSH))", nvdimm_flush() will be called,
> > > otherwise, the pmem_wmb() called to fense all previous write.
> > > 
> > This looks like the same patch you sent earlier?  Did it change?  Is this a V2?
> > 
> > Ira
> > 
> > > Signed-off-by: Dennis.Wu <dennis.wu@intel.com>
> > > ---
> > >   drivers/nvdimm/btt.c   | 26 +++++++++++++++++---------
> > >   drivers/nvdimm/claim.c |  9 +++++++--
> > >   drivers/nvdimm/nd.h    |  4 ++++
> > >   3 files changed, 28 insertions(+), 11 deletions(-)
> > > 
> > > diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
> > > index 9613e54c7a67..c71ba7a1edd0 100644
> > > --- a/drivers/nvdimm/btt.c
> > > +++ b/drivers/nvdimm/btt.c
> > > @@ -70,6 +70,10 @@ static int btt_info_write(struct arena_info *arena, struct btt_sb *super)
> > >   	dev_WARN_ONCE(to_dev(arena), !IS_ALIGNED(arena->info2off, 512),
> > >   		"arena->info2off: %#llx is unaligned\n", arena->info2off);
> > > +	/*
> > > +	 * btt_sb is critial information and need proper write
> > > +	 * nvdimm_flush will be called (deepflush)
> > > +	 */
> > >   	ret = arena_write_bytes(arena, arena->info2off, super,
> > >   			sizeof(struct btt_sb), 0);
> > >   	if (ret)
> > > @@ -384,7 +388,8 @@ static int btt_flog_write(struct arena_info *arena, u32 lane, u32 sub,
> > >   {
> > >   	int ret;
> > > -	ret = __btt_log_write(arena, lane, sub, ent, NVDIMM_IO_ATOMIC);
> > > +	ret = __btt_log_write(arena, lane, sub, ent,
> > > +		NVDIMM_IO_ATOMIC|NVDIMM_NO_DEEPFLUSH);
> > >   	if (ret)
> > >   		return ret;
> > > @@ -429,7 +434,7 @@ static int btt_map_init(struct arena_info *arena)
> > >   		dev_WARN_ONCE(to_dev(arena), size < 512,
> > >   			"chunk size: %#zx is unaligned\n", size);
> > >   		ret = arena_write_bytes(arena, arena->mapoff + offset, zerobuf,
> > > -				size, 0);
> > > +				size, NVDIMM_NO_DEEPFLUSH);
> > >   		if (ret)
> > >   			goto free;
> > > @@ -473,7 +478,7 @@ static int btt_log_init(struct arena_info *arena)
> > >   		dev_WARN_ONCE(to_dev(arena), size < 512,
> > >   			"chunk size: %#zx is unaligned\n", size);
> > >   		ret = arena_write_bytes(arena, arena->logoff + offset, zerobuf,
> > > -				size, 0);
> > > +				size, NVDIMM_NO_DEEPFLUSH);
> > >   		if (ret)
> > >   			goto free;
> > > @@ -487,7 +492,7 @@ static int btt_log_init(struct arena_info *arena)
> > >   		ent.old_map = cpu_to_le32(arena->external_nlba + i);
> > >   		ent.new_map = cpu_to_le32(arena->external_nlba + i);
> > >   		ent.seq = cpu_to_le32(LOG_SEQ_INIT);
> > > -		ret = __btt_log_write(arena, i, 0, &ent, 0);
> > > +		ret = __btt_log_write(arena, i, 0, &ent, NVDIMM_NO_DEEPFLUSH);
> > >   		if (ret)
> > >   			goto free;
> > >   	}
> > > @@ -518,7 +523,7 @@ static int arena_clear_freelist_error(struct arena_info *arena, u32 lane)
> > >   			unsigned long chunk = min(len, PAGE_SIZE);
> > >   			ret = arena_write_bytes(arena, nsoff, zero_page,
> > > -				chunk, 0);
> > > +				chunk, NVDIMM_NO_DEEPFLUSH);
> > >   			if (ret)
> > >   				break;
> > >   			len -= chunk;
> > > @@ -592,7 +597,8 @@ static int btt_freelist_init(struct arena_info *arena)
> > >   			 * to complete the map write. So fix up the map.
> > >   			 */
> > >   			ret = btt_map_write(arena, le32_to_cpu(log_new.lba),
> > > -					le32_to_cpu(log_new.new_map), 0, 0, 0);
> > > +					le32_to_cpu(log_new.new_map), 0, 0,
> > > +					NVDIMM_NO_DEEPFLUSH);
> > >   			if (ret)
> > >   				return ret;
> > >   		}
> > > @@ -1123,7 +1129,8 @@ static int btt_data_write(struct arena_info *arena, u32 lba,
> > >   	u64 nsoff = to_namespace_offset(arena, lba);
> > >   	void *mem = kmap_atomic(page);
> > > -	ret = arena_write_bytes(arena, nsoff, mem + off, len, NVDIMM_IO_ATOMIC);
> > > +	ret = arena_write_bytes(arena, nsoff, mem + off, len,
> > > +		NVDIMM_IO_ATOMIC|NVDIMM_NO_DEEPFLUSH);
> > >   	kunmap_atomic(mem);
> > >   	return ret;
> > > @@ -1260,7 +1267,8 @@ static int btt_read_pg(struct btt *btt, struct bio_integrity_payload *bip,
> > >   		ret = btt_data_read(arena, page, off, postmap, cur_len);
> > >   		if (ret) {
> > >   			/* Media error - set the e_flag */
> > > -			if (btt_map_write(arena, premap, postmap, 0, 1, NVDIMM_IO_ATOMIC))
> > > +			if (btt_map_write(arena, premap, postmap, 0, 1,
> > > +				NVDIMM_IO_ATOMIC|NVDIMM_NO_DEEPFLUSH))
> > >   				dev_warn_ratelimited(to_dev(arena),
> > >   					"Error persistently tracking bad blocks at %#x\n",
> > >   					premap);
> > > @@ -1393,7 +1401,7 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip,
> > >   			goto out_map;
> > >   		ret = btt_map_write(arena, premap, new_postmap, 0, 0,
> > > -			NVDIMM_IO_ATOMIC);
> > > +			NVDIMM_IO_ATOMIC|NVDIMM_NO_DEEPFLUSH);
> > >   		if (ret)
> > >   			goto out_map;
> > > diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
> > > index 030dbde6b088..c1fa3291c063 100644
> > > --- a/drivers/nvdimm/claim.c
> > > +++ b/drivers/nvdimm/claim.c
> > > @@ -294,9 +294,14 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
> > >   	}
> > >   	memcpy_flushcache(nsio->addr + offset, buf, size);
> > > -	ret = nvdimm_flush(to_nd_region(ndns->dev.parent), NULL);
> > > -	if (ret)
> > > +	if (!(flags & NVDIMM_NO_DEEPFLUSH)) {
> > > +		ret = nvdimm_flush(to_nd_region(ndns->dev.parent), NULL);
> > > +		if (ret)
> > > +			rc = ret;
> > > +	} else {
> > >   		rc = ret;
> > > +		pmem_wmb();
> > > +	}
> > >   	return rc;
> > >   }
> > > diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
> > > index ec5219680092..a16e259a8cff 100644
> > > --- a/drivers/nvdimm/nd.h
> > > +++ b/drivers/nvdimm/nd.h
> > > @@ -22,7 +22,11 @@ enum {
> > >   	 */
> > >   	ND_MAX_LANES = 256,
> > >   	INT_LBASIZE_ALIGNMENT = 64,
> > > +	/*
> > > +	 * NVDIMM_IO_ATOMIC | NVDIMM_NO_DEEPFLUSH is support.
> > > +	 */
> > >   	NVDIMM_IO_ATOMIC = 1,
> > > +	NVDIMM_NO_DEEPFLUSH = 2,
> > >   };
> > >   struct nvdimm_drvdata {
> > > -- 
> > > 2.27.0
> > >
Ira Weiny July 1, 2022, 4:42 p.m. UTC | #4
On Wed, Jun 29, 2022 at 09:58:01PM +0800, Dennis.Wu wrote:
> Reason: we can have a global control of deepflush in the nfit module
> by "no_deepflush" param. In the case of "no_deepflush=0", we still
> need control data deepflush or not by the NVDIMM_NO_DEEPFLUSH flag.
> In the BTT, the btt information block(btt_sb) will use deepflush.
> Other like the data blocks(512B or 4KB),4 bytes btt_map and 16 bytes
> bflog will not use the deepflush. so that, during the runtime, no
> deepflush will be called in the BTT.
> 
> How: Add flag NVDIMM_NO_DEEPFLUSH which can use with NVDIMM_IO_ATOMIC
> like NVDIMM_NO_DEEPFLUSH | NVDIMM_IO_ATOMIC.
> "if (!(flags & NVDIMM_NO_DEEPFLUSH))", nvdimm_flush() will be called,
> otherwise, the pmem_wmb() called to fense all previous write.

Unless Christoph is right and there is no need for this deep flush I think this
logic is backwards and awkward.

Why not call the flag NVDIMM_DEEPFLUSH?  I think that would make this patch a
lot smaller and the code much more straight forward.

The commit log implies that this flag is to be used with NVDIMM_IO_ATOMIC but I
don't see any relation between the 2 flags.  With that in mind see below.

> 
> Signed-off-by: Dennis.Wu <dennis.wu@intel.com>
> ---
>  drivers/nvdimm/btt.c   | 26 +++++++++++++++++---------
>  drivers/nvdimm/claim.c |  9 +++++++--
>  drivers/nvdimm/nd.h    |  4 ++++
>  3 files changed, 28 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
> index 9613e54c7a67..c71ba7a1edd0 100644
> --- a/drivers/nvdimm/btt.c
> +++ b/drivers/nvdimm/btt.c
> @@ -70,6 +70,10 @@ static int btt_info_write(struct arena_info *arena, struct btt_sb *super)
>  	dev_WARN_ONCE(to_dev(arena), !IS_ALIGNED(arena->info2off, 512),
>  		"arena->info2off: %#llx is unaligned\n", arena->info2off);
>  
> +	/*
> +	 * btt_sb is critial information and need proper write
> +	 * nvdimm_flush will be called (deepflush)
> +	 */
>  	ret = arena_write_bytes(arena, arena->info2off, super,
>  			sizeof(struct btt_sb), 0);

Why not specify NVDIMM_DEEPFLUSH here?  ... skip the next 9 hunks ...

>  	if (ret)
> @@ -384,7 +388,8 @@ static int btt_flog_write(struct arena_info *arena, u32 lane, u32 sub,
>  {
>  	int ret;
>  
> -	ret = __btt_log_write(arena, lane, sub, ent, NVDIMM_IO_ATOMIC);
> +	ret = __btt_log_write(arena, lane, sub, ent,
> +		NVDIMM_IO_ATOMIC|NVDIMM_NO_DEEPFLUSH);
>  	if (ret)
>  		return ret;
>  
> @@ -429,7 +434,7 @@ static int btt_map_init(struct arena_info *arena)
>  		dev_WARN_ONCE(to_dev(arena), size < 512,
>  			"chunk size: %#zx is unaligned\n", size);
>  		ret = arena_write_bytes(arena, arena->mapoff + offset, zerobuf,
> -				size, 0);
> +				size, NVDIMM_NO_DEEPFLUSH);
>  		if (ret)
>  			goto free;
>  
> @@ -473,7 +478,7 @@ static int btt_log_init(struct arena_info *arena)
>  		dev_WARN_ONCE(to_dev(arena), size < 512,
>  			"chunk size: %#zx is unaligned\n", size);
>  		ret = arena_write_bytes(arena, arena->logoff + offset, zerobuf,
> -				size, 0);
> +				size, NVDIMM_NO_DEEPFLUSH);
>  		if (ret)
>  			goto free;
>  
> @@ -487,7 +492,7 @@ static int btt_log_init(struct arena_info *arena)
>  		ent.old_map = cpu_to_le32(arena->external_nlba + i);
>  		ent.new_map = cpu_to_le32(arena->external_nlba + i);
>  		ent.seq = cpu_to_le32(LOG_SEQ_INIT);
> -		ret = __btt_log_write(arena, i, 0, &ent, 0);
> +		ret = __btt_log_write(arena, i, 0, &ent, NVDIMM_NO_DEEPFLUSH);
>  		if (ret)
>  			goto free;
>  	}
> @@ -518,7 +523,7 @@ static int arena_clear_freelist_error(struct arena_info *arena, u32 lane)
>  			unsigned long chunk = min(len, PAGE_SIZE);
>  
>  			ret = arena_write_bytes(arena, nsoff, zero_page,
> -				chunk, 0);
> +				chunk, NVDIMM_NO_DEEPFLUSH);
>  			if (ret)
>  				break;
>  			len -= chunk;
> @@ -592,7 +597,8 @@ static int btt_freelist_init(struct arena_info *arena)
>  			 * to complete the map write. So fix up the map.
>  			 */
>  			ret = btt_map_write(arena, le32_to_cpu(log_new.lba),
> -					le32_to_cpu(log_new.new_map), 0, 0, 0);
> +					le32_to_cpu(log_new.new_map), 0, 0,
> +					NVDIMM_NO_DEEPFLUSH);
>  			if (ret)
>  				return ret;
>  		}
> @@ -1123,7 +1129,8 @@ static int btt_data_write(struct arena_info *arena, u32 lba,
>  	u64 nsoff = to_namespace_offset(arena, lba);
>  	void *mem = kmap_atomic(page);
>  
> -	ret = arena_write_bytes(arena, nsoff, mem + off, len, NVDIMM_IO_ATOMIC);
> +	ret = arena_write_bytes(arena, nsoff, mem + off, len,
> +		NVDIMM_IO_ATOMIC|NVDIMM_NO_DEEPFLUSH);
>  	kunmap_atomic(mem);
>  
>  	return ret;
> @@ -1260,7 +1267,8 @@ static int btt_read_pg(struct btt *btt, struct bio_integrity_payload *bip,
>  		ret = btt_data_read(arena, page, off, postmap, cur_len);
>  		if (ret) {
>  			/* Media error - set the e_flag */
> -			if (btt_map_write(arena, premap, postmap, 0, 1, NVDIMM_IO_ATOMIC))
> +			if (btt_map_write(arena, premap, postmap, 0, 1,
> +				NVDIMM_IO_ATOMIC|NVDIMM_NO_DEEPFLUSH))
>  				dev_warn_ratelimited(to_dev(arena),
>  					"Error persistently tracking bad blocks at %#x\n",
>  					premap);
> @@ -1393,7 +1401,7 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip,
>  			goto out_map;
>  
>  		ret = btt_map_write(arena, premap, new_postmap, 0, 0,
> -			NVDIMM_IO_ATOMIC);
> +			NVDIMM_IO_ATOMIC|NVDIMM_NO_DEEPFLUSH);
>  		if (ret)
>  			goto out_map;
>  
> diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
> index 030dbde6b088..c1fa3291c063 100644
> --- a/drivers/nvdimm/claim.c
> +++ b/drivers/nvdimm/claim.c
> @@ -294,9 +294,14 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
>  	}
>  
>  	memcpy_flushcache(nsio->addr + offset, buf, size);
> -	ret = nvdimm_flush(to_nd_region(ndns->dev.parent), NULL);
> -	if (ret)
> +	if (!(flags & NVDIMM_NO_DEEPFLUSH)) {

And reverse this logic?

> +		ret = nvdimm_flush(to_nd_region(ndns->dev.parent), NULL);
> +		if (ret)
> +			rc = ret;
> +	} else {
>  		rc = ret;
> +		pmem_wmb();
> +	}
>  
>  	return rc;
>  }
> diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
> index ec5219680092..a16e259a8cff 100644
> --- a/drivers/nvdimm/nd.h
> +++ b/drivers/nvdimm/nd.h
> @@ -22,7 +22,11 @@ enum {
>  	 */
>  	ND_MAX_LANES = 256,
>  	INT_LBASIZE_ALIGNMENT = 64,
> +	/*
> +	 * NVDIMM_IO_ATOMIC | NVDIMM_NO_DEEPFLUSH is support.
> +	 */

I don't understand this comment?

Ira

>  	NVDIMM_IO_ATOMIC = 1,
> +	NVDIMM_NO_DEEPFLUSH = 2,
>  };
>  
>  struct nvdimm_drvdata {
> -- 
> 2.27.0
>
Dan Williams July 13, 2022, 1:10 a.m. UTC | #5
Dennis.Wu wrote:
> Reason: we can have a global control of deepflush in the nfit module
> by "no_deepflush" param. In the case of "no_deepflush=0", we still
> need control data deepflush or not by the NVDIMM_NO_DEEPFLUSH flag.
> In the BTT, the btt information block(btt_sb) will use deepflush.
> Other like the data blocks(512B or 4KB),4 bytes btt_map and 16 bytes
> bflog will not use the deepflush. so that, during the runtime, no
> deepflush will be called in the BTT.

Why do you need this in the BTT driver when deepflush is disabled
globally for all regions?

ADR only applies at global visibility of stores, so the pmem_wmb() is
still necessary.
dennis.wu July 19, 2022, 6:23 a.m. UTC | #6
Hi Dan,

Thank you!

This patch is not necessary, but we have a concern that might some 
customer would like to keep "no_deepflush=0" option. From BTT 
perspective, we still would like to control independently.

BR,

Dennis Wu

On 7/13/22 09:10, Dan Williams wrote:
> Dennis.Wu wrote:
>> Reason: we can have a global control of deepflush in the nfit module
>> by "no_deepflush" param. In the case of "no_deepflush=0", we still
>> need control data deepflush or not by the NVDIMM_NO_DEEPFLUSH flag.
>> In the BTT, the btt information block(btt_sb) will use deepflush.
>> Other like the data blocks(512B or 4KB),4 bytes btt_map and 16 bytes
>> bflog will not use the deepflush. so that, during the runtime, no
>> deepflush will be called in the BTT.
> Why do you need this in the BTT driver when deepflush is disabled
> globally for all regions?
>
> ADR only applies at global visibility of stores, so the pmem_wmb() is
> still necessary.
diff mbox series

Patch

diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 9613e54c7a67..c71ba7a1edd0 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -70,6 +70,10 @@  static int btt_info_write(struct arena_info *arena, struct btt_sb *super)
 	dev_WARN_ONCE(to_dev(arena), !IS_ALIGNED(arena->info2off, 512),
 		"arena->info2off: %#llx is unaligned\n", arena->info2off);
 
+	/*
+	 * btt_sb is critial information and need proper write
+	 * nvdimm_flush will be called (deepflush)
+	 */
 	ret = arena_write_bytes(arena, arena->info2off, super,
 			sizeof(struct btt_sb), 0);
 	if (ret)
@@ -384,7 +388,8 @@  static int btt_flog_write(struct arena_info *arena, u32 lane, u32 sub,
 {
 	int ret;
 
-	ret = __btt_log_write(arena, lane, sub, ent, NVDIMM_IO_ATOMIC);
+	ret = __btt_log_write(arena, lane, sub, ent,
+		NVDIMM_IO_ATOMIC|NVDIMM_NO_DEEPFLUSH);
 	if (ret)
 		return ret;
 
@@ -429,7 +434,7 @@  static int btt_map_init(struct arena_info *arena)
 		dev_WARN_ONCE(to_dev(arena), size < 512,
 			"chunk size: %#zx is unaligned\n", size);
 		ret = arena_write_bytes(arena, arena->mapoff + offset, zerobuf,
-				size, 0);
+				size, NVDIMM_NO_DEEPFLUSH);
 		if (ret)
 			goto free;
 
@@ -473,7 +478,7 @@  static int btt_log_init(struct arena_info *arena)
 		dev_WARN_ONCE(to_dev(arena), size < 512,
 			"chunk size: %#zx is unaligned\n", size);
 		ret = arena_write_bytes(arena, arena->logoff + offset, zerobuf,
-				size, 0);
+				size, NVDIMM_NO_DEEPFLUSH);
 		if (ret)
 			goto free;
 
@@ -487,7 +492,7 @@  static int btt_log_init(struct arena_info *arena)
 		ent.old_map = cpu_to_le32(arena->external_nlba + i);
 		ent.new_map = cpu_to_le32(arena->external_nlba + i);
 		ent.seq = cpu_to_le32(LOG_SEQ_INIT);
-		ret = __btt_log_write(arena, i, 0, &ent, 0);
+		ret = __btt_log_write(arena, i, 0, &ent, NVDIMM_NO_DEEPFLUSH);
 		if (ret)
 			goto free;
 	}
@@ -518,7 +523,7 @@  static int arena_clear_freelist_error(struct arena_info *arena, u32 lane)
 			unsigned long chunk = min(len, PAGE_SIZE);
 
 			ret = arena_write_bytes(arena, nsoff, zero_page,
-				chunk, 0);
+				chunk, NVDIMM_NO_DEEPFLUSH);
 			if (ret)
 				break;
 			len -= chunk;
@@ -592,7 +597,8 @@  static int btt_freelist_init(struct arena_info *arena)
 			 * to complete the map write. So fix up the map.
 			 */
 			ret = btt_map_write(arena, le32_to_cpu(log_new.lba),
-					le32_to_cpu(log_new.new_map), 0, 0, 0);
+					le32_to_cpu(log_new.new_map), 0, 0,
+					NVDIMM_NO_DEEPFLUSH);
 			if (ret)
 				return ret;
 		}
@@ -1123,7 +1129,8 @@  static int btt_data_write(struct arena_info *arena, u32 lba,
 	u64 nsoff = to_namespace_offset(arena, lba);
 	void *mem = kmap_atomic(page);
 
-	ret = arena_write_bytes(arena, nsoff, mem + off, len, NVDIMM_IO_ATOMIC);
+	ret = arena_write_bytes(arena, nsoff, mem + off, len,
+		NVDIMM_IO_ATOMIC|NVDIMM_NO_DEEPFLUSH);
 	kunmap_atomic(mem);
 
 	return ret;
@@ -1260,7 +1267,8 @@  static int btt_read_pg(struct btt *btt, struct bio_integrity_payload *bip,
 		ret = btt_data_read(arena, page, off, postmap, cur_len);
 		if (ret) {
 			/* Media error - set the e_flag */
-			if (btt_map_write(arena, premap, postmap, 0, 1, NVDIMM_IO_ATOMIC))
+			if (btt_map_write(arena, premap, postmap, 0, 1,
+				NVDIMM_IO_ATOMIC|NVDIMM_NO_DEEPFLUSH))
 				dev_warn_ratelimited(to_dev(arena),
 					"Error persistently tracking bad blocks at %#x\n",
 					premap);
@@ -1393,7 +1401,7 @@  static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip,
 			goto out_map;
 
 		ret = btt_map_write(arena, premap, new_postmap, 0, 0,
-			NVDIMM_IO_ATOMIC);
+			NVDIMM_IO_ATOMIC|NVDIMM_NO_DEEPFLUSH);
 		if (ret)
 			goto out_map;
 
diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
index 030dbde6b088..c1fa3291c063 100644
--- a/drivers/nvdimm/claim.c
+++ b/drivers/nvdimm/claim.c
@@ -294,9 +294,14 @@  static int nsio_rw_bytes(struct nd_namespace_common *ndns,
 	}
 
 	memcpy_flushcache(nsio->addr + offset, buf, size);
-	ret = nvdimm_flush(to_nd_region(ndns->dev.parent), NULL);
-	if (ret)
+	if (!(flags & NVDIMM_NO_DEEPFLUSH)) {
+		ret = nvdimm_flush(to_nd_region(ndns->dev.parent), NULL);
+		if (ret)
+			rc = ret;
+	} else {
 		rc = ret;
+		pmem_wmb();
+	}
 
 	return rc;
 }
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index ec5219680092..a16e259a8cff 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -22,7 +22,11 @@  enum {
 	 */
 	ND_MAX_LANES = 256,
 	INT_LBASIZE_ALIGNMENT = 64,
+	/*
+	 * NVDIMM_IO_ATOMIC | NVDIMM_NO_DEEPFLUSH is support.
+	 */
 	NVDIMM_IO_ATOMIC = 1,
+	NVDIMM_NO_DEEPFLUSH = 2,
 };
 
 struct nvdimm_drvdata {