diff mbox series

[for-next] RDMA/cma: Replace RMW with atomic bit-ops

Message ID 1623854153-19147-1-git-send-email-haakon.bugge@oracle.com (mailing list archive)
State Superseded
Headers show
Series [for-next] RDMA/cma: Replace RMW with atomic bit-ops | expand

Commit Message

Haakon Bugge June 16, 2021, 2:35 p.m. UTC
The struct rdma_id_private contains three bit-fields, tos_set,
timeout_set, and min_rnr_timer_set. These are set by accessor
functions without any synchronization. If two or all accessor
functions are invoked in close proximity in time, there will be
Read-Modify-Write from several contexts to the same variable, and the
result will be intermittent.

Replace with a flag variable and inline functions for set and get.

Signed-off-by: Håkon Bugge <haakon.bugge@oracle.com>
Signed-off-by: Hans Westgaard Ry<hans.westgaard.ry@oracle.com>
---
 drivers/infiniband/core/cma.c      | 24 +++++++++++-------------
 drivers/infiniband/core/cma_priv.h | 28 +++++++++++++++++++++++++---
 2 files changed, 36 insertions(+), 16 deletions(-)

Comments

Stefan Metzmacher June 16, 2021, 3:02 p.m. UTC | #1
Hi Håkon,

> +#define BIT_ACCESS_FUNCTIONS(b)							\
> +	static inline void set_##b(unsigned long flags)				\
> +	{									\
> +		/* set_bit() does not imply a memory barrier */			\
> +		smp_mb__before_atomic();					\
> +		set_bit(b, &flags);						\
> +		/* set_bit() does not imply a memory barrier */			\
> +		smp_mb__after_atomic();						\
> +	}									\

Don't you need to pass flags by reference to the inline function?
As far as I can see set_bit() operates on a temporary variable.

In order to check if inline functions are special I wrote this little check:

$ cat inline_arg.c
#include <stdio.h>

static inline void func(unsigned s, unsigned *p)
{
        printf("&s=%p p=%p\n", &s, p);
}

int main(void)
{
        unsigned flags = 0;

        func(flags, &flags);
        return 0;
}

$ gcc -O0 -o inline_arg inline_arg.c
$ ./inline_arg
&s=0x7ffd3a71616c p=0x7ffd3a716184

$ gcc -O6 -o inline_arg inline_arg.c
$ ./inline_arg
&s=0x7ffd87781064 p=0x7ffd87781060

It means s doesn't magically become 'flags' of the caller...

I'm I missing something?

metze
Haakon Bugge June 16, 2021, 4:03 p.m. UTC | #2
> On 16 Jun 2021, at 17:02, Stefan Metzmacher <metze@samba.org> wrote:
> 
> Hi Håkon,
> 
>> +#define BIT_ACCESS_FUNCTIONS(b)							\
>> +	static inline void set_##b(unsigned long flags)				\
>> +	{									\
>> +		/* set_bit() does not imply a memory barrier */			\
>> +		smp_mb__before_atomic();					\
>> +		set_bit(b, &flags);						\
>> +		/* set_bit() does not imply a memory barrier */			\
>> +		smp_mb__after_atomic();						\
>> +	}									\
> 
> Don't you need to pass flags by reference to the inline function?
> As far as I can see set_bit() operates on a temporary variable.

Good catch Stefan! It started off as a define, then it worked, but as you say, this doesn't work when we pass flags by value to the inline function. Bummer!

I'll send a v2 tomorrow and see if I can include other review comments as well.

Again, thanks for the review !


Håkon

> 
> In order to check if inline functions are special I wrote this little check:
> 
> $ cat inline_arg.c
> #include <stdio.h>
> 
> static inline void func(unsigned s, unsigned *p)
> {
>        printf("&s=%p p=%p\n", &s, p);
> }
> 
> int main(void)
> {
>        unsigned flags = 0;
> 
>        func(flags, &flags);
>        return 0;
> }
> 
> $ gcc -O0 -o inline_arg inline_arg.c
> $ ./inline_arg
> &s=0x7ffd3a71616c p=0x7ffd3a716184
> 
> $ gcc -O6 -o inline_arg inline_arg.c
> $ ./inline_arg
> &s=0x7ffd87781064 p=0x7ffd87781060
> 
> It means s doesn't magically become 'flags' of the caller...
> 
> I'm I missing something?
> 
> metze
Leon Romanovsky June 17, 2021, 6:51 a.m. UTC | #3
On Wed, Jun 16, 2021 at 04:35:53PM +0200, Håkon Bugge wrote:
> The struct rdma_id_private contains three bit-fields, tos_set,
> timeout_set, and min_rnr_timer_set. These are set by accessor
> functions without any synchronization. If two or all accessor
> functions are invoked in close proximity in time, there will be
> Read-Modify-Write from several contexts to the same variable, and the
> result will be intermittent.
> 
> Replace with a flag variable and inline functions for set and get.
> 
> Signed-off-by: Håkon Bugge <haakon.bugge@oracle.com>
> Signed-off-by: Hans Westgaard Ry<hans.westgaard.ry@oracle.com>
> ---
>  drivers/infiniband/core/cma.c      | 24 +++++++++++-------------
>  drivers/infiniband/core/cma_priv.h | 28 +++++++++++++++++++++++++---
>  2 files changed, 36 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
> index 2b9ffc2..fe609e7 100644
> --- a/drivers/infiniband/core/cma.c
> +++ b/drivers/infiniband/core/cma.c
> @@ -844,9 +844,7 @@ static void cma_id_put(struct rdma_id_private *id_priv)
>  	id_priv->id.event_handler = event_handler;
>  	id_priv->id.ps = ps;
>  	id_priv->id.qp_type = qp_type;
> -	id_priv->tos_set = false;
> -	id_priv->timeout_set = false;
> -	id_priv->min_rnr_timer_set = false;
> +	id_priv->flags = 0;

It is not needed, id_priv is allocated with kzalloc.

>  	id_priv->gid_type = IB_GID_TYPE_IB;
>  	spin_lock_init(&id_priv->lock);
>  	mutex_init(&id_priv->qp_mutex);
> @@ -1134,10 +1132,10 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
>  		ret = -ENOSYS;
>  	}
>  
> -	if ((*qp_attr_mask & IB_QP_TIMEOUT) && id_priv->timeout_set)
> +	if ((*qp_attr_mask & IB_QP_TIMEOUT) && get_TIMEOUT_SET(id_priv->flags))
>  		qp_attr->timeout = id_priv->timeout;
>  
> -	if ((*qp_attr_mask & IB_QP_MIN_RNR_TIMER) && id_priv->min_rnr_timer_set)
> +	if ((*qp_attr_mask & IB_QP_MIN_RNR_TIMER) && get_MIN_RNR_TIMER_SET(id_priv->flags))
>  		qp_attr->min_rnr_timer = id_priv->min_rnr_timer;
>  
>  	return ret;
> @@ -2472,7 +2470,7 @@ static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
>  		return PTR_ERR(id);
>  
>  	id->tos = id_priv->tos;
> -	id->tos_set = id_priv->tos_set;
> +	id->tos_set = get_TOS_SET(id_priv->flags);
>  	id->afonly = id_priv->afonly;
>  	id_priv->cm_id.iw = id;
>  
> @@ -2533,7 +2531,7 @@ static int cma_listen_on_dev(struct rdma_id_private *id_priv,
>  	cma_id_get(id_priv);
>  	dev_id_priv->internal_id = 1;
>  	dev_id_priv->afonly = id_priv->afonly;
> -	dev_id_priv->tos_set = id_priv->tos_set;
> +	dev_id_priv->flags = id_priv->flags;
>  	dev_id_priv->tos = id_priv->tos;
>  
>  	ret = rdma_listen(&dev_id_priv->id, id_priv->backlog);
> @@ -2582,7 +2580,7 @@ void rdma_set_service_type(struct rdma_cm_id *id, int tos)
>  
>  	id_priv = container_of(id, struct rdma_id_private, id);
>  	id_priv->tos = (u8) tos;
> -	id_priv->tos_set = true;
> +	set_TOS_SET(id_priv->flags);
>  }
>  EXPORT_SYMBOL(rdma_set_service_type);
>  
> @@ -2610,7 +2608,7 @@ int rdma_set_ack_timeout(struct rdma_cm_id *id, u8 timeout)
>  
>  	id_priv = container_of(id, struct rdma_id_private, id);
>  	id_priv->timeout = timeout;
> -	id_priv->timeout_set = true;
> +	set_TIMEOUT_SET(id_priv->flags);
>  
>  	return 0;
>  }
> @@ -2647,7 +2645,7 @@ int rdma_set_min_rnr_timer(struct rdma_cm_id *id, u8 min_rnr_timer)
>  
>  	id_priv = container_of(id, struct rdma_id_private, id);
>  	id_priv->min_rnr_timer = min_rnr_timer;
> -	id_priv->min_rnr_timer_set = true;
> +	set_MIN_RNR_TIMER_SET(id_priv->flags);
>  
>  	return 0;
>  }
> @@ -3033,7 +3031,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
>  
>  	u8 default_roce_tos = id_priv->cma_dev->default_roce_tos[id_priv->id.port_num -
>  					rdma_start_port(id_priv->cma_dev->device)];
> -	u8 tos = id_priv->tos_set ? id_priv->tos : default_roce_tos;
> +	u8 tos = get_TOS_SET(id_priv->flags) ? id_priv->tos : default_roce_tos;
>  
>  
>  	work = kzalloc(sizeof *work, GFP_KERNEL);
> @@ -3081,7 +3079,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
>  	 * PacketLifeTime = local ACK timeout/2
>  	 * as a reasonable approximation for RoCE networks.
>  	 */
> -	route->path_rec->packet_life_time = id_priv->timeout_set ?
> +	route->path_rec->packet_life_time = get_TIMEOUT_SET(id_priv->flags) ?
>  		id_priv->timeout - 1 : CMA_IBOE_PACKET_LIFETIME;
>  
>  	if (!route->path_rec->mtu) {
> @@ -4107,7 +4105,7 @@ static int cma_connect_iw(struct rdma_id_private *id_priv,
>  		return PTR_ERR(cm_id);
>  
>  	cm_id->tos = id_priv->tos;
> -	cm_id->tos_set = id_priv->tos_set;
> +	cm_id->tos_set = get_TOS_SET(id_priv->flags);
>  	id_priv->cm_id.iw = cm_id;
>  
>  	memcpy(&cm_id->local_addr, cma_src_addr(id_priv),
> diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h
> index 5c463da..2c1694f 100644
> --- a/drivers/infiniband/core/cma_priv.h
> +++ b/drivers/infiniband/core/cma_priv.h
> @@ -82,11 +82,9 @@ struct rdma_id_private {
>  	u32			qkey;
>  	u32			qp_num;
>  	u32			options;
> +	unsigned long		flags;
>  	u8			srq;
>  	u8			tos;
> -	u8			tos_set:1;
> -	u8                      timeout_set:1;
> -	u8			min_rnr_timer_set:1;
>  	u8			reuseaddr;
>  	u8			afonly;
>  	u8			timeout;
> @@ -127,4 +125,28 @@ int cma_set_default_roce_tos(struct cma_device *dev, u32 port,
>  			     u8 default_roce_tos);
>  struct ib_device *cma_get_ib_dev(struct cma_device *dev);
>  
> +#define BIT_ACCESS_FUNCTIONS(b)							\
> +	static inline void set_##b(unsigned long flags)				\
> +	{									\
> +		/* set_bit() does not imply a memory barrier */			\
> +		smp_mb__before_atomic();					\
> +		set_bit(b, &flags);						\
> +		/* set_bit() does not imply a memory barrier */			\
> +		smp_mb__after_atomic();						\
> +	}									\
> +	static inline bool get_##b(unsigned long flags)				\
> +	{									\
> +		return test_bit(b, &flags);					\
> +	}
> +
> +enum cm_id_flag_bits {
> +	TOS_SET,
> +	TIMEOUT_SET,
> +	MIN_RNR_TIMER_SET,
> +};
> +
> +BIT_ACCESS_FUNCTIONS(TIMEOUT_SET);
> +BIT_ACCESS_FUNCTIONS(TOS_SET);
> +BIT_ACCESS_FUNCTIONS(MIN_RNR_TIMER_SET);

I would prefer one function that has same syntax as set_bit() instead of
introducing two new that built with macros.

Something like this:
static inline set_bit_mb(long nr, unsigned long *flags)
{
   smp_mb__before_atomic();
   set_bit(nr, flags); 
   smp_mb__after_atomic();
}

> +
>  #endif /* _CMA_PRIV_H */
> -- 
> 1.8.3.1
>
Haakon Bugge June 17, 2021, 6:56 a.m. UTC | #4
> On 17 Jun 2021, at 08:51, Leon Romanovsky <leon@kernel.org> wrote:
> 
> On Wed, Jun 16, 2021 at 04:35:53PM +0200, Håkon Bugge wrote:
>> The struct rdma_id_private contains three bit-fields, tos_set,
>> timeout_set, and min_rnr_timer_set. These are set by accessor
>> functions without any synchronization. If two or all accessor
>> functions are invoked in close proximity in time, there will be
>> Read-Modify-Write from several contexts to the same variable, and the
>> result will be intermittent.
>> 
>> Replace with a flag variable and inline functions for set and get.
>> 
>> Signed-off-by: Håkon Bugge <haakon.bugge@oracle.com>
>> Signed-off-by: Hans Westgaard Ry<hans.westgaard.ry@oracle.com>
>> ---
>> drivers/infiniband/core/cma.c      | 24 +++++++++++-------------
>> drivers/infiniband/core/cma_priv.h | 28 +++++++++++++++++++++++++---
>> 2 files changed, 36 insertions(+), 16 deletions(-)
>> 
>> diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
>> index 2b9ffc2..fe609e7 100644
>> --- a/drivers/infiniband/core/cma.c
>> +++ b/drivers/infiniband/core/cma.c
>> @@ -844,9 +844,7 @@ static void cma_id_put(struct rdma_id_private *id_priv)
>> 	id_priv->id.event_handler = event_handler;
>> 	id_priv->id.ps = ps;
>> 	id_priv->id.qp_type = qp_type;
>> -	id_priv->tos_set = false;
>> -	id_priv->timeout_set = false;
>> -	id_priv->min_rnr_timer_set = false;
>> +	id_priv->flags = 0;
> 
> It is not needed, id_priv is allocated with kzalloc.

I agree. Did put it in due the foo = false.
> 
>> 	id_priv->gid_type = IB_GID_TYPE_IB;
>> 	spin_lock_init(&id_priv->lock);
>> 	mutex_init(&id_priv->qp_mutex);
>> @@ -1134,10 +1132,10 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
>> 		ret = -ENOSYS;
>> 	}
>> 
>> -	if ((*qp_attr_mask & IB_QP_TIMEOUT) && id_priv->timeout_set)
>> +	if ((*qp_attr_mask & IB_QP_TIMEOUT) && get_TIMEOUT_SET(id_priv->flags))
>> 		qp_attr->timeout = id_priv->timeout;
>> 
>> -	if ((*qp_attr_mask & IB_QP_MIN_RNR_TIMER) && id_priv->min_rnr_timer_set)
>> +	if ((*qp_attr_mask & IB_QP_MIN_RNR_TIMER) && get_MIN_RNR_TIMER_SET(id_priv->flags))
>> 		qp_attr->min_rnr_timer = id_priv->min_rnr_timer;
>> 
>> 	return ret;
>> @@ -2472,7 +2470,7 @@ static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
>> 		return PTR_ERR(id);
>> 
>> 	id->tos = id_priv->tos;
>> -	id->tos_set = id_priv->tos_set;
>> +	id->tos_set = get_TOS_SET(id_priv->flags);
>> 	id->afonly = id_priv->afonly;
>> 	id_priv->cm_id.iw = id;
>> 
>> @@ -2533,7 +2531,7 @@ static int cma_listen_on_dev(struct rdma_id_private *id_priv,
>> 	cma_id_get(id_priv);
>> 	dev_id_priv->internal_id = 1;
>> 	dev_id_priv->afonly = id_priv->afonly;
>> -	dev_id_priv->tos_set = id_priv->tos_set;
>> +	dev_id_priv->flags = id_priv->flags;
>> 	dev_id_priv->tos = id_priv->tos;
>> 
>> 	ret = rdma_listen(&dev_id_priv->id, id_priv->backlog);
>> @@ -2582,7 +2580,7 @@ void rdma_set_service_type(struct rdma_cm_id *id, int tos)
>> 
>> 	id_priv = container_of(id, struct rdma_id_private, id);
>> 	id_priv->tos = (u8) tos;
>> -	id_priv->tos_set = true;
>> +	set_TOS_SET(id_priv->flags);
>> }
>> EXPORT_SYMBOL(rdma_set_service_type);
>> 
>> @@ -2610,7 +2608,7 @@ int rdma_set_ack_timeout(struct rdma_cm_id *id, u8 timeout)
>> 
>> 	id_priv = container_of(id, struct rdma_id_private, id);
>> 	id_priv->timeout = timeout;
>> -	id_priv->timeout_set = true;
>> +	set_TIMEOUT_SET(id_priv->flags);
>> 
>> 	return 0;
>> }
>> @@ -2647,7 +2645,7 @@ int rdma_set_min_rnr_timer(struct rdma_cm_id *id, u8 min_rnr_timer)
>> 
>> 	id_priv = container_of(id, struct rdma_id_private, id);
>> 	id_priv->min_rnr_timer = min_rnr_timer;
>> -	id_priv->min_rnr_timer_set = true;
>> +	set_MIN_RNR_TIMER_SET(id_priv->flags);
>> 
>> 	return 0;
>> }
>> @@ -3033,7 +3031,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
>> 
>> 	u8 default_roce_tos = id_priv->cma_dev->default_roce_tos[id_priv->id.port_num -
>> 					rdma_start_port(id_priv->cma_dev->device)];
>> -	u8 tos = id_priv->tos_set ? id_priv->tos : default_roce_tos;
>> +	u8 tos = get_TOS_SET(id_priv->flags) ? id_priv->tos : default_roce_tos;
>> 
>> 
>> 	work = kzalloc(sizeof *work, GFP_KERNEL);
>> @@ -3081,7 +3079,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
>> 	 * PacketLifeTime = local ACK timeout/2
>> 	 * as a reasonable approximation for RoCE networks.
>> 	 */
>> -	route->path_rec->packet_life_time = id_priv->timeout_set ?
>> +	route->path_rec->packet_life_time = get_TIMEOUT_SET(id_priv->flags) ?
>> 		id_priv->timeout - 1 : CMA_IBOE_PACKET_LIFETIME;
>> 
>> 	if (!route->path_rec->mtu) {
>> @@ -4107,7 +4105,7 @@ static int cma_connect_iw(struct rdma_id_private *id_priv,
>> 		return PTR_ERR(cm_id);
>> 
>> 	cm_id->tos = id_priv->tos;
>> -	cm_id->tos_set = id_priv->tos_set;
>> +	cm_id->tos_set = get_TOS_SET(id_priv->flags);
>> 	id_priv->cm_id.iw = cm_id;
>> 
>> 	memcpy(&cm_id->local_addr, cma_src_addr(id_priv),
>> diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h
>> index 5c463da..2c1694f 100644
>> --- a/drivers/infiniband/core/cma_priv.h
>> +++ b/drivers/infiniband/core/cma_priv.h
>> @@ -82,11 +82,9 @@ struct rdma_id_private {
>> 	u32			qkey;
>> 	u32			qp_num;
>> 	u32			options;
>> +	unsigned long		flags;
>> 	u8			srq;
>> 	u8			tos;
>> -	u8			tos_set:1;
>> -	u8                      timeout_set:1;
>> -	u8			min_rnr_timer_set:1;
>> 	u8			reuseaddr;
>> 	u8			afonly;
>> 	u8			timeout;
>> @@ -127,4 +125,28 @@ int cma_set_default_roce_tos(struct cma_device *dev, u32 port,
>> 			     u8 default_roce_tos);
>> struct ib_device *cma_get_ib_dev(struct cma_device *dev);
>> 
>> +#define BIT_ACCESS_FUNCTIONS(b)							\
>> +	static inline void set_##b(unsigned long flags)				\
>> +	{									\
>> +		/* set_bit() does not imply a memory barrier */			\
>> +		smp_mb__before_atomic();					\
>> +		set_bit(b, &flags);						\
>> +		/* set_bit() does not imply a memory barrier */			\
>> +		smp_mb__after_atomic();						\
>> +	}									\
>> +	static inline bool get_##b(unsigned long flags)				\
>> +	{									\
>> +		return test_bit(b, &flags);					\
>> +	}
>> +
>> +enum cm_id_flag_bits {

This should be called cm_id_priv_flags_bits.

>> +	TOS_SET,
>> +	TIMEOUT_SET,
>> +	MIN_RNR_TIMER_SET,
>> +};
>> +
>> +BIT_ACCESS_FUNCTIONS(TIMEOUT_SET);
>> +BIT_ACCESS_FUNCTIONS(TOS_SET);
>> +BIT_ACCESS_FUNCTIONS(MIN_RNR_TIMER_SET);
> 
> I would prefer one function that has same syntax as set_bit() instead of
> introducing two new that built with macros.
> 
> Something like this:
> static inline set_bit_mb(long nr, unsigned long *flags)
> {
>   smp_mb__before_atomic();
>   set_bit(nr, flags); 
>   smp_mb__after_atomic();
> }

OK. I should also probably move this to cma.c, since it is not used outside cma.c?

Also, do you want it checkpatch clean? Then I need the /* set_bit() does not imply a memory barrier */ comments.

Thanks for you review, Leon.


Håkon

> 
>> +
>> #endif /* _CMA_PRIV_H */
>> -- 
>> 1.8.3.1
Leon Romanovsky June 17, 2021, 7:38 a.m. UTC | #5
On Thu, Jun 17, 2021 at 06:56:15AM +0000, Haakon Bugge wrote:
> 
> 
> > On 17 Jun 2021, at 08:51, Leon Romanovsky <leon@kernel.org> wrote:
> > 
> > On Wed, Jun 16, 2021 at 04:35:53PM +0200, Håkon Bugge wrote:
> >> The struct rdma_id_private contains three bit-fields, tos_set,
> >> timeout_set, and min_rnr_timer_set. These are set by accessor
> >> functions without any synchronization. If two or all accessor
> >> functions are invoked in close proximity in time, there will be
> >> Read-Modify-Write from several contexts to the same variable, and the
> >> result will be intermittent.
> >> 
> >> Replace with a flag variable and inline functions for set and get.
> >> 
> >> Signed-off-by: Håkon Bugge <haakon.bugge@oracle.com>
> >> Signed-off-by: Hans Westgaard Ry<hans.westgaard.ry@oracle.com>
> >> ---
> >> drivers/infiniband/core/cma.c      | 24 +++++++++++-------------
> >> drivers/infiniband/core/cma_priv.h | 28 +++++++++++++++++++++++++---
> >> 2 files changed, 36 insertions(+), 16 deletions(-)
> >> 
> >> diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
> >> index 2b9ffc2..fe609e7 100644
> >> --- a/drivers/infiniband/core/cma.c
> >> +++ b/drivers/infiniband/core/cma.c
> >> @@ -844,9 +844,7 @@ static void cma_id_put(struct rdma_id_private *id_priv)
> >> 	id_priv->id.event_handler = event_handler;
> >> 	id_priv->id.ps = ps;
> >> 	id_priv->id.qp_type = qp_type;
> >> -	id_priv->tos_set = false;
> >> -	id_priv->timeout_set = false;
> >> -	id_priv->min_rnr_timer_set = false;
> >> +	id_priv->flags = 0;
> > 
> > It is not needed, id_priv is allocated with kzalloc.
> 
> I agree. Did put it in due the foo = false.
> > 
> >> 	id_priv->gid_type = IB_GID_TYPE_IB;
> >> 	spin_lock_init(&id_priv->lock);
> >> 	mutex_init(&id_priv->qp_mutex);
> >> @@ -1134,10 +1132,10 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
> >> 		ret = -ENOSYS;
> >> 	}
> >> 
> >> -	if ((*qp_attr_mask & IB_QP_TIMEOUT) && id_priv->timeout_set)
> >> +	if ((*qp_attr_mask & IB_QP_TIMEOUT) && get_TIMEOUT_SET(id_priv->flags))
> >> 		qp_attr->timeout = id_priv->timeout;
> >> 
> >> -	if ((*qp_attr_mask & IB_QP_MIN_RNR_TIMER) && id_priv->min_rnr_timer_set)
> >> +	if ((*qp_attr_mask & IB_QP_MIN_RNR_TIMER) && get_MIN_RNR_TIMER_SET(id_priv->flags))
> >> 		qp_attr->min_rnr_timer = id_priv->min_rnr_timer;
> >> 
> >> 	return ret;
> >> @@ -2472,7 +2470,7 @@ static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
> >> 		return PTR_ERR(id);
> >> 
> >> 	id->tos = id_priv->tos;
> >> -	id->tos_set = id_priv->tos_set;
> >> +	id->tos_set = get_TOS_SET(id_priv->flags);
> >> 	id->afonly = id_priv->afonly;
> >> 	id_priv->cm_id.iw = id;
> >> 
> >> @@ -2533,7 +2531,7 @@ static int cma_listen_on_dev(struct rdma_id_private *id_priv,
> >> 	cma_id_get(id_priv);
> >> 	dev_id_priv->internal_id = 1;
> >> 	dev_id_priv->afonly = id_priv->afonly;
> >> -	dev_id_priv->tos_set = id_priv->tos_set;
> >> +	dev_id_priv->flags = id_priv->flags;
> >> 	dev_id_priv->tos = id_priv->tos;
> >> 
> >> 	ret = rdma_listen(&dev_id_priv->id, id_priv->backlog);
> >> @@ -2582,7 +2580,7 @@ void rdma_set_service_type(struct rdma_cm_id *id, int tos)
> >> 
> >> 	id_priv = container_of(id, struct rdma_id_private, id);
> >> 	id_priv->tos = (u8) tos;
> >> -	id_priv->tos_set = true;
> >> +	set_TOS_SET(id_priv->flags);
> >> }
> >> EXPORT_SYMBOL(rdma_set_service_type);
> >> 
> >> @@ -2610,7 +2608,7 @@ int rdma_set_ack_timeout(struct rdma_cm_id *id, u8 timeout)
> >> 
> >> 	id_priv = container_of(id, struct rdma_id_private, id);
> >> 	id_priv->timeout = timeout;
> >> -	id_priv->timeout_set = true;
> >> +	set_TIMEOUT_SET(id_priv->flags);
> >> 
> >> 	return 0;
> >> }
> >> @@ -2647,7 +2645,7 @@ int rdma_set_min_rnr_timer(struct rdma_cm_id *id, u8 min_rnr_timer)
> >> 
> >> 	id_priv = container_of(id, struct rdma_id_private, id);
> >> 	id_priv->min_rnr_timer = min_rnr_timer;
> >> -	id_priv->min_rnr_timer_set = true;
> >> +	set_MIN_RNR_TIMER_SET(id_priv->flags);
> >> 
> >> 	return 0;
> >> }
> >> @@ -3033,7 +3031,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
> >> 
> >> 	u8 default_roce_tos = id_priv->cma_dev->default_roce_tos[id_priv->id.port_num -
> >> 					rdma_start_port(id_priv->cma_dev->device)];
> >> -	u8 tos = id_priv->tos_set ? id_priv->tos : default_roce_tos;
> >> +	u8 tos = get_TOS_SET(id_priv->flags) ? id_priv->tos : default_roce_tos;
> >> 
> >> 
> >> 	work = kzalloc(sizeof *work, GFP_KERNEL);
> >> @@ -3081,7 +3079,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
> >> 	 * PacketLifeTime = local ACK timeout/2
> >> 	 * as a reasonable approximation for RoCE networks.
> >> 	 */
> >> -	route->path_rec->packet_life_time = id_priv->timeout_set ?
> >> +	route->path_rec->packet_life_time = get_TIMEOUT_SET(id_priv->flags) ?
> >> 		id_priv->timeout - 1 : CMA_IBOE_PACKET_LIFETIME;
> >> 
> >> 	if (!route->path_rec->mtu) {
> >> @@ -4107,7 +4105,7 @@ static int cma_connect_iw(struct rdma_id_private *id_priv,
> >> 		return PTR_ERR(cm_id);
> >> 
> >> 	cm_id->tos = id_priv->tos;
> >> -	cm_id->tos_set = id_priv->tos_set;
> >> +	cm_id->tos_set = get_TOS_SET(id_priv->flags);
> >> 	id_priv->cm_id.iw = cm_id;
> >> 
> >> 	memcpy(&cm_id->local_addr, cma_src_addr(id_priv),
> >> diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h
> >> index 5c463da..2c1694f 100644
> >> --- a/drivers/infiniband/core/cma_priv.h
> >> +++ b/drivers/infiniband/core/cma_priv.h
> >> @@ -82,11 +82,9 @@ struct rdma_id_private {
> >> 	u32			qkey;
> >> 	u32			qp_num;
> >> 	u32			options;
> >> +	unsigned long		flags;
> >> 	u8			srq;
> >> 	u8			tos;
> >> -	u8			tos_set:1;
> >> -	u8                      timeout_set:1;
> >> -	u8			min_rnr_timer_set:1;
> >> 	u8			reuseaddr;
> >> 	u8			afonly;
> >> 	u8			timeout;
> >> @@ -127,4 +125,28 @@ int cma_set_default_roce_tos(struct cma_device *dev, u32 port,
> >> 			     u8 default_roce_tos);
> >> struct ib_device *cma_get_ib_dev(struct cma_device *dev);
> >> 
> >> +#define BIT_ACCESS_FUNCTIONS(b)							\
> >> +	static inline void set_##b(unsigned long flags)				\
> >> +	{									\
> >> +		/* set_bit() does not imply a memory barrier */			\
> >> +		smp_mb__before_atomic();					\
> >> +		set_bit(b, &flags);						\
> >> +		/* set_bit() does not imply a memory barrier */			\
> >> +		smp_mb__after_atomic();						\
> >> +	}									\
> >> +	static inline bool get_##b(unsigned long flags)				\
> >> +	{									\
> >> +		return test_bit(b, &flags);					\
> >> +	}
> >> +
> >> +enum cm_id_flag_bits {
> 
> This should be called cm_id_priv_flags_bits.
> 
> >> +	TOS_SET,
> >> +	TIMEOUT_SET,
> >> +	MIN_RNR_TIMER_SET,
> >> +};
> >> +
> >> +BIT_ACCESS_FUNCTIONS(TIMEOUT_SET);
> >> +BIT_ACCESS_FUNCTIONS(TOS_SET);
> >> +BIT_ACCESS_FUNCTIONS(MIN_RNR_TIMER_SET);
> > 
> > I would prefer one function that has same syntax as set_bit() instead of
> > introducing two new that built with macros.
> > 
> > Something like this:
> > static inline set_bit_mb(long nr, unsigned long *flags)
> > {
> >   smp_mb__before_atomic();
> >   set_bit(nr, flags); 
> >   smp_mb__after_atomic();
> > }
> 
> OK. I should also probably move this to cma.c, since it is not used outside cma.c?

Yes, please

> 
> Also, do you want it checkpatch clean? Then I need the /* set_bit() does not imply a memory barrier */ comments.

It is always safer to send checkpatch clean patches. The most important
part is to have W=1 clean patches, our subsystem is one warning away from
being warnings-free one.

> 
> Thanks for you review, Leon.

Thank you for listening :)

> 
> 
> Håkon
> 
> > 
> >> +
> >> #endif /* _CMA_PRIV_H */
> >> -- 
> >> 1.8.3.1
>
Haakon Bugge June 17, 2021, 9:19 a.m. UTC | #6
> On 17 Jun 2021, at 09:38, Leon Romanovsky <leon@kernel.org> wrote:
> 
> On Thu, Jun 17, 2021 at 06:56:15AM +0000, Haakon Bugge wrote:
>> 
>> 
>>> On 17 Jun 2021, at 08:51, Leon Romanovsky <leon@kernel.org> wrote:
>>> 
>>> On Wed, Jun 16, 2021 at 04:35:53PM +0200, Håkon Bugge wrote:
>>>> The struct rdma_id_private contains three bit-fields, tos_set,
>>>> timeout_set, and min_rnr_timer_set. These are set by accessor
>>>> functions without any synchronization. If two or all accessor
>>>> functions are invoked in close proximity in time, there will be
>>>> Read-Modify-Write from several contexts to the same variable, and the
>>>> result will be intermittent.
>>>> 
>>>> Replace with a flag variable and inline functions for set and get.
>>>> 
>>>> Signed-off-by: Håkon Bugge <haakon.bugge@oracle.com>
>>>> Signed-off-by: Hans Westgaard Ry<hans.westgaard.ry@oracle.com>
>>>> ---
>>>> drivers/infiniband/core/cma.c      | 24 +++++++++++-------------
>>>> drivers/infiniband/core/cma_priv.h | 28 +++++++++++++++++++++++++---
>>>> 2 files changed, 36 insertions(+), 16 deletions(-)
>>>> 
>>>> diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
>>>> index 2b9ffc2..fe609e7 100644
>>>> --- a/drivers/infiniband/core/cma.c
>>>> +++ b/drivers/infiniband/core/cma.c
>>>> @@ -844,9 +844,7 @@ static void cma_id_put(struct rdma_id_private *id_priv)
>>>> 	id_priv->id.event_handler = event_handler;
>>>> 	id_priv->id.ps = ps;
>>>> 	id_priv->id.qp_type = qp_type;
>>>> -	id_priv->tos_set = false;
>>>> -	id_priv->timeout_set = false;
>>>> -	id_priv->min_rnr_timer_set = false;
>>>> +	id_priv->flags = 0;
>>> 
>>> It is not needed, id_priv is allocated with kzalloc.
>> 
>> I agree. Did put it in due the foo = false.
>>> 
>>>> 	id_priv->gid_type = IB_GID_TYPE_IB;
>>>> 	spin_lock_init(&id_priv->lock);
>>>> 	mutex_init(&id_priv->qp_mutex);
>>>> @@ -1134,10 +1132,10 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
>>>> 		ret = -ENOSYS;
>>>> 	}
>>>> 
>>>> -	if ((*qp_attr_mask & IB_QP_TIMEOUT) && id_priv->timeout_set)
>>>> +	if ((*qp_attr_mask & IB_QP_TIMEOUT) && get_TIMEOUT_SET(id_priv->flags))
>>>> 		qp_attr->timeout = id_priv->timeout;
>>>> 
>>>> -	if ((*qp_attr_mask & IB_QP_MIN_RNR_TIMER) && id_priv->min_rnr_timer_set)
>>>> +	if ((*qp_attr_mask & IB_QP_MIN_RNR_TIMER) && get_MIN_RNR_TIMER_SET(id_priv->flags))
>>>> 		qp_attr->min_rnr_timer = id_priv->min_rnr_timer;
>>>> 
>>>> 	return ret;
>>>> @@ -2472,7 +2470,7 @@ static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
>>>> 		return PTR_ERR(id);
>>>> 
>>>> 	id->tos = id_priv->tos;
>>>> -	id->tos_set = id_priv->tos_set;
>>>> +	id->tos_set = get_TOS_SET(id_priv->flags);
>>>> 	id->afonly = id_priv->afonly;
>>>> 	id_priv->cm_id.iw = id;
>>>> 
>>>> @@ -2533,7 +2531,7 @@ static int cma_listen_on_dev(struct rdma_id_private *id_priv,
>>>> 	cma_id_get(id_priv);
>>>> 	dev_id_priv->internal_id = 1;
>>>> 	dev_id_priv->afonly = id_priv->afonly;
>>>> -	dev_id_priv->tos_set = id_priv->tos_set;
>>>> +	dev_id_priv->flags = id_priv->flags;
>>>> 	dev_id_priv->tos = id_priv->tos;
>>>> 
>>>> 	ret = rdma_listen(&dev_id_priv->id, id_priv->backlog);
>>>> @@ -2582,7 +2580,7 @@ void rdma_set_service_type(struct rdma_cm_id *id, int tos)
>>>> 
>>>> 	id_priv = container_of(id, struct rdma_id_private, id);
>>>> 	id_priv->tos = (u8) tos;
>>>> -	id_priv->tos_set = true;
>>>> +	set_TOS_SET(id_priv->flags);
>>>> }
>>>> EXPORT_SYMBOL(rdma_set_service_type);
>>>> 
>>>> @@ -2610,7 +2608,7 @@ int rdma_set_ack_timeout(struct rdma_cm_id *id, u8 timeout)
>>>> 
>>>> 	id_priv = container_of(id, struct rdma_id_private, id);
>>>> 	id_priv->timeout = timeout;
>>>> -	id_priv->timeout_set = true;
>>>> +	set_TIMEOUT_SET(id_priv->flags);
>>>> 
>>>> 	return 0;
>>>> }
>>>> @@ -2647,7 +2645,7 @@ int rdma_set_min_rnr_timer(struct rdma_cm_id *id, u8 min_rnr_timer)
>>>> 
>>>> 	id_priv = container_of(id, struct rdma_id_private, id);
>>>> 	id_priv->min_rnr_timer = min_rnr_timer;
>>>> -	id_priv->min_rnr_timer_set = true;
>>>> +	set_MIN_RNR_TIMER_SET(id_priv->flags);
>>>> 
>>>> 	return 0;
>>>> }
>>>> @@ -3033,7 +3031,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
>>>> 
>>>> 	u8 default_roce_tos = id_priv->cma_dev->default_roce_tos[id_priv->id.port_num -
>>>> 					rdma_start_port(id_priv->cma_dev->device)];
>>>> -	u8 tos = id_priv->tos_set ? id_priv->tos : default_roce_tos;
>>>> +	u8 tos = get_TOS_SET(id_priv->flags) ? id_priv->tos : default_roce_tos;
>>>> 
>>>> 
>>>> 	work = kzalloc(sizeof *work, GFP_KERNEL);
>>>> @@ -3081,7 +3079,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
>>>> 	 * PacketLifeTime = local ACK timeout/2
>>>> 	 * as a reasonable approximation for RoCE networks.
>>>> 	 */
>>>> -	route->path_rec->packet_life_time = id_priv->timeout_set ?
>>>> +	route->path_rec->packet_life_time = get_TIMEOUT_SET(id_priv->flags) ?
>>>> 		id_priv->timeout - 1 : CMA_IBOE_PACKET_LIFETIME;
>>>> 
>>>> 	if (!route->path_rec->mtu) {
>>>> @@ -4107,7 +4105,7 @@ static int cma_connect_iw(struct rdma_id_private *id_priv,
>>>> 		return PTR_ERR(cm_id);
>>>> 
>>>> 	cm_id->tos = id_priv->tos;
>>>> -	cm_id->tos_set = id_priv->tos_set;
>>>> +	cm_id->tos_set = get_TOS_SET(id_priv->flags);
>>>> 	id_priv->cm_id.iw = cm_id;
>>>> 
>>>> 	memcpy(&cm_id->local_addr, cma_src_addr(id_priv),
>>>> diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h
>>>> index 5c463da..2c1694f 100644
>>>> --- a/drivers/infiniband/core/cma_priv.h
>>>> +++ b/drivers/infiniband/core/cma_priv.h
>>>> @@ -82,11 +82,9 @@ struct rdma_id_private {
>>>> 	u32			qkey;
>>>> 	u32			qp_num;
>>>> 	u32			options;
>>>> +	unsigned long		flags;
>>>> 	u8			srq;
>>>> 	u8			tos;
>>>> -	u8			tos_set:1;
>>>> -	u8                      timeout_set:1;
>>>> -	u8			min_rnr_timer_set:1;
>>>> 	u8			reuseaddr;
>>>> 	u8			afonly;
>>>> 	u8			timeout;
>>>> @@ -127,4 +125,28 @@ int cma_set_default_roce_tos(struct cma_device *dev, u32 port,
>>>> 			     u8 default_roce_tos);
>>>> struct ib_device *cma_get_ib_dev(struct cma_device *dev);
>>>> 
>>>> +#define BIT_ACCESS_FUNCTIONS(b)							\
>>>> +	static inline void set_##b(unsigned long flags)				\
>>>> +	{									\
>>>> +		/* set_bit() does not imply a memory barrier */			\
>>>> +		smp_mb__before_atomic();					\
>>>> +		set_bit(b, &flags);						\
>>>> +		/* set_bit() does not imply a memory barrier */			\
>>>> +		smp_mb__after_atomic();						\
>>>> +	}									\
>>>> +	static inline bool get_##b(unsigned long flags)				\
>>>> +	{									\
>>>> +		return test_bit(b, &flags);					\
>>>> +	}
>>>> +
>>>> +enum cm_id_flag_bits {
>> 
>> This should be called cm_id_priv_flags_bits.
>> 
>>>> +	TOS_SET,
>>>> +	TIMEOUT_SET,
>>>> +	MIN_RNR_TIMER_SET,
>>>> +};
>>>> +
>>>> +BIT_ACCESS_FUNCTIONS(TIMEOUT_SET);
>>>> +BIT_ACCESS_FUNCTIONS(TOS_SET);
>>>> +BIT_ACCESS_FUNCTIONS(MIN_RNR_TIMER_SET);
>>> 
>>> I would prefer one function that has same syntax as set_bit() instead of
>>> introducing two new that built with macros.
>>> 
>>> Something like this:
>>> static inline set_bit_mb(long nr, unsigned long *flags)
>>> {
>>>  smp_mb__before_atomic();
>>>  set_bit(nr, flags); 
>>>  smp_mb__after_atomic();
>>> }
>> 
>> OK. I should also probably move this to cma.c, since it is not used outside cma.c?
> 
> Yes, please
> 
>> 
>> Also, do you want it checkpatch clean? Then I need the /* set_bit() does not imply a memory barrier */ comments.
> 
> It is always safer to send checkpatch clean patches. The most important
> part is to have W=1 clean patches, our subsystem is one warning away from
> being warnings-free one.
> 
>> 
>> Thanks for you review, Leon.
> 
> Thank you for listening :)

From checkpatch:

ERROR: inline keyword should sit between storage class and type
#47: FILE: drivers/infiniband/core/cma.c:51:
+inline static void set_bit_mb(unsigned long nr, unsigned long *flags)

From W=1:

  CC [M]  /home/hbugge/git/linux-uek/drivers/infiniband/core/cma.o
/home/hbugge/git/linux-uek/drivers/infiniband/core/cma.c:51:1: warning: ‘inline’ is not at beginning of declaration [-Wold-style-declaration]
 static void inline set_bit_mb(unsigned long nr, unsigned long *flags)
 ^~~~~~


which one do you prefer?


Håkon


>> 
>> 
>> Håkon
>> 
>>> 
>>>> +
>>>> #endif /* _CMA_PRIV_H */
>>>> -- 
>>>> 1.8.3.1
Leon Romanovsky June 17, 2021, 12:49 p.m. UTC | #7
On Thu, Jun 17, 2021 at 09:19:26AM +0000, Haakon Bugge wrote:
> 
> 
> > On 17 Jun 2021, at 09:38, Leon Romanovsky <leon@kernel.org> wrote:
> > 
> > On Thu, Jun 17, 2021 at 06:56:15AM +0000, Haakon Bugge wrote:
> >> 
> >> 
> >>> On 17 Jun 2021, at 08:51, Leon Romanovsky <leon@kernel.org> wrote:
> >>> 
> >>> On Wed, Jun 16, 2021 at 04:35:53PM +0200, Håkon Bugge wrote:
> >>>> The struct rdma_id_private contains three bit-fields, tos_set,
> >>>> timeout_set, and min_rnr_timer_set. These are set by accessor
> >>>> functions without any synchronization. If two or all accessor
> >>>> functions are invoked in close proximity in time, there will be
> >>>> Read-Modify-Write from several contexts to the same variable, and the
> >>>> result will be intermittent.
> >>>> 
> >>>> Replace with a flag variable and inline functions for set and get.
> >>>> 
> >>>> Signed-off-by: Håkon Bugge <haakon.bugge@oracle.com>
> >>>> Signed-off-by: Hans Westgaard Ry<hans.westgaard.ry@oracle.com>
> >>>> ---
> >>>> drivers/infiniband/core/cma.c      | 24 +++++++++++-------------
> >>>> drivers/infiniband/core/cma_priv.h | 28 +++++++++++++++++++++++++---
> >>>> 2 files changed, 36 insertions(+), 16 deletions(-)
> >>>> 
> >>>> diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
> >>>> index 2b9ffc2..fe609e7 100644
> >>>> --- a/drivers/infiniband/core/cma.c
> >>>> +++ b/drivers/infiniband/core/cma.c
> >>>> @@ -844,9 +844,7 @@ static void cma_id_put(struct rdma_id_private *id_priv)
> >>>> 	id_priv->id.event_handler = event_handler;
> >>>> 	id_priv->id.ps = ps;
> >>>> 	id_priv->id.qp_type = qp_type;
> >>>> -	id_priv->tos_set = false;
> >>>> -	id_priv->timeout_set = false;
> >>>> -	id_priv->min_rnr_timer_set = false;
> >>>> +	id_priv->flags = 0;
> >>> 
> >>> It is not needed, id_priv is allocated with kzalloc.
> >> 
> >> I agree. Did put it in due the foo = false.
> >>> 
> >>>> 	id_priv->gid_type = IB_GID_TYPE_IB;
> >>>> 	spin_lock_init(&id_priv->lock);
> >>>> 	mutex_init(&id_priv->qp_mutex);
> >>>> @@ -1134,10 +1132,10 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
> >>>> 		ret = -ENOSYS;
> >>>> 	}
> >>>> 
> >>>> -	if ((*qp_attr_mask & IB_QP_TIMEOUT) && id_priv->timeout_set)
> >>>> +	if ((*qp_attr_mask & IB_QP_TIMEOUT) && get_TIMEOUT_SET(id_priv->flags))
> >>>> 		qp_attr->timeout = id_priv->timeout;
> >>>> 
> >>>> -	if ((*qp_attr_mask & IB_QP_MIN_RNR_TIMER) && id_priv->min_rnr_timer_set)
> >>>> +	if ((*qp_attr_mask & IB_QP_MIN_RNR_TIMER) && get_MIN_RNR_TIMER_SET(id_priv->flags))
> >>>> 		qp_attr->min_rnr_timer = id_priv->min_rnr_timer;
> >>>> 
> >>>> 	return ret;
> >>>> @@ -2472,7 +2470,7 @@ static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
> >>>> 		return PTR_ERR(id);
> >>>> 
> >>>> 	id->tos = id_priv->tos;
> >>>> -	id->tos_set = id_priv->tos_set;
> >>>> +	id->tos_set = get_TOS_SET(id_priv->flags);
> >>>> 	id->afonly = id_priv->afonly;
> >>>> 	id_priv->cm_id.iw = id;
> >>>> 
> >>>> @@ -2533,7 +2531,7 @@ static int cma_listen_on_dev(struct rdma_id_private *id_priv,
> >>>> 	cma_id_get(id_priv);
> >>>> 	dev_id_priv->internal_id = 1;
> >>>> 	dev_id_priv->afonly = id_priv->afonly;
> >>>> -	dev_id_priv->tos_set = id_priv->tos_set;
> >>>> +	dev_id_priv->flags = id_priv->flags;
> >>>> 	dev_id_priv->tos = id_priv->tos;
> >>>> 
> >>>> 	ret = rdma_listen(&dev_id_priv->id, id_priv->backlog);
> >>>> @@ -2582,7 +2580,7 @@ void rdma_set_service_type(struct rdma_cm_id *id, int tos)
> >>>> 
> >>>> 	id_priv = container_of(id, struct rdma_id_private, id);
> >>>> 	id_priv->tos = (u8) tos;
> >>>> -	id_priv->tos_set = true;
> >>>> +	set_TOS_SET(id_priv->flags);
> >>>> }
> >>>> EXPORT_SYMBOL(rdma_set_service_type);
> >>>> 
> >>>> @@ -2610,7 +2608,7 @@ int rdma_set_ack_timeout(struct rdma_cm_id *id, u8 timeout)
> >>>> 
> >>>> 	id_priv = container_of(id, struct rdma_id_private, id);
> >>>> 	id_priv->timeout = timeout;
> >>>> -	id_priv->timeout_set = true;
> >>>> +	set_TIMEOUT_SET(id_priv->flags);
> >>>> 
> >>>> 	return 0;
> >>>> }
> >>>> @@ -2647,7 +2645,7 @@ int rdma_set_min_rnr_timer(struct rdma_cm_id *id, u8 min_rnr_timer)
> >>>> 
> >>>> 	id_priv = container_of(id, struct rdma_id_private, id);
> >>>> 	id_priv->min_rnr_timer = min_rnr_timer;
> >>>> -	id_priv->min_rnr_timer_set = true;
> >>>> +	set_MIN_RNR_TIMER_SET(id_priv->flags);
> >>>> 
> >>>> 	return 0;
> >>>> }
> >>>> @@ -3033,7 +3031,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
> >>>> 
> >>>> 	u8 default_roce_tos = id_priv->cma_dev->default_roce_tos[id_priv->id.port_num -
> >>>> 					rdma_start_port(id_priv->cma_dev->device)];
> >>>> -	u8 tos = id_priv->tos_set ? id_priv->tos : default_roce_tos;
> >>>> +	u8 tos = get_TOS_SET(id_priv->flags) ? id_priv->tos : default_roce_tos;
> >>>> 
> >>>> 
> >>>> 	work = kzalloc(sizeof *work, GFP_KERNEL);
> >>>> @@ -3081,7 +3079,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
> >>>> 	 * PacketLifeTime = local ACK timeout/2
> >>>> 	 * as a reasonable approximation for RoCE networks.
> >>>> 	 */
> >>>> -	route->path_rec->packet_life_time = id_priv->timeout_set ?
> >>>> +	route->path_rec->packet_life_time = get_TIMEOUT_SET(id_priv->flags) ?
> >>>> 		id_priv->timeout - 1 : CMA_IBOE_PACKET_LIFETIME;
> >>>> 
> >>>> 	if (!route->path_rec->mtu) {
> >>>> @@ -4107,7 +4105,7 @@ static int cma_connect_iw(struct rdma_id_private *id_priv,
> >>>> 		return PTR_ERR(cm_id);
> >>>> 
> >>>> 	cm_id->tos = id_priv->tos;
> >>>> -	cm_id->tos_set = id_priv->tos_set;
> >>>> +	cm_id->tos_set = get_TOS_SET(id_priv->flags);
> >>>> 	id_priv->cm_id.iw = cm_id;
> >>>> 
> >>>> 	memcpy(&cm_id->local_addr, cma_src_addr(id_priv),
> >>>> diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h
> >>>> index 5c463da..2c1694f 100644
> >>>> --- a/drivers/infiniband/core/cma_priv.h
> >>>> +++ b/drivers/infiniband/core/cma_priv.h
> >>>> @@ -82,11 +82,9 @@ struct rdma_id_private {
> >>>> 	u32			qkey;
> >>>> 	u32			qp_num;
> >>>> 	u32			options;
> >>>> +	unsigned long		flags;
> >>>> 	u8			srq;
> >>>> 	u8			tos;
> >>>> -	u8			tos_set:1;
> >>>> -	u8                      timeout_set:1;
> >>>> -	u8			min_rnr_timer_set:1;
> >>>> 	u8			reuseaddr;
> >>>> 	u8			afonly;
> >>>> 	u8			timeout;
> >>>> @@ -127,4 +125,28 @@ int cma_set_default_roce_tos(struct cma_device *dev, u32 port,
> >>>> 			     u8 default_roce_tos);
> >>>> struct ib_device *cma_get_ib_dev(struct cma_device *dev);
> >>>> 
> >>>> +#define BIT_ACCESS_FUNCTIONS(b)							\
> >>>> +	static inline void set_##b(unsigned long flags)				\
> >>>> +	{									\
> >>>> +		/* set_bit() does not imply a memory barrier */			\
> >>>> +		smp_mb__before_atomic();					\
> >>>> +		set_bit(b, &flags);						\
> >>>> +		/* set_bit() does not imply a memory barrier */			\
> >>>> +		smp_mb__after_atomic();						\
> >>>> +	}									\
> >>>> +	static inline bool get_##b(unsigned long flags)				\
> >>>> +	{									\
> >>>> +		return test_bit(b, &flags);					\
> >>>> +	}
> >>>> +
> >>>> +enum cm_id_flag_bits {
> >> 
> >> This should be called cm_id_priv_flags_bits.
> >> 
> >>>> +	TOS_SET,
> >>>> +	TIMEOUT_SET,
> >>>> +	MIN_RNR_TIMER_SET,
> >>>> +};
> >>>> +
> >>>> +BIT_ACCESS_FUNCTIONS(TIMEOUT_SET);
> >>>> +BIT_ACCESS_FUNCTIONS(TOS_SET);
> >>>> +BIT_ACCESS_FUNCTIONS(MIN_RNR_TIMER_SET);
> >>> 
> >>> I would prefer one function that has same syntax as set_bit() instead of
> >>> introducing two new that built with macros.
> >>> 
> >>> Something like this:
> >>> static inline set_bit_mb(long nr, unsigned long *flags)
> >>> {
> >>>  smp_mb__before_atomic();
> >>>  set_bit(nr, flags); 
> >>>  smp_mb__after_atomic();
> >>> }
> >> 
> >> OK. I should also probably move this to cma.c, since it is not used outside cma.c?
> > 
> > Yes, please
> > 
> >> 
> >> Also, do you want it checkpatch clean? Then I need the /* set_bit() does not imply a memory barrier */ comments.
> > 
> > It is always safer to send checkpatch clean patches. The most important
> > part is to have W=1 clean patches, our subsystem is one warning away from
> > being warnings-free one.
> > 
> >> 
> >> Thanks for you review, Leon.
> > 
> > Thank you for listening :)
> 
> From checkpatch:
> 
> ERROR: inline keyword should sit between storage class and type
> #47: FILE: drivers/infiniband/core/cma.c:51:
> +inline static void set_bit_mb(unsigned long nr, unsigned long *flags)
> 
> From W=1:
> 
>   CC [M]  /home/hbugge/git/linux-uek/drivers/infiniband/core/cma.o
> /home/hbugge/git/linux-uek/drivers/infiniband/core/cma.c:51:1: warning: ‘inline’ is not at beginning of declaration [-Wold-style-declaration]
>  static void inline set_bit_mb(unsigned long nr, unsigned long *flags)
>  ^~~~~~
> 
> 
> which one do you prefer?

The one that is written in CodingStyle - do not use inline functions in .c file.
BTW, it is "static inline void set_bit_mb" and not as you wrote.

Thanks

> 
> 
> Håkon
> 
> 
> >> 
> >> 
> >> Håkon
> >> 
> >>> 
> >>>> +
> >>>> #endif /* _CMA_PRIV_H */
> >>>> -- 
> >>>> 1.8.3.1
>
Haakon Bugge June 18, 2021, 1:57 p.m. UTC | #8
> On 17 Jun 2021, at 14:49, Leon Romanovsky <leon@kernel.org> wrote:
> 
> On Thu, Jun 17, 2021 at 09:19:26AM +0000, Haakon Bugge wrote:
>> 
>> 
>>> On 17 Jun 2021, at 09:38, Leon Romanovsky <leon@kernel.org> wrote:
>>> 
>>> On Thu, Jun 17, 2021 at 06:56:15AM +0000, Haakon Bugge wrote:
>>>> 
>>>> 
>>>>> On 17 Jun 2021, at 08:51, Leon Romanovsky <leon@kernel.org> wrote:
>>>>> 
>>>>> On Wed, Jun 16, 2021 at 04:35:53PM +0200, Håkon Bugge wrote:
>>>>>> The struct rdma_id_private contains three bit-fields, tos_set,
>>>>>> timeout_set, and min_rnr_timer_set. These are set by accessor
>>>>>> functions without any synchronization. If two or all accessor
>>>>>> functions are invoked in close proximity in time, there will be
>>>>>> Read-Modify-Write from several contexts to the same variable, and the
>>>>>> result will be intermittent.
>>>>>> 
>>>>>> Replace with a flag variable and inline functions for set and get.
>>>>>> 
>>>>>> Signed-off-by: Håkon Bugge <haakon.bugge@oracle.com>
>>>>>> Signed-off-by: Hans Westgaard Ry<hans.westgaard.ry@oracle.com>
>>>>>> ---
>>>>>> drivers/infiniband/core/cma.c      | 24 +++++++++++-------------
>>>>>> drivers/infiniband/core/cma_priv.h | 28 +++++++++++++++++++++++++---
>>>>>> 2 files changed, 36 insertions(+), 16 deletions(-)
>>>>>> 
>>>>>> diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
>>>>>> index 2b9ffc2..fe609e7 100644
>>>>>> --- a/drivers/infiniband/core/cma.c
>>>>>> +++ b/drivers/infiniband/core/cma.c
>>>>>> @@ -844,9 +844,7 @@ static void cma_id_put(struct rdma_id_private *id_priv)
>>>>>> 	id_priv->id.event_handler = event_handler;
>>>>>> 	id_priv->id.ps = ps;
>>>>>> 	id_priv->id.qp_type = qp_type;
>>>>>> -	id_priv->tos_set = false;
>>>>>> -	id_priv->timeout_set = false;
>>>>>> -	id_priv->min_rnr_timer_set = false;
>>>>>> +	id_priv->flags = 0;
>>>>> 
>>>>> It is not needed, id_priv is allocated with kzalloc.
>>>> 
>>>> I agree. Did put it in due the foo = false.
>>>>> 
>>>>>> 	id_priv->gid_type = IB_GID_TYPE_IB;
>>>>>> 	spin_lock_init(&id_priv->lock);
>>>>>> 	mutex_init(&id_priv->qp_mutex);
>>>>>> @@ -1134,10 +1132,10 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
>>>>>> 		ret = -ENOSYS;
>>>>>> 	}
>>>>>> 
>>>>>> -	if ((*qp_attr_mask & IB_QP_TIMEOUT) && id_priv->timeout_set)
>>>>>> +	if ((*qp_attr_mask & IB_QP_TIMEOUT) && get_TIMEOUT_SET(id_priv->flags))
>>>>>> 		qp_attr->timeout = id_priv->timeout;
>>>>>> 
>>>>>> -	if ((*qp_attr_mask & IB_QP_MIN_RNR_TIMER) && id_priv->min_rnr_timer_set)
>>>>>> +	if ((*qp_attr_mask & IB_QP_MIN_RNR_TIMER) && get_MIN_RNR_TIMER_SET(id_priv->flags))
>>>>>> 		qp_attr->min_rnr_timer = id_priv->min_rnr_timer;
>>>>>> 
>>>>>> 	return ret;
>>>>>> @@ -2472,7 +2470,7 @@ static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
>>>>>> 		return PTR_ERR(id);
>>>>>> 
>>>>>> 	id->tos = id_priv->tos;
>>>>>> -	id->tos_set = id_priv->tos_set;
>>>>>> +	id->tos_set = get_TOS_SET(id_priv->flags);
>>>>>> 	id->afonly = id_priv->afonly;
>>>>>> 	id_priv->cm_id.iw = id;
>>>>>> 
>>>>>> @@ -2533,7 +2531,7 @@ static int cma_listen_on_dev(struct rdma_id_private *id_priv,
>>>>>> 	cma_id_get(id_priv);
>>>>>> 	dev_id_priv->internal_id = 1;
>>>>>> 	dev_id_priv->afonly = id_priv->afonly;
>>>>>> -	dev_id_priv->tos_set = id_priv->tos_set;
>>>>>> +	dev_id_priv->flags = id_priv->flags;
>>>>>> 	dev_id_priv->tos = id_priv->tos;
>>>>>> 
>>>>>> 	ret = rdma_listen(&dev_id_priv->id, id_priv->backlog);
>>>>>> @@ -2582,7 +2580,7 @@ void rdma_set_service_type(struct rdma_cm_id *id, int tos)
>>>>>> 
>>>>>> 	id_priv = container_of(id, struct rdma_id_private, id);
>>>>>> 	id_priv->tos = (u8) tos;
>>>>>> -	id_priv->tos_set = true;
>>>>>> +	set_TOS_SET(id_priv->flags);
>>>>>> }
>>>>>> EXPORT_SYMBOL(rdma_set_service_type);
>>>>>> 
>>>>>> @@ -2610,7 +2608,7 @@ int rdma_set_ack_timeout(struct rdma_cm_id *id, u8 timeout)
>>>>>> 
>>>>>> 	id_priv = container_of(id, struct rdma_id_private, id);
>>>>>> 	id_priv->timeout = timeout;
>>>>>> -	id_priv->timeout_set = true;
>>>>>> +	set_TIMEOUT_SET(id_priv->flags);
>>>>>> 
>>>>>> 	return 0;
>>>>>> }
>>>>>> @@ -2647,7 +2645,7 @@ int rdma_set_min_rnr_timer(struct rdma_cm_id *id, u8 min_rnr_timer)
>>>>>> 
>>>>>> 	id_priv = container_of(id, struct rdma_id_private, id);
>>>>>> 	id_priv->min_rnr_timer = min_rnr_timer;
>>>>>> -	id_priv->min_rnr_timer_set = true;
>>>>>> +	set_MIN_RNR_TIMER_SET(id_priv->flags);
>>>>>> 
>>>>>> 	return 0;
>>>>>> }
>>>>>> @@ -3033,7 +3031,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
>>>>>> 
>>>>>> 	u8 default_roce_tos = id_priv->cma_dev->default_roce_tos[id_priv->id.port_num -
>>>>>> 					rdma_start_port(id_priv->cma_dev->device)];
>>>>>> -	u8 tos = id_priv->tos_set ? id_priv->tos : default_roce_tos;
>>>>>> +	u8 tos = get_TOS_SET(id_priv->flags) ? id_priv->tos : default_roce_tos;
>>>>>> 
>>>>>> 
>>>>>> 	work = kzalloc(sizeof *work, GFP_KERNEL);
>>>>>> @@ -3081,7 +3079,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
>>>>>> 	 * PacketLifeTime = local ACK timeout/2
>>>>>> 	 * as a reasonable approximation for RoCE networks.
>>>>>> 	 */
>>>>>> -	route->path_rec->packet_life_time = id_priv->timeout_set ?
>>>>>> +	route->path_rec->packet_life_time = get_TIMEOUT_SET(id_priv->flags) ?
>>>>>> 		id_priv->timeout - 1 : CMA_IBOE_PACKET_LIFETIME;
>>>>>> 
>>>>>> 	if (!route->path_rec->mtu) {
>>>>>> @@ -4107,7 +4105,7 @@ static int cma_connect_iw(struct rdma_id_private *id_priv,
>>>>>> 		return PTR_ERR(cm_id);
>>>>>> 
>>>>>> 	cm_id->tos = id_priv->tos;
>>>>>> -	cm_id->tos_set = id_priv->tos_set;
>>>>>> +	cm_id->tos_set = get_TOS_SET(id_priv->flags);
>>>>>> 	id_priv->cm_id.iw = cm_id;
>>>>>> 
>>>>>> 	memcpy(&cm_id->local_addr, cma_src_addr(id_priv),
>>>>>> diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h
>>>>>> index 5c463da..2c1694f 100644
>>>>>> --- a/drivers/infiniband/core/cma_priv.h
>>>>>> +++ b/drivers/infiniband/core/cma_priv.h
>>>>>> @@ -82,11 +82,9 @@ struct rdma_id_private {
>>>>>> 	u32			qkey;
>>>>>> 	u32			qp_num;
>>>>>> 	u32			options;
>>>>>> +	unsigned long		flags;
>>>>>> 	u8			srq;
>>>>>> 	u8			tos;
>>>>>> -	u8			tos_set:1;
>>>>>> -	u8                      timeout_set:1;
>>>>>> -	u8			min_rnr_timer_set:1;
>>>>>> 	u8			reuseaddr;
>>>>>> 	u8			afonly;
>>>>>> 	u8			timeout;
>>>>>> @@ -127,4 +125,28 @@ int cma_set_default_roce_tos(struct cma_device *dev, u32 port,
>>>>>> 			     u8 default_roce_tos);
>>>>>> struct ib_device *cma_get_ib_dev(struct cma_device *dev);
>>>>>> 
>>>>>> +#define BIT_ACCESS_FUNCTIONS(b)							\
>>>>>> +	static inline void set_##b(unsigned long flags)				\
>>>>>> +	{									\
>>>>>> +		/* set_bit() does not imply a memory barrier */			\
>>>>>> +		smp_mb__before_atomic();					\
>>>>>> +		set_bit(b, &flags);						\
>>>>>> +		/* set_bit() does not imply a memory barrier */			\
>>>>>> +		smp_mb__after_atomic();						\
>>>>>> +	}									\
>>>>>> +	static inline bool get_##b(unsigned long flags)				\
>>>>>> +	{									\
>>>>>> +		return test_bit(b, &flags);					\
>>>>>> +	}
>>>>>> +
>>>>>> +enum cm_id_flag_bits {
>>>> 
>>>> This should be called cm_id_priv_flags_bits.
>>>> 
>>>>>> +	TOS_SET,
>>>>>> +	TIMEOUT_SET,
>>>>>> +	MIN_RNR_TIMER_SET,
>>>>>> +};
>>>>>> +
>>>>>> +BIT_ACCESS_FUNCTIONS(TIMEOUT_SET);
>>>>>> +BIT_ACCESS_FUNCTIONS(TOS_SET);
>>>>>> +BIT_ACCESS_FUNCTIONS(MIN_RNR_TIMER_SET);
>>>>> 
>>>>> I would prefer one function that has same syntax as set_bit() instead of
>>>>> introducing two new that built with macros.
>>>>> 
>>>>> Something like this:
>>>>> static inline set_bit_mb(long nr, unsigned long *flags)
>>>>> {
>>>>> smp_mb__before_atomic();
>>>>> set_bit(nr, flags); 
>>>>> smp_mb__after_atomic();
>>>>> }
>>>> 
>>>> OK. I should also probably move this to cma.c, since it is not used outside cma.c?
>>> 
>>> Yes, please
>>> 
>>>> 
>>>> Also, do you want it checkpatch clean? Then I need the /* set_bit() does not imply a memory barrier */ comments.
>>> 
>>> It is always safer to send checkpatch clean patches. The most important
>>> part is to have W=1 clean patches, our subsystem is one warning away from
>>> being warnings-free one.
>>> 
>>>> 
>>>> Thanks for you review, Leon.
>>> 
>>> Thank you for listening :)
>> 
>> From checkpatch:
>> 
>> ERROR: inline keyword should sit between storage class and type
>> #47: FILE: drivers/infiniband/core/cma.c:51:
>> +inline static void set_bit_mb(unsigned long nr, unsigned long *flags)
>> 
>> From W=1:
>> 
>>  CC [M]  /home/hbugge/git/linux-uek/drivers/infiniband/core/cma.o
>> /home/hbugge/git/linux-uek/drivers/infiniband/core/cma.c:51:1: warning: ‘inline’ is not at beginning of declaration [-Wold-style-declaration]
>> static void inline set_bit_mb(unsigned long nr, unsigned long *flags)
>> ^~~~~~
>> 
>> 
>> which one do you prefer?
> 
> The one that is written in CodingStyle - do not use inline functions in .c file.
> BTW, it is "static inline void set_bit_mb" and not as you wrote.

Sure, remember something about the "inline decease" :-) Omitted inline in the v2. They got inlined despite that, on x86_64.


Thxs, Håkon

> 
> Thanks
> 
>> 
>> 
>> Håkon
>> 
>> 
>>>> 
>>>> 
>>>> Håkon
>>>> 
>>>>> 
>>>>>> +
>>>>>> #endif /* _CMA_PRIV_H */
>>>>>> -- 
>>>>>> 1.8.3.1
Jason Gunthorpe June 21, 2021, 2:35 p.m. UTC | #9
On Wed, Jun 16, 2021 at 04:35:53PM +0200, Håkon Bugge wrote:
> +#define BIT_ACCESS_FUNCTIONS(b)							\
> +	static inline void set_##b(unsigned long flags)				\
> +	{									\
> +		/* set_bit() does not imply a memory barrier */			\
> +		smp_mb__before_atomic();					\
> +		set_bit(b, &flags);						\
> +		/* set_bit() does not imply a memory barrier */			\
> +		smp_mb__after_atomic();						\
> +	}

This isn't needed, set_bit/test_bit are already atomic with
themselves, we should not need to introduce release semantics.

Please split this to one patch per variable

Every variable should be evalulated to decide if we should hold the
spinlock instead of relying on atomics.

Jason
Haakon Bugge June 21, 2021, 3:30 p.m. UTC | #10
> On 21 Jun 2021, at 16:35, Jason Gunthorpe <jgg@nvidia.com> wrote:
> 
> On Wed, Jun 16, 2021 at 04:35:53PM +0200, Håkon Bugge wrote:
>> +#define BIT_ACCESS_FUNCTIONS(b)							\
>> +	static inline void set_##b(unsigned long flags)				\
>> +	{									\
>> +		/* set_bit() does not imply a memory barrier */			\
>> +		smp_mb__before_atomic();					\
>> +		set_bit(b, &flags);						\
>> +		/* set_bit() does not imply a memory barrier */			\
>> +		smp_mb__after_atomic();						\
>> +	}
> 
> This isn't needed, set_bit/test_bit are already atomic with
> themselves, we should not need to introduce release semantics.

They are atomic, yes. But set_bit() does not provide a memory barrier (on x86_64, yes, but not as per the Linux definition of set_bit()).

We have (paraphrased):

	id_priv->min_rnr_timer = min_rnr_timer;
	set_bit(MIN_RNR_TIMER_SET, &id_priv->flags);

Since set_bit() does not provide a memory barrier, another thread may observe the MIN_RNR_TIMER_SET bit in id_priv->flags, but the id_priv->min_rnr_timer value is not yet globally visible. Hence, IMHO, we need the memory barriers.

> Please split this to one patch per variable
> 
> Every variable should be evalulated to decide if we should hold the
> spinlock instead of relying on atomics.

OK. Will do.


Thxs, Håkon
Jason Gunthorpe June 21, 2021, 3:32 p.m. UTC | #11
On Mon, Jun 21, 2021 at 03:30:14PM +0000, Haakon Bugge wrote:
> 
> 
> > On 21 Jun 2021, at 16:35, Jason Gunthorpe <jgg@nvidia.com> wrote:
> > 
> > On Wed, Jun 16, 2021 at 04:35:53PM +0200, Håkon Bugge wrote:
> >> +#define BIT_ACCESS_FUNCTIONS(b)							\
> >> +	static inline void set_##b(unsigned long flags)				\
> >> +	{									\
> >> +		/* set_bit() does not imply a memory barrier */			\
> >> +		smp_mb__before_atomic();					\
> >> +		set_bit(b, &flags);						\
> >> +		/* set_bit() does not imply a memory barrier */			\
> >> +		smp_mb__after_atomic();						\
> >> +	}
> > 
> > This isn't needed, set_bit/test_bit are already atomic with
> > themselves, we should not need to introduce release semantics.
> 
> They are atomic, yes. But set_bit() does not provide a memory barrier (on x86_64, yes, but not as per the Linux definition of set_bit()).
> 
> We have (paraphrased):
> 
> 	id_priv->min_rnr_timer = min_rnr_timer;
> 	set_bit(MIN_RNR_TIMER_SET, &id_priv->flags);
> 
> Since set_bit() does not provide a memory barrier, another thread
> may observe the MIN_RNR_TIMER_SET bit in id_priv->flags, but the
> id_priv->min_rnr_timer value is not yet globally visible. Hence,
> IMHO, we need the memory barriers.

No, you need proper locks.

Jason
Haakon Bugge June 21, 2021, 3:37 p.m. UTC | #12
> On 21 Jun 2021, at 17:32, Jason Gunthorpe <jgg@nvidia.com> wrote:
> 
> On Mon, Jun 21, 2021 at 03:30:14PM +0000, Haakon Bugge wrote:
>> 
>> 
>>> On 21 Jun 2021, at 16:35, Jason Gunthorpe <jgg@nvidia.com> wrote:
>>> 
>>> On Wed, Jun 16, 2021 at 04:35:53PM +0200, Håkon Bugge wrote:
>>>> +#define BIT_ACCESS_FUNCTIONS(b)							\
>>>> +	static inline void set_##b(unsigned long flags)				\
>>>> +	{									\
>>>> +		/* set_bit() does not imply a memory barrier */			\
>>>> +		smp_mb__before_atomic();					\
>>>> +		set_bit(b, &flags);						\
>>>> +		/* set_bit() does not imply a memory barrier */			\
>>>> +		smp_mb__after_atomic();						\
>>>> +	}
>>> 
>>> This isn't needed, set_bit/test_bit are already atomic with
>>> themselves, we should not need to introduce release semantics.
>> 
>> They are atomic, yes. But set_bit() does not provide a memory barrier (on x86_64, yes, but not as per the Linux definition of set_bit()).
>> 
>> We have (paraphrased):
>> 
>> 	id_priv->min_rnr_timer = min_rnr_timer;
>> 	set_bit(MIN_RNR_TIMER_SET, &id_priv->flags);
>> 
>> Since set_bit() does not provide a memory barrier, another thread
>> may observe the MIN_RNR_TIMER_SET bit in id_priv->flags, but the
>> id_priv->min_rnr_timer value is not yet globally visible. Hence,
>> IMHO, we need the memory barriers.
> 
> No, you need proper locks.

Either will work in my opinion. If you prefer locking, I can do that. This is not performance critical.


Thxs, Håkon

> 
> Jason
Jason Gunthorpe June 21, 2021, 11:29 p.m. UTC | #13
On Mon, Jun 21, 2021 at 03:37:10PM +0000, Haakon Bugge wrote:
> 
> 
> > On 21 Jun 2021, at 17:32, Jason Gunthorpe <jgg@nvidia.com> wrote:
> > 
> > On Mon, Jun 21, 2021 at 03:30:14PM +0000, Haakon Bugge wrote:
> >> 
> >> 
> >>> On 21 Jun 2021, at 16:35, Jason Gunthorpe <jgg@nvidia.com> wrote:
> >>> 
> >>> On Wed, Jun 16, 2021 at 04:35:53PM +0200, Håkon Bugge wrote:
> >>>> +#define BIT_ACCESS_FUNCTIONS(b)							\
> >>>> +	static inline void set_##b(unsigned long flags)				\
> >>>> +	{									\
> >>>> +		/* set_bit() does not imply a memory barrier */			\
> >>>> +		smp_mb__before_atomic();					\
> >>>> +		set_bit(b, &flags);						\
> >>>> +		/* set_bit() does not imply a memory barrier */			\
> >>>> +		smp_mb__after_atomic();						\
> >>>> +	}
> >>> 
> >>> This isn't needed, set_bit/test_bit are already atomic with
> >>> themselves, we should not need to introduce release semantics.
> >> 
> >> They are atomic, yes. But set_bit() does not provide a memory barrier (on x86_64, yes, but not as per the Linux definition of set_bit()).
> >> 
> >> We have (paraphrased):
> >> 
> >> 	id_priv->min_rnr_timer = min_rnr_timer;
> >> 	set_bit(MIN_RNR_TIMER_SET, &id_priv->flags);
> >> 
> >> Since set_bit() does not provide a memory barrier, another thread
> >> may observe the MIN_RNR_TIMER_SET bit in id_priv->flags, but the
> >> id_priv->min_rnr_timer value is not yet globally visible. Hence,
> >> IMHO, we need the memory barriers.
> > 
> > No, you need proper locks.
> 
> Either will work in my opinion. If you prefer locking, I can do
> that. This is not performance critical.

Yes, use locks please

Jason
Haakon Bugge June 22, 2021, 7:34 a.m. UTC | #14
> On 22 Jun 2021, at 01:29, Jason Gunthorpe <jgg@nvidia.com> wrote:
> 
> On Mon, Jun 21, 2021 at 03:37:10PM +0000, Haakon Bugge wrote:
>> 
>> 
>>> On 21 Jun 2021, at 17:32, Jason Gunthorpe <jgg@nvidia.com> wrote:
>>> 
>>> On Mon, Jun 21, 2021 at 03:30:14PM +0000, Haakon Bugge wrote:
>>>> 
>>>> 
>>>>> On 21 Jun 2021, at 16:35, Jason Gunthorpe <jgg@nvidia.com> wrote:
>>>>> 
>>>>> On Wed, Jun 16, 2021 at 04:35:53PM +0200, Håkon Bugge wrote:
>>>>>> +#define BIT_ACCESS_FUNCTIONS(b)							\
>>>>>> +	static inline void set_##b(unsigned long flags)				\
>>>>>> +	{									\
>>>>>> +		/* set_bit() does not imply a memory barrier */			\
>>>>>> +		smp_mb__before_atomic();					\
>>>>>> +		set_bit(b, &flags);						\
>>>>>> +		/* set_bit() does not imply a memory barrier */			\
>>>>>> +		smp_mb__after_atomic();						\
>>>>>> +	}
>>>>> 
>>>>> This isn't needed, set_bit/test_bit are already atomic with
>>>>> themselves, we should not need to introduce release semantics.
>>>> 
>>>> They are atomic, yes. But set_bit() does not provide a memory barrier (on x86_64, yes, but not as per the Linux definition of set_bit()).
>>>> 
>>>> We have (paraphrased):
>>>> 
>>>> 	id_priv->min_rnr_timer = min_rnr_timer;
>>>> 	set_bit(MIN_RNR_TIMER_SET, &id_priv->flags);
>>>> 
>>>> Since set_bit() does not provide a memory barrier, another thread
>>>> may observe the MIN_RNR_TIMER_SET bit in id_priv->flags, but the
>>>> id_priv->min_rnr_timer value is not yet globally visible. Hence,
>>>> IMHO, we need the memory barriers.
>>> 
>>> No, you need proper locks.
>> 
>> Either will work in my opinion. If you prefer locking, I can do
>> that. This is not performance critical.
> 
> Yes, use locks please

With locking, there is no need for changing the bit fields to a flags variable and set/test_bit. But, for the fix to be complete, the locking must then be done all three places. Hence. I'll send one commit with locking.


Thxs, Håkon
Haakon Bugge June 22, 2021, 7:44 a.m. UTC | #15
> On 22 Jun 2021, at 09:34, Haakon Bugge <haakon.bugge@oracle.com> wrote:
> 
> 
> 
>> On 22 Jun 2021, at 01:29, Jason Gunthorpe <jgg@nvidia.com> wrote:
>> 
>> On Mon, Jun 21, 2021 at 03:37:10PM +0000, Haakon Bugge wrote:
>>> 
>>> 
>>>> On 21 Jun 2021, at 17:32, Jason Gunthorpe <jgg@nvidia.com> wrote:
>>>> 
>>>> On Mon, Jun 21, 2021 at 03:30:14PM +0000, Haakon Bugge wrote:
>>>>> 
>>>>> 
>>>>>> On 21 Jun 2021, at 16:35, Jason Gunthorpe <jgg@nvidia.com> wrote:
>>>>>> 
>>>>>> On Wed, Jun 16, 2021 at 04:35:53PM +0200, Håkon Bugge wrote:
>>>>>>> +#define BIT_ACCESS_FUNCTIONS(b)							\
>>>>>>> +	static inline void set_##b(unsigned long flags)				\
>>>>>>> +	{									\
>>>>>>> +		/* set_bit() does not imply a memory barrier */			\
>>>>>>> +		smp_mb__before_atomic();					\
>>>>>>> +		set_bit(b, &flags);						\
>>>>>>> +		/* set_bit() does not imply a memory barrier */			\
>>>>>>> +		smp_mb__after_atomic();						\
>>>>>>> +	}
>>>>>> 
>>>>>> This isn't needed, set_bit/test_bit are already atomic with
>>>>>> themselves, we should not need to introduce release semantics.
>>>>> 
>>>>> They are atomic, yes. But set_bit() does not provide a memory barrier (on x86_64, yes, but not as per the Linux definition of set_bit()).
>>>>> 
>>>>> We have (paraphrased):
>>>>> 
>>>>> 	id_priv->min_rnr_timer = min_rnr_timer;
>>>>> 	set_bit(MIN_RNR_TIMER_SET, &id_priv->flags);
>>>>> 
>>>>> Since set_bit() does not provide a memory barrier, another thread
>>>>> may observe the MIN_RNR_TIMER_SET bit in id_priv->flags, but the
>>>>> id_priv->min_rnr_timer value is not yet globally visible. Hence,
>>>>> IMHO, we need the memory barriers.
>>>> 
>>>> No, you need proper locks.
>>> 
>>> Either will work in my opinion. If you prefer locking, I can do
>>> that. This is not performance critical.
>> 
>> Yes, use locks please
> 
> With locking, there is no need for changing the bit fields to a flags variable and set/test_bit. But, for the fix to be complete, the locking must then be done all three places. Hence. I'll send one commit with locking.

Adding to that, I will make a series of this and include ("RDMA/cma: Remove unnecessary INIT->INIT transition") here. The reason is that the transitions of the QP state of a connected QP is not protected by a lock when called from rdma_create_qp() [what protects the cm_id from being destroyed whilst rdma_create_qp() executes?].

With commit ("RDMA/cma: Remove unnecessary INIT->INIT transition"), the QP state transitions on a connected QP is removed from rdma_create_qp(), and when called from  cma_modify_qp_rtr(), the qp_lock is held, which fits well with fixing the unprotected RMW to the bitfields.


Thxs, Håkon


> 
> 
> Thxs, Håkon
diff mbox series

Patch

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 2b9ffc2..fe609e7 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -844,9 +844,7 @@  static void cma_id_put(struct rdma_id_private *id_priv)
 	id_priv->id.event_handler = event_handler;
 	id_priv->id.ps = ps;
 	id_priv->id.qp_type = qp_type;
-	id_priv->tos_set = false;
-	id_priv->timeout_set = false;
-	id_priv->min_rnr_timer_set = false;
+	id_priv->flags = 0;
 	id_priv->gid_type = IB_GID_TYPE_IB;
 	spin_lock_init(&id_priv->lock);
 	mutex_init(&id_priv->qp_mutex);
@@ -1134,10 +1132,10 @@  int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
 		ret = -ENOSYS;
 	}
 
-	if ((*qp_attr_mask & IB_QP_TIMEOUT) && id_priv->timeout_set)
+	if ((*qp_attr_mask & IB_QP_TIMEOUT) && get_TIMEOUT_SET(id_priv->flags))
 		qp_attr->timeout = id_priv->timeout;
 
-	if ((*qp_attr_mask & IB_QP_MIN_RNR_TIMER) && id_priv->min_rnr_timer_set)
+	if ((*qp_attr_mask & IB_QP_MIN_RNR_TIMER) && get_MIN_RNR_TIMER_SET(id_priv->flags))
 		qp_attr->min_rnr_timer = id_priv->min_rnr_timer;
 
 	return ret;
@@ -2472,7 +2470,7 @@  static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
 		return PTR_ERR(id);
 
 	id->tos = id_priv->tos;
-	id->tos_set = id_priv->tos_set;
+	id->tos_set = get_TOS_SET(id_priv->flags);
 	id->afonly = id_priv->afonly;
 	id_priv->cm_id.iw = id;
 
@@ -2533,7 +2531,7 @@  static int cma_listen_on_dev(struct rdma_id_private *id_priv,
 	cma_id_get(id_priv);
 	dev_id_priv->internal_id = 1;
 	dev_id_priv->afonly = id_priv->afonly;
-	dev_id_priv->tos_set = id_priv->tos_set;
+	dev_id_priv->flags = id_priv->flags;
 	dev_id_priv->tos = id_priv->tos;
 
 	ret = rdma_listen(&dev_id_priv->id, id_priv->backlog);
@@ -2582,7 +2580,7 @@  void rdma_set_service_type(struct rdma_cm_id *id, int tos)
 
 	id_priv = container_of(id, struct rdma_id_private, id);
 	id_priv->tos = (u8) tos;
-	id_priv->tos_set = true;
+	set_TOS_SET(id_priv->flags);
 }
 EXPORT_SYMBOL(rdma_set_service_type);
 
@@ -2610,7 +2608,7 @@  int rdma_set_ack_timeout(struct rdma_cm_id *id, u8 timeout)
 
 	id_priv = container_of(id, struct rdma_id_private, id);
 	id_priv->timeout = timeout;
-	id_priv->timeout_set = true;
+	set_TIMEOUT_SET(id_priv->flags);
 
 	return 0;
 }
@@ -2647,7 +2645,7 @@  int rdma_set_min_rnr_timer(struct rdma_cm_id *id, u8 min_rnr_timer)
 
 	id_priv = container_of(id, struct rdma_id_private, id);
 	id_priv->min_rnr_timer = min_rnr_timer;
-	id_priv->min_rnr_timer_set = true;
+	set_MIN_RNR_TIMER_SET(id_priv->flags);
 
 	return 0;
 }
@@ -3033,7 +3031,7 @@  static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
 
 	u8 default_roce_tos = id_priv->cma_dev->default_roce_tos[id_priv->id.port_num -
 					rdma_start_port(id_priv->cma_dev->device)];
-	u8 tos = id_priv->tos_set ? id_priv->tos : default_roce_tos;
+	u8 tos = get_TOS_SET(id_priv->flags) ? id_priv->tos : default_roce_tos;
 
 
 	work = kzalloc(sizeof *work, GFP_KERNEL);
@@ -3081,7 +3079,7 @@  static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
 	 * PacketLifeTime = local ACK timeout/2
 	 * as a reasonable approximation for RoCE networks.
 	 */
-	route->path_rec->packet_life_time = id_priv->timeout_set ?
+	route->path_rec->packet_life_time = get_TIMEOUT_SET(id_priv->flags) ?
 		id_priv->timeout - 1 : CMA_IBOE_PACKET_LIFETIME;
 
 	if (!route->path_rec->mtu) {
@@ -4107,7 +4105,7 @@  static int cma_connect_iw(struct rdma_id_private *id_priv,
 		return PTR_ERR(cm_id);
 
 	cm_id->tos = id_priv->tos;
-	cm_id->tos_set = id_priv->tos_set;
+	cm_id->tos_set = get_TOS_SET(id_priv->flags);
 	id_priv->cm_id.iw = cm_id;
 
 	memcpy(&cm_id->local_addr, cma_src_addr(id_priv),
diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h
index 5c463da..2c1694f 100644
--- a/drivers/infiniband/core/cma_priv.h
+++ b/drivers/infiniband/core/cma_priv.h
@@ -82,11 +82,9 @@  struct rdma_id_private {
 	u32			qkey;
 	u32			qp_num;
 	u32			options;
+	unsigned long		flags;
 	u8			srq;
 	u8			tos;
-	u8			tos_set:1;
-	u8                      timeout_set:1;
-	u8			min_rnr_timer_set:1;
 	u8			reuseaddr;
 	u8			afonly;
 	u8			timeout;
@@ -127,4 +125,28 @@  int cma_set_default_roce_tos(struct cma_device *dev, u32 port,
 			     u8 default_roce_tos);
 struct ib_device *cma_get_ib_dev(struct cma_device *dev);
 
+#define BIT_ACCESS_FUNCTIONS(b)							\
+	static inline void set_##b(unsigned long flags)				\
+	{									\
+		/* set_bit() does not imply a memory barrier */			\
+		smp_mb__before_atomic();					\
+		set_bit(b, &flags);						\
+		/* set_bit() does not imply a memory barrier */			\
+		smp_mb__after_atomic();						\
+	}									\
+	static inline bool get_##b(unsigned long flags)				\
+	{									\
+		return test_bit(b, &flags);					\
+	}
+
+enum cm_id_flag_bits {
+	TOS_SET,
+	TIMEOUT_SET,
+	MIN_RNR_TIMER_SET,
+};
+
+BIT_ACCESS_FUNCTIONS(TIMEOUT_SET);
+BIT_ACCESS_FUNCTIONS(TOS_SET);
+BIT_ACCESS_FUNCTIONS(MIN_RNR_TIMER_SET);
+
 #endif /* _CMA_PRIV_H */