diff mbox series

[rdma-next,1/8] net/mlx5: Expand mkey page size to support 6 bits

Message ID 20240904153038.23054-2-michaelgur@nvidia.com (mailing list archive)
State Superseded
Headers show
Series Introduce mlx5 Memory Scheme ODP | expand

Commit Message

Michael Guralnik Sept. 4, 2024, 3:30 p.m. UTC
Protect the usage of the 6th bit with the relevant capability to ensure
we are using the new page sizes with FW that supports the bit extension.

Signed-off-by: Michael Guralnik <michaelgur@nvidia.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/hw/mlx5/mlx5_ib.h | 14 ++++++++------
 drivers/infiniband/hw/mlx5/mr.c      | 10 ++++------
 drivers/infiniband/hw/mlx5/odp.c     |  2 +-
 include/linux/mlx5/mlx5_ifc.h        |  7 ++++---
 4 files changed, 17 insertions(+), 16 deletions(-)

Comments

Jason Gunthorpe Sept. 4, 2024, 4:18 p.m. UTC | #1
On Wed, Sep 04, 2024 at 06:30:31PM +0300, Michael Guralnik wrote:
> +#define mlx5_umem_find_best_pgsz(umem, dev, iova)                              \
> +	ib_umem_find_best_pgsz(                                                \
> +		umem,                                                          \
> +		__mlx5_log_page_size_to_bitmap(                                \
> +			MLX5_CAP_GEN_2(dev->mdev, umr_log_entity_size_5) ? 6 : \
> +									   5,  \
> +			0),                                                    \
> +		iova)

This can go in a real static inline function now.

Isn't is mlx5_mkx_find_best_pgsz ? It is only for mkc right?

> @@ -4221,8 +4223,7 @@ struct mlx5_ifc_mkc_bits {
>  
>  	u8         reserved_at_1c0[0x19];
>  	u8         relaxed_ordering_read[0x1];
> -	u8         reserved_at_1d9[0x1];
> -	u8         log_page_size[0x5];
> +	u8         log_page_size[0x6];

?

Why is this change OK without more changes? Doesn't it move
log_page_size forward by 1 bit?

Jason
Michael Guralnik Sept. 5, 2024, 8:48 p.m. UTC | #2
On 04/09/2024 19:18, Jason Gunthorpe wrote:
> On Wed, Sep 04, 2024 at 06:30:31PM +0300, Michael Guralnik wrote:
>> +#define mlx5_umem_find_best_pgsz(umem, dev, iova)                              \
>> +	ib_umem_find_best_pgsz(                                                \
>> +		umem,                                                          \
>> +		__mlx5_log_page_size_to_bitmap(                                \
>> +			MLX5_CAP_GEN_2(dev->mdev, umr_log_entity_size_5) ? 6 : \
>> +									   5,  \
>> +			0),                                                    \
>> +		iova)
> This can go in a real static inline function now.
Ack.
> Isn't is mlx5_mkx_find_best_pgsz ? It is only for mkc right?

Yes. It was written to be generic but mkc users were the only ones 
calling it.

>> @@ -4221,8 +4223,7 @@ struct mlx5_ifc_mkc_bits {
>>   
>>   	u8         reserved_at_1c0[0x19];
>>   	u8         relaxed_ordering_read[0x1];
>> -	u8         reserved_at_1d9[0x1];
>> -	u8         log_page_size[0x5];
>> +	u8         log_page_size[0x6];
> ?
>
> Why is this change OK without more changes? Doesn't it move
> log_page_size forward by 1 bit?
>
> Jason

The reserved_at_1d9 is the new MSB of log_page_size that was not exposed 
in ifc so far.

Not moving forward, just extending by one MSB bit.
Jason Gunthorpe Sept. 5, 2024, 11:21 p.m. UTC | #3
On Thu, Sep 05, 2024 at 11:48:30PM +0300, Michael Guralnik wrote:
> 
> On 04/09/2024 19:18, Jason Gunthorpe wrote:
> > On Wed, Sep 04, 2024 at 06:30:31PM +0300, Michael Guralnik wrote:
> > > +#define mlx5_umem_find_best_pgsz(umem, dev, iova)                              \
> > > +	ib_umem_find_best_pgsz(                                                \
> > > +		umem,                                                          \
> > > +		__mlx5_log_page_size_to_bitmap(                                \
> > > +			MLX5_CAP_GEN_2(dev->mdev, umr_log_entity_size_5) ? 6 : \
> > > +									   5,  \
> > > +			0),                                                    \
> > > +		iova)
> > This can go in a real static inline function now.
> Ack.
> > Isn't is mlx5_mkx_find_best_pgsz ? It is only for mkc right?
> 
> Yes. It was written to be generic but mkc users were the only ones calling
> it.

Well, more than that, it hardwires details about the mkc layout inside
it with those open coded constants.

> > > @@ -4221,8 +4223,7 @@ struct mlx5_ifc_mkc_bits {
> > >   	u8         reserved_at_1c0[0x19];
> > >   	u8         relaxed_ordering_read[0x1];
> > > -	u8         reserved_at_1d9[0x1];
> > > -	u8         log_page_size[0x5];
> > > +	u8         log_page_size[0x6];
> > ?
> > 
> > Why is this change OK without more changes? Doesn't it move
> > log_page_size forward by 1 bit?
> 
> The reserved_at_1d9 is the new MSB of log_page_size that was not exposed in
> ifc so far.

Oh wow, I never noticed this ifc scheme has a scrambled bit ordering..

Jason
diff mbox series

Patch

diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 926a965e4570..89c2ab728577 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -67,12 +67,14 @@  __mlx5_log_page_size_to_bitmap(unsigned int log_pgsz_bits,
  * For mkc users, instead of a page_offset the command has a start_iova which
  * specifies both the page_offset and the on-the-wire IOVA
  */
-#define mlx5_umem_find_best_pgsz(umem, typ, log_pgsz_fld, pgsz_shift, iova)    \
-	ib_umem_find_best_pgsz(umem,                                           \
-			       __mlx5_log_page_size_to_bitmap(                 \
-				       __mlx5_bit_sz(typ, log_pgsz_fld),       \
-				       pgsz_shift),                            \
-			       iova)
+#define mlx5_umem_find_best_pgsz(umem, dev, iova)                              \
+	ib_umem_find_best_pgsz(                                                \
+		umem,                                                          \
+		__mlx5_log_page_size_to_bitmap(                                \
+			MLX5_CAP_GEN_2(dev->mdev, umr_log_entity_size_5) ? 6 : \
+									   5,  \
+			0),                                                    \
+		iova)
 
 static __always_inline unsigned long
 __mlx5_page_offset_to_bitmask(unsigned int page_offset_bits,
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 73962bd0b216..0b52f080879f 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1119,8 +1119,7 @@  static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
 	if (umem->is_dmabuf)
 		page_size = mlx5_umem_dmabuf_default_pgsz(umem, iova);
 	else
-		page_size = mlx5_umem_find_best_pgsz(umem, mkc, log_page_size,
-						     0, iova);
+		page_size = mlx5_umem_find_best_pgsz(umem, dev, iova);
 	if (WARN_ON(!page_size))
 		return ERR_PTR(-EINVAL);
 
@@ -1425,8 +1424,8 @@  static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem,
 		mr = alloc_cacheable_mr(pd, umem, iova, access_flags,
 					MLX5_MKC_ACCESS_MODE_MTT);
 	} else {
-		unsigned int page_size = mlx5_umem_find_best_pgsz(
-			umem, mkc, log_page_size, 0, iova);
+		unsigned int page_size =
+			mlx5_umem_find_best_pgsz(umem, dev, iova);
 
 		mutex_lock(&dev->slow_path_mutex);
 		mr = reg_create(pd, umem, iova, access_flags, page_size,
@@ -1744,8 +1743,7 @@  static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr,
 	if (!mlx5r_umr_can_load_pas(dev, new_umem->length))
 		return false;
 
-	*page_size =
-		mlx5_umem_find_best_pgsz(new_umem, mkc, log_page_size, 0, iova);
+	*page_size = mlx5_umem_find_best_pgsz(new_umem, dev, iova);
 	if (WARN_ON(!*page_size))
 		return false;
 	return (mr->mmkey.cache_ent->rb_key.ndescs) >=
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 44a3428ea342..221820874e7a 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -693,7 +693,7 @@  static int pagefault_dmabuf_mr(struct mlx5_ib_mr *mr, size_t bcnt,
 	struct ib_umem_dmabuf *umem_dmabuf = to_ib_umem_dmabuf(mr->umem);
 	u32 xlt_flags = 0;
 	int err;
-	unsigned int page_size;
+	unsigned long page_size;
 
 	if (flags & MLX5_PF_FLAGS_ENABLE)
 		xlt_flags |= MLX5_IB_UPD_XLT_ENABLE;
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 691a285f9c1e..1be2495362ee 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1995,7 +1995,9 @@  struct mlx5_ifc_cmd_hca_cap_2_bits {
 	u8         dp_ordering_force[0x1];
 	u8         reserved_at_89[0x9];
 	u8         query_vuid[0x1];
-	u8         reserved_at_93[0xd];
+	u8         reserved_at_93[0x5];
+	u8         umr_log_entity_size_5[0x1];
+	u8         reserved_at_99[0x7];
 
 	u8	   max_reformat_insert_size[0x8];
 	u8	   max_reformat_insert_offset[0x8];
@@ -4221,8 +4223,7 @@  struct mlx5_ifc_mkc_bits {
 
 	u8         reserved_at_1c0[0x19];
 	u8         relaxed_ordering_read[0x1];
-	u8         reserved_at_1d9[0x1];
-	u8         log_page_size[0x5];
+	u8         log_page_size[0x6];
 
 	u8         reserved_at_1e0[0x20];
 };