@@ -1265,8 +1265,8 @@ static int devx_handle_mkey_indirect(struct devx_obj *obj,
mkey->pd = MLX5_GET(mkc, mkc, pd);
devx_mr->ndescs = MLX5_GET(mkc, mkc, translations_octword_size);
- return xa_err(xa_store(&dev->mdev->priv.mkey_table,
- mlx5_base_mkey(mkey->key), mkey, GFP_KERNEL));
+ return xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mkey->key), mkey,
+ GFP_KERNEL));
}
static int devx_handle_mkey_create(struct mlx5_ib_dev *dev,
@@ -1345,9 +1345,9 @@ static int devx_obj_cleanup(struct ib_uobject *uobject,
* the mmkey, we must wait for that to stop before freeing the
* mkey, as another allocation could get the same mkey #.
*/
- xa_erase(&obj->ib_dev->mdev->priv.mkey_table,
+ xa_erase(&obj->ib_dev->odp_mkeys,
mlx5_base_mkey(obj->devx_mr.mmkey.key));
- synchronize_srcu(&dev->mr_srcu);
+ synchronize_srcu(&dev->odp_srcu);
}
if (obj->flags & DEVX_OBJ_FLAGS_DCT)
@@ -6145,10 +6145,10 @@ static struct ib_counters *mlx5_ib_create_counters(struct ib_device *device,
static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
{
mlx5_ib_cleanup_multiport_master(dev);
- if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
- srcu_barrier(&dev->mr_srcu);
- cleanup_srcu_struct(&dev->mr_srcu);
- }
+ WARN_ON(!xa_empty(&dev->odp_mkeys));
+ if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
+ srcu_barrier(&dev->odp_srcu);
+ cleanup_srcu_struct(&dev->odp_srcu);
WARN_ON(!xa_empty(&dev->sig_mrs));
WARN_ON(!bitmap_empty(dev->dm.memic_alloc_pages, MLX5_MAX_MEMIC_PAGES));
@@ -6202,16 +6202,15 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
mutex_init(&dev->cap_mask_mutex);
INIT_LIST_HEAD(&dev->qp_list);
spin_lock_init(&dev->reset_flow_resource_lock);
+ xa_init(&dev->odp_mkeys);
xa_init(&dev->sig_mrs);
spin_lock_init(&dev->dm.lock);
dev->dm.dev = mdev;
- if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
- err = init_srcu_struct(&dev->mr_srcu);
- if (err)
- goto err_mp;
- }
+ err = init_srcu_struct(&dev->odp_srcu);
+ if (err)
+ goto err_mp;
return 0;
@@ -606,7 +606,6 @@ struct mlx5_ib_mr {
struct mlx5_ib_dev *dev;
u32 out[MLX5_ST_SZ_DW(create_mkey_out)];
struct mlx5_core_sig_ctx *sig;
- unsigned int live;
void *descs_alloc;
int access_flags; /* Needed for rereg MR */
@@ -975,7 +974,9 @@ struct mlx5_ib_dev {
* Sleepable RCU that prevents destruction of MRs while they are still
* being used by a page fault handler.
*/
- struct srcu_struct mr_srcu;
+ struct srcu_struct odp_srcu;
+ struct xarray odp_mkeys;
+
u32 null_mkey;
struct mlx5_ib_flow_db *flow_db;
/* protect resources needed as part of reset flow */
@@ -59,13 +59,9 @@ static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev)
static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{
- int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
+ WARN_ON(xa_load(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)));
- if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
- /* Wait until all page fault handlers using the mr complete. */
- synchronize_srcu(&dev->mr_srcu);
-
- return err;
+ return mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
}
static int order2idx(struct mlx5_ib_dev *dev, int order)
@@ -218,9 +214,6 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
}
- if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
- synchronize_srcu(&dev->mr_srcu);
-
list_for_each_entry_safe(mr, tmp_mr, &del_list, list) {
list_del(&mr->list);
kfree(mr);
@@ -555,10 +548,6 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c)
mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
}
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- synchronize_srcu(&dev->mr_srcu);
-#endif
-
list_for_each_entry_safe(mr, tmp_mr, &del_list, list) {
list_del(&mr->list);
kfree(mr);
@@ -1338,9 +1327,14 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
if (is_odp_mr(mr)) {
to_ib_umem_odp(mr->umem)->private = mr;
atomic_set(&mr->num_pending_prefetch, 0);
+ err = xa_err(xa_store(&dev->odp_mkeys,
+ mlx5_base_mkey(mr->mmkey.key), &mr->mmkey,
+ GFP_KERNEL));
+ if (err) {
+ dereg_mr(dev, mr);
+ return ERR_PTR(err);
+ }
}
- if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
- smp_store_release(&mr->live, 1);
return &mr->ibmr;
error:
@@ -1582,10 +1576,10 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
/* Prevent new page faults and
* prefetch requests from succeeding
*/
- WRITE_ONCE(mr->live, 0);
+ xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key));
/* Wait for all running page-fault handlers to finish. */
- synchronize_srcu(&dev->mr_srcu);
+ synchronize_srcu(&dev->odp_srcu);
/* dequeue pending prefetch requests for the mr */
if (atomic_read(&mr->num_pending_prefetch))
@@ -1959,9 +1953,19 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
}
}
+ if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
+ err = xa_err(xa_store(&dev->odp_mkeys,
+ mlx5_base_mkey(mw->mmkey.key), &mw->mmkey,
+ GFP_KERNEL));
+ if (err)
+ goto free_mkey;
+ }
+
kfree(in);
return &mw->ibmw;
+free_mkey:
+ mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey);
free:
kfree(mw);
kfree(in);
@@ -1975,13 +1979,12 @@ int mlx5_ib_dealloc_mw(struct ib_mw *mw)
int err;
if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
- xa_erase(&dev->mdev->priv.mkey_table,
- mlx5_base_mkey(mmw->mmkey.key));
+ xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mmw->mmkey.key));
/*
* pagefault_single_data_segment() may be accessing mmw under
* SRCU if the user bound an ODP MR to this MW.
*/
- synchronize_srcu(&dev->mr_srcu);
+ synchronize_srcu(&dev->odp_srcu);
}
err = mlx5_core_destroy_mkey(dev->mdev, &mmw->mmkey);
@@ -199,7 +199,7 @@ void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
* locking around the children list.
*/
lockdep_assert_held(&to_ib_umem_odp(mr->umem)->umem_mutex);
- lockdep_assert_held(&mr->dev->mr_srcu);
+ lockdep_assert_held(&mr->dev->odp_srcu);
odp = odp_lookup(offset * MLX5_IMR_MTT_SIZE,
nentries * MLX5_IMR_MTT_SIZE, mr);
@@ -229,16 +229,16 @@ static void mr_leaf_free_action(struct work_struct *work)
int srcu_key;
mr->parent = NULL;
- synchronize_srcu(&mr->dev->mr_srcu);
+ synchronize_srcu(&mr->dev->odp_srcu);
- if (smp_load_acquire(&imr->live)) {
- srcu_key = srcu_read_lock(&mr->dev->mr_srcu);
+ if (xa_load(&mr->dev->odp_mkeys, mlx5_base_mkey(imr->mmkey.key))) {
+ srcu_key = srcu_read_lock(&mr->dev->odp_srcu);
mutex_lock(&odp_imr->umem_mutex);
mlx5_ib_update_xlt(imr, idx, 1, 0,
MLX5_IB_UPD_XLT_INDIRECT |
MLX5_IB_UPD_XLT_ATOMIC);
mutex_unlock(&odp_imr->umem_mutex);
- srcu_read_unlock(&mr->dev->mr_srcu, srcu_key);
+ srcu_read_unlock(&mr->dev->odp_srcu, srcu_key);
}
ib_umem_odp_release(odp);
mlx5_mr_cache_free(mr->dev, mr);
@@ -318,7 +318,7 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
if (unlikely(!umem_odp->npages && mr->parent &&
!umem_odp->dying)) {
- WRITE_ONCE(mr->live, 0);
+ xa_erase(&mr->dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key));
umem_odp->dying = 1;
atomic_inc(&mr->parent->num_leaf_free);
schedule_work(&umem_odp->work);
@@ -430,6 +430,11 @@ static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd,
if (IS_ERR(mr))
return mr;
+ err = xa_reserve(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key),
+ GFP_KERNEL);
+ if (err)
+ goto out_mr;
+
mr->ibmr.pd = pd;
mr->dev = dev;
@@ -455,7 +460,7 @@ static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd,
}
if (err)
- goto fail;
+ goto out_release;
mr->ibmr.lkey = mr->mmkey.key;
mr->ibmr.rkey = mr->mmkey.key;
@@ -465,7 +470,9 @@ static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd,
return mr;
-fail:
+out_release:
+ xa_release(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key));
+out_mr:
mlx5_ib_err(dev, "Failed to register MKEY %d\n", err);
mlx5_mr_cache_free(dev, mr);
@@ -513,7 +520,8 @@ static struct ib_umem_odp *implicit_mr_get_data(struct mlx5_ib_mr *mr,
mtt->parent = mr;
INIT_WORK(&odp->work, mr_leaf_free_action);
- smp_store_release(&mtt->live, 1);
+ xa_store(&dev->odp_mkeys, mlx5_base_mkey(mtt->mmkey.key),
+ &mtt->mmkey, GFP_ATOMIC);
if (!nentries)
start_idx = addr >> MLX5_IMR_MTT_SHIFT;
@@ -567,7 +575,8 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
init_waitqueue_head(&imr->q_leaf_free);
atomic_set(&imr->num_leaf_free, 0);
atomic_set(&imr->num_pending_prefetch, 0);
- smp_store_release(&imr->live, 1);
+ xa_store(&imr->dev->odp_mkeys, mlx5_base_mkey(imr->mmkey.key),
+ &imr->mmkey, GFP_ATOMIC);
return imr;
}
@@ -778,13 +787,28 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
size_t offset;
int ndescs;
- srcu_key = srcu_read_lock(&dev->mr_srcu);
+ srcu_key = srcu_read_lock(&dev->odp_srcu);
io_virt += *bytes_committed;
bcnt -= *bytes_committed;
next_mr:
- mmkey = xa_load(&dev->mdev->priv.mkey_table, mlx5_base_mkey(key));
+ mmkey = xa_load(&dev->odp_mkeys, mlx5_base_mkey(key));
+ if (!mmkey) {
+ mlx5_ib_dbg(
+ dev,
+ "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n",
+ key);
+ if (bytes_mapped)
+ *bytes_mapped += bcnt;
+ /*
+ * The user could specify a SGL with multiple lkeys and only
+ * some of them are ODP. Treat the non-ODP ones as fully
+ * faulted.
+ */
+ ret = 0;
+ goto srcu_unlock;
+ }
if (!mkey_is_eq(mmkey, key)) {
mlx5_ib_dbg(dev, "failed to find mkey %x\n", key);
ret = -EFAULT;
@@ -794,20 +818,6 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
switch (mmkey->type) {
case MLX5_MKEY_MR:
mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
- if (!smp_load_acquire(&mr->live) || !mr->ibmr.pd) {
- mlx5_ib_dbg(dev, "got dead MR\n");
- ret = -EFAULT;
- goto srcu_unlock;
- }
-
- if (!is_odp_mr(mr)) {
- mlx5_ib_dbg(dev, "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n",
- key);
- if (bytes_mapped)
- *bytes_mapped += bcnt;
- ret = 0;
- goto srcu_unlock;
- }
ret = pagefault_mr(mr, io_virt, bcnt, bytes_mapped, 0);
if (ret < 0)
@@ -902,7 +912,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
}
kfree(out);
- srcu_read_unlock(&dev->mr_srcu, srcu_key);
+ srcu_read_unlock(&dev->odp_srcu, srcu_key);
*bytes_committed = 0;
return ret ? ret : npages;
}
@@ -1623,18 +1633,15 @@ get_prefetchable_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice,
struct ib_umem_odp *odp;
struct mlx5_ib_mr *mr;
- lockdep_assert_held(&dev->mr_srcu);
+ lockdep_assert_held(&dev->odp_srcu);
- mmkey = xa_load(&dev->mdev->priv.mkey_table, mlx5_base_mkey(lkey));
+ mmkey = xa_load(&dev->odp_mkeys, mlx5_base_mkey(lkey));
if (!mmkey || mmkey->key != lkey || mmkey->type != MLX5_MKEY_MR)
return NULL;
mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
- if (!smp_load_acquire(&mr->live))
- return NULL;
-
- if (mr->ibmr.pd != pd || !is_odp_mr(mr))
+ if (mr->ibmr.pd != pd)
return NULL;
/*
@@ -1709,7 +1716,7 @@ static int mlx5_ib_prefetch_sg_list(struct ib_pd *pd,
int ret = 0;
u32 i;
- srcu_key = srcu_read_lock(&dev->mr_srcu);
+ srcu_key = srcu_read_lock(&dev->odp_srcu);
for (i = 0; i < num_sge; ++i) {
struct mlx5_ib_mr *mr;
@@ -1725,7 +1732,7 @@ static int mlx5_ib_prefetch_sg_list(struct ib_pd *pd,
}
out:
- srcu_read_unlock(&dev->mr_srcu, srcu_key);
+ srcu_read_unlock(&dev->odp_srcu, srcu_key);
return ret;
}
@@ -1749,12 +1756,12 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
if (!work)
return -ENOMEM;
- srcu_key = srcu_read_lock(&dev->mr_srcu);
+ srcu_key = srcu_read_lock(&dev->odp_srcu);
if (!init_prefetch_work(pd, advice, pf_flags, work, sg_list, num_sge)) {
- srcu_read_unlock(&dev->mr_srcu, srcu_key);
+ srcu_read_unlock(&dev->odp_srcu, srcu_key);
return -EINVAL;
}
queue_work(system_unbound_wq, &work->work);
- srcu_read_unlock(&dev->mr_srcu, srcu_key);
+ srcu_read_unlock(&dev->odp_srcu, srcu_key);
return 0;
}