@@ -121,11 +121,10 @@ struct siw_page_chunk {
};
struct siw_umem {
+ struct ib_umem *base_mem;
struct siw_page_chunk *page_chunk;
int num_pages;
- bool writable;
u64 fp_addr; /* First page base address */
- struct mm_struct *owning_mm;
};
struct siw_pble {
@@ -5,6 +5,7 @@
#include <linux/gfp.h>
#include <rdma/ib_verbs.h>
+#include <rdma/ib_umem.h>
#include <linux/dma-mapping.h>
#include <linux/slab.h>
#include <linux/sched/mm.h>
@@ -60,28 +61,17 @@ struct siw_mem *siw_mem_id2obj(struct siw_device *sdev, int stag_index)
return NULL;
}
-static void siw_free_plist(struct siw_page_chunk *chunk, int num_pages,
- bool dirty)
+void siw_umem_release(struct siw_umem *umem)
{
- unpin_user_pages_dirty_lock(chunk->plist, num_pages, dirty);
-}
-
-void siw_umem_release(struct siw_umem *umem, bool dirty)
-{
- struct mm_struct *mm_s = umem->owning_mm;
int i, num_pages = umem->num_pages;
- for (i = 0; num_pages; i++) {
- int to_free = min_t(int, PAGES_PER_CHUNK, num_pages);
+ if (umem->base_mem)
+ ib_umem_release(umem->base_mem);
- siw_free_plist(&umem->page_chunk[i], to_free,
- umem->writable && dirty);
+ for (i = 0; num_pages > 0; i++) {
kfree(umem->page_chunk[i].plist);
- num_pages -= to_free;
+ num_pages -= PAGES_PER_CHUNK;
}
- atomic64_sub(umem->num_pages, &mm_s->pinned_vm);
-
- mmdrop(mm_s);
kfree(umem->page_chunk);
kfree(umem);
}
@@ -145,7 +135,7 @@ void siw_free_mem(struct kref *ref)
if (!mem->is_mw && mem->mem_obj) {
if (mem->is_pbl == 0)
- siw_umem_release(mem->umem, true);
+ siw_umem_release(mem->umem);
else
kfree(mem->pbl);
}
@@ -362,18 +352,16 @@ struct siw_pbl *siw_pbl_alloc(u32 num_buf)
return pbl;
}
-struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable)
+struct siw_umem *siw_umem_get(struct ib_device *base_dev, u64 start,
+ u64 len, int rights)
{
struct siw_umem *umem;
- struct mm_struct *mm_s;
+ struct ib_umem *base_mem;
+ struct sg_page_iter sg_iter;
+ struct sg_table *sgt;
u64 first_page_va;
- unsigned long mlock_limit;
- unsigned int foll_flags = FOLL_LONGTERM;
int num_pages, num_chunks, i, rv = 0;
- if (!can_do_mlock())
- return ERR_PTR(-EPERM);
-
if (!len)
return ERR_PTR(-EINVAL);
@@ -385,65 +373,50 @@ struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable)
if (!umem)
return ERR_PTR(-ENOMEM);
- mm_s = current->mm;
- umem->owning_mm = mm_s;
- umem->writable = writable;
-
- mmgrab(mm_s);
-
- if (writable)
- foll_flags |= FOLL_WRITE;
-
- mmap_read_lock(mm_s);
-
- mlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-
- if (atomic64_add_return(num_pages, &mm_s->pinned_vm) > mlock_limit) {
- rv = -ENOMEM;
- goto out_sem_up;
- }
- umem->fp_addr = first_page_va;
-
umem->page_chunk =
kcalloc(num_chunks, sizeof(struct siw_page_chunk), GFP_KERNEL);
if (!umem->page_chunk) {
rv = -ENOMEM;
- goto out_sem_up;
+ goto err_out;
}
- for (i = 0; num_pages; i++) {
+ base_mem = ib_umem_get(base_dev, start, len, rights);
+ if (IS_ERR(base_mem)) {
+ rv = PTR_ERR(base_mem);
+ siw_dbg(base_dev, "Cannot pin user memory: %d\n", rv);
+ goto err_out;
+ }
+ umem->fp_addr = first_page_va;
+ umem->base_mem = base_mem;
+
+ sgt = &base_mem->sgt_append.sgt;
+ __sg_page_iter_start(&sg_iter, sgt->sgl, sgt->orig_nents, 0);
+
+ if (!__sg_page_iter_next(&sg_iter)) {
+ rv = -EINVAL;
+ goto err_out;
+ }
+ for (i = 0; num_pages > 0; i++) {
int nents = min_t(int, num_pages, PAGES_PER_CHUNK);
struct page **plist =
kcalloc(nents, sizeof(struct page *), GFP_KERNEL);
if (!plist) {
rv = -ENOMEM;
- goto out_sem_up;
+ goto err_out;
}
umem->page_chunk[i].plist = plist;
- while (nents) {
- rv = pin_user_pages(first_page_va, nents, foll_flags,
- plist);
- if (rv < 0)
- goto out_sem_up;
-
- umem->num_pages += rv;
- first_page_va += rv * PAGE_SIZE;
- plist += rv;
- nents -= rv;
- num_pages -= rv;
+ while (nents--) {
+ *plist = sg_page_iter_page(&sg_iter);
+ umem->num_pages++;
+ num_pages--;
+ plist++;
+ if (!__sg_page_iter_next(&sg_iter))
+ break;
}
}
-out_sem_up:
- mmap_read_unlock(mm_s);
-
- if (rv > 0)
- return umem;
-
- /* Adjust accounting for pages not pinned */
- if (num_pages)
- atomic64_sub(num_pages, &mm_s->pinned_vm);
-
- siw_umem_release(umem, false);
+ return umem;
+err_out:
+ siw_umem_release(umem);
return ERR_PTR(rv);
}
@@ -6,8 +6,9 @@
#ifndef _SIW_MEM_H
#define _SIW_MEM_H
-struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable);
-void siw_umem_release(struct siw_umem *umem, bool dirty);
+struct siw_umem *siw_umem_get(struct ib_device *base_dave, u64 start,
+ u64 len, int rights);
+void siw_umem_release(struct siw_umem *umem);
struct siw_pbl *siw_pbl_alloc(u32 num_buf);
dma_addr_t siw_pbl_get_buffer(struct siw_pbl *pbl, u64 off, int *len, int *idx);
struct siw_mem *siw_mem_id2obj(struct siw_device *sdev, int stag_index);
@@ -1321,8 +1321,6 @@ struct ib_mr *siw_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
struct siw_umem *umem = NULL;
struct siw_ureq_reg_mr ureq;
struct siw_device *sdev = to_siw_dev(pd->device);
-
- unsigned long mem_limit = rlimit(RLIMIT_MEMLOCK);
int rv;
siw_dbg_pd(pd, "start: 0x%pK, va: 0x%pK, len: %llu\n",
@@ -1338,20 +1336,7 @@ struct ib_mr *siw_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
rv = -EINVAL;
goto err_out;
}
- if (mem_limit != RLIM_INFINITY) {
- unsigned long num_pages =
- (PAGE_ALIGN(len + (start & ~PAGE_MASK))) >> PAGE_SHIFT;
- mem_limit >>= PAGE_SHIFT;
-
- if (num_pages > mem_limit - current->mm->locked_vm) {
- siw_dbg_pd(pd, "pages req %lu, max %lu, lock %lu\n",
- num_pages, mem_limit,
- current->mm->locked_vm);
- rv = -ENOMEM;
- goto err_out;
- }
- }
- umem = siw_umem_get(start, len, ib_access_writable(rights));
+ umem = siw_umem_get(pd->device, start, len, rights);
if (IS_ERR(umem)) {
rv = PTR_ERR(umem);
siw_dbg_pd(pd, "getting user memory failed: %d\n", rv);
@@ -1404,7 +1389,7 @@ struct ib_mr *siw_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
kfree_rcu(mr, rcu);
} else {
if (umem)
- siw_umem_release(umem, false);
+ siw_umem_release(umem);
}
return ERR_PTR(rv);
}
Abandon siw private code to pin user pages during user memory registration, but use ib_umem_get() instead. This will help maintaining the driver in case of changes to the memory subsystem. Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com> --- v1 -> v2: remove RLIMIT memlock check logic, now done in ib_umem_get() --- drivers/infiniband/sw/siw/siw.h | 3 +- drivers/infiniband/sw/siw/siw_mem.c | 109 ++++++++++---------------- drivers/infiniband/sw/siw/siw_mem.h | 5 +- drivers/infiniband/sw/siw/siw_verbs.c | 19 +---- 4 files changed, 47 insertions(+), 89 deletions(-)