Message ID | 20180910005753.5860-3-jglisse@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Getting rid of GUP and use HMM for user ptr features. | expand |
Hi Jérôme, I love your patch! Yet something to improve: [auto build test ERROR on linus/master] [also build test ERROR on v4.19-rc3 next-20180910] [if your patch is applied to the wrong git tree, please drop us a note to help improve the system] url: https://github.com/0day-ci/linux/commits/jglisse-redhat-com/Getting-rid-of-GUP-and-use-HMM-for-user-ptr-features/20180911-020741 config: x86_64-randconfig-x017-201836 (attached as .config) compiler: gcc-7 (Debian 7.3.0-1) 7.3.0 reproduce: # save the attached .config to linux build tree make ARCH=x86_64 All errors (new ones prefixed by >>): drivers/gpu/drm/radeon/radeon_mn.c:43:20: error: field 'mirror' has incomplete type struct hmm_mirror mirror; ^~~~~~ drivers/gpu/drm/radeon/radeon_mn.c: In function 'radeon_mn_destroy': drivers/gpu/drm/radeon/radeon_mn.c:90:2: error: implicit declaration of function 'hmm_mirror_unregister'; did you mean 'drm_dp_aux_unregister'? [-Werror=implicit-function-declaration] hmm_mirror_unregister(&rmn->mirror); ^~~~~~~~~~~~~~~~~~~~~ drm_dp_aux_unregister In file included from include/linux/firmware.h:6:0, from drivers/gpu/drm/radeon/radeon_mn.c:31: drivers/gpu/drm/radeon/radeon_mn.c: In function 'radeon_mirror_release': include/linux/kernel.h:997:32: error: dereferencing pointer to incomplete type 'struct hmm_mirror' BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) && \ ^~~~~~ include/linux/compiler.h:335:18: note: in definition of macro '__compiletime_assert' int __cond = !(condition); \ ^~~~~~~~~ include/linux/compiler.h:358:2: note: in expansion of macro '_compiletime_assert' _compiletime_assert(condition, msg, __compiletime_assert_, __LINE__) ^~~~~~~~~~~~~~~~~~~ include/linux/build_bug.h:45:37: note: in expansion of macro 'compiletime_assert' #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg) ^~~~~~~~~~~~~~~~~~ include/linux/kernel.h:997:2: note: in expansion of macro 'BUILD_BUG_ON_MSG' BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) && \ ^~~~~~~~~~~~~~~~ include/linux/kernel.h:997:20: note: in expansion of macro '__same_type' BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) && \ ^~~~~~~~~~~ drivers/gpu/drm/radeon/radeon_mn.c:103:26: note: in expansion of macro 'container_of' struct radeon_mn *rmn = container_of(mirror, struct radeon_mn, mirror); ^~~~~~~~~~~~ drivers/gpu/drm/radeon/radeon_mn.c: At top level: drivers/gpu/drm/radeon/radeon_mn.c:119:24: warning: 'struct hmm_update' declared inside parameter list will not be visible outside of this definition or declaration const struct hmm_update *update) ^~~~~~~~~~ drivers/gpu/drm/radeon/radeon_mn.c: In function 'radeon_sync_cpu_device_pagetables': drivers/gpu/drm/radeon/radeon_mn.c:128:14: error: dereferencing pointer to incomplete type 'const struct hmm_update' end = update->end - 1; ^~ drivers/gpu/drm/radeon/radeon_mn.c: At top level: drivers/gpu/drm/radeon/radeon_mn.c:183:21: error: variable 'radeon_mirror_ops' has initializer but incomplete type static const struct hmm_mirror_ops radeon_mirror_ops = { ^~~~~~~~~~~~~~ drivers/gpu/drm/radeon/radeon_mn.c:184:3: error: 'const struct hmm_mirror_ops' has no member named 'sync_cpu_device_pagetables' .sync_cpu_device_pagetables = &radeon_sync_cpu_device_pagetables, ^~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/radeon/radeon_mn.c:184:32: warning: excess elements in struct initializer .sync_cpu_device_pagetables = &radeon_sync_cpu_device_pagetables, ^ drivers/gpu/drm/radeon/radeon_mn.c:184:32: note: (near initialization for 'radeon_mirror_ops') drivers/gpu/drm/radeon/radeon_mn.c:185:3: error: 'const struct hmm_mirror_ops' has no member named 'release' .release = &radeon_mirror_release, ^~~~~~~ drivers/gpu/drm/radeon/radeon_mn.c:185:13: warning: excess elements in struct initializer .release = &radeon_mirror_release, ^ drivers/gpu/drm/radeon/radeon_mn.c:185:13: note: (near initialization for 'radeon_mirror_ops') drivers/gpu/drm/radeon/radeon_mn.c: In function 'radeon_mn_get': drivers/gpu/drm/radeon/radeon_mn.c:224:6: error: implicit declaration of function 'hmm_mirror_register'; did you mean 'drm_dp_aux_register'? [-Werror=implicit-function-declaration] r = hmm_mirror_register(&new->mirror, mm); ^~~~~~~~~~~~~~~~~~~ drm_dp_aux_register drivers/gpu/drm/radeon/radeon_mn.c: In function 'radeon_mn_bo_map': >> drivers/gpu/drm/radeon/radeon_mn.c:373:43: error: 'HMM_PFN_FLAG_MAX' undeclared (first use in this function); did you mean 'TTM_PL_FLAG_VRAM'? static const uint64_t radeon_range_flags[HMM_PFN_FLAG_MAX] = { ^~~~~~~~~~~~~~~~ TTM_PL_FLAG_VRAM drivers/gpu/drm/radeon/radeon_mn.c:373:43: note: each undeclared identifier is reported only once for each function it appears in >> drivers/gpu/drm/radeon/radeon_mn.c:378:44: error: 'HMM_PFN_VALUE_MAX' undeclared (first use in this function); did you mean 'HMM_PFN_FLAG_MAX'? static const uint64_t radeon_range_values[HMM_PFN_VALUE_MAX] = { ^~~~~~~~~~~~~~~~~ HMM_PFN_FLAG_MAX >> drivers/gpu/drm/radeon/radeon_mn.c:389:19: error: storage size of 'range' isn't known struct hmm_range range; ^~~~~ >> drivers/gpu/drm/radeon/radeon_mn.c:421:31: error: 'HMM_PFN_VALID' undeclared (first use in this function); did you mean 'HMM_PFN_VALUE_MAX'? range.pfns[i] = range.flags[HMM_PFN_VALID]; ^~~~~~~~~~~~~ HMM_PFN_VALUE_MAX >> drivers/gpu/drm/radeon/radeon_mn.c:422:40: error: 'HMM_PFN_WRITE' undeclared (first use in this function); did you mean 'HMM_PFN_VALID'? range.pfns[i] |= write ? range.flags[HMM_PFN_WRITE] : 0; ^~~~~~~~~~~~~ HMM_PFN_VALID >> drivers/gpu/drm/radeon/radeon_mn.c:425:8: error: implicit declaration of function 'hmm_vma_fault'; did you mean 'filemap_fault'? [-Werror=implicit-function-declaration] ret = hmm_vma_fault(&range, true); ^~~~~~~~~~~~~ filemap_fault >> drivers/gpu/drm/radeon/radeon_mn.c:430:23: error: implicit declaration of function 'hmm_pfn_to_page'; did you mean '__pfn_to_page'? [-Werror=implicit-function-declaration] struct page *page = hmm_pfn_to_page(&range, range.pfns[i]); ^~~~~~~~~~~~~~~ __pfn_to_page >> drivers/gpu/drm/radeon/radeon_mn.c:446:4: error: implicit declaration of function 'hmm_vma_range_done'; did you mean 'drm_vma_node_size'? [-Werror=implicit-function-declaration] hmm_vma_range_done(&range); ^~~~~~~~~~~~~~~~~~ drm_vma_node_size drivers/gpu/drm/radeon/radeon_mn.c:389:19: warning: unused variable 'range' [-Wunused-variable] struct hmm_range range; ^~~~~ drivers/gpu/drm/radeon/radeon_mn.c:378:24: warning: unused variable 'radeon_range_values' [-Wunused-variable] static const uint64_t radeon_range_values[HMM_PFN_VALUE_MAX] = { ^~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/radeon/radeon_mn.c:373:24: warning: unused variable 'radeon_range_flags' [-Wunused-variable] static const uint64_t radeon_range_flags[HMM_PFN_FLAG_MAX] = { ^~~~~~~~~~~~~~~~~~ drivers/gpu/drm/radeon/radeon_mn.c: At top level: drivers/gpu/drm/radeon/radeon_mn.c:183:36: error: storage size of 'radeon_mirror_ops' isn't known static const struct hmm_mirror_ops radeon_mirror_ops = { ^~~~~~~~~~~~~~~~~ cc1: some warnings being treated as errors vim +373 drivers/gpu/drm/radeon/radeon_mn.c 182 183 static const struct hmm_mirror_ops radeon_mirror_ops = { 184 .sync_cpu_device_pagetables = &radeon_sync_cpu_device_pagetables, > 185 .release = &radeon_mirror_release, 186 }; 187 188 /** 189 * radeon_mn_get - create notifier context 190 * 191 * @rdev: radeon device pointer 192 * 193 * Creates a notifier context for current->mm. 194 */ 195 static struct radeon_mn *radeon_mn_get(struct radeon_device *rdev) 196 { 197 struct mm_struct *mm = current->mm; 198 struct radeon_mn *rmn, *new; 199 int r; 200 201 mutex_lock(&rdev->mn_lock); 202 hash_for_each_possible(rdev->mn_hash, rmn, node, (unsigned long)mm) { 203 if (rmn->mm == mm) { 204 mutex_unlock(&rdev->mn_lock); 205 return rmn; 206 } 207 } 208 mutex_unlock(&rdev->mn_lock); 209 210 new = kzalloc(sizeof(*rmn), GFP_KERNEL); 211 if (!new) { 212 return ERR_PTR(-ENOMEM); 213 } 214 new->mm = mm; 215 new->rdev = rdev; 216 mutex_init(&new->lock); 217 new->objects = RB_ROOT_CACHED; 218 new->mirror.ops = &radeon_mirror_ops; 219 220 if (down_write_killable(&mm->mmap_sem)) { 221 kfree(new); 222 return ERR_PTR(-EINTR); 223 } 224 r = hmm_mirror_register(&new->mirror, mm); 225 up_write(&mm->mmap_sem); 226 if (r) { 227 kfree(new); 228 return ERR_PTR(r); 229 } 230 231 mutex_lock(&rdev->mn_lock); 232 /* Check again in case some other thread raced with us ... */ 233 hash_for_each_possible(rdev->mn_hash, rmn, node, (unsigned long)mm) { 234 if (rmn->mm == mm) { 235 mutex_unlock(&rdev->mn_lock); 236 hmm_mirror_unregister(&new->mirror); 237 kfree(new); 238 return rmn; 239 } 240 } 241 hash_add(rdev->mn_hash, &new->node, (unsigned long)mm); 242 mutex_unlock(&rdev->mn_lock); 243 244 return new; 245 } 246 247 /** 248 * radeon_mn_register - register a BO for notifier updates 249 * 250 * @bo: radeon buffer object 251 * @addr: userptr addr we should monitor 252 * 253 * Registers an MMU notifier for the given BO at the specified address. 254 * Returns 0 on success, -ERRNO if anything goes wrong. 255 */ 256 int radeon_mn_register(struct radeon_bo *bo, unsigned long addr) 257 { 258 unsigned long end = addr + radeon_bo_size(bo) - 1; 259 struct radeon_device *rdev = bo->rdev; 260 struct radeon_mn *rmn; 261 struct radeon_mn_node *node = NULL; 262 struct list_head bos; 263 struct interval_tree_node *it; 264 265 bo->userptr = addr; 266 bo->pfns = kvmalloc_array(bo->tbo.num_pages, sizeof(uint64_t), 267 GFP_KERNEL | __GFP_ZERO); 268 if (bo->pfns == NULL) 269 return -ENOMEM; 270 271 rmn = radeon_mn_get(rdev); 272 if (IS_ERR(rmn)) { 273 kvfree(bo->pfns); 274 bo->pfns = NULL; 275 return PTR_ERR(rmn); 276 } 277 278 INIT_LIST_HEAD(&bos); 279 280 mutex_lock(&rmn->lock); 281 282 while ((it = interval_tree_iter_first(&rmn->objects, addr, end))) { 283 kfree(node); 284 node = container_of(it, struct radeon_mn_node, it); 285 interval_tree_remove(&node->it, &rmn->objects); 286 addr = min(it->start, addr); 287 end = max(it->last, end); 288 list_splice(&node->bos, &bos); 289 } 290 291 if (!node) { 292 node = kmalloc(sizeof(struct radeon_mn_node), GFP_KERNEL); 293 if (!node) { 294 mutex_unlock(&rmn->lock); 295 kvfree(bo->pfns); 296 bo->pfns = NULL; 297 return -ENOMEM; 298 } 299 } 300 301 bo->mn = rmn; 302 303 node->it.start = addr; 304 node->it.last = end; 305 INIT_LIST_HEAD(&node->bos); 306 list_splice(&bos, &node->bos); 307 list_add(&bo->mn_list, &node->bos); 308 309 interval_tree_insert(&node->it, &rmn->objects); 310 311 mutex_unlock(&rmn->lock); 312 313 return 0; 314 } 315 316 /** 317 * radeon_mn_unregister - unregister a BO for notifier updates 318 * 319 * @bo: radeon buffer object 320 * 321 * Remove any registration of MMU notifier updates from the buffer object. 322 */ 323 void radeon_mn_unregister(struct radeon_bo *bo) 324 { 325 struct radeon_device *rdev = bo->rdev; 326 struct radeon_mn *rmn; 327 struct list_head *head; 328 329 mutex_lock(&rdev->mn_lock); 330 rmn = bo->mn; 331 if (rmn == NULL) { 332 mutex_unlock(&rdev->mn_lock); 333 return; 334 } 335 336 mutex_lock(&rmn->lock); 337 /* save the next list entry for later */ 338 head = bo->mn_list.next; 339 340 bo->mn = NULL; 341 list_del(&bo->mn_list); 342 343 if (list_empty(head)) { 344 struct radeon_mn_node *node; 345 node = container_of(head, struct radeon_mn_node, bos); 346 interval_tree_remove(&node->it, &rmn->objects); 347 kfree(node); 348 } 349 350 mutex_unlock(&rmn->lock); 351 mutex_unlock(&rdev->mn_lock); 352 353 kvfree(bo->pfns); 354 bo->pfns = NULL; 355 } 356 357 /** 358 * radeon_mn_bo_map - map range of virtual address as buffer object 359 * 360 * @bo: radeon buffer object 361 * @ttm: ttm_tt object in which holds mirroring result 362 * @write: can GPU write to the range ? 363 * Returns: 0 on success, error code otherwise 364 * 365 * Use HMM to mirror a range of virtual address as a buffer object mapped into 366 * GPU address space (thus allowing transparent GPU access to this range). It 367 * does not pin pages for range but rely on HMM and underlying synchronizations 368 * to make sure that both CPU and GPU points to same physical memory for the 369 * range. 370 */ 371 int radeon_mn_bo_map(struct radeon_bo *bo, struct ttm_dma_tt *dma, bool write) 372 { > 373 static const uint64_t radeon_range_flags[HMM_PFN_FLAG_MAX] = { 374 (1 << 0), /* HMM_PFN_VALID */ 375 (1 << 1), /* HMM_PFN_WRITE */ 376 0 /* HMM_PFN_DEVICE_PRIVATE */ 377 }; > 378 static const uint64_t radeon_range_values[HMM_PFN_VALUE_MAX] = { 379 0xfffffffffffffffeUL, /* HMM_PFN_ERROR */ 380 0, /* HMM_PFN_NONE */ 381 0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */ 382 }; 383 384 unsigned long i, npages = bo->tbo.num_pages; 385 enum dma_data_direction direction = write ? 386 DMA_BIDIRECTIONAL : DMA_TO_DEVICE; 387 struct radeon_device *rdev = bo->rdev; 388 struct ttm_tt *ttm = &dma->ttm; > 389 struct hmm_range range; 390 struct radeon_mn *rmn; 391 int ret; 392 393 /* 394 * FIXME This whole protection shouldn't be needed as we should only 395 * reach that code with a valid reserved bo that can not under go a 396 * concurrent radeon_mn_unregister(). 397 */ 398 mutex_lock(&rdev->mn_lock); 399 if (bo->mn == NULL) { 400 mutex_unlock(&rdev->mn_lock); 401 return -EINVAL; 402 } 403 rmn = bo->mn; 404 mutex_unlock(&rdev->mn_lock); 405 406 range.pfn_shift = 12; 407 range.pfns = bo->pfns; 408 range.start = bo->userptr; 409 range.flags = radeon_range_flags; 410 range.values = radeon_range_values; 411 range.end = bo->userptr + radeon_bo_size(bo); 412 413 range.vma = find_vma(rmn->mm, bo->userptr); 414 if (!range.vma || range.vma->vm_file || range.vma->vm_end < range.end) 415 return -EPERM; 416 417 memset(ttm->pages, 0, sizeof(void*) * npages); 418 419 again: 420 for (i = 0; i < npages; ++i) { > 421 range.pfns[i] = range.flags[HMM_PFN_VALID]; > 422 range.pfns[i] |= write ? range.flags[HMM_PFN_WRITE] : 0; 423 } 424 > 425 ret = hmm_vma_fault(&range, true); 426 if (ret) 427 goto err_unmap; 428 429 for (i = 0; i < npages; ++i) { > 430 struct page *page = hmm_pfn_to_page(&range, range.pfns[i]); 431 432 if (page == NULL) 433 goto again; 434 435 if (ttm->pages[i] == page) 436 continue; 437 438 if (ttm->pages[i]) 439 dma_unmap_page(rdev->dev, dma->dma_address[i], 440 PAGE_SIZE, direction); 441 ttm->pages[i] = page; 442 443 dma->dma_address[i] = dma_map_page(rdev->dev, page, 0, 444 PAGE_SIZE, direction); 445 if (dma_mapping_error(rdev->dev, dma->dma_address[i])) { > 446 hmm_vma_range_done(&range); 447 ttm->pages[i] = NULL; 448 ret = -ENOMEM; 449 goto err_unmap; 450 } 451 } 452 453 /* 454 * Taking rmn->lock is not necessary here as we are protected from any 455 * concurrent invalidation through ttm object reservation. Involved 456 * functions: radeon_sync_cpu_device_pagetables() 457 * radeon_bo_list_validate() 458 * radeon_gem_userptr_ioctl() 459 */ 460 if (!hmm_vma_range_done(&range)) 461 goto again; 462 463 return 0; 464 465 err_unmap: 466 radeon_mn_bo_unmap(bo, dma, write); 467 return ret; 468 } 469 --- 0-DAY kernel test infrastructure Open Source Technology Center https://lists.01.org/pipermail/kbuild-all Intel Corporation
Hi Jérôme, I love your patch! Yet something to improve: [auto build test ERROR on linus/master] [also build test ERROR on v4.19-rc3 next-20180910] [if your patch is applied to the wrong git tree, please drop us a note to help improve the system] url: https://github.com/0day-ci/linux/commits/jglisse-redhat-com/Getting-rid-of-GUP-and-use-HMM-for-user-ptr-features/20180911-020741 config: x86_64-randconfig-x000-201836 (attached as .config) compiler: gcc-7 (Debian 7.3.0-1) 7.3.0 reproduce: # save the attached .config to linux build tree make ARCH=x86_64 All errors (new ones prefixed by >>): drivers/gpu/drm/radeon/radeon_mn.c:43:20: error: field 'mirror' has incomplete type struct hmm_mirror mirror; ^~~~~~ drivers/gpu/drm/radeon/radeon_mn.c: In function 'radeon_mn_destroy': drivers/gpu/drm/radeon/radeon_mn.c:90:2: error: implicit declaration of function 'hmm_mirror_unregister'; did you mean 'drm_dp_aux_unregister'? [-Werror=implicit-function-declaration] hmm_mirror_unregister(&rmn->mirror); ^~~~~~~~~~~~~~~~~~~~~ drm_dp_aux_unregister In file included from include/linux/firmware.h:6:0, from drivers/gpu/drm/radeon/radeon_mn.c:31: drivers/gpu/drm/radeon/radeon_mn.c: In function 'radeon_mirror_release': include/linux/kernel.h:997:32: error: dereferencing pointer to incomplete type 'struct hmm_mirror' BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) && \ ^~~~~~ include/linux/compiler.h:335:18: note: in definition of macro '__compiletime_assert' int __cond = !(condition); \ ^~~~~~~~~ include/linux/compiler.h:358:2: note: in expansion of macro '_compiletime_assert' _compiletime_assert(condition, msg, __compiletime_assert_, __LINE__) ^~~~~~~~~~~~~~~~~~~ include/linux/build_bug.h:45:37: note: in expansion of macro 'compiletime_assert' #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg) ^~~~~~~~~~~~~~~~~~ include/linux/kernel.h:997:2: note: in expansion of macro 'BUILD_BUG_ON_MSG' BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) && \ ^~~~~~~~~~~~~~~~ include/linux/kernel.h:997:20: note: in expansion of macro '__same_type' BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) && \ ^~~~~~~~~~~ drivers/gpu/drm/radeon/radeon_mn.c:103:26: note: in expansion of macro 'container_of' struct radeon_mn *rmn = container_of(mirror, struct radeon_mn, mirror); ^~~~~~~~~~~~ drivers/gpu/drm/radeon/radeon_mn.c: At top level: drivers/gpu/drm/radeon/radeon_mn.c:119:24: warning: 'struct hmm_update' declared inside parameter list will not be visible outside of this definition or declaration const struct hmm_update *update) ^~~~~~~~~~ drivers/gpu/drm/radeon/radeon_mn.c: In function 'radeon_sync_cpu_device_pagetables': drivers/gpu/drm/radeon/radeon_mn.c:128:14: error: dereferencing pointer to incomplete type 'const struct hmm_update' end = update->end - 1; ^~ drivers/gpu/drm/radeon/radeon_mn.c: At top level: drivers/gpu/drm/radeon/radeon_mn.c:183:21: error: variable 'radeon_mirror_ops' has initializer but incomplete type static const struct hmm_mirror_ops radeon_mirror_ops = { ^~~~~~~~~~~~~~ drivers/gpu/drm/radeon/radeon_mn.c:184:3: error: 'const struct hmm_mirror_ops' has no member named 'sync_cpu_device_pagetables' .sync_cpu_device_pagetables = &radeon_sync_cpu_device_pagetables, ^~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/radeon/radeon_mn.c:184:32: warning: excess elements in struct initializer .sync_cpu_device_pagetables = &radeon_sync_cpu_device_pagetables, ^ drivers/gpu/drm/radeon/radeon_mn.c:184:32: note: (near initialization for 'radeon_mirror_ops') drivers/gpu/drm/radeon/radeon_mn.c:185:3: error: 'const struct hmm_mirror_ops' has no member named 'release' .release = &radeon_mirror_release, ^~~~~~~ drivers/gpu/drm/radeon/radeon_mn.c:185:13: warning: excess elements in struct initializer .release = &radeon_mirror_release, ^ drivers/gpu/drm/radeon/radeon_mn.c:185:13: note: (near initialization for 'radeon_mirror_ops') drivers/gpu/drm/radeon/radeon_mn.c: In function 'radeon_mn_get': drivers/gpu/drm/radeon/radeon_mn.c:224:6: error: implicit declaration of function 'hmm_mirror_register'; did you mean 'drm_dp_aux_register'? [-Werror=implicit-function-declaration] r = hmm_mirror_register(&new->mirror, mm); ^~~~~~~~~~~~~~~~~~~ drm_dp_aux_register drivers/gpu/drm/radeon/radeon_mn.c: In function 'radeon_mn_bo_map': >> drivers/gpu/drm/radeon/radeon_mn.c:373:43: error: 'HMM_PFN_FLAG_MAX' undeclared (first use in this function); did you mean 'TTM_PL_FLAG_TT'? static const uint64_t radeon_range_flags[HMM_PFN_FLAG_MAX] = { ^~~~~~~~~~~~~~~~ TTM_PL_FLAG_TT drivers/gpu/drm/radeon/radeon_mn.c:373:43: note: each undeclared identifier is reported only once for each function it appears in drivers/gpu/drm/radeon/radeon_mn.c:378:44: error: 'HMM_PFN_VALUE_MAX' undeclared (first use in this function); did you mean 'HMM_PFN_FLAG_MAX'? static const uint64_t radeon_range_values[HMM_PFN_VALUE_MAX] = { ^~~~~~~~~~~~~~~~~ HMM_PFN_FLAG_MAX drivers/gpu/drm/radeon/radeon_mn.c:389:19: error: storage size of 'range' isn't known struct hmm_range range; ^~~~~ drivers/gpu/drm/radeon/radeon_mn.c:421:31: error: 'HMM_PFN_VALID' undeclared (first use in this function); did you mean 'HMM_PFN_VALUE_MAX'? range.pfns[i] = range.flags[HMM_PFN_VALID]; ^~~~~~~~~~~~~ HMM_PFN_VALUE_MAX drivers/gpu/drm/radeon/radeon_mn.c:422:40: error: 'HMM_PFN_WRITE' undeclared (first use in this function); did you mean 'HMM_PFN_VALID'? range.pfns[i] |= write ? range.flags[HMM_PFN_WRITE] : 0; ^~~~~~~~~~~~~ HMM_PFN_VALID drivers/gpu/drm/radeon/radeon_mn.c:425:8: error: implicit declaration of function 'hmm_vma_fault'; did you mean 'filemap_fault'? [-Werror=implicit-function-declaration] ret = hmm_vma_fault(&range, true); ^~~~~~~~~~~~~ filemap_fault drivers/gpu/drm/radeon/radeon_mn.c:430:23: error: implicit declaration of function 'hmm_pfn_to_page'; did you mean '__pfn_to_page'? [-Werror=implicit-function-declaration] struct page *page = hmm_pfn_to_page(&range, range.pfns[i]); ^~~~~~~~~~~~~~~ __pfn_to_page drivers/gpu/drm/radeon/radeon_mn.c:446:4: error: implicit declaration of function 'hmm_vma_range_done'; did you mean 'drm_vma_node_size'? [-Werror=implicit-function-declaration] hmm_vma_range_done(&range); ^~~~~~~~~~~~~~~~~~ drm_vma_node_size drivers/gpu/drm/radeon/radeon_mn.c:389:19: warning: unused variable 'range' [-Wunused-variable] struct hmm_range range; ^~~~~ drivers/gpu/drm/radeon/radeon_mn.c:378:24: warning: unused variable 'radeon_range_values' [-Wunused-variable] static const uint64_t radeon_range_values[HMM_PFN_VALUE_MAX] = { ^~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/radeon/radeon_mn.c:373:24: warning: unused variable 'radeon_range_flags' [-Wunused-variable] static const uint64_t radeon_range_flags[HMM_PFN_FLAG_MAX] = { ^~~~~~~~~~~~~~~~~~ drivers/gpu/drm/radeon/radeon_mn.c: At top level: drivers/gpu/drm/radeon/radeon_mn.c:183:36: error: storage size of 'radeon_mirror_ops' isn't known static const struct hmm_mirror_ops radeon_mirror_ops = { ^~~~~~~~~~~~~~~~~ cc1: some warnings being treated as errors vim +373 drivers/gpu/drm/radeon/radeon_mn.c 182 183 static const struct hmm_mirror_ops radeon_mirror_ops = { 184 .sync_cpu_device_pagetables = &radeon_sync_cpu_device_pagetables, > 185 .release = &radeon_mirror_release, 186 }; 187 188 /** 189 * radeon_mn_get - create notifier context 190 * 191 * @rdev: radeon device pointer 192 * 193 * Creates a notifier context for current->mm. 194 */ 195 static struct radeon_mn *radeon_mn_get(struct radeon_device *rdev) 196 { 197 struct mm_struct *mm = current->mm; 198 struct radeon_mn *rmn, *new; 199 int r; 200 201 mutex_lock(&rdev->mn_lock); 202 hash_for_each_possible(rdev->mn_hash, rmn, node, (unsigned long)mm) { 203 if (rmn->mm == mm) { 204 mutex_unlock(&rdev->mn_lock); 205 return rmn; 206 } 207 } 208 mutex_unlock(&rdev->mn_lock); 209 210 new = kzalloc(sizeof(*rmn), GFP_KERNEL); 211 if (!new) { 212 return ERR_PTR(-ENOMEM); 213 } 214 new->mm = mm; 215 new->rdev = rdev; 216 mutex_init(&new->lock); 217 new->objects = RB_ROOT_CACHED; 218 new->mirror.ops = &radeon_mirror_ops; 219 220 if (down_write_killable(&mm->mmap_sem)) { 221 kfree(new); 222 return ERR_PTR(-EINTR); 223 } 224 r = hmm_mirror_register(&new->mirror, mm); 225 up_write(&mm->mmap_sem); 226 if (r) { 227 kfree(new); 228 return ERR_PTR(r); 229 } 230 231 mutex_lock(&rdev->mn_lock); 232 /* Check again in case some other thread raced with us ... */ 233 hash_for_each_possible(rdev->mn_hash, rmn, node, (unsigned long)mm) { 234 if (rmn->mm == mm) { 235 mutex_unlock(&rdev->mn_lock); 236 hmm_mirror_unregister(&new->mirror); 237 kfree(new); 238 return rmn; 239 } 240 } 241 hash_add(rdev->mn_hash, &new->node, (unsigned long)mm); 242 mutex_unlock(&rdev->mn_lock); 243 244 return new; 245 } 246 247 /** 248 * radeon_mn_register - register a BO for notifier updates 249 * 250 * @bo: radeon buffer object 251 * @addr: userptr addr we should monitor 252 * 253 * Registers an MMU notifier for the given BO at the specified address. 254 * Returns 0 on success, -ERRNO if anything goes wrong. 255 */ 256 int radeon_mn_register(struct radeon_bo *bo, unsigned long addr) 257 { 258 unsigned long end = addr + radeon_bo_size(bo) - 1; 259 struct radeon_device *rdev = bo->rdev; 260 struct radeon_mn *rmn; 261 struct radeon_mn_node *node = NULL; 262 struct list_head bos; 263 struct interval_tree_node *it; 264 265 bo->userptr = addr; 266 bo->pfns = kvmalloc_array(bo->tbo.num_pages, sizeof(uint64_t), 267 GFP_KERNEL | __GFP_ZERO); 268 if (bo->pfns == NULL) 269 return -ENOMEM; 270 271 rmn = radeon_mn_get(rdev); 272 if (IS_ERR(rmn)) { 273 kvfree(bo->pfns); 274 bo->pfns = NULL; 275 return PTR_ERR(rmn); 276 } 277 278 INIT_LIST_HEAD(&bos); 279 280 mutex_lock(&rmn->lock); 281 282 while ((it = interval_tree_iter_first(&rmn->objects, addr, end))) { 283 kfree(node); 284 node = container_of(it, struct radeon_mn_node, it); 285 interval_tree_remove(&node->it, &rmn->objects); 286 addr = min(it->start, addr); 287 end = max(it->last, end); 288 list_splice(&node->bos, &bos); 289 } 290 291 if (!node) { 292 node = kmalloc(sizeof(struct radeon_mn_node), GFP_KERNEL); 293 if (!node) { 294 mutex_unlock(&rmn->lock); 295 kvfree(bo->pfns); 296 bo->pfns = NULL; 297 return -ENOMEM; 298 } 299 } 300 301 bo->mn = rmn; 302 303 node->it.start = addr; 304 node->it.last = end; 305 INIT_LIST_HEAD(&node->bos); 306 list_splice(&bos, &node->bos); 307 list_add(&bo->mn_list, &node->bos); 308 309 interval_tree_insert(&node->it, &rmn->objects); 310 311 mutex_unlock(&rmn->lock); 312 313 return 0; 314 } 315 316 /** 317 * radeon_mn_unregister - unregister a BO for notifier updates 318 * 319 * @bo: radeon buffer object 320 * 321 * Remove any registration of MMU notifier updates from the buffer object. 322 */ 323 void radeon_mn_unregister(struct radeon_bo *bo) 324 { 325 struct radeon_device *rdev = bo->rdev; 326 struct radeon_mn *rmn; 327 struct list_head *head; 328 329 mutex_lock(&rdev->mn_lock); 330 rmn = bo->mn; 331 if (rmn == NULL) { 332 mutex_unlock(&rdev->mn_lock); 333 return; 334 } 335 336 mutex_lock(&rmn->lock); 337 /* save the next list entry for later */ 338 head = bo->mn_list.next; 339 340 bo->mn = NULL; 341 list_del(&bo->mn_list); 342 343 if (list_empty(head)) { 344 struct radeon_mn_node *node; 345 node = container_of(head, struct radeon_mn_node, bos); 346 interval_tree_remove(&node->it, &rmn->objects); 347 kfree(node); 348 } 349 350 mutex_unlock(&rmn->lock); 351 mutex_unlock(&rdev->mn_lock); 352 353 kvfree(bo->pfns); 354 bo->pfns = NULL; 355 } 356 357 /** 358 * radeon_mn_bo_map - map range of virtual address as buffer object 359 * 360 * @bo: radeon buffer object 361 * @ttm: ttm_tt object in which holds mirroring result 362 * @write: can GPU write to the range ? 363 * Returns: 0 on success, error code otherwise 364 * 365 * Use HMM to mirror a range of virtual address as a buffer object mapped into 366 * GPU address space (thus allowing transparent GPU access to this range). It 367 * does not pin pages for range but rely on HMM and underlying synchronizations 368 * to make sure that both CPU and GPU points to same physical memory for the 369 * range. 370 */ 371 int radeon_mn_bo_map(struct radeon_bo *bo, struct ttm_dma_tt *dma, bool write) 372 { > 373 static const uint64_t radeon_range_flags[HMM_PFN_FLAG_MAX] = { 374 (1 << 0), /* HMM_PFN_VALID */ 375 (1 << 1), /* HMM_PFN_WRITE */ 376 0 /* HMM_PFN_DEVICE_PRIVATE */ 377 }; 378 static const uint64_t radeon_range_values[HMM_PFN_VALUE_MAX] = { 379 0xfffffffffffffffeUL, /* HMM_PFN_ERROR */ 380 0, /* HMM_PFN_NONE */ 381 0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */ 382 }; 383 384 unsigned long i, npages = bo->tbo.num_pages; 385 enum dma_data_direction direction = write ? 386 DMA_BIDIRECTIONAL : DMA_TO_DEVICE; 387 struct radeon_device *rdev = bo->rdev; 388 struct ttm_tt *ttm = &dma->ttm; 389 struct hmm_range range; 390 struct radeon_mn *rmn; 391 int ret; 392 393 /* 394 * FIXME This whole protection shouldn't be needed as we should only 395 * reach that code with a valid reserved bo that can not under go a 396 * concurrent radeon_mn_unregister(). 397 */ 398 mutex_lock(&rdev->mn_lock); 399 if (bo->mn == NULL) { 400 mutex_unlock(&rdev->mn_lock); 401 return -EINVAL; 402 } 403 rmn = bo->mn; 404 mutex_unlock(&rdev->mn_lock); 405 406 range.pfn_shift = 12; 407 range.pfns = bo->pfns; 408 range.start = bo->userptr; 409 range.flags = radeon_range_flags; 410 range.values = radeon_range_values; 411 range.end = bo->userptr + radeon_bo_size(bo); 412 413 range.vma = find_vma(rmn->mm, bo->userptr); 414 if (!range.vma || range.vma->vm_file || range.vma->vm_end < range.end) 415 return -EPERM; 416 417 memset(ttm->pages, 0, sizeof(void*) * npages); 418 419 again: 420 for (i = 0; i < npages; ++i) { 421 range.pfns[i] = range.flags[HMM_PFN_VALID]; 422 range.pfns[i] |= write ? range.flags[HMM_PFN_WRITE] : 0; 423 } 424 425 ret = hmm_vma_fault(&range, true); 426 if (ret) 427 goto err_unmap; 428 429 for (i = 0; i < npages; ++i) { 430 struct page *page = hmm_pfn_to_page(&range, range.pfns[i]); 431 432 if (page == NULL) 433 goto again; 434 435 if (ttm->pages[i] == page) 436 continue; 437 438 if (ttm->pages[i]) 439 dma_unmap_page(rdev->dev, dma->dma_address[i], 440 PAGE_SIZE, direction); 441 ttm->pages[i] = page; 442 443 dma->dma_address[i] = dma_map_page(rdev->dev, page, 0, 444 PAGE_SIZE, direction); 445 if (dma_mapping_error(rdev->dev, dma->dma_address[i])) { 446 hmm_vma_range_done(&range); 447 ttm->pages[i] = NULL; 448 ret = -ENOMEM; 449 goto err_unmap; 450 } 451 } 452 453 /* 454 * Taking rmn->lock is not necessary here as we are protected from any 455 * concurrent invalidation through ttm object reservation. Involved 456 * functions: radeon_sync_cpu_device_pagetables() 457 * radeon_bo_list_validate() 458 * radeon_gem_userptr_ioctl() 459 */ 460 if (!hmm_vma_range_done(&range)) 461 goto again; 462 463 return 0; 464 465 err_unmap: 466 radeon_mn_bo_unmap(bo, dma, write); 467 return ret; 468 } 469 --- 0-DAY kernel test infrastructure Open Source Technology Center https://lists.01.org/pipermail/kbuild-all Intel Corporation
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 1a6f6edb3515..6c83bf911e9c 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -514,6 +514,8 @@ struct radeon_bo { pid_t pid; struct radeon_mn *mn; + uint64_t *pfns; + unsigned long userptr; struct list_head mn_list; }; #define gem_to_radeon_bo(gobj) container_of((gobj), struct radeon_bo, gem_base) @@ -1787,12 +1789,22 @@ void radeon_test_syncing(struct radeon_device *rdev); #if defined(CONFIG_MMU_NOTIFIER) int radeon_mn_register(struct radeon_bo *bo, unsigned long addr); void radeon_mn_unregister(struct radeon_bo *bo); +int radeon_mn_bo_map(struct radeon_bo *bo, struct ttm_dma_tt *dma, bool write); +void radeon_mn_bo_unmap(struct radeon_bo *bo, struct ttm_dma_tt *dma, + bool write); #else static inline int radeon_mn_register(struct radeon_bo *bo, unsigned long addr) { return -ENODEV; } static inline void radeon_mn_unregister(struct radeon_bo *bo) {} +static int radeon_mn_bo_map(struct radeon_bo *bo, struct ttm_dma_tt *dma, + bool write) +{ + return -ENODEV; +} +static void radeon_mn_bo_unmap(struct radeon_bo *bo, struct ttm_dma_tt *dma, + bool write) {} #endif /* @@ -2818,7 +2830,7 @@ extern void radeon_legacy_set_clock_gating(struct radeon_device *rdev, int enabl extern void radeon_atom_set_clock_gating(struct radeon_device *rdev, int enable); extern void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain); extern bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo); -extern int radeon_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, +extern int radeon_ttm_tt_set_userptr(struct ttm_tt *ttm, struct radeon_bo *bo, uint32_t flags); extern bool radeon_ttm_tt_has_userptr(struct ttm_tt *ttm); extern bool radeon_ttm_tt_is_readonly(struct ttm_tt *ttm); diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index 27d8e7dd2d06..b489025086c4 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -323,15 +323,19 @@ int radeon_gem_userptr_ioctl(struct drm_device *dev, void *data, goto handle_lockup; bo = gem_to_radeon_bo(gobj); - r = radeon_ttm_tt_set_userptr(bo->tbo.ttm, args->addr, args->flags); + + /* + * Always register an HMM mirror (if one is not already registered). + * This means ignoring RADEON_GEM_USERPTR_REGISTER flag but that flag + * is already made mandatory by flags sanity check above. + */ + r = radeon_mn_register(bo, args->addr); if (r) goto release_object; - if (args->flags & RADEON_GEM_USERPTR_REGISTER) { - r = radeon_mn_register(bo, args->addr); - if (r) - goto release_object; - } + r = radeon_ttm_tt_set_userptr(bo->tbo.ttm, bo, args->flags); + if (r) + goto release_object; if (args->flags & RADEON_GEM_USERPTR_VALIDATE) { down_read(¤t->mm->mmap_sem); diff --git a/drivers/gpu/drm/radeon/radeon_mn.c b/drivers/gpu/drm/radeon/radeon_mn.c index a3bf74c1a3fc..ff53ffa5deef 100644 --- a/drivers/gpu/drm/radeon/radeon_mn.c +++ b/drivers/gpu/drm/radeon/radeon_mn.c @@ -262,9 +262,18 @@ int radeon_mn_register(struct radeon_bo *bo, unsigned long addr) struct list_head bos; struct interval_tree_node *it; + bo->userptr = addr; + bo->pfns = kvmalloc_array(bo->tbo.num_pages, sizeof(uint64_t), + GFP_KERNEL | __GFP_ZERO); + if (bo->pfns == NULL) + return -ENOMEM; + rmn = radeon_mn_get(rdev); - if (IS_ERR(rmn)) + if (IS_ERR(rmn)) { + kvfree(bo->pfns); + bo->pfns = NULL; return PTR_ERR(rmn); + } INIT_LIST_HEAD(&bos); @@ -283,6 +292,8 @@ int radeon_mn_register(struct radeon_bo *bo, unsigned long addr) node = kmalloc(sizeof(struct radeon_mn_node), GFP_KERNEL); if (!node) { mutex_unlock(&rmn->lock); + kvfree(bo->pfns); + bo->pfns = NULL; return -ENOMEM; } } @@ -338,4 +349,148 @@ void radeon_mn_unregister(struct radeon_bo *bo) mutex_unlock(&rmn->lock); mutex_unlock(&rdev->mn_lock); + + kvfree(bo->pfns); + bo->pfns = NULL; +} + +/** + * radeon_mn_bo_map - map range of virtual address as buffer object + * + * @bo: radeon buffer object + * @ttm: ttm_tt object in which holds mirroring result + * @write: can GPU write to the range ? + * Returns: 0 on success, error code otherwise + * + * Use HMM to mirror a range of virtual address as a buffer object mapped into + * GPU address space (thus allowing transparent GPU access to this range). It + * does not pin pages for range but rely on HMM and underlying synchronizations + * to make sure that both CPU and GPU points to same physical memory for the + * range. + */ +int radeon_mn_bo_map(struct radeon_bo *bo, struct ttm_dma_tt *dma, bool write) +{ + static const uint64_t radeon_range_flags[HMM_PFN_FLAG_MAX] = { + (1 << 0), /* HMM_PFN_VALID */ + (1 << 1), /* HMM_PFN_WRITE */ + 0 /* HMM_PFN_DEVICE_PRIVATE */ + }; + static const uint64_t radeon_range_values[HMM_PFN_VALUE_MAX] = { + 0xfffffffffffffffeUL, /* HMM_PFN_ERROR */ + 0, /* HMM_PFN_NONE */ + 0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */ + }; + + unsigned long i, npages = bo->tbo.num_pages; + enum dma_data_direction direction = write ? + DMA_BIDIRECTIONAL : DMA_TO_DEVICE; + struct radeon_device *rdev = bo->rdev; + struct ttm_tt *ttm = &dma->ttm; + struct hmm_range range; + struct radeon_mn *rmn; + int ret; + + /* + * FIXME This whole protection shouldn't be needed as we should only + * reach that code with a valid reserved bo that can not under go a + * concurrent radeon_mn_unregister(). + */ + mutex_lock(&rdev->mn_lock); + if (bo->mn == NULL) { + mutex_unlock(&rdev->mn_lock); + return -EINVAL; + } + rmn = bo->mn; + mutex_unlock(&rdev->mn_lock); + + range.pfn_shift = 12; + range.pfns = bo->pfns; + range.start = bo->userptr; + range.flags = radeon_range_flags; + range.values = radeon_range_values; + range.end = bo->userptr + radeon_bo_size(bo); + + range.vma = find_vma(rmn->mm, bo->userptr); + if (!range.vma || range.vma->vm_file || range.vma->vm_end < range.end) + return -EPERM; + + memset(ttm->pages, 0, sizeof(void*) * npages); + +again: + for (i = 0; i < npages; ++i) { + range.pfns[i] = range.flags[HMM_PFN_VALID]; + range.pfns[i] |= write ? range.flags[HMM_PFN_WRITE] : 0; + } + + ret = hmm_vma_fault(&range, true); + if (ret) + goto err_unmap; + + for (i = 0; i < npages; ++i) { + struct page *page = hmm_pfn_to_page(&range, range.pfns[i]); + + if (page == NULL) + goto again; + + if (ttm->pages[i] == page) + continue; + + if (ttm->pages[i]) + dma_unmap_page(rdev->dev, dma->dma_address[i], + PAGE_SIZE, direction); + ttm->pages[i] = page; + + dma->dma_address[i] = dma_map_page(rdev->dev, page, 0, + PAGE_SIZE, direction); + if (dma_mapping_error(rdev->dev, dma->dma_address[i])) { + hmm_vma_range_done(&range); + ttm->pages[i] = NULL; + ret = -ENOMEM; + goto err_unmap; + } + } + + /* + * Taking rmn->lock is not necessary here as we are protected from any + * concurrent invalidation through ttm object reservation. Involved + * functions: radeon_sync_cpu_device_pagetables() + * radeon_bo_list_validate() + * radeon_gem_userptr_ioctl() + */ + if (!hmm_vma_range_done(&range)) + goto again; + + return 0; + +err_unmap: + radeon_mn_bo_unmap(bo, dma, write); + return ret; +} + +/** + * radeon_mn_bo_unmap - unmap range of virtual address as buffer object + * + * @bo: radeon buffer object + * @ttm: ttm_tt object in which holds mirroring result + * @write: can GPU write to the range ? + * Returns: 0 on success, error code otherwise + */ +void radeon_mn_bo_unmap(struct radeon_bo *bo, struct ttm_dma_tt *dma, + bool write) +{ + unsigned long i, npages = bo->tbo.num_pages; + enum dma_data_direction direction = write ? + DMA_BIDIRECTIONAL : DMA_TO_DEVICE; + struct radeon_device *rdev = bo->rdev; + struct ttm_tt *ttm = &dma->ttm; + + for (i = 0; i < npages; ++i) { + /* No need to go beyond first NULL page */ + if (ttm->pages[i] == NULL) + break; + + dma_unmap_page(rdev->dev, dma->dma_address[i], + PAGE_SIZE, direction); + ttm->pages[i] = NULL; + } } diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index cbb67e9ffb3a..9c33e6c429b2 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -533,103 +533,10 @@ struct radeon_ttm_tt { struct radeon_device *rdev; u64 offset; - uint64_t userptr; - struct mm_struct *usermm; + struct radeon_bo *bo; uint32_t userflags; }; -/* prepare the sg table with the user pages */ -static int radeon_ttm_tt_pin_userptr(struct ttm_tt *ttm) -{ - struct radeon_device *rdev = radeon_get_rdev(ttm->bdev); - struct radeon_ttm_tt *gtt = (void *)ttm; - unsigned pinned = 0, nents; - int r; - - int write = !(gtt->userflags & RADEON_GEM_USERPTR_READONLY); - enum dma_data_direction direction = write ? - DMA_BIDIRECTIONAL : DMA_TO_DEVICE; - - if (current->mm != gtt->usermm) - return -EPERM; - - if (gtt->userflags & RADEON_GEM_USERPTR_ANONONLY) { - /* check that we only pin down anonymous memory - to prevent problems with writeback */ - unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE; - struct vm_area_struct *vma; - vma = find_vma(gtt->usermm, gtt->userptr); - if (!vma || vma->vm_file || vma->vm_end < end) - return -EPERM; - } - - do { - unsigned num_pages = ttm->num_pages - pinned; - uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE; - struct page **pages = ttm->pages + pinned; - - r = get_user_pages(userptr, num_pages, write ? FOLL_WRITE : 0, - pages, NULL); - if (r < 0) - goto release_pages; - - pinned += r; - - } while (pinned < ttm->num_pages); - - r = sg_alloc_table_from_pages(ttm->sg, ttm->pages, ttm->num_pages, 0, - ttm->num_pages << PAGE_SHIFT, - GFP_KERNEL); - if (r) - goto release_sg; - - r = -ENOMEM; - nents = dma_map_sg(rdev->dev, ttm->sg->sgl, ttm->sg->nents, direction); - if (nents != ttm->sg->nents) - goto release_sg; - - drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages, - gtt->ttm.dma_address, ttm->num_pages); - - return 0; - -release_sg: - kfree(ttm->sg); - -release_pages: - release_pages(ttm->pages, pinned); - return r; -} - -static void radeon_ttm_tt_unpin_userptr(struct ttm_tt *ttm) -{ - struct radeon_device *rdev = radeon_get_rdev(ttm->bdev); - struct radeon_ttm_tt *gtt = (void *)ttm; - struct sg_page_iter sg_iter; - - int write = !(gtt->userflags & RADEON_GEM_USERPTR_READONLY); - enum dma_data_direction direction = write ? - DMA_BIDIRECTIONAL : DMA_TO_DEVICE; - - /* double check that we don't free the table twice */ - if (!ttm->sg->sgl) - return; - - /* free the sg table and pages again */ - dma_unmap_sg(rdev->dev, ttm->sg->sgl, ttm->sg->nents, direction); - - for_each_sg_page(ttm->sg->sgl, &sg_iter, ttm->sg->nents, 0) { - struct page *page = sg_page_iter_page(&sg_iter); - if (!(gtt->userflags & RADEON_GEM_USERPTR_READONLY)) - set_page_dirty(page); - - mark_page_accessed(page); - put_page(page); - } - - sg_free_table(ttm->sg); -} - static int radeon_ttm_backend_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem) { @@ -638,8 +545,12 @@ static int radeon_ttm_backend_bind(struct ttm_tt *ttm, RADEON_GART_PAGE_WRITE; int r; - if (gtt->userptr) { - radeon_ttm_tt_pin_userptr(ttm); + if (gtt->bo) { + bool write = !(gtt->userflags & RADEON_GEM_USERPTR_READONLY); + + r = radeon_mn_bo_map(gtt->bo, >t->ttm, write); + if (r) + return r; flags &= ~RADEON_GART_PAGE_WRITE; } @@ -666,8 +577,11 @@ static int radeon_ttm_backend_unbind(struct ttm_tt *ttm) radeon_gart_unbind(gtt->rdev, gtt->offset, ttm->num_pages); - if (gtt->userptr) - radeon_ttm_tt_unpin_userptr(ttm); + if (gtt->bo) { + bool write = !(gtt->userflags & RADEON_GEM_USERPTR_READONLY); + + radeon_mn_bo_unmap(gtt->bo, >t->ttm, write); + } return 0; } @@ -727,12 +641,7 @@ static int radeon_ttm_tt_populate(struct ttm_tt *ttm, struct radeon_device *rdev; bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG); - if (gtt && gtt->userptr) { - ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL); - if (!ttm->sg) - return -ENOMEM; - - ttm->page_flags |= TTM_PAGE_FLAG_SG; + if (gtt && gtt->bo) { ttm->state = tt_unbound; return 0; } @@ -766,11 +675,8 @@ static void radeon_ttm_tt_unpopulate(struct ttm_tt *ttm) struct radeon_ttm_tt *gtt = radeon_ttm_tt_to_gtt(ttm); bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG); - if (gtt && gtt->userptr) { - kfree(ttm->sg); - ttm->page_flags &= ~TTM_PAGE_FLAG_SG; + if (gtt && gtt->bo) return; - } if (slave) return; @@ -793,7 +699,7 @@ static void radeon_ttm_tt_unpopulate(struct ttm_tt *ttm) ttm_unmap_and_unpopulate_pages(rdev->dev, >t->ttm); } -int radeon_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, +int radeon_ttm_tt_set_userptr(struct ttm_tt *ttm, struct radeon_bo *bo, uint32_t flags) { struct radeon_ttm_tt *gtt = radeon_ttm_tt_to_gtt(ttm); @@ -801,8 +707,7 @@ int radeon_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, if (gtt == NULL) return -EINVAL; - gtt->userptr = addr; - gtt->usermm = current->mm; + gtt->bo = bo; gtt->userflags = flags; return 0; } @@ -814,7 +719,7 @@ bool radeon_ttm_tt_has_userptr(struct ttm_tt *ttm) if (gtt == NULL) return false; - return !!gtt->userptr; + return !!gtt->bo; } bool radeon_ttm_tt_is_readonly(struct ttm_tt *ttm)