Message ID | 52541350.5060807@canonical.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Tue, Oct 08, 2013 at 04:14:40PM +0200, Maarten Lankhorst wrote: > Allocate and copy all kernel memory before doing reservations. This prevents a locking > inversion between mmap_sem and reservation_class, and allows us to drop the trylocking > in ttm_bo_vm_fault without upsetting lockdep. > > Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com> I would say NAK. Current code only allocate temporary page in AGP case. So AGP case is userspace -> temp page -> cs checker -> radeon ib. Non AGP is directly memcpy to radeon IB. Your patch allocate memory memcpy userspace to it and it will then be memcpy to IB. Which means you introduce an extra memcpy in the process not something we want. Cheers, Jerome > --- > diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c > index 01a3ec8..efa9bca 100644 > --- a/drivers/gpu/drm/radeon/r600_cs.c > +++ b/drivers/gpu/drm/radeon/r600_cs.c > @@ -2391,18 +2391,13 @@ int r600_cs_legacy(struct drm_device *dev, void *data, struct drm_file *filp, > ib_chunk = &parser.chunks[parser.chunk_ib_idx]; > parser.ib.length_dw = ib_chunk->length_dw; > *l = parser.ib.length_dw; > + memcpy(ib, ib_chunk->kdata, ib_chunk->length_dw * 4); > r = r600_cs_parse(&parser); > if (r) { > DRM_ERROR("Invalid command stream !\n"); > r600_cs_parser_fini(&parser, r); > return r; > } > - r = radeon_cs_finish_pages(&parser); > - if (r) { > - DRM_ERROR("Invalid command stream !\n"); > - r600_cs_parser_fini(&parser, r); > - return r; > - } > r600_cs_parser_fini(&parser, r); > return r; > } > diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h > index e067024..c52bb5e 100644 > --- a/drivers/gpu/drm/radeon/radeon.h > +++ b/drivers/gpu/drm/radeon/radeon.h > @@ -983,12 +983,7 @@ struct radeon_cs_reloc { > struct radeon_cs_chunk { > uint32_t chunk_id; > uint32_t length_dw; > - int kpage_idx[2]; > - uint32_t *kpage[2]; > uint32_t *kdata; > - void __user *user_ptr; > - int last_copied_page; > - int last_page_index; > }; > > struct radeon_cs_parser { > @@ -1027,8 +1022,13 @@ struct radeon_cs_parser { > struct radeon_sa_bo *cpu_sema; > }; > > -extern int radeon_cs_finish_pages(struct radeon_cs_parser *p); > -extern u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx); > +static inline u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx) > +{ > + struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx]; > + > + return ibc->kdata[idx]; > +} > + > > struct radeon_cs_packet { > unsigned idx; > diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c > index 7d7322e..98d8898 100644 > --- a/drivers/gpu/drm/radeon/radeon_cs.c > +++ b/drivers/gpu/drm/radeon/radeon_cs.c > @@ -217,9 +217,7 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) > return -EFAULT; > } > p->chunks[i].length_dw = user_chunk.length_dw; > - p->chunks[i].kdata = NULL; > p->chunks[i].chunk_id = user_chunk.chunk_id; > - p->chunks[i].user_ptr = (void __user *)(unsigned long)user_chunk.chunk_data; > if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_RELOCS) { > p->chunk_relocs_idx = i; > } > @@ -242,25 +240,22 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) > return -EINVAL; > } > > - cdata = (uint32_t *)(unsigned long)user_chunk.chunk_data; > - if ((p->chunks[i].chunk_id == RADEON_CHUNK_ID_RELOCS) || > - (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS)) { > - size = p->chunks[i].length_dw * sizeof(uint32_t); > - p->chunks[i].kdata = kmalloc(size, GFP_KERNEL); > - if (p->chunks[i].kdata == NULL) { > - return -ENOMEM; > - } > - if (DRM_COPY_FROM_USER(p->chunks[i].kdata, > - p->chunks[i].user_ptr, size)) { > - return -EFAULT; > - } > - if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS) { > - p->cs_flags = p->chunks[i].kdata[0]; > - if (p->chunks[i].length_dw > 1) > - ring = p->chunks[i].kdata[1]; > - if (p->chunks[i].length_dw > 2) > - priority = (s32)p->chunks[i].kdata[2]; > - } > + size = p->chunks[i].length_dw; > + p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t)); > + size *= sizeof(uint32_t); > + if (p->chunks[i].kdata == NULL) { > + return -ENOMEM; > + } > + cdata = (void __user *)(unsigned long)user_chunk.chunk_data; > + if (DRM_COPY_FROM_USER(p->chunks[i].kdata, cdata, size)) { > + return -EFAULT; > + } > + if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS) { > + p->cs_flags = p->chunks[i].kdata[0]; > + if (p->chunks[i].length_dw > 1) > + ring = p->chunks[i].kdata[1]; > + if (p->chunks[i].length_dw > 2) > + priority = (s32)p->chunks[i].kdata[2]; > } > } > > @@ -283,34 +278,6 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) > } > } > > - /* deal with non-vm */ > - if ((p->chunk_ib_idx != -1) && > - ((p->cs_flags & RADEON_CS_USE_VM) == 0) && > - (p->chunks[p->chunk_ib_idx].chunk_id == RADEON_CHUNK_ID_IB)) { > - if (p->chunks[p->chunk_ib_idx].length_dw > (16 * 1024)) { > - DRM_ERROR("cs IB too big: %d\n", > - p->chunks[p->chunk_ib_idx].length_dw); > - return -EINVAL; > - } > - if (p->rdev && (p->rdev->flags & RADEON_IS_AGP)) { > - p->chunks[p->chunk_ib_idx].kpage[0] = kmalloc(PAGE_SIZE, GFP_KERNEL); > - p->chunks[p->chunk_ib_idx].kpage[1] = kmalloc(PAGE_SIZE, GFP_KERNEL); > - if (p->chunks[p->chunk_ib_idx].kpage[0] == NULL || > - p->chunks[p->chunk_ib_idx].kpage[1] == NULL) { > - kfree(p->chunks[p->chunk_ib_idx].kpage[0]); > - kfree(p->chunks[p->chunk_ib_idx].kpage[1]); > - p->chunks[p->chunk_ib_idx].kpage[0] = NULL; > - p->chunks[p->chunk_ib_idx].kpage[1] = NULL; > - return -ENOMEM; > - } > - } > - p->chunks[p->chunk_ib_idx].kpage_idx[0] = -1; > - p->chunks[p->chunk_ib_idx].kpage_idx[1] = -1; > - p->chunks[p->chunk_ib_idx].last_copied_page = -1; > - p->chunks[p->chunk_ib_idx].last_page_index = > - ((p->chunks[p->chunk_ib_idx].length_dw * 4) - 1) / PAGE_SIZE; > - } > - > return 0; > } > > @@ -450,13 +417,8 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bo > kfree(parser->track); > kfree(parser->relocs); > kfree(parser->relocs_ptr); > - for (i = 0; i < parser->nchunks; i++) { > - kfree(parser->chunks[i].kdata); > - if ((parser->rdev->flags & RADEON_IS_AGP)) { > - kfree(parser->chunks[i].kpage[0]); > - kfree(parser->chunks[i].kpage[1]); > - } > - } > + for (i = 0; i < parser->nchunks; i++) > + drm_free_large(parser->chunks[i].kdata); > kfree(parser->chunks); > kfree(parser->chunks_array); > radeon_ib_free(parser->rdev, &parser->ib); > @@ -483,17 +445,15 @@ static int radeon_cs_ib_chunk(struct radeon_device *rdev, > DRM_ERROR("Failed to get ib !\n"); > return r; > } > + > + memcpy(parser->ib.ptr, ib_chunk->kdata, ib_chunk->length_dw * 4); > parser->ib.length_dw = ib_chunk->length_dw; > + > r = radeon_cs_parse(rdev, parser->ring, parser); > if (r || parser->parser_error) { > DRM_ERROR("Invalid command stream !\n"); > return r; > } > - r = radeon_cs_finish_pages(parser); > - if (r) { > - DRM_ERROR("Invalid command stream !\n"); > - return r; > - } > > if (parser->ring == R600_RING_TYPE_UVD_INDEX) > radeon_uvd_note_usage(rdev); > @@ -555,10 +515,8 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, > parser->const_ib.is_const_ib = true; > parser->const_ib.length_dw = ib_chunk->length_dw; > /* Copy the packet into the IB */ > - if (DRM_COPY_FROM_USER(parser->const_ib.ptr, ib_chunk->user_ptr, > - ib_chunk->length_dw * 4)) { > - return -EFAULT; > - } > + memcpy(parser->const_ib.ptr, ib_chunk->kdata, > + ib_chunk->length_dw * 4); > r = radeon_ring_ib_parse(rdev, parser->ring, &parser->const_ib); > if (r) { > return r; > @@ -578,10 +536,7 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, > } > parser->ib.length_dw = ib_chunk->length_dw; > /* Copy the packet into the IB */ > - if (DRM_COPY_FROM_USER(parser->ib.ptr, ib_chunk->user_ptr, > - ib_chunk->length_dw * 4)) { > - return -EFAULT; > - } > + memcpy(parser->ib.ptr, ib_chunk->kdata, ib_chunk->length_dw * 4); > r = radeon_ring_ib_parse(rdev, parser->ring, &parser->ib); > if (r) { > return r; > @@ -704,97 +659,6 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) > return r; > } > > -int radeon_cs_finish_pages(struct radeon_cs_parser *p) > -{ > - struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx]; > - int i; > - int size = PAGE_SIZE; > - > - for (i = ibc->last_copied_page + 1; i <= ibc->last_page_index; i++) { > - if (i == ibc->last_page_index) { > - size = (ibc->length_dw * 4) % PAGE_SIZE; > - if (size == 0) > - size = PAGE_SIZE; > - } > - > - if (DRM_COPY_FROM_USER(p->ib.ptr + (i * (PAGE_SIZE/4)), > - ibc->user_ptr + (i * PAGE_SIZE), > - size)) > - return -EFAULT; > - } > - return 0; > -} > - > -static int radeon_cs_update_pages(struct radeon_cs_parser *p, int pg_idx) > -{ > - int new_page; > - struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx]; > - int i; > - int size = PAGE_SIZE; > - bool copy1 = (p->rdev && (p->rdev->flags & RADEON_IS_AGP)) ? > - false : true; > - > - for (i = ibc->last_copied_page + 1; i < pg_idx; i++) { > - if (DRM_COPY_FROM_USER(p->ib.ptr + (i * (PAGE_SIZE/4)), > - ibc->user_ptr + (i * PAGE_SIZE), > - PAGE_SIZE)) { > - p->parser_error = -EFAULT; > - return 0; > - } > - } > - > - if (pg_idx == ibc->last_page_index) { > - size = (ibc->length_dw * 4) % PAGE_SIZE; > - if (size == 0) > - size = PAGE_SIZE; > - } > - > - new_page = ibc->kpage_idx[0] < ibc->kpage_idx[1] ? 0 : 1; > - if (copy1) > - ibc->kpage[new_page] = p->ib.ptr + (pg_idx * (PAGE_SIZE / 4)); > - > - if (DRM_COPY_FROM_USER(ibc->kpage[new_page], > - ibc->user_ptr + (pg_idx * PAGE_SIZE), > - size)) { > - p->parser_error = -EFAULT; > - return 0; > - } > - > - /* copy to IB for non single case */ > - if (!copy1) > - memcpy((void *)(p->ib.ptr+(pg_idx*(PAGE_SIZE/4))), ibc->kpage[new_page], size); > - > - ibc->last_copied_page = pg_idx; > - ibc->kpage_idx[new_page] = pg_idx; > - > - return new_page; > -} > - > -u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx) > -{ > - struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx]; > - u32 pg_idx, pg_offset; > - u32 idx_value = 0; > - int new_page; > - > - pg_idx = (idx * 4) / PAGE_SIZE; > - pg_offset = (idx * 4) % PAGE_SIZE; > - > - if (ibc->kpage_idx[0] == pg_idx) > - return ibc->kpage[0][pg_offset/4]; > - if (ibc->kpage_idx[1] == pg_idx) > - return ibc->kpage[1][pg_offset/4]; > - > - new_page = radeon_cs_update_pages(p, pg_idx); > - if (new_page < 0) { > - p->parser_error = new_page; > - return 0; > - } > - > - idx_value = ibc->kpage[new_page][pg_offset/4]; > - return idx_value; > -} > - > /** > * radeon_cs_packet_parse() - parse cp packet and point ib index to next packet > * @parser: parser structure holding parsing context. > > _______________________________________________ > dri-devel mailing list > dri-devel@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/dri-devel
Am 08.10.2013 16:33, schrieb Jerome Glisse: > On Tue, Oct 08, 2013 at 04:14:40PM +0200, Maarten Lankhorst wrote: >> Allocate and copy all kernel memory before doing reservations. This prevents a locking >> inversion between mmap_sem and reservation_class, and allows us to drop the trylocking >> in ttm_bo_vm_fault without upsetting lockdep. >> >> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com> > I would say NAK. Current code only allocate temporary page in AGP case. > So AGP case is userspace -> temp page -> cs checker -> radeon ib. > > Non AGP is directly memcpy to radeon IB. > > Your patch allocate memory memcpy userspace to it and it will then be > memcpy to IB. Which means you introduce an extra memcpy in the process > not something we want. Totally agree. Additional to that there is no good reason to provide anything else than anonymous system memory to the CS ioctl, so the dependency between the mmap_sem and reservations are not really clear to me. Christian. > Cheers, > Jerome > >> --- >> diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c >> index 01a3ec8..efa9bca 100644 >> --- a/drivers/gpu/drm/radeon/r600_cs.c >> +++ b/drivers/gpu/drm/radeon/r600_cs.c >> @@ -2391,18 +2391,13 @@ int r600_cs_legacy(struct drm_device *dev, void *data, struct drm_file *filp, >> ib_chunk = &parser.chunks[parser.chunk_ib_idx]; >> parser.ib.length_dw = ib_chunk->length_dw; >> *l = parser.ib.length_dw; >> + memcpy(ib, ib_chunk->kdata, ib_chunk->length_dw * 4); >> r = r600_cs_parse(&parser); >> if (r) { >> DRM_ERROR("Invalid command stream !\n"); >> r600_cs_parser_fini(&parser, r); >> return r; >> } >> - r = radeon_cs_finish_pages(&parser); >> - if (r) { >> - DRM_ERROR("Invalid command stream !\n"); >> - r600_cs_parser_fini(&parser, r); >> - return r; >> - } >> r600_cs_parser_fini(&parser, r); >> return r; >> } >> diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h >> index e067024..c52bb5e 100644 >> --- a/drivers/gpu/drm/radeon/radeon.h >> +++ b/drivers/gpu/drm/radeon/radeon.h >> @@ -983,12 +983,7 @@ struct radeon_cs_reloc { >> struct radeon_cs_chunk { >> uint32_t chunk_id; >> uint32_t length_dw; >> - int kpage_idx[2]; >> - uint32_t *kpage[2]; >> uint32_t *kdata; >> - void __user *user_ptr; >> - int last_copied_page; >> - int last_page_index; >> }; >> >> struct radeon_cs_parser { >> @@ -1027,8 +1022,13 @@ struct radeon_cs_parser { >> struct radeon_sa_bo *cpu_sema; >> }; >> >> -extern int radeon_cs_finish_pages(struct radeon_cs_parser *p); >> -extern u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx); >> +static inline u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx) >> +{ >> + struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx]; >> + >> + return ibc->kdata[idx]; >> +} >> + >> >> struct radeon_cs_packet { >> unsigned idx; >> diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c >> index 7d7322e..98d8898 100644 >> --- a/drivers/gpu/drm/radeon/radeon_cs.c >> +++ b/drivers/gpu/drm/radeon/radeon_cs.c >> @@ -217,9 +217,7 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) >> return -EFAULT; >> } >> p->chunks[i].length_dw = user_chunk.length_dw; >> - p->chunks[i].kdata = NULL; >> p->chunks[i].chunk_id = user_chunk.chunk_id; >> - p->chunks[i].user_ptr = (void __user *)(unsigned long)user_chunk.chunk_data; >> if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_RELOCS) { >> p->chunk_relocs_idx = i; >> } >> @@ -242,25 +240,22 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) >> return -EINVAL; >> } >> >> - cdata = (uint32_t *)(unsigned long)user_chunk.chunk_data; >> - if ((p->chunks[i].chunk_id == RADEON_CHUNK_ID_RELOCS) || >> - (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS)) { >> - size = p->chunks[i].length_dw * sizeof(uint32_t); >> - p->chunks[i].kdata = kmalloc(size, GFP_KERNEL); >> - if (p->chunks[i].kdata == NULL) { >> - return -ENOMEM; >> - } >> - if (DRM_COPY_FROM_USER(p->chunks[i].kdata, >> - p->chunks[i].user_ptr, size)) { >> - return -EFAULT; >> - } >> - if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS) { >> - p->cs_flags = p->chunks[i].kdata[0]; >> - if (p->chunks[i].length_dw > 1) >> - ring = p->chunks[i].kdata[1]; >> - if (p->chunks[i].length_dw > 2) >> - priority = (s32)p->chunks[i].kdata[2]; >> - } >> + size = p->chunks[i].length_dw; >> + p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t)); >> + size *= sizeof(uint32_t); >> + if (p->chunks[i].kdata == NULL) { >> + return -ENOMEM; >> + } >> + cdata = (void __user *)(unsigned long)user_chunk.chunk_data; >> + if (DRM_COPY_FROM_USER(p->chunks[i].kdata, cdata, size)) { >> + return -EFAULT; >> + } >> + if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS) { >> + p->cs_flags = p->chunks[i].kdata[0]; >> + if (p->chunks[i].length_dw > 1) >> + ring = p->chunks[i].kdata[1]; >> + if (p->chunks[i].length_dw > 2) >> + priority = (s32)p->chunks[i].kdata[2]; >> } >> } >> >> @@ -283,34 +278,6 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) >> } >> } >> >> - /* deal with non-vm */ >> - if ((p->chunk_ib_idx != -1) && >> - ((p->cs_flags & RADEON_CS_USE_VM) == 0) && >> - (p->chunks[p->chunk_ib_idx].chunk_id == RADEON_CHUNK_ID_IB)) { >> - if (p->chunks[p->chunk_ib_idx].length_dw > (16 * 1024)) { >> - DRM_ERROR("cs IB too big: %d\n", >> - p->chunks[p->chunk_ib_idx].length_dw); >> - return -EINVAL; >> - } >> - if (p->rdev && (p->rdev->flags & RADEON_IS_AGP)) { >> - p->chunks[p->chunk_ib_idx].kpage[0] = kmalloc(PAGE_SIZE, GFP_KERNEL); >> - p->chunks[p->chunk_ib_idx].kpage[1] = kmalloc(PAGE_SIZE, GFP_KERNEL); >> - if (p->chunks[p->chunk_ib_idx].kpage[0] == NULL || >> - p->chunks[p->chunk_ib_idx].kpage[1] == NULL) { >> - kfree(p->chunks[p->chunk_ib_idx].kpage[0]); >> - kfree(p->chunks[p->chunk_ib_idx].kpage[1]); >> - p->chunks[p->chunk_ib_idx].kpage[0] = NULL; >> - p->chunks[p->chunk_ib_idx].kpage[1] = NULL; >> - return -ENOMEM; >> - } >> - } >> - p->chunks[p->chunk_ib_idx].kpage_idx[0] = -1; >> - p->chunks[p->chunk_ib_idx].kpage_idx[1] = -1; >> - p->chunks[p->chunk_ib_idx].last_copied_page = -1; >> - p->chunks[p->chunk_ib_idx].last_page_index = >> - ((p->chunks[p->chunk_ib_idx].length_dw * 4) - 1) / PAGE_SIZE; >> - } >> - >> return 0; >> } >> >> @@ -450,13 +417,8 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bo >> kfree(parser->track); >> kfree(parser->relocs); >> kfree(parser->relocs_ptr); >> - for (i = 0; i < parser->nchunks; i++) { >> - kfree(parser->chunks[i].kdata); >> - if ((parser->rdev->flags & RADEON_IS_AGP)) { >> - kfree(parser->chunks[i].kpage[0]); >> - kfree(parser->chunks[i].kpage[1]); >> - } >> - } >> + for (i = 0; i < parser->nchunks; i++) >> + drm_free_large(parser->chunks[i].kdata); >> kfree(parser->chunks); >> kfree(parser->chunks_array); >> radeon_ib_free(parser->rdev, &parser->ib); >> @@ -483,17 +445,15 @@ static int radeon_cs_ib_chunk(struct radeon_device *rdev, >> DRM_ERROR("Failed to get ib !\n"); >> return r; >> } >> + >> + memcpy(parser->ib.ptr, ib_chunk->kdata, ib_chunk->length_dw * 4); >> parser->ib.length_dw = ib_chunk->length_dw; >> + >> r = radeon_cs_parse(rdev, parser->ring, parser); >> if (r || parser->parser_error) { >> DRM_ERROR("Invalid command stream !\n"); >> return r; >> } >> - r = radeon_cs_finish_pages(parser); >> - if (r) { >> - DRM_ERROR("Invalid command stream !\n"); >> - return r; >> - } >> >> if (parser->ring == R600_RING_TYPE_UVD_INDEX) >> radeon_uvd_note_usage(rdev); >> @@ -555,10 +515,8 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, >> parser->const_ib.is_const_ib = true; >> parser->const_ib.length_dw = ib_chunk->length_dw; >> /* Copy the packet into the IB */ >> - if (DRM_COPY_FROM_USER(parser->const_ib.ptr, ib_chunk->user_ptr, >> - ib_chunk->length_dw * 4)) { >> - return -EFAULT; >> - } >> + memcpy(parser->const_ib.ptr, ib_chunk->kdata, >> + ib_chunk->length_dw * 4); >> r = radeon_ring_ib_parse(rdev, parser->ring, &parser->const_ib); >> if (r) { >> return r; >> @@ -578,10 +536,7 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, >> } >> parser->ib.length_dw = ib_chunk->length_dw; >> /* Copy the packet into the IB */ >> - if (DRM_COPY_FROM_USER(parser->ib.ptr, ib_chunk->user_ptr, >> - ib_chunk->length_dw * 4)) { >> - return -EFAULT; >> - } >> + memcpy(parser->ib.ptr, ib_chunk->kdata, ib_chunk->length_dw * 4); >> r = radeon_ring_ib_parse(rdev, parser->ring, &parser->ib); >> if (r) { >> return r; >> @@ -704,97 +659,6 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) >> return r; >> } >> >> -int radeon_cs_finish_pages(struct radeon_cs_parser *p) >> -{ >> - struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx]; >> - int i; >> - int size = PAGE_SIZE; >> - >> - for (i = ibc->last_copied_page + 1; i <= ibc->last_page_index; i++) { >> - if (i == ibc->last_page_index) { >> - size = (ibc->length_dw * 4) % PAGE_SIZE; >> - if (size == 0) >> - size = PAGE_SIZE; >> - } >> - >> - if (DRM_COPY_FROM_USER(p->ib.ptr + (i * (PAGE_SIZE/4)), >> - ibc->user_ptr + (i * PAGE_SIZE), >> - size)) >> - return -EFAULT; >> - } >> - return 0; >> -} >> - >> -static int radeon_cs_update_pages(struct radeon_cs_parser *p, int pg_idx) >> -{ >> - int new_page; >> - struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx]; >> - int i; >> - int size = PAGE_SIZE; >> - bool copy1 = (p->rdev && (p->rdev->flags & RADEON_IS_AGP)) ? >> - false : true; >> - >> - for (i = ibc->last_copied_page + 1; i < pg_idx; i++) { >> - if (DRM_COPY_FROM_USER(p->ib.ptr + (i * (PAGE_SIZE/4)), >> - ibc->user_ptr + (i * PAGE_SIZE), >> - PAGE_SIZE)) { >> - p->parser_error = -EFAULT; >> - return 0; >> - } >> - } >> - >> - if (pg_idx == ibc->last_page_index) { >> - size = (ibc->length_dw * 4) % PAGE_SIZE; >> - if (size == 0) >> - size = PAGE_SIZE; >> - } >> - >> - new_page = ibc->kpage_idx[0] < ibc->kpage_idx[1] ? 0 : 1; >> - if (copy1) >> - ibc->kpage[new_page] = p->ib.ptr + (pg_idx * (PAGE_SIZE / 4)); >> - >> - if (DRM_COPY_FROM_USER(ibc->kpage[new_page], >> - ibc->user_ptr + (pg_idx * PAGE_SIZE), >> - size)) { >> - p->parser_error = -EFAULT; >> - return 0; >> - } >> - >> - /* copy to IB for non single case */ >> - if (!copy1) >> - memcpy((void *)(p->ib.ptr+(pg_idx*(PAGE_SIZE/4))), ibc->kpage[new_page], size); >> - >> - ibc->last_copied_page = pg_idx; >> - ibc->kpage_idx[new_page] = pg_idx; >> - >> - return new_page; >> -} >> - >> -u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx) >> -{ >> - struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx]; >> - u32 pg_idx, pg_offset; >> - u32 idx_value = 0; >> - int new_page; >> - >> - pg_idx = (idx * 4) / PAGE_SIZE; >> - pg_offset = (idx * 4) % PAGE_SIZE; >> - >> - if (ibc->kpage_idx[0] == pg_idx) >> - return ibc->kpage[0][pg_offset/4]; >> - if (ibc->kpage_idx[1] == pg_idx) >> - return ibc->kpage[1][pg_offset/4]; >> - >> - new_page = radeon_cs_update_pages(p, pg_idx); >> - if (new_page < 0) { >> - p->parser_error = new_page; >> - return 0; >> - } >> - >> - idx_value = ibc->kpage[new_page][pg_offset/4]; >> - return idx_value; >> -} >> - >> /** >> * radeon_cs_packet_parse() - parse cp packet and point ib index to next packet >> * @parser: parser structure holding parsing context. >> >> _______________________________________________ >> dri-devel mailing list >> dri-devel@lists.freedesktop.org >> http://lists.freedesktop.org/mailman/listinfo/dri-devel > _______________________________________________ > dri-devel mailing list > dri-devel@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/dri-devel
op 08-10-13 16:33, Jerome Glisse schreef: > On Tue, Oct 08, 2013 at 04:14:40PM +0200, Maarten Lankhorst wrote: >> Allocate and copy all kernel memory before doing reservations. This prevents a locking >> inversion between mmap_sem and reservation_class, and allows us to drop the trylocking >> in ttm_bo_vm_fault without upsetting lockdep. >> >> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com> > I would say NAK. Current code only allocate temporary page in AGP case. > So AGP case is userspace -> temp page -> cs checker -> radeon ib. > > Non AGP is directly memcpy to radeon IB. > > Your patch allocate memory memcpy userspace to it and it will then be > memcpy to IB. Which means you introduce an extra memcpy in the process > not something we want. > Can we move up the call to radeon_ib_get then to be done before calling radeon_cs_parser_relocs? ~Maarten
On Tue, Oct 08, 2013 at 04:45:18PM +0200, Christian König wrote: > Am 08.10.2013 16:33, schrieb Jerome Glisse: > >On Tue, Oct 08, 2013 at 04:14:40PM +0200, Maarten Lankhorst wrote: > >>Allocate and copy all kernel memory before doing reservations. This prevents a locking > >>inversion between mmap_sem and reservation_class, and allows us to drop the trylocking > >>in ttm_bo_vm_fault without upsetting lockdep. > >> > >>Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com> > >I would say NAK. Current code only allocate temporary page in AGP case. > >So AGP case is userspace -> temp page -> cs checker -> radeon ib. > > > >Non AGP is directly memcpy to radeon IB. > > > >Your patch allocate memory memcpy userspace to it and it will then be > >memcpy to IB. Which means you introduce an extra memcpy in the process > >not something we want. > > Totally agree. Additional to that there is no good reason to provide > anything else than anonymous system memory to the CS ioctl, so the > dependency between the mmap_sem and reservations are not really > clear to me. > > Christian. I think is that in other code path you take mmap_sem first then reserve bo. But here we reserve bo and then we take mmap_sem because of copy from user. Cheers, Jerome > > >Cheers, > >Jerome > > > >>--- > >>diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c > >>index 01a3ec8..efa9bca 100644 > >>--- a/drivers/gpu/drm/radeon/r600_cs.c > >>+++ b/drivers/gpu/drm/radeon/r600_cs.c > >>@@ -2391,18 +2391,13 @@ int r600_cs_legacy(struct drm_device *dev, void *data, struct drm_file *filp, > >> ib_chunk = &parser.chunks[parser.chunk_ib_idx]; > >> parser.ib.length_dw = ib_chunk->length_dw; > >> *l = parser.ib.length_dw; > >>+ memcpy(ib, ib_chunk->kdata, ib_chunk->length_dw * 4); > >> r = r600_cs_parse(&parser); > >> if (r) { > >> DRM_ERROR("Invalid command stream !\n"); > >> r600_cs_parser_fini(&parser, r); > >> return r; > >> } > >>- r = radeon_cs_finish_pages(&parser); > >>- if (r) { > >>- DRM_ERROR("Invalid command stream !\n"); > >>- r600_cs_parser_fini(&parser, r); > >>- return r; > >>- } > >> r600_cs_parser_fini(&parser, r); > >> return r; > >> } > >>diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h > >>index e067024..c52bb5e 100644 > >>--- a/drivers/gpu/drm/radeon/radeon.h > >>+++ b/drivers/gpu/drm/radeon/radeon.h > >>@@ -983,12 +983,7 @@ struct radeon_cs_reloc { > >> struct radeon_cs_chunk { > >> uint32_t chunk_id; > >> uint32_t length_dw; > >>- int kpage_idx[2]; > >>- uint32_t *kpage[2]; > >> uint32_t *kdata; > >>- void __user *user_ptr; > >>- int last_copied_page; > >>- int last_page_index; > >> }; > >> struct radeon_cs_parser { > >>@@ -1027,8 +1022,13 @@ struct radeon_cs_parser { > >> struct radeon_sa_bo *cpu_sema; > >> }; > >>-extern int radeon_cs_finish_pages(struct radeon_cs_parser *p); > >>-extern u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx); > >>+static inline u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx) > >>+{ > >>+ struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx]; > >>+ > >>+ return ibc->kdata[idx]; > >>+} > >>+ > >> struct radeon_cs_packet { > >> unsigned idx; > >>diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c > >>index 7d7322e..98d8898 100644 > >>--- a/drivers/gpu/drm/radeon/radeon_cs.c > >>+++ b/drivers/gpu/drm/radeon/radeon_cs.c > >>@@ -217,9 +217,7 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) > >> return -EFAULT; > >> } > >> p->chunks[i].length_dw = user_chunk.length_dw; > >>- p->chunks[i].kdata = NULL; > >> p->chunks[i].chunk_id = user_chunk.chunk_id; > >>- p->chunks[i].user_ptr = (void __user *)(unsigned long)user_chunk.chunk_data; > >> if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_RELOCS) { > >> p->chunk_relocs_idx = i; > >> } > >>@@ -242,25 +240,22 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) > >> return -EINVAL; > >> } > >>- cdata = (uint32_t *)(unsigned long)user_chunk.chunk_data; > >>- if ((p->chunks[i].chunk_id == RADEON_CHUNK_ID_RELOCS) || > >>- (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS)) { > >>- size = p->chunks[i].length_dw * sizeof(uint32_t); > >>- p->chunks[i].kdata = kmalloc(size, GFP_KERNEL); > >>- if (p->chunks[i].kdata == NULL) { > >>- return -ENOMEM; > >>- } > >>- if (DRM_COPY_FROM_USER(p->chunks[i].kdata, > >>- p->chunks[i].user_ptr, size)) { > >>- return -EFAULT; > >>- } > >>- if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS) { > >>- p->cs_flags = p->chunks[i].kdata[0]; > >>- if (p->chunks[i].length_dw > 1) > >>- ring = p->chunks[i].kdata[1]; > >>- if (p->chunks[i].length_dw > 2) > >>- priority = (s32)p->chunks[i].kdata[2]; > >>- } > >>+ size = p->chunks[i].length_dw; > >>+ p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t)); > >>+ size *= sizeof(uint32_t); > >>+ if (p->chunks[i].kdata == NULL) { > >>+ return -ENOMEM; > >>+ } > >>+ cdata = (void __user *)(unsigned long)user_chunk.chunk_data; > >>+ if (DRM_COPY_FROM_USER(p->chunks[i].kdata, cdata, size)) { > >>+ return -EFAULT; > >>+ } > >>+ if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS) { > >>+ p->cs_flags = p->chunks[i].kdata[0]; > >>+ if (p->chunks[i].length_dw > 1) > >>+ ring = p->chunks[i].kdata[1]; > >>+ if (p->chunks[i].length_dw > 2) > >>+ priority = (s32)p->chunks[i].kdata[2]; > >> } > >> } > >>@@ -283,34 +278,6 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) > >> } > >> } > >>- /* deal with non-vm */ > >>- if ((p->chunk_ib_idx != -1) && > >>- ((p->cs_flags & RADEON_CS_USE_VM) == 0) && > >>- (p->chunks[p->chunk_ib_idx].chunk_id == RADEON_CHUNK_ID_IB)) { > >>- if (p->chunks[p->chunk_ib_idx].length_dw > (16 * 1024)) { > >>- DRM_ERROR("cs IB too big: %d\n", > >>- p->chunks[p->chunk_ib_idx].length_dw); > >>- return -EINVAL; > >>- } > >>- if (p->rdev && (p->rdev->flags & RADEON_IS_AGP)) { > >>- p->chunks[p->chunk_ib_idx].kpage[0] = kmalloc(PAGE_SIZE, GFP_KERNEL); > >>- p->chunks[p->chunk_ib_idx].kpage[1] = kmalloc(PAGE_SIZE, GFP_KERNEL); > >>- if (p->chunks[p->chunk_ib_idx].kpage[0] == NULL || > >>- p->chunks[p->chunk_ib_idx].kpage[1] == NULL) { > >>- kfree(p->chunks[p->chunk_ib_idx].kpage[0]); > >>- kfree(p->chunks[p->chunk_ib_idx].kpage[1]); > >>- p->chunks[p->chunk_ib_idx].kpage[0] = NULL; > >>- p->chunks[p->chunk_ib_idx].kpage[1] = NULL; > >>- return -ENOMEM; > >>- } > >>- } > >>- p->chunks[p->chunk_ib_idx].kpage_idx[0] = -1; > >>- p->chunks[p->chunk_ib_idx].kpage_idx[1] = -1; > >>- p->chunks[p->chunk_ib_idx].last_copied_page = -1; > >>- p->chunks[p->chunk_ib_idx].last_page_index = > >>- ((p->chunks[p->chunk_ib_idx].length_dw * 4) - 1) / PAGE_SIZE; > >>- } > >>- > >> return 0; > >> } > >>@@ -450,13 +417,8 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bo > >> kfree(parser->track); > >> kfree(parser->relocs); > >> kfree(parser->relocs_ptr); > >>- for (i = 0; i < parser->nchunks; i++) { > >>- kfree(parser->chunks[i].kdata); > >>- if ((parser->rdev->flags & RADEON_IS_AGP)) { > >>- kfree(parser->chunks[i].kpage[0]); > >>- kfree(parser->chunks[i].kpage[1]); > >>- } > >>- } > >>+ for (i = 0; i < parser->nchunks; i++) > >>+ drm_free_large(parser->chunks[i].kdata); > >> kfree(parser->chunks); > >> kfree(parser->chunks_array); > >> radeon_ib_free(parser->rdev, &parser->ib); > >>@@ -483,17 +445,15 @@ static int radeon_cs_ib_chunk(struct radeon_device *rdev, > >> DRM_ERROR("Failed to get ib !\n"); > >> return r; > >> } > >>+ > >>+ memcpy(parser->ib.ptr, ib_chunk->kdata, ib_chunk->length_dw * 4); > >> parser->ib.length_dw = ib_chunk->length_dw; > >>+ > >> r = radeon_cs_parse(rdev, parser->ring, parser); > >> if (r || parser->parser_error) { > >> DRM_ERROR("Invalid command stream !\n"); > >> return r; > >> } > >>- r = radeon_cs_finish_pages(parser); > >>- if (r) { > >>- DRM_ERROR("Invalid command stream !\n"); > >>- return r; > >>- } > >> if (parser->ring == R600_RING_TYPE_UVD_INDEX) > >> radeon_uvd_note_usage(rdev); > >>@@ -555,10 +515,8 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, > >> parser->const_ib.is_const_ib = true; > >> parser->const_ib.length_dw = ib_chunk->length_dw; > >> /* Copy the packet into the IB */ > >>- if (DRM_COPY_FROM_USER(parser->const_ib.ptr, ib_chunk->user_ptr, > >>- ib_chunk->length_dw * 4)) { > >>- return -EFAULT; > >>- } > >>+ memcpy(parser->const_ib.ptr, ib_chunk->kdata, > >>+ ib_chunk->length_dw * 4); > >> r = radeon_ring_ib_parse(rdev, parser->ring, &parser->const_ib); > >> if (r) { > >> return r; > >>@@ -578,10 +536,7 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, > >> } > >> parser->ib.length_dw = ib_chunk->length_dw; > >> /* Copy the packet into the IB */ > >>- if (DRM_COPY_FROM_USER(parser->ib.ptr, ib_chunk->user_ptr, > >>- ib_chunk->length_dw * 4)) { > >>- return -EFAULT; > >>- } > >>+ memcpy(parser->ib.ptr, ib_chunk->kdata, ib_chunk->length_dw * 4); > >> r = radeon_ring_ib_parse(rdev, parser->ring, &parser->ib); > >> if (r) { > >> return r; > >>@@ -704,97 +659,6 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) > >> return r; > >> } > >>-int radeon_cs_finish_pages(struct radeon_cs_parser *p) > >>-{ > >>- struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx]; > >>- int i; > >>- int size = PAGE_SIZE; > >>- > >>- for (i = ibc->last_copied_page + 1; i <= ibc->last_page_index; i++) { > >>- if (i == ibc->last_page_index) { > >>- size = (ibc->length_dw * 4) % PAGE_SIZE; > >>- if (size == 0) > >>- size = PAGE_SIZE; > >>- } > >>- > >>- if (DRM_COPY_FROM_USER(p->ib.ptr + (i * (PAGE_SIZE/4)), > >>- ibc->user_ptr + (i * PAGE_SIZE), > >>- size)) > >>- return -EFAULT; > >>- } > >>- return 0; > >>-} > >>- > >>-static int radeon_cs_update_pages(struct radeon_cs_parser *p, int pg_idx) > >>-{ > >>- int new_page; > >>- struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx]; > >>- int i; > >>- int size = PAGE_SIZE; > >>- bool copy1 = (p->rdev && (p->rdev->flags & RADEON_IS_AGP)) ? > >>- false : true; > >>- > >>- for (i = ibc->last_copied_page + 1; i < pg_idx; i++) { > >>- if (DRM_COPY_FROM_USER(p->ib.ptr + (i * (PAGE_SIZE/4)), > >>- ibc->user_ptr + (i * PAGE_SIZE), > >>- PAGE_SIZE)) { > >>- p->parser_error = -EFAULT; > >>- return 0; > >>- } > >>- } > >>- > >>- if (pg_idx == ibc->last_page_index) { > >>- size = (ibc->length_dw * 4) % PAGE_SIZE; > >>- if (size == 0) > >>- size = PAGE_SIZE; > >>- } > >>- > >>- new_page = ibc->kpage_idx[0] < ibc->kpage_idx[1] ? 0 : 1; > >>- if (copy1) > >>- ibc->kpage[new_page] = p->ib.ptr + (pg_idx * (PAGE_SIZE / 4)); > >>- > >>- if (DRM_COPY_FROM_USER(ibc->kpage[new_page], > >>- ibc->user_ptr + (pg_idx * PAGE_SIZE), > >>- size)) { > >>- p->parser_error = -EFAULT; > >>- return 0; > >>- } > >>- > >>- /* copy to IB for non single case */ > >>- if (!copy1) > >>- memcpy((void *)(p->ib.ptr+(pg_idx*(PAGE_SIZE/4))), ibc->kpage[new_page], size); > >>- > >>- ibc->last_copied_page = pg_idx; > >>- ibc->kpage_idx[new_page] = pg_idx; > >>- > >>- return new_page; > >>-} > >>- > >>-u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx) > >>-{ > >>- struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx]; > >>- u32 pg_idx, pg_offset; > >>- u32 idx_value = 0; > >>- int new_page; > >>- > >>- pg_idx = (idx * 4) / PAGE_SIZE; > >>- pg_offset = (idx * 4) % PAGE_SIZE; > >>- > >>- if (ibc->kpage_idx[0] == pg_idx) > >>- return ibc->kpage[0][pg_offset/4]; > >>- if (ibc->kpage_idx[1] == pg_idx) > >>- return ibc->kpage[1][pg_offset/4]; > >>- > >>- new_page = radeon_cs_update_pages(p, pg_idx); > >>- if (new_page < 0) { > >>- p->parser_error = new_page; > >>- return 0; > >>- } > >>- > >>- idx_value = ibc->kpage[new_page][pg_offset/4]; > >>- return idx_value; > >>-} > >>- > >> /** > >> * radeon_cs_packet_parse() - parse cp packet and point ib index to next packet > >> * @parser: parser structure holding parsing context. > >> > >>_______________________________________________ > >>dri-devel mailing list > >>dri-devel@lists.freedesktop.org > >>http://lists.freedesktop.org/mailman/listinfo/dri-devel > >_______________________________________________ > >dri-devel mailing list > >dri-devel@lists.freedesktop.org > >http://lists.freedesktop.org/mailman/listinfo/dri-devel >
On Tue, Oct 8, 2013 at 10:45 AM, Maarten Lankhorst < maarten.lankhorst@canonical.com> wrote: > op 08-10-13 16:33, Jerome Glisse schreef: > > On Tue, Oct 08, 2013 at 04:14:40PM +0200, Maarten Lankhorst wrote: > >> Allocate and copy all kernel memory before doing reservations. This > prevents a locking > >> inversion between mmap_sem and reservation_class, and allows us to drop > the trylocking > >> in ttm_bo_vm_fault without upsetting lockdep. > >> > >> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com> > > I would say NAK. Current code only allocate temporary page in AGP case. > > So AGP case is userspace -> temp page -> cs checker -> radeon ib. > > > > Non AGP is directly memcpy to radeon IB. > > > > Your patch allocate memory memcpy userspace to it and it will then be > > memcpy to IB. Which means you introduce an extra memcpy in the process > > not something we want. > > > Can we move up the call to radeon_ib_get then to be done before calling > radeon_cs_parser_relocs? > > ~Maarten > No won't work with the AGP case where we copy one page at a time while cs checker is processing. For AGP we could however go to the 2 memcpy and temp buffer case. For other, we don't know upfront the ib size or the number of ib we want. Cheers, Jerome
On 10/08/2013 04:55 PM, Jerome Glisse wrote: > On Tue, Oct 08, 2013 at 04:45:18PM +0200, Christian König wrote: >> Am 08.10.2013 16:33, schrieb Jerome Glisse: >>> On Tue, Oct 08, 2013 at 04:14:40PM +0200, Maarten Lankhorst wrote: >>>> Allocate and copy all kernel memory before doing reservations. This prevents a locking >>>> inversion between mmap_sem and reservation_class, and allows us to drop the trylocking >>>> in ttm_bo_vm_fault without upsetting lockdep. >>>> >>>> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com> >>> I would say NAK. Current code only allocate temporary page in AGP case. >>> So AGP case is userspace -> temp page -> cs checker -> radeon ib. >>> >>> Non AGP is directly memcpy to radeon IB. >>> >>> Your patch allocate memory memcpy userspace to it and it will then be >>> memcpy to IB. Which means you introduce an extra memcpy in the process >>> not something we want. >> Totally agree. Additional to that there is no good reason to provide >> anything else than anonymous system memory to the CS ioctl, so the >> dependency between the mmap_sem and reservations are not really >> clear to me. >> >> Christian. > I think is that in other code path you take mmap_sem first then reserve > bo. But here we reserve bo and then we take mmap_sem because of copy > from user. > > Cheers, > Jerome > Actually the log message is a little confusing. I think the mmap_sem locking inversion problem is orthogonal to what's being fixed here. This patch fixes the possible recursive bo::reserve caused by malicious user-space handing a pointer to ttm memory so that the ttm fault handler is called when bos are already reserved. That may cause a (possibly interruptible) livelock. Once that is fixed, we are free to choose the mmap_sem -> bo::reserve locking order. Currently it's bo::reserve->mmap_sem(), but the hack required in the ttm fault handler is admittedly a bit ugly. The plan is to change the locking order to mmap_sem->bo::reserve I'm not sure if it applies to this particular case, but it should be possible to make sure that copy_from_user_inatomic() will always succeed, by making sure the pages are present using get_user_pages(), and release the pages after copy_from_user_inatomic() is done. That way there's no need for a double memcpy slowpath, but if the copied data is very fragmented I guess the resulting code may look ugly. The get_user_pages() function will return an error if it hits TTM pages. /Thomas
On Tue, Oct 08, 2013 at 06:29:35PM +0200, Thomas Hellstrom wrote: > On 10/08/2013 04:55 PM, Jerome Glisse wrote: > >On Tue, Oct 08, 2013 at 04:45:18PM +0200, Christian König wrote: > >>Am 08.10.2013 16:33, schrieb Jerome Glisse: > >>>On Tue, Oct 08, 2013 at 04:14:40PM +0200, Maarten Lankhorst wrote: > >>>>Allocate and copy all kernel memory before doing reservations. This prevents a locking > >>>>inversion between mmap_sem and reservation_class, and allows us to drop the trylocking > >>>>in ttm_bo_vm_fault without upsetting lockdep. > >>>> > >>>>Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com> > >>>I would say NAK. Current code only allocate temporary page in AGP case. > >>>So AGP case is userspace -> temp page -> cs checker -> radeon ib. > >>> > >>>Non AGP is directly memcpy to radeon IB. > >>> > >>>Your patch allocate memory memcpy userspace to it and it will then be > >>>memcpy to IB. Which means you introduce an extra memcpy in the process > >>>not something we want. > >>Totally agree. Additional to that there is no good reason to provide > >>anything else than anonymous system memory to the CS ioctl, so the > >>dependency between the mmap_sem and reservations are not really > >>clear to me. > >> > >>Christian. > >I think is that in other code path you take mmap_sem first then reserve > >bo. But here we reserve bo and then we take mmap_sem because of copy > >from user. > > > >Cheers, > >Jerome > > > Actually the log message is a little confusing. I think the mmap_sem > locking inversion problem is orthogonal to what's being fixed here. > > This patch fixes the possible recursive bo::reserve caused by > malicious user-space handing a pointer to ttm memory so that the ttm > fault handler is called when bos are already reserved. That may > cause a (possibly interruptible) livelock. > > Once that is fixed, we are free to choose the mmap_sem -> > bo::reserve locking order. Currently it's bo::reserve->mmap_sem(), > but the hack required in the ttm fault handler is admittedly a bit > ugly. The plan is to change the locking order to > mmap_sem->bo::reserve > > I'm not sure if it applies to this particular case, but it should be > possible to make sure that copy_from_user_inatomic() will always > succeed, by making sure the pages are present using > get_user_pages(), and release the pages after > copy_from_user_inatomic() is done. That way there's no need for a > double memcpy slowpath, but if the copied data is very fragmented I > guess the resulting code may look ugly. The get_user_pages() > function will return an error if it hits TTM pages. > > /Thomas get_user_pages + copy_from_user_inatomic is overkill. We should just do get_user_pages which fails with ttm memory and then use copy_highpage helper. Cheers, Jerome
On 10/08/2013 06:47 PM, Jerome Glisse wrote: > On Tue, Oct 08, 2013 at 06:29:35PM +0200, Thomas Hellstrom wrote: >> On 10/08/2013 04:55 PM, Jerome Glisse wrote: >>> On Tue, Oct 08, 2013 at 04:45:18PM +0200, Christian König wrote: >>>> Am 08.10.2013 16:33, schrieb Jerome Glisse: >>>>> On Tue, Oct 08, 2013 at 04:14:40PM +0200, Maarten Lankhorst wrote: >>>>>> Allocate and copy all kernel memory before doing reservations. This prevents a locking >>>>>> inversion between mmap_sem and reservation_class, and allows us to drop the trylocking >>>>>> in ttm_bo_vm_fault without upsetting lockdep. >>>>>> >>>>>> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com> >>>>> I would say NAK. Current code only allocate temporary page in AGP case. >>>>> So AGP case is userspace -> temp page -> cs checker -> radeon ib. >>>>> >>>>> Non AGP is directly memcpy to radeon IB. >>>>> >>>>> Your patch allocate memory memcpy userspace to it and it will then be >>>>> memcpy to IB. Which means you introduce an extra memcpy in the process >>>>> not something we want. >>>> Totally agree. Additional to that there is no good reason to provide >>>> anything else than anonymous system memory to the CS ioctl, so the >>>> dependency between the mmap_sem and reservations are not really >>>> clear to me. >>>> >>>> Christian. >>> I think is that in other code path you take mmap_sem first then reserve >>> bo. But here we reserve bo and then we take mmap_sem because of copy >> >from user. >>> Cheers, >>> Jerome >>> >> Actually the log message is a little confusing. I think the mmap_sem >> locking inversion problem is orthogonal to what's being fixed here. >> >> This patch fixes the possible recursive bo::reserve caused by >> malicious user-space handing a pointer to ttm memory so that the ttm >> fault handler is called when bos are already reserved. That may >> cause a (possibly interruptible) livelock. >> >> Once that is fixed, we are free to choose the mmap_sem -> >> bo::reserve locking order. Currently it's bo::reserve->mmap_sem(), >> but the hack required in the ttm fault handler is admittedly a bit >> ugly. The plan is to change the locking order to >> mmap_sem->bo::reserve >> >> I'm not sure if it applies to this particular case, but it should be >> possible to make sure that copy_from_user_inatomic() will always >> succeed, by making sure the pages are present using >> get_user_pages(), and release the pages after >> copy_from_user_inatomic() is done. That way there's no need for a >> double memcpy slowpath, but if the copied data is very fragmented I >> guess the resulting code may look ugly. The get_user_pages() >> function will return an error if it hits TTM pages. >> >> /Thomas > get_user_pages + copy_from_user_inatomic is overkill. We should just > do get_user_pages which fails with ttm memory and then use copy_highpage > helper. > > Cheers, > Jerome Yeah, it may well be that that's the preferred solution. /Thomas
op 08-10-13 18:47, Jerome Glisse schreef: > On Tue, Oct 08, 2013 at 06:29:35PM +0200, Thomas Hellstrom wrote: >> On 10/08/2013 04:55 PM, Jerome Glisse wrote: >>> On Tue, Oct 08, 2013 at 04:45:18PM +0200, Christian König wrote: >>>> Am 08.10.2013 16:33, schrieb Jerome Glisse: >>>>> On Tue, Oct 08, 2013 at 04:14:40PM +0200, Maarten Lankhorst wrote: >>>>>> Allocate and copy all kernel memory before doing reservations. This prevents a locking >>>>>> inversion between mmap_sem and reservation_class, and allows us to drop the trylocking >>>>>> in ttm_bo_vm_fault without upsetting lockdep. >>>>>> >>>>>> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com> >>>>> I would say NAK. Current code only allocate temporary page in AGP case. >>>>> So AGP case is userspace -> temp page -> cs checker -> radeon ib. >>>>> >>>>> Non AGP is directly memcpy to radeon IB. >>>>> >>>>> Your patch allocate memory memcpy userspace to it and it will then be >>>>> memcpy to IB. Which means you introduce an extra memcpy in the process >>>>> not something we want. >>>> Totally agree. Additional to that there is no good reason to provide >>>> anything else than anonymous system memory to the CS ioctl, so the >>>> dependency between the mmap_sem and reservations are not really >>>> clear to me. >>>> >>>> Christian. >>> I think is that in other code path you take mmap_sem first then reserve >>> bo. But here we reserve bo and then we take mmap_sem because of copy >> >from user. >>> Cheers, >>> Jerome >>> >> Actually the log message is a little confusing. I think the mmap_sem >> locking inversion problem is orthogonal to what's being fixed here. >> >> This patch fixes the possible recursive bo::reserve caused by >> malicious user-space handing a pointer to ttm memory so that the ttm >> fault handler is called when bos are already reserved. That may >> cause a (possibly interruptible) livelock. >> >> Once that is fixed, we are free to choose the mmap_sem -> >> bo::reserve locking order. Currently it's bo::reserve->mmap_sem(), >> but the hack required in the ttm fault handler is admittedly a bit >> ugly. The plan is to change the locking order to >> mmap_sem->bo::reserve >> >> I'm not sure if it applies to this particular case, but it should be >> possible to make sure that copy_from_user_inatomic() will always >> succeed, by making sure the pages are present using >> get_user_pages(), and release the pages after >> copy_from_user_inatomic() is done. That way there's no need for a >> double memcpy slowpath, but if the copied data is very fragmented I >> guess the resulting code may look ugly. The get_user_pages() >> function will return an error if it hits TTM pages. >> >> /Thomas > get_user_pages + copy_from_user_inatomic is overkill. We should just > do get_user_pages which fails with ttm memory and then use copy_highpage > helper. I don't think we have to do anything that complicated, the easiest solution seems to be to call radeon_ib_get before calling radeon_cs_parser_relocs, and copy everything to the ib buffer before taking the reserve lock. ~Maarten
diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c index 01a3ec8..efa9bca 100644 --- a/drivers/gpu/drm/radeon/r600_cs.c +++ b/drivers/gpu/drm/radeon/r600_cs.c @@ -2391,18 +2391,13 @@ int r600_cs_legacy(struct drm_device *dev, void *data, struct drm_file *filp, ib_chunk = &parser.chunks[parser.chunk_ib_idx]; parser.ib.length_dw = ib_chunk->length_dw; *l = parser.ib.length_dw; + memcpy(ib, ib_chunk->kdata, ib_chunk->length_dw * 4); r = r600_cs_parse(&parser); if (r) { DRM_ERROR("Invalid command stream !\n"); r600_cs_parser_fini(&parser, r); return r; } - r = radeon_cs_finish_pages(&parser); - if (r) { - DRM_ERROR("Invalid command stream !\n"); - r600_cs_parser_fini(&parser, r); - return r; - } r600_cs_parser_fini(&parser, r); return r; } diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index e067024..c52bb5e 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -983,12 +983,7 @@ struct radeon_cs_reloc { struct radeon_cs_chunk { uint32_t chunk_id; uint32_t length_dw; - int kpage_idx[2]; - uint32_t *kpage[2]; uint32_t *kdata; - void __user *user_ptr; - int last_copied_page; - int last_page_index; }; struct radeon_cs_parser { @@ -1027,8 +1022,13 @@ struct radeon_cs_parser { struct radeon_sa_bo *cpu_sema; }; -extern int radeon_cs_finish_pages(struct radeon_cs_parser *p); -extern u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx); +static inline u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx) +{ + struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx]; + + return ibc->kdata[idx]; +} + struct radeon_cs_packet { unsigned idx; diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 7d7322e..98d8898 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -217,9 +217,7 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) return -EFAULT; } p->chunks[i].length_dw = user_chunk.length_dw; - p->chunks[i].kdata = NULL; p->chunks[i].chunk_id = user_chunk.chunk_id; - p->chunks[i].user_ptr = (void __user *)(unsigned long)user_chunk.chunk_data; if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_RELOCS) { p->chunk_relocs_idx = i; } @@ -242,25 +240,22 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) return -EINVAL; } - cdata = (uint32_t *)(unsigned long)user_chunk.chunk_data; - if ((p->chunks[i].chunk_id == RADEON_CHUNK_ID_RELOCS) || - (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS)) { - size = p->chunks[i].length_dw * sizeof(uint32_t); - p->chunks[i].kdata = kmalloc(size, GFP_KERNEL); - if (p->chunks[i].kdata == NULL) { - return -ENOMEM; - } - if (DRM_COPY_FROM_USER(p->chunks[i].kdata, - p->chunks[i].user_ptr, size)) { - return -EFAULT; - } - if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS) { - p->cs_flags = p->chunks[i].kdata[0]; - if (p->chunks[i].length_dw > 1) - ring = p->chunks[i].kdata[1]; - if (p->chunks[i].length_dw > 2) - priority = (s32)p->chunks[i].kdata[2]; - } + size = p->chunks[i].length_dw; + p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t)); + size *= sizeof(uint32_t); + if (p->chunks[i].kdata == NULL) { + return -ENOMEM; + } + cdata = (void __user *)(unsigned long)user_chunk.chunk_data; + if (DRM_COPY_FROM_USER(p->chunks[i].kdata, cdata, size)) { + return -EFAULT; + } + if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS) { + p->cs_flags = p->chunks[i].kdata[0]; + if (p->chunks[i].length_dw > 1) + ring = p->chunks[i].kdata[1]; + if (p->chunks[i].length_dw > 2) + priority = (s32)p->chunks[i].kdata[2]; } } @@ -283,34 +278,6 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) } } - /* deal with non-vm */ - if ((p->chunk_ib_idx != -1) && - ((p->cs_flags & RADEON_CS_USE_VM) == 0) && - (p->chunks[p->chunk_ib_idx].chunk_id == RADEON_CHUNK_ID_IB)) { - if (p->chunks[p->chunk_ib_idx].length_dw > (16 * 1024)) { - DRM_ERROR("cs IB too big: %d\n", - p->chunks[p->chunk_ib_idx].length_dw); - return -EINVAL; - } - if (p->rdev && (p->rdev->flags & RADEON_IS_AGP)) { - p->chunks[p->chunk_ib_idx].kpage[0] = kmalloc(PAGE_SIZE, GFP_KERNEL); - p->chunks[p->chunk_ib_idx].kpage[1] = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (p->chunks[p->chunk_ib_idx].kpage[0] == NULL || - p->chunks[p->chunk_ib_idx].kpage[1] == NULL) { - kfree(p->chunks[p->chunk_ib_idx].kpage[0]); - kfree(p->chunks[p->chunk_ib_idx].kpage[1]); - p->chunks[p->chunk_ib_idx].kpage[0] = NULL; - p->chunks[p->chunk_ib_idx].kpage[1] = NULL; - return -ENOMEM; - } - } - p->chunks[p->chunk_ib_idx].kpage_idx[0] = -1; - p->chunks[p->chunk_ib_idx].kpage_idx[1] = -1; - p->chunks[p->chunk_ib_idx].last_copied_page = -1; - p->chunks[p->chunk_ib_idx].last_page_index = - ((p->chunks[p->chunk_ib_idx].length_dw * 4) - 1) / PAGE_SIZE; - } - return 0; } @@ -450,13 +417,8 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bo kfree(parser->track); kfree(parser->relocs); kfree(parser->relocs_ptr); - for (i = 0; i < parser->nchunks; i++) { - kfree(parser->chunks[i].kdata); - if ((parser->rdev->flags & RADEON_IS_AGP)) { - kfree(parser->chunks[i].kpage[0]); - kfree(parser->chunks[i].kpage[1]); - } - } + for (i = 0; i < parser->nchunks; i++) + drm_free_large(parser->chunks[i].kdata); kfree(parser->chunks); kfree(parser->chunks_array); radeon_ib_free(parser->rdev, &parser->ib); @@ -483,17 +445,15 @@ static int radeon_cs_ib_chunk(struct radeon_device *rdev, DRM_ERROR("Failed to get ib !\n"); return r; } + + memcpy(parser->ib.ptr, ib_chunk->kdata, ib_chunk->length_dw * 4); parser->ib.length_dw = ib_chunk->length_dw; + r = radeon_cs_parse(rdev, parser->ring, parser); if (r || parser->parser_error) { DRM_ERROR("Invalid command stream !\n"); return r; } - r = radeon_cs_finish_pages(parser); - if (r) { - DRM_ERROR("Invalid command stream !\n"); - return r; - } if (parser->ring == R600_RING_TYPE_UVD_INDEX) radeon_uvd_note_usage(rdev); @@ -555,10 +515,8 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, parser->const_ib.is_const_ib = true; parser->const_ib.length_dw = ib_chunk->length_dw; /* Copy the packet into the IB */ - if (DRM_COPY_FROM_USER(parser->const_ib.ptr, ib_chunk->user_ptr, - ib_chunk->length_dw * 4)) { - return -EFAULT; - } + memcpy(parser->const_ib.ptr, ib_chunk->kdata, + ib_chunk->length_dw * 4); r = radeon_ring_ib_parse(rdev, parser->ring, &parser->const_ib); if (r) { return r; @@ -578,10 +536,7 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, } parser->ib.length_dw = ib_chunk->length_dw; /* Copy the packet into the IB */ - if (DRM_COPY_FROM_USER(parser->ib.ptr, ib_chunk->user_ptr, - ib_chunk->length_dw * 4)) { - return -EFAULT; - } + memcpy(parser->ib.ptr, ib_chunk->kdata, ib_chunk->length_dw * 4); r = radeon_ring_ib_parse(rdev, parser->ring, &parser->ib); if (r) { return r; @@ -704,97 +659,6 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) return r; } -int radeon_cs_finish_pages(struct radeon_cs_parser *p) -{ - struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx]; - int i; - int size = PAGE_SIZE; - - for (i = ibc->last_copied_page + 1; i <= ibc->last_page_index; i++) { - if (i == ibc->last_page_index) { - size = (ibc->length_dw * 4) % PAGE_SIZE; - if (size == 0) - size = PAGE_SIZE; - } - - if (DRM_COPY_FROM_USER(p->ib.ptr + (i * (PAGE_SIZE/4)), - ibc->user_ptr + (i * PAGE_SIZE), - size)) - return -EFAULT; - } - return 0; -} - -static int radeon_cs_update_pages(struct radeon_cs_parser *p, int pg_idx) -{ - int new_page; - struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx]; - int i; - int size = PAGE_SIZE; - bool copy1 = (p->rdev && (p->rdev->flags & RADEON_IS_AGP)) ? - false : true; - - for (i = ibc->last_copied_page + 1; i < pg_idx; i++) { - if (DRM_COPY_FROM_USER(p->ib.ptr + (i * (PAGE_SIZE/4)), - ibc->user_ptr + (i * PAGE_SIZE), - PAGE_SIZE)) { - p->parser_error = -EFAULT; - return 0; - } - } - - if (pg_idx == ibc->last_page_index) { - size = (ibc->length_dw * 4) % PAGE_SIZE; - if (size == 0) - size = PAGE_SIZE; - } - - new_page = ibc->kpage_idx[0] < ibc->kpage_idx[1] ? 0 : 1; - if (copy1) - ibc->kpage[new_page] = p->ib.ptr + (pg_idx * (PAGE_SIZE / 4)); - - if (DRM_COPY_FROM_USER(ibc->kpage[new_page], - ibc->user_ptr + (pg_idx * PAGE_SIZE), - size)) { - p->parser_error = -EFAULT; - return 0; - } - - /* copy to IB for non single case */ - if (!copy1) - memcpy((void *)(p->ib.ptr+(pg_idx*(PAGE_SIZE/4))), ibc->kpage[new_page], size); - - ibc->last_copied_page = pg_idx; - ibc->kpage_idx[new_page] = pg_idx; - - return new_page; -} - -u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx) -{ - struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx]; - u32 pg_idx, pg_offset; - u32 idx_value = 0; - int new_page; - - pg_idx = (idx * 4) / PAGE_SIZE; - pg_offset = (idx * 4) % PAGE_SIZE; - - if (ibc->kpage_idx[0] == pg_idx) - return ibc->kpage[0][pg_offset/4]; - if (ibc->kpage_idx[1] == pg_idx) - return ibc->kpage[1][pg_offset/4]; - - new_page = radeon_cs_update_pages(p, pg_idx); - if (new_page < 0) { - p->parser_error = new_page; - return 0; - } - - idx_value = ibc->kpage[new_page][pg_offset/4]; - return idx_value; -} - /** * radeon_cs_packet_parse() - parse cp packet and point ib index to next packet * @parser: parser structure holding parsing context.
Allocate and copy all kernel memory before doing reservations. This prevents a locking inversion between mmap_sem and reservation_class, and allows us to drop the trylocking in ttm_bo_vm_fault without upsetting lockdep. Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com> ---