Message ID | 1592719388-13819-2-git-send-email-andrey.grodzovsky@amd.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | RFC Support hot device unplug in amdgpu | expand |
On Sun, Jun 21, 2020 at 02:03:01AM -0400, Andrey Grodzovsky wrote: > Will be used to reroute CPU mapped BO's page faults once > device is removed. > > Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> > --- > drivers/gpu/drm/drm_file.c | 8 ++++++++ > drivers/gpu/drm/drm_prime.c | 10 ++++++++++ > include/drm/drm_file.h | 2 ++ > include/drm/drm_gem.h | 2 ++ > 4 files changed, 22 insertions(+) > > diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c > index c4c704e..67c0770 100644 > --- a/drivers/gpu/drm/drm_file.c > +++ b/drivers/gpu/drm/drm_file.c > @@ -188,6 +188,12 @@ struct drm_file *drm_file_alloc(struct drm_minor *minor) > goto out_prime_destroy; > } > > + file->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO); > + if (!file->dummy_page) { > + ret = -ENOMEM; > + goto out_prime_destroy; > + } > + > return file; > > out_prime_destroy: > @@ -284,6 +290,8 @@ void drm_file_free(struct drm_file *file) > if (dev->driver->postclose) > dev->driver->postclose(dev, file); > > + __free_page(file->dummy_page); > + > drm_prime_destroy_file_private(&file->prime); > > WARN_ON(!list_empty(&file->event_list)); > diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c > index 1de2cde..c482e9c 100644 > --- a/drivers/gpu/drm/drm_prime.c > +++ b/drivers/gpu/drm/drm_prime.c > @@ -335,6 +335,13 @@ int drm_gem_prime_fd_to_handle(struct drm_device *dev, > > ret = drm_prime_add_buf_handle(&file_priv->prime, > dma_buf, *handle); > + > + if (!ret) { > + obj->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO); > + if (!obj->dummy_page) > + ret = -ENOMEM; > + } > + > mutex_unlock(&file_priv->prime.lock); > if (ret) > goto fail; > @@ -1006,6 +1013,9 @@ void drm_prime_gem_destroy(struct drm_gem_object *obj, struct sg_table *sg) > dma_buf_unmap_attachment(attach, sg, DMA_BIDIRECTIONAL); > dma_buf = attach->dmabuf; > dma_buf_detach(attach->dmabuf, attach); > + > + __free_page(obj->dummy_page); > + > /* remove the reference */ > dma_buf_put(dma_buf); > } > diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h > index 19df802..349a658 100644 > --- a/include/drm/drm_file.h > +++ b/include/drm/drm_file.h > @@ -335,6 +335,8 @@ struct drm_file { > */ > struct drm_prime_file_private prime; > Kerneldoc for these please, including why we need them and when. E.g. the one in gem_bo should say it's only for exported buffers, so that we're not colliding security spaces. > + struct page *dummy_page; > + > /* private: */ > #if IS_ENABLED(CONFIG_DRM_LEGACY) > unsigned long lock_count; /* DRI1 legacy lock count */ > diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h > index 0b37506..47460d1 100644 > --- a/include/drm/drm_gem.h > +++ b/include/drm/drm_gem.h > @@ -310,6 +310,8 @@ struct drm_gem_object { > * > */ > const struct drm_gem_object_funcs *funcs; > + > + struct page *dummy_page; > }; I think amdgpu doesn't care, but everyone else still might care somewhat about flink. That also shares buffers, so also needs to allocate the per-bo dummy page. I also wonder whether we shouldn't have a helper to look up the dummy page, just to encode in core code how it's supposedo to cascade. -Daniel > > /** > -- > 2.7.4 >
Am 21.06.20 um 08:03 schrieb Andrey Grodzovsky: > Will be used to reroute CPU mapped BO's page faults once > device is removed. > > Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> > --- > drivers/gpu/drm/drm_file.c | 8 ++++++++ > drivers/gpu/drm/drm_prime.c | 10 ++++++++++ > include/drm/drm_file.h | 2 ++ > include/drm/drm_gem.h | 2 ++ > 4 files changed, 22 insertions(+) > > diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c > index c4c704e..67c0770 100644 > --- a/drivers/gpu/drm/drm_file.c > +++ b/drivers/gpu/drm/drm_file.c > @@ -188,6 +188,12 @@ struct drm_file *drm_file_alloc(struct drm_minor *minor) > goto out_prime_destroy; > } > > + file->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO); > + if (!file->dummy_page) { > + ret = -ENOMEM; > + goto out_prime_destroy; > + } > + > return file; > > out_prime_destroy: > @@ -284,6 +290,8 @@ void drm_file_free(struct drm_file *file) > if (dev->driver->postclose) > dev->driver->postclose(dev, file); > > + __free_page(file->dummy_page); > + > drm_prime_destroy_file_private(&file->prime); > > WARN_ON(!list_empty(&file->event_list)); > diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c > index 1de2cde..c482e9c 100644 > --- a/drivers/gpu/drm/drm_prime.c > +++ b/drivers/gpu/drm/drm_prime.c > @@ -335,6 +335,13 @@ int drm_gem_prime_fd_to_handle(struct drm_device *dev, > > ret = drm_prime_add_buf_handle(&file_priv->prime, > dma_buf, *handle); > + > + if (!ret) { > + obj->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO); > + if (!obj->dummy_page) > + ret = -ENOMEM; > + } > + While the per file case still looks acceptable this is a clear NAK since it will massively increase the memory needed for a prime exported object. I think that this is quite overkill in the first place and for the hot unplug case we can just use the global dummy page as well. Christian. > mutex_unlock(&file_priv->prime.lock); > if (ret) > goto fail; > @@ -1006,6 +1013,9 @@ void drm_prime_gem_destroy(struct drm_gem_object *obj, struct sg_table *sg) > dma_buf_unmap_attachment(attach, sg, DMA_BIDIRECTIONAL); > dma_buf = attach->dmabuf; > dma_buf_detach(attach->dmabuf, attach); > + > + __free_page(obj->dummy_page); > + > /* remove the reference */ > dma_buf_put(dma_buf); > } > diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h > index 19df802..349a658 100644 > --- a/include/drm/drm_file.h > +++ b/include/drm/drm_file.h > @@ -335,6 +335,8 @@ struct drm_file { > */ > struct drm_prime_file_private prime; > > + struct page *dummy_page; > + > /* private: */ > #if IS_ENABLED(CONFIG_DRM_LEGACY) > unsigned long lock_count; /* DRI1 legacy lock count */ > diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h > index 0b37506..47460d1 100644 > --- a/include/drm/drm_gem.h > +++ b/include/drm/drm_gem.h > @@ -310,6 +310,8 @@ struct drm_gem_object { > * > */ > const struct drm_gem_object_funcs *funcs; > + > + struct page *dummy_page; > }; > > /**
On Mon, 22 Jun 2020 11:35:01 +0200 Daniel Vetter <daniel@ffwll.ch> wrote: > On Sun, Jun 21, 2020 at 02:03:01AM -0400, Andrey Grodzovsky wrote: > > Will be used to reroute CPU mapped BO's page faults once > > device is removed. > > > > Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> > > --- > > drivers/gpu/drm/drm_file.c | 8 ++++++++ > > drivers/gpu/drm/drm_prime.c | 10 ++++++++++ > > include/drm/drm_file.h | 2 ++ > > include/drm/drm_gem.h | 2 ++ > > 4 files changed, 22 insertions(+) ... > > diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h > > index 0b37506..47460d1 100644 > > --- a/include/drm/drm_gem.h > > +++ b/include/drm/drm_gem.h > > @@ -310,6 +310,8 @@ struct drm_gem_object { > > * > > */ > > const struct drm_gem_object_funcs *funcs; > > + > > + struct page *dummy_page; > > }; > > I think amdgpu doesn't care, but everyone else still might care somewhat > about flink. That also shares buffers, so also needs to allocate the > per-bo dummy page. Do you really care about making flink not explode on device hot-unplug? Why not just leave flink users die in a fire? It's not a regression. Thanks, pq
On Mon, Jun 22, 2020 at 3:18 PM Christian König <ckoenig.leichtzumerken@gmail.com> wrote: > > Am 21.06.20 um 08:03 schrieb Andrey Grodzovsky: > > Will be used to reroute CPU mapped BO's page faults once > > device is removed. > > > > Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> > > --- > > drivers/gpu/drm/drm_file.c | 8 ++++++++ > > drivers/gpu/drm/drm_prime.c | 10 ++++++++++ > > include/drm/drm_file.h | 2 ++ > > include/drm/drm_gem.h | 2 ++ > > 4 files changed, 22 insertions(+) > > > > diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c > > index c4c704e..67c0770 100644 > > --- a/drivers/gpu/drm/drm_file.c > > +++ b/drivers/gpu/drm/drm_file.c > > @@ -188,6 +188,12 @@ struct drm_file *drm_file_alloc(struct drm_minor *minor) > > goto out_prime_destroy; > > } > > > > + file->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO); > > + if (!file->dummy_page) { > > + ret = -ENOMEM; > > + goto out_prime_destroy; > > + } > > + > > return file; > > > > out_prime_destroy: > > @@ -284,6 +290,8 @@ void drm_file_free(struct drm_file *file) > > if (dev->driver->postclose) > > dev->driver->postclose(dev, file); > > > > + __free_page(file->dummy_page); > > + > > drm_prime_destroy_file_private(&file->prime); > > > > WARN_ON(!list_empty(&file->event_list)); > > diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c > > index 1de2cde..c482e9c 100644 > > --- a/drivers/gpu/drm/drm_prime.c > > +++ b/drivers/gpu/drm/drm_prime.c > > @@ -335,6 +335,13 @@ int drm_gem_prime_fd_to_handle(struct drm_device *dev, > > > > ret = drm_prime_add_buf_handle(&file_priv->prime, > > dma_buf, *handle); > > + > > + if (!ret) { > > + obj->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO); > > + if (!obj->dummy_page) > > + ret = -ENOMEM; > > + } > > + > > While the per file case still looks acceptable this is a clear NAK since > it will massively increase the memory needed for a prime exported object. > > I think that this is quite overkill in the first place and for the hot > unplug case we can just use the global dummy page as well. Imo we either don't bother with per-file dummy page, or we need this. Half-way doesn't make much sense, since for anything you dma-buf exported you have no idea whether it left a sandbox or not. E.g. anything that's shared between client/compositor has a different security context, so picking the dummy page of either is the wrong thing. If you're worried about the overhead we can also allocate the dummy page on demand, and SIGBUS if we can't allocate the right one. Then we just need to track whether a buffer has ever been exported. -Daniel > > Christian. > > > mutex_unlock(&file_priv->prime.lock); > > if (ret) > > goto fail; > > @@ -1006,6 +1013,9 @@ void drm_prime_gem_destroy(struct drm_gem_object *obj, struct sg_table *sg) > > dma_buf_unmap_attachment(attach, sg, DMA_BIDIRECTIONAL); > > dma_buf = attach->dmabuf; > > dma_buf_detach(attach->dmabuf, attach); > > + > > + __free_page(obj->dummy_page); > > + > > /* remove the reference */ > > dma_buf_put(dma_buf); > > } > > diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h > > index 19df802..349a658 100644 > > --- a/include/drm/drm_file.h > > +++ b/include/drm/drm_file.h > > @@ -335,6 +335,8 @@ struct drm_file { > > */ > > struct drm_prime_file_private prime; > > > > + struct page *dummy_page; > > + > > /* private: */ > > #if IS_ENABLED(CONFIG_DRM_LEGACY) > > unsigned long lock_count; /* DRI1 legacy lock count */ > > diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h > > index 0b37506..47460d1 100644 > > --- a/include/drm/drm_gem.h > > +++ b/include/drm/drm_gem.h > > @@ -310,6 +310,8 @@ struct drm_gem_object { > > * > > */ > > const struct drm_gem_object_funcs *funcs; > > + > > + struct page *dummy_page; > > }; > > > > /** >
On Mon, Jun 22, 2020 at 4:22 PM Pekka Paalanen <ppaalanen@gmail.com> wrote: > > On Mon, 22 Jun 2020 11:35:01 +0200 > Daniel Vetter <daniel@ffwll.ch> wrote: > > > On Sun, Jun 21, 2020 at 02:03:01AM -0400, Andrey Grodzovsky wrote: > > > Will be used to reroute CPU mapped BO's page faults once > > > device is removed. > > > > > > Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> > > > --- > > > drivers/gpu/drm/drm_file.c | 8 ++++++++ > > > drivers/gpu/drm/drm_prime.c | 10 ++++++++++ > > > include/drm/drm_file.h | 2 ++ > > > include/drm/drm_gem.h | 2 ++ > > > 4 files changed, 22 insertions(+) > > ... > > > > diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h > > > index 0b37506..47460d1 100644 > > > --- a/include/drm/drm_gem.h > > > +++ b/include/drm/drm_gem.h > > > @@ -310,6 +310,8 @@ struct drm_gem_object { > > > * > > > */ > > > const struct drm_gem_object_funcs *funcs; > > > + > > > + struct page *dummy_page; > > > }; > > > > I think amdgpu doesn't care, but everyone else still might care somewhat > > about flink. That also shares buffers, so also needs to allocate the > > per-bo dummy page. > > Do you really care about making flink not explode on device > hot-unplug? Why not just leave flink users die in a fire? > It's not a regression. It's not about exploding, they won't. With flink you can pass a buffer from one address space to the other, so imo we should avoid false sharing. E.g. if you happen to write something $secret into a private buffer, but only $non-secret stuff into shared buffers. Then if you unplug, your well-kept $secret might suddenly be visible by lots of other processes you never intended to share it with. Just feels safer to plug that hole completely. -Daniel
On Mon, 22 Jun 2020 16:24:38 +0200 Daniel Vetter <daniel@ffwll.ch> wrote: > On Mon, Jun 22, 2020 at 4:22 PM Pekka Paalanen <ppaalanen@gmail.com> wrote: > > > > On Mon, 22 Jun 2020 11:35:01 +0200 > > Daniel Vetter <daniel@ffwll.ch> wrote: > > > > > On Sun, Jun 21, 2020 at 02:03:01AM -0400, Andrey Grodzovsky wrote: > > > > Will be used to reroute CPU mapped BO's page faults once > > > > device is removed. > > > > > > > > Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> > > > > --- > > > > drivers/gpu/drm/drm_file.c | 8 ++++++++ > > > > drivers/gpu/drm/drm_prime.c | 10 ++++++++++ > > > > include/drm/drm_file.h | 2 ++ > > > > include/drm/drm_gem.h | 2 ++ > > > > 4 files changed, 22 insertions(+) > > > > ... > > > > > > diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h > > > > index 0b37506..47460d1 100644 > > > > --- a/include/drm/drm_gem.h > > > > +++ b/include/drm/drm_gem.h > > > > @@ -310,6 +310,8 @@ struct drm_gem_object { > > > > * > > > > */ > > > > const struct drm_gem_object_funcs *funcs; > > > > + > > > > + struct page *dummy_page; > > > > }; > > > > > > I think amdgpu doesn't care, but everyone else still might care somewhat > > > about flink. That also shares buffers, so also needs to allocate the > > > per-bo dummy page. > > > > Do you really care about making flink not explode on device > > hot-unplug? Why not just leave flink users die in a fire? > > It's not a regression. > > It's not about exploding, they won't. With flink you can pass a buffer > from one address space to the other, so imo we should avoid false > sharing. E.g. if you happen to write something $secret into a private > buffer, but only $non-secret stuff into shared buffers. Then if you > unplug, your well-kept $secret might suddenly be visible by lots of > other processes you never intended to share it with. > > Just feels safer to plug that hole completely. Ah! Ok, I clearly didn't understand the consequences. Thanks, pq
On 6/22/20 9:18 AM, Christian König wrote: > Am 21.06.20 um 08:03 schrieb Andrey Grodzovsky: >> Will be used to reroute CPU mapped BO's page faults once >> device is removed. >> >> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> >> --- >> drivers/gpu/drm/drm_file.c | 8 ++++++++ >> drivers/gpu/drm/drm_prime.c | 10 ++++++++++ >> include/drm/drm_file.h | 2 ++ >> include/drm/drm_gem.h | 2 ++ >> 4 files changed, 22 insertions(+) >> >> diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c >> index c4c704e..67c0770 100644 >> --- a/drivers/gpu/drm/drm_file.c >> +++ b/drivers/gpu/drm/drm_file.c >> @@ -188,6 +188,12 @@ struct drm_file *drm_file_alloc(struct drm_minor >> *minor) >> goto out_prime_destroy; >> } >> + file->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO); >> + if (!file->dummy_page) { >> + ret = -ENOMEM; >> + goto out_prime_destroy; >> + } >> + >> return file; >> out_prime_destroy: >> @@ -284,6 +290,8 @@ void drm_file_free(struct drm_file *file) >> if (dev->driver->postclose) >> dev->driver->postclose(dev, file); >> + __free_page(file->dummy_page); >> + >> drm_prime_destroy_file_private(&file->prime); >> WARN_ON(!list_empty(&file->event_list)); >> diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c >> index 1de2cde..c482e9c 100644 >> --- a/drivers/gpu/drm/drm_prime.c >> +++ b/drivers/gpu/drm/drm_prime.c >> @@ -335,6 +335,13 @@ int drm_gem_prime_fd_to_handle(struct drm_device >> *dev, >> ret = drm_prime_add_buf_handle(&file_priv->prime, >> dma_buf, *handle); >> + >> + if (!ret) { >> + obj->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO); >> + if (!obj->dummy_page) >> + ret = -ENOMEM; >> + } >> + > > While the per file case still looks acceptable this is a clear NAK > since it will massively increase the memory needed for a prime > exported object. > > I think that this is quite overkill in the first place and for the hot > unplug case we can just use the global dummy page as well. > > Christian. Global dummy page is good for read access, what do you do on write access ? My first approach was indeed to map at first global dummy page as read only and mark the vma->vm_flags as !VM_SHARED assuming that this would trigger Copy On Write flow in core mm (https://elixir.bootlin.com/linux/v5.7-rc7/source/mm/memory.c#L3977) on the next page fault to same address triggered by a write access but then i realized a new COW page will be allocated for each such mapping and this is much more wasteful then having a dedicated page per GEM object. We can indeed optimize by allocating this dummy page on the first page fault after device disconnect instead on GEM object creation. Andrey > >> mutex_unlock(&file_priv->prime.lock); >> if (ret) >> goto fail; >> @@ -1006,6 +1013,9 @@ void drm_prime_gem_destroy(struct >> drm_gem_object *obj, struct sg_table *sg) >> dma_buf_unmap_attachment(attach, sg, DMA_BIDIRECTIONAL); >> dma_buf = attach->dmabuf; >> dma_buf_detach(attach->dmabuf, attach); >> + >> + __free_page(obj->dummy_page); >> + >> /* remove the reference */ >> dma_buf_put(dma_buf); >> } >> diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h >> index 19df802..349a658 100644 >> --- a/include/drm/drm_file.h >> +++ b/include/drm/drm_file.h >> @@ -335,6 +335,8 @@ struct drm_file { >> */ >> struct drm_prime_file_private prime; >> + struct page *dummy_page; >> + >> /* private: */ >> #if IS_ENABLED(CONFIG_DRM_LEGACY) >> unsigned long lock_count; /* DRI1 legacy lock count */ >> diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h >> index 0b37506..47460d1 100644 >> --- a/include/drm/drm_gem.h >> +++ b/include/drm/drm_gem.h >> @@ -310,6 +310,8 @@ struct drm_gem_object { >> * >> */ >> const struct drm_gem_object_funcs *funcs; >> + >> + struct page *dummy_page; >> }; >> /** >
Am 22.06.20 um 16:32 schrieb Andrey Grodzovsky: > > On 6/22/20 9:18 AM, Christian König wrote: >> Am 21.06.20 um 08:03 schrieb Andrey Grodzovsky: >>> Will be used to reroute CPU mapped BO's page faults once >>> device is removed. >>> >>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> >>> --- >>> drivers/gpu/drm/drm_file.c | 8 ++++++++ >>> drivers/gpu/drm/drm_prime.c | 10 ++++++++++ >>> include/drm/drm_file.h | 2 ++ >>> include/drm/drm_gem.h | 2 ++ >>> 4 files changed, 22 insertions(+) >>> >>> diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c >>> index c4c704e..67c0770 100644 >>> --- a/drivers/gpu/drm/drm_file.c >>> +++ b/drivers/gpu/drm/drm_file.c >>> @@ -188,6 +188,12 @@ struct drm_file *drm_file_alloc(struct >>> drm_minor *minor) >>> goto out_prime_destroy; >>> } >>> + file->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO); >>> + if (!file->dummy_page) { >>> + ret = -ENOMEM; >>> + goto out_prime_destroy; >>> + } >>> + >>> return file; >>> out_prime_destroy: >>> @@ -284,6 +290,8 @@ void drm_file_free(struct drm_file *file) >>> if (dev->driver->postclose) >>> dev->driver->postclose(dev, file); >>> + __free_page(file->dummy_page); >>> + >>> drm_prime_destroy_file_private(&file->prime); >>> WARN_ON(!list_empty(&file->event_list)); >>> diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c >>> index 1de2cde..c482e9c 100644 >>> --- a/drivers/gpu/drm/drm_prime.c >>> +++ b/drivers/gpu/drm/drm_prime.c >>> @@ -335,6 +335,13 @@ int drm_gem_prime_fd_to_handle(struct >>> drm_device *dev, >>> ret = drm_prime_add_buf_handle(&file_priv->prime, >>> dma_buf, *handle); >>> + >>> + if (!ret) { >>> + obj->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO); >>> + if (!obj->dummy_page) >>> + ret = -ENOMEM; >>> + } >>> + >> >> While the per file case still looks acceptable this is a clear NAK >> since it will massively increase the memory needed for a prime >> exported object. >> >> I think that this is quite overkill in the first place and for the >> hot unplug case we can just use the global dummy page as well. >> >> Christian. > > > Global dummy page is good for read access, what do you do on write > access ? My first approach was indeed to map at first global dummy > page as read only and mark the vma->vm_flags as !VM_SHARED assuming > that this would trigger Copy On Write flow in core mm > (https://elixir.bootlin.com/linux/v5.7-rc7/source/mm/memory.c#L3977) > on the next page fault to same address triggered by a write access but > then i realized a new COW page will be allocated for each such mapping > and this is much more wasteful then having a dedicated page per GEM > object. Yeah, but this is only for a very very small corner cases. What we need to prevent is increasing the memory usage during normal operation to much. Using memory during the unplug is completely unproblematic because we just released quite a bunch of it by releasing all those system memory buffers. And I'm pretty sure that COWed pages are correctly accounted towards the used memory of a process. So I think if that approach works as intended and the COW pages are released again on unmapping it would be the perfect solution to the problem. Daniel what do you think? Regards, Christian. > We can indeed optimize by allocating this dummy page on the first page > fault after device disconnect instead on GEM object creation. > > Andrey > > >> >>> mutex_unlock(&file_priv->prime.lock); >>> if (ret) >>> goto fail; >>> @@ -1006,6 +1013,9 @@ void drm_prime_gem_destroy(struct >>> drm_gem_object *obj, struct sg_table *sg) >>> dma_buf_unmap_attachment(attach, sg, DMA_BIDIRECTIONAL); >>> dma_buf = attach->dmabuf; >>> dma_buf_detach(attach->dmabuf, attach); >>> + >>> + __free_page(obj->dummy_page); >>> + >>> /* remove the reference */ >>> dma_buf_put(dma_buf); >>> } >>> diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h >>> index 19df802..349a658 100644 >>> --- a/include/drm/drm_file.h >>> +++ b/include/drm/drm_file.h >>> @@ -335,6 +335,8 @@ struct drm_file { >>> */ >>> struct drm_prime_file_private prime; >>> + struct page *dummy_page; >>> + >>> /* private: */ >>> #if IS_ENABLED(CONFIG_DRM_LEGACY) >>> unsigned long lock_count; /* DRI1 legacy lock count */ >>> diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h >>> index 0b37506..47460d1 100644 >>> --- a/include/drm/drm_gem.h >>> +++ b/include/drm/drm_gem.h >>> @@ -310,6 +310,8 @@ struct drm_gem_object { >>> * >>> */ >>> const struct drm_gem_object_funcs *funcs; >>> + >>> + struct page *dummy_page; >>> }; >>> /** >>
On Mon, Jun 22, 2020 at 7:45 PM Christian König <christian.koenig@amd.com> wrote: > > Am 22.06.20 um 16:32 schrieb Andrey Grodzovsky: > > > > On 6/22/20 9:18 AM, Christian König wrote: > >> Am 21.06.20 um 08:03 schrieb Andrey Grodzovsky: > >>> Will be used to reroute CPU mapped BO's page faults once > >>> device is removed. > >>> > >>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> > >>> --- > >>> drivers/gpu/drm/drm_file.c | 8 ++++++++ > >>> drivers/gpu/drm/drm_prime.c | 10 ++++++++++ > >>> include/drm/drm_file.h | 2 ++ > >>> include/drm/drm_gem.h | 2 ++ > >>> 4 files changed, 22 insertions(+) > >>> > >>> diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c > >>> index c4c704e..67c0770 100644 > >>> --- a/drivers/gpu/drm/drm_file.c > >>> +++ b/drivers/gpu/drm/drm_file.c > >>> @@ -188,6 +188,12 @@ struct drm_file *drm_file_alloc(struct > >>> drm_minor *minor) > >>> goto out_prime_destroy; > >>> } > >>> + file->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO); > >>> + if (!file->dummy_page) { > >>> + ret = -ENOMEM; > >>> + goto out_prime_destroy; > >>> + } > >>> + > >>> return file; > >>> out_prime_destroy: > >>> @@ -284,6 +290,8 @@ void drm_file_free(struct drm_file *file) > >>> if (dev->driver->postclose) > >>> dev->driver->postclose(dev, file); > >>> + __free_page(file->dummy_page); > >>> + > >>> drm_prime_destroy_file_private(&file->prime); > >>> WARN_ON(!list_empty(&file->event_list)); > >>> diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c > >>> index 1de2cde..c482e9c 100644 > >>> --- a/drivers/gpu/drm/drm_prime.c > >>> +++ b/drivers/gpu/drm/drm_prime.c > >>> @@ -335,6 +335,13 @@ int drm_gem_prime_fd_to_handle(struct > >>> drm_device *dev, > >>> ret = drm_prime_add_buf_handle(&file_priv->prime, > >>> dma_buf, *handle); > >>> + > >>> + if (!ret) { > >>> + obj->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO); > >>> + if (!obj->dummy_page) > >>> + ret = -ENOMEM; > >>> + } > >>> + > >> > >> While the per file case still looks acceptable this is a clear NAK > >> since it will massively increase the memory needed for a prime > >> exported object. > >> > >> I think that this is quite overkill in the first place and for the > >> hot unplug case we can just use the global dummy page as well. > >> > >> Christian. > > > > > > Global dummy page is good for read access, what do you do on write > > access ? My first approach was indeed to map at first global dummy > > page as read only and mark the vma->vm_flags as !VM_SHARED assuming > > that this would trigger Copy On Write flow in core mm > > (https://elixir.bootlin.com/linux/v5.7-rc7/source/mm/memory.c#L3977) > > on the next page fault to same address triggered by a write access but > > then i realized a new COW page will be allocated for each such mapping > > and this is much more wasteful then having a dedicated page per GEM > > object. > > Yeah, but this is only for a very very small corner cases. What we need > to prevent is increasing the memory usage during normal operation to much. > > Using memory during the unplug is completely unproblematic because we > just released quite a bunch of it by releasing all those system memory > buffers. > > And I'm pretty sure that COWed pages are correctly accounted towards the > used memory of a process. > > So I think if that approach works as intended and the COW pages are > released again on unmapping it would be the perfect solution to the problem. > > Daniel what do you think? If COW works, sure sounds reasonable. And if we can make sure we managed to drop all the system allocations (otherwise suddenly 2x memory usage, worst case). But I have no idea whether we can retroshoehorn that into an established vma, you might have fun stuff like a mkwrite handler there (which I thought is the COW handler thing, but really no idea). If we need to massively change stuff then I think rw dummy page, allocated on first fault after hotunplug (maybe just make it one per object, that's simplest) seems like the much safer option. Much less code that can go wrong. -Daniel > Regards, > Christian. > > > We can indeed optimize by allocating this dummy page on the first page > > fault after device disconnect instead on GEM object creation. > > > > Andrey > > > > > >> > >>> mutex_unlock(&file_priv->prime.lock); > >>> if (ret) > >>> goto fail; > >>> @@ -1006,6 +1013,9 @@ void drm_prime_gem_destroy(struct > >>> drm_gem_object *obj, struct sg_table *sg) > >>> dma_buf_unmap_attachment(attach, sg, DMA_BIDIRECTIONAL); > >>> dma_buf = attach->dmabuf; > >>> dma_buf_detach(attach->dmabuf, attach); > >>> + > >>> + __free_page(obj->dummy_page); > >>> + > >>> /* remove the reference */ > >>> dma_buf_put(dma_buf); > >>> } > >>> diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h > >>> index 19df802..349a658 100644 > >>> --- a/include/drm/drm_file.h > >>> +++ b/include/drm/drm_file.h > >>> @@ -335,6 +335,8 @@ struct drm_file { > >>> */ > >>> struct drm_prime_file_private prime; > >>> + struct page *dummy_page; > >>> + > >>> /* private: */ > >>> #if IS_ENABLED(CONFIG_DRM_LEGACY) > >>> unsigned long lock_count; /* DRI1 legacy lock count */ > >>> diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h > >>> index 0b37506..47460d1 100644 > >>> --- a/include/drm/drm_gem.h > >>> +++ b/include/drm/drm_gem.h > >>> @@ -310,6 +310,8 @@ struct drm_gem_object { > >>> * > >>> */ > >>> const struct drm_gem_object_funcs *funcs; > >>> + > >>> + struct page *dummy_page; > >>> }; > >>> /** > >> >
On 6/22/20 5:35 AM, Daniel Vetter wrote: > On Sun, Jun 21, 2020 at 02:03:01AM -0400, Andrey Grodzovsky wrote: >> Will be used to reroute CPU mapped BO's page faults once >> device is removed. >> >> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> >> --- >> drivers/gpu/drm/drm_file.c | 8 ++++++++ >> drivers/gpu/drm/drm_prime.c | 10 ++++++++++ >> include/drm/drm_file.h | 2 ++ >> include/drm/drm_gem.h | 2 ++ >> 4 files changed, 22 insertions(+) >> >> diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c >> index c4c704e..67c0770 100644 >> --- a/drivers/gpu/drm/drm_file.c >> +++ b/drivers/gpu/drm/drm_file.c >> @@ -188,6 +188,12 @@ struct drm_file *drm_file_alloc(struct drm_minor *minor) >> goto out_prime_destroy; >> } >> >> + file->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO); >> + if (!file->dummy_page) { >> + ret = -ENOMEM; >> + goto out_prime_destroy; >> + } >> + >> return file; >> >> out_prime_destroy: >> @@ -284,6 +290,8 @@ void drm_file_free(struct drm_file *file) >> if (dev->driver->postclose) >> dev->driver->postclose(dev, file); >> >> + __free_page(file->dummy_page); >> + >> drm_prime_destroy_file_private(&file->prime); >> >> WARN_ON(!list_empty(&file->event_list)); >> diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c >> index 1de2cde..c482e9c 100644 >> --- a/drivers/gpu/drm/drm_prime.c >> +++ b/drivers/gpu/drm/drm_prime.c >> @@ -335,6 +335,13 @@ int drm_gem_prime_fd_to_handle(struct drm_device *dev, >> >> ret = drm_prime_add_buf_handle(&file_priv->prime, >> dma_buf, *handle); >> + >> + if (!ret) { >> + obj->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO); >> + if (!obj->dummy_page) >> + ret = -ENOMEM; >> + } >> + >> mutex_unlock(&file_priv->prime.lock); >> if (ret) >> goto fail; >> @@ -1006,6 +1013,9 @@ void drm_prime_gem_destroy(struct drm_gem_object *obj, struct sg_table *sg) >> dma_buf_unmap_attachment(attach, sg, DMA_BIDIRECTIONAL); >> dma_buf = attach->dmabuf; >> dma_buf_detach(attach->dmabuf, attach); >> + >> + __free_page(obj->dummy_page); >> + >> /* remove the reference */ >> dma_buf_put(dma_buf); >> } >> diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h >> index 19df802..349a658 100644 >> --- a/include/drm/drm_file.h >> +++ b/include/drm/drm_file.h >> @@ -335,6 +335,8 @@ struct drm_file { >> */ >> struct drm_prime_file_private prime; >> > Kerneldoc for these please, including why we need them and when. E.g. the > one in gem_bo should say it's only for exported buffers, so that we're not > colliding security spaces. > >> + struct page *dummy_page; >> + >> /* private: */ >> #if IS_ENABLED(CONFIG_DRM_LEGACY) >> unsigned long lock_count; /* DRI1 legacy lock count */ >> diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h >> index 0b37506..47460d1 100644 >> --- a/include/drm/drm_gem.h >> +++ b/include/drm/drm_gem.h >> @@ -310,6 +310,8 @@ struct drm_gem_object { >> * >> */ >> const struct drm_gem_object_funcs *funcs; >> + >> + struct page *dummy_page; >> }; > I think amdgpu doesn't care, but everyone else still might care somewhat > about flink. That also shares buffers, so also needs to allocate the > per-bo dummy page. Hi, back to this topic after a long context switch for internal project. I don't see why for FLINK we can't use same dummy page from struct drm_gem_object - looking at drm_gem_flink_ioctl I see that the underlying object we look up is still of type drm_gem_object. Why we need per BO (TTM BO I assume?) dummy page for this ? Andrey > > I also wonder whether we shouldn't have a helper to look up the dummy > page, just to encode in core code how it's supposedo to cascade. > -Daniel > >> >> /** >> -- >> 2.7.4 >>
On 6/22/20 1:50 PM, Daniel Vetter wrote: > On Mon, Jun 22, 2020 at 7:45 PM Christian König > <christian.koenig@amd.com> wrote: >> Am 22.06.20 um 16:32 schrieb Andrey Grodzovsky: >>> On 6/22/20 9:18 AM, Christian König wrote: >>>> Am 21.06.20 um 08:03 schrieb Andrey Grodzovsky: >>>>> Will be used to reroute CPU mapped BO's page faults once >>>>> device is removed. >>>>> >>>>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> >>>>> --- >>>>> drivers/gpu/drm/drm_file.c | 8 ++++++++ >>>>> drivers/gpu/drm/drm_prime.c | 10 ++++++++++ >>>>> include/drm/drm_file.h | 2 ++ >>>>> include/drm/drm_gem.h | 2 ++ >>>>> 4 files changed, 22 insertions(+) >>>>> >>>>> diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c >>>>> index c4c704e..67c0770 100644 >>>>> --- a/drivers/gpu/drm/drm_file.c >>>>> +++ b/drivers/gpu/drm/drm_file.c >>>>> @@ -188,6 +188,12 @@ struct drm_file *drm_file_alloc(struct >>>>> drm_minor *minor) >>>>> goto out_prime_destroy; >>>>> } >>>>> + file->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO); >>>>> + if (!file->dummy_page) { >>>>> + ret = -ENOMEM; >>>>> + goto out_prime_destroy; >>>>> + } >>>>> + >>>>> return file; >>>>> out_prime_destroy: >>>>> @@ -284,6 +290,8 @@ void drm_file_free(struct drm_file *file) >>>>> if (dev->driver->postclose) >>>>> dev->driver->postclose(dev, file); >>>>> + __free_page(file->dummy_page); >>>>> + >>>>> drm_prime_destroy_file_private(&file->prime); >>>>> WARN_ON(!list_empty(&file->event_list)); >>>>> diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c >>>>> index 1de2cde..c482e9c 100644 >>>>> --- a/drivers/gpu/drm/drm_prime.c >>>>> +++ b/drivers/gpu/drm/drm_prime.c >>>>> @@ -335,6 +335,13 @@ int drm_gem_prime_fd_to_handle(struct >>>>> drm_device *dev, >>>>> ret = drm_prime_add_buf_handle(&file_priv->prime, >>>>> dma_buf, *handle); >>>>> + >>>>> + if (!ret) { >>>>> + obj->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO); >>>>> + if (!obj->dummy_page) >>>>> + ret = -ENOMEM; >>>>> + } >>>>> + >>>> While the per file case still looks acceptable this is a clear NAK >>>> since it will massively increase the memory needed for a prime >>>> exported object. >>>> >>>> I think that this is quite overkill in the first place and for the >>>> hot unplug case we can just use the global dummy page as well. >>>> >>>> Christian. >>> >>> Global dummy page is good for read access, what do you do on write >>> access ? My first approach was indeed to map at first global dummy >>> page as read only and mark the vma->vm_flags as !VM_SHARED assuming >>> that this would trigger Copy On Write flow in core mm >>> (https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Felixir.bootlin.com%2Flinux%2Fv5.7-rc7%2Fsource%2Fmm%2Fmemory.c%23L3977&data=02%7C01%7CAndrey.Grodzovsky%40amd.com%7C3753451d037544e7495408d816d4c4ee%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637284450384586120&sdata=ZpRaQgqA5K4jRfidOiedey0AleeYQ97WNUkGA29ERA0%3D&reserved=0) >>> on the next page fault to same address triggered by a write access but >>> then i realized a new COW page will be allocated for each such mapping >>> and this is much more wasteful then having a dedicated page per GEM >>> object. >> Yeah, but this is only for a very very small corner cases. What we need >> to prevent is increasing the memory usage during normal operation to much. >> >> Using memory during the unplug is completely unproblematic because we >> just released quite a bunch of it by releasing all those system memory >> buffers. >> >> And I'm pretty sure that COWed pages are correctly accounted towards the >> used memory of a process. >> >> So I think if that approach works as intended and the COW pages are >> released again on unmapping it would be the perfect solution to the problem. >> >> Daniel what do you think? > If COW works, sure sounds reasonable. And if we can make sure we > managed to drop all the system allocations (otherwise suddenly 2x > memory usage, worst case). But I have no idea whether we can > retroshoehorn that into an established vma, you might have fun stuff > like a mkwrite handler there (which I thought is the COW handler > thing, but really no idea). Can you clarify your concern here ? I see no DRM driver besides vmwgfx who installs a handler for vm_operations_struct.page_mkwrite and in any case, since I will be turning off VM_SHARED flag for the faulting vm_area_struct making it a COW, page_mkwrite will not be called on any subsequent vm fault. Andrey > > If we need to massively change stuff then I think rw dummy page, > allocated on first fault after hotunplug (maybe just make it one per > object, that's simplest) seems like the much safer option. Much less > code that can go wrong. > -Daniel > >> Regards, >> Christian. >> >>> We can indeed optimize by allocating this dummy page on the first page >>> fault after device disconnect instead on GEM object creation. >>> >>> Andrey >>> >>> >>>>> mutex_unlock(&file_priv->prime.lock); >>>>> if (ret) >>>>> goto fail; >>>>> @@ -1006,6 +1013,9 @@ void drm_prime_gem_destroy(struct >>>>> drm_gem_object *obj, struct sg_table *sg) >>>>> dma_buf_unmap_attachment(attach, sg, DMA_BIDIRECTIONAL); >>>>> dma_buf = attach->dmabuf; >>>>> dma_buf_detach(attach->dmabuf, attach); >>>>> + >>>>> + __free_page(obj->dummy_page); >>>>> + >>>>> /* remove the reference */ >>>>> dma_buf_put(dma_buf); >>>>> } >>>>> diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h >>>>> index 19df802..349a658 100644 >>>>> --- a/include/drm/drm_file.h >>>>> +++ b/include/drm/drm_file.h >>>>> @@ -335,6 +335,8 @@ struct drm_file { >>>>> */ >>>>> struct drm_prime_file_private prime; >>>>> + struct page *dummy_page; >>>>> + >>>>> /* private: */ >>>>> #if IS_ENABLED(CONFIG_DRM_LEGACY) >>>>> unsigned long lock_count; /* DRI1 legacy lock count */ >>>>> diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h >>>>> index 0b37506..47460d1 100644 >>>>> --- a/include/drm/drm_gem.h >>>>> +++ b/include/drm/drm_gem.h >>>>> @@ -310,6 +310,8 @@ struct drm_gem_object { >>>>> * >>>>> */ >>>>> const struct drm_gem_object_funcs *funcs; >>>>> + >>>>> + struct page *dummy_page; >>>>> }; >>>>> /** >
On 6/22/20 1:50 PM, Daniel Vetter wrote: > On Mon, Jun 22, 2020 at 7:45 PM Christian König > <christian.koenig@amd.com> wrote: >> Am 22.06.20 um 16:32 schrieb Andrey Grodzovsky: >>> On 6/22/20 9:18 AM, Christian König wrote: >>>> Am 21.06.20 um 08:03 schrieb Andrey Grodzovsky: >>>>> Will be used to reroute CPU mapped BO's page faults once >>>>> device is removed. >>>>> >>>>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> >>>>> --- >>>>> drivers/gpu/drm/drm_file.c | 8 ++++++++ >>>>> drivers/gpu/drm/drm_prime.c | 10 ++++++++++ >>>>> include/drm/drm_file.h | 2 ++ >>>>> include/drm/drm_gem.h | 2 ++ >>>>> 4 files changed, 22 insertions(+) >>>>> >>>>> diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c >>>>> index c4c704e..67c0770 100644 >>>>> --- a/drivers/gpu/drm/drm_file.c >>>>> +++ b/drivers/gpu/drm/drm_file.c >>>>> @@ -188,6 +188,12 @@ struct drm_file *drm_file_alloc(struct >>>>> drm_minor *minor) >>>>> goto out_prime_destroy; >>>>> } >>>>> + file->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO); >>>>> + if (!file->dummy_page) { >>>>> + ret = -ENOMEM; >>>>> + goto out_prime_destroy; >>>>> + } >>>>> + >>>>> return file; >>>>> out_prime_destroy: >>>>> @@ -284,6 +290,8 @@ void drm_file_free(struct drm_file *file) >>>>> if (dev->driver->postclose) >>>>> dev->driver->postclose(dev, file); >>>>> + __free_page(file->dummy_page); >>>>> + >>>>> drm_prime_destroy_file_private(&file->prime); >>>>> WARN_ON(!list_empty(&file->event_list)); >>>>> diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c >>>>> index 1de2cde..c482e9c 100644 >>>>> --- a/drivers/gpu/drm/drm_prime.c >>>>> +++ b/drivers/gpu/drm/drm_prime.c >>>>> @@ -335,6 +335,13 @@ int drm_gem_prime_fd_to_handle(struct >>>>> drm_device *dev, >>>>> ret = drm_prime_add_buf_handle(&file_priv->prime, >>>>> dma_buf, *handle); >>>>> + >>>>> + if (!ret) { >>>>> + obj->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO); >>>>> + if (!obj->dummy_page) >>>>> + ret = -ENOMEM; >>>>> + } >>>>> + >>>> While the per file case still looks acceptable this is a clear NAK >>>> since it will massively increase the memory needed for a prime >>>> exported object. >>>> >>>> I think that this is quite overkill in the first place and for the >>>> hot unplug case we can just use the global dummy page as well. >>>> >>>> Christian. >>> >>> Global dummy page is good for read access, what do you do on write >>> access ? My first approach was indeed to map at first global dummy >>> page as read only and mark the vma->vm_flags as !VM_SHARED assuming >>> that this would trigger Copy On Write flow in core mm >>> (https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Felixir.bootlin.com%2Flinux%2Fv5.7-rc7%2Fsource%2Fmm%2Fmemory.c%23L3977&data=02%7C01%7CAndrey.Grodzovsky%40amd.com%7C3753451d037544e7495408d816d4c4ee%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637284450384586120&sdata=ZpRaQgqA5K4jRfidOiedey0AleeYQ97WNUkGA29ERA0%3D&reserved=0) >>> on the next page fault to same address triggered by a write access but >>> then i realized a new COW page will be allocated for each such mapping >>> and this is much more wasteful then having a dedicated page per GEM >>> object. >> Yeah, but this is only for a very very small corner cases. What we need >> to prevent is increasing the memory usage during normal operation to much. >> >> Using memory during the unplug is completely unproblematic because we >> just released quite a bunch of it by releasing all those system memory >> buffers. >> >> And I'm pretty sure that COWed pages are correctly accounted towards the >> used memory of a process. >> >> So I think if that approach works as intended and the COW pages are >> released again on unmapping it would be the perfect solution to the problem. >> >> Daniel what do you think? > If COW works, sure sounds reasonable. And if we can make sure we > managed to drop all the system allocations (otherwise suddenly 2x > memory usage, worst case). But I have no idea whether we can > retroshoehorn that into an established vma, you might have fun stuff > like a mkwrite handler there (which I thought is the COW handler > thing, but really no idea). > > If we need to massively change stuff then I think rw dummy page, > allocated on first fault after hotunplug (maybe just make it one per > object, that's simplest) seems like the much safer option. Much less > code that can go wrong. > -Daniel Regarding COW, i was looking into how to properly implement it from within the fault handler (i.e. ttm_bo_vm_fault) and the main obstacle I hit is that of exclusive access to the vm_area_struct, i need to be able to modify vma->vm_flags (and vm_page_prot) to remove VM_SHARED bit so COW can be triggered on subsequent write access fault (here https://elixir.bootlin.com/linux/latest/source/mm/memory.c#L4128) but core mm takes only read side mm_sem (here for example https://elixir.bootlin.com/linux/latest/source/drivers/iommu/amd/iommu_v2.c#L488) and so I am not supposed to modify vm_area_struct in this case. I am not sure if it's legit to write lock tthe mm_sem from this point. I found some discussions about this here http://lkml.iu.edu/hypermail/linux/kernel/1909.1/02754.html but it wasn't really clear to me what's the solution. In any case, seems to me that easier and more memory saving solution would be to just switch to per ttm bo dumy rw page that would be allocated on demand as you suggested here. This should also take care of imported BOs and flink cases. Then i can drop the per device FD and per GEM object FD dummy BO and the ugly loop i am using in patch 2 to match faulting BO to the right dummy page. Does this makes sense ? Andrey > >> Regards, >> Christian. >> >>> We can indeed optimize by allocating this dummy page on the first page >>> fault after device disconnect instead on GEM object creation. >>> >>> Andrey >>> >>> >>>>> mutex_unlock(&file_priv->prime.lock); >>>>> if (ret) >>>>> goto fail; >>>>> @@ -1006,6 +1013,9 @@ void drm_prime_gem_destroy(struct >>>>> drm_gem_object *obj, struct sg_table *sg) >>>>> dma_buf_unmap_attachment(attach, sg, DMA_BIDIRECTIONAL); >>>>> dma_buf = attach->dmabuf; >>>>> dma_buf_detach(attach->dmabuf, attach); >>>>> + >>>>> + __free_page(obj->dummy_page); >>>>> + >>>>> /* remove the reference */ >>>>> dma_buf_put(dma_buf); >>>>> } >>>>> diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h >>>>> index 19df802..349a658 100644 >>>>> --- a/include/drm/drm_file.h >>>>> +++ b/include/drm/drm_file.h >>>>> @@ -335,6 +335,8 @@ struct drm_file { >>>>> */ >>>>> struct drm_prime_file_private prime; >>>>> + struct page *dummy_page; >>>>> + >>>>> /* private: */ >>>>> #if IS_ENABLED(CONFIG_DRM_LEGACY) >>>>> unsigned long lock_count; /* DRI1 legacy lock count */ >>>>> diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h >>>>> index 0b37506..47460d1 100644 >>>>> --- a/include/drm/drm_gem.h >>>>> +++ b/include/drm/drm_gem.h >>>>> @@ -310,6 +310,8 @@ struct drm_gem_object { >>>>> * >>>>> */ >>>>> const struct drm_gem_object_funcs *funcs; >>>>> + >>>>> + struct page *dummy_page; >>>>> }; >>>>> /** >
Am 13.11.20 um 21:52 schrieb Andrey Grodzovsky: > > On 6/22/20 1:50 PM, Daniel Vetter wrote: >> On Mon, Jun 22, 2020 at 7:45 PM Christian König >> <christian.koenig@amd.com> wrote: >>> Am 22.06.20 um 16:32 schrieb Andrey Grodzovsky: >>>> On 6/22/20 9:18 AM, Christian König wrote: >>>>> Am 21.06.20 um 08:03 schrieb Andrey Grodzovsky: >>>>>> Will be used to reroute CPU mapped BO's page faults once >>>>>> device is removed. >>>>>> >>>>>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> >>>>>> --- >>>>>> drivers/gpu/drm/drm_file.c | 8 ++++++++ >>>>>> drivers/gpu/drm/drm_prime.c | 10 ++++++++++ >>>>>> include/drm/drm_file.h | 2 ++ >>>>>> include/drm/drm_gem.h | 2 ++ >>>>>> 4 files changed, 22 insertions(+) >>>>>> >>>>>> diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c >>>>>> index c4c704e..67c0770 100644 >>>>>> --- a/drivers/gpu/drm/drm_file.c >>>>>> +++ b/drivers/gpu/drm/drm_file.c >>>>>> @@ -188,6 +188,12 @@ struct drm_file *drm_file_alloc(struct >>>>>> drm_minor *minor) >>>>>> goto out_prime_destroy; >>>>>> } >>>>>> + file->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO); >>>>>> + if (!file->dummy_page) { >>>>>> + ret = -ENOMEM; >>>>>> + goto out_prime_destroy; >>>>>> + } >>>>>> + >>>>>> return file; >>>>>> out_prime_destroy: >>>>>> @@ -284,6 +290,8 @@ void drm_file_free(struct drm_file *file) >>>>>> if (dev->driver->postclose) >>>>>> dev->driver->postclose(dev, file); >>>>>> + __free_page(file->dummy_page); >>>>>> + >>>>>> drm_prime_destroy_file_private(&file->prime); >>>>>> WARN_ON(!list_empty(&file->event_list)); >>>>>> diff --git a/drivers/gpu/drm/drm_prime.c >>>>>> b/drivers/gpu/drm/drm_prime.c >>>>>> index 1de2cde..c482e9c 100644 >>>>>> --- a/drivers/gpu/drm/drm_prime.c >>>>>> +++ b/drivers/gpu/drm/drm_prime.c >>>>>> @@ -335,6 +335,13 @@ int drm_gem_prime_fd_to_handle(struct >>>>>> drm_device *dev, >>>>>> ret = drm_prime_add_buf_handle(&file_priv->prime, >>>>>> dma_buf, *handle); >>>>>> + >>>>>> + if (!ret) { >>>>>> + obj->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO); >>>>>> + if (!obj->dummy_page) >>>>>> + ret = -ENOMEM; >>>>>> + } >>>>>> + >>>>> While the per file case still looks acceptable this is a clear NAK >>>>> since it will massively increase the memory needed for a prime >>>>> exported object. >>>>> >>>>> I think that this is quite overkill in the first place and for the >>>>> hot unplug case we can just use the global dummy page as well. >>>>> >>>>> Christian. >>>> >>>> Global dummy page is good for read access, what do you do on write >>>> access ? My first approach was indeed to map at first global dummy >>>> page as read only and mark the vma->vm_flags as !VM_SHARED assuming >>>> that this would trigger Copy On Write flow in core mm >>>> (https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Felixir.bootlin.com%2Flinux%2Fv5.7-rc7%2Fsource%2Fmm%2Fmemory.c%23L3977&data=02%7C01%7CAndrey.Grodzovsky%40amd.com%7C3753451d037544e7495408d816d4c4ee%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637284450384586120&sdata=ZpRaQgqA5K4jRfidOiedey0AleeYQ97WNUkGA29ERA0%3D&reserved=0) >>>> >>>> on the next page fault to same address triggered by a write access but >>>> then i realized a new COW page will be allocated for each such mapping >>>> and this is much more wasteful then having a dedicated page per GEM >>>> object. >>> Yeah, but this is only for a very very small corner cases. What we need >>> to prevent is increasing the memory usage during normal operation to >>> much. >>> >>> Using memory during the unplug is completely unproblematic because we >>> just released quite a bunch of it by releasing all those system memory >>> buffers. >>> >>> And I'm pretty sure that COWed pages are correctly accounted towards >>> the >>> used memory of a process. >>> >>> So I think if that approach works as intended and the COW pages are >>> released again on unmapping it would be the perfect solution to the >>> problem. >>> >>> Daniel what do you think? >> If COW works, sure sounds reasonable. And if we can make sure we >> managed to drop all the system allocations (otherwise suddenly 2x >> memory usage, worst case). But I have no idea whether we can >> retroshoehorn that into an established vma, you might have fun stuff >> like a mkwrite handler there (which I thought is the COW handler >> thing, but really no idea). >> >> If we need to massively change stuff then I think rw dummy page, >> allocated on first fault after hotunplug (maybe just make it one per >> object, that's simplest) seems like the much safer option. Much less >> code that can go wrong. >> -Daniel > > > Regarding COW, i was looking into how to properly implement it from > within the fault handler (i.e. ttm_bo_vm_fault) > and the main obstacle I hit is that of exclusive access to the > vm_area_struct, i need to be able to modify > vma->vm_flags (and vm_page_prot) to remove VM_SHARED bit so COW can > be triggered on subsequent write access > fault (here > https://elixir.bootlin.com/linux/latest/source/mm/memory.c#L4128) > but core mm takes only read side mm_sem (here for example > https://elixir.bootlin.com/linux/latest/source/drivers/iommu/amd/iommu_v2.c#L488) > and so I am not supposed to modify vm_area_struct in this case. I am > not sure if it's legit to write lock tthe mm_sem from this point. > I found some discussions about this here > http://lkml.iu.edu/hypermail/linux/kernel/1909.1/02754.html but it > wasn't really clear to me > what's the solution. > > In any case, seems to me that easier and more memory saving solution > would be to just switch to per ttm bo dumy rw page that > would be allocated on demand as you suggested here. This should also > take care of imported BOs and flink cases. > Then i can drop the per device FD and per GEM object FD dummy BO and > the ugly loop i am using in patch 2 to match faulting BO to the right > dummy page. > > Does this makes sense ? I still don't see the information leak as much of a problem, but if Daniel insists we should probably do this. But could we at least have only one page per client instead of per BO? Thanks, Christian. > > Andrey > > >> >>> Regards, >>> Christian. >>> >>>> We can indeed optimize by allocating this dummy page on the first page >>>> fault after device disconnect instead on GEM object creation. >>>> >>>> Andrey >>>> >>>> >>>>>> mutex_unlock(&file_priv->prime.lock); >>>>>> if (ret) >>>>>> goto fail; >>>>>> @@ -1006,6 +1013,9 @@ void drm_prime_gem_destroy(struct >>>>>> drm_gem_object *obj, struct sg_table *sg) >>>>>> dma_buf_unmap_attachment(attach, sg, DMA_BIDIRECTIONAL); >>>>>> dma_buf = attach->dmabuf; >>>>>> dma_buf_detach(attach->dmabuf, attach); >>>>>> + >>>>>> + __free_page(obj->dummy_page); >>>>>> + >>>>>> /* remove the reference */ >>>>>> dma_buf_put(dma_buf); >>>>>> } >>>>>> diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h >>>>>> index 19df802..349a658 100644 >>>>>> --- a/include/drm/drm_file.h >>>>>> +++ b/include/drm/drm_file.h >>>>>> @@ -335,6 +335,8 @@ struct drm_file { >>>>>> */ >>>>>> struct drm_prime_file_private prime; >>>>>> + struct page *dummy_page; >>>>>> + >>>>>> /* private: */ >>>>>> #if IS_ENABLED(CONFIG_DRM_LEGACY) >>>>>> unsigned long lock_count; /* DRI1 legacy lock count */ >>>>>> diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h >>>>>> index 0b37506..47460d1 100644 >>>>>> --- a/include/drm/drm_gem.h >>>>>> +++ b/include/drm/drm_gem.h >>>>>> @@ -310,6 +310,8 @@ struct drm_gem_object { >>>>>> * >>>>>> */ >>>>>> const struct drm_gem_object_funcs *funcs; >>>>>> + >>>>>> + struct page *dummy_page; >>>>>> }; >>>>>> /** >> > _______________________________________________ > amd-gfx mailing list > amd-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx
On Sat, Nov 14, 2020 at 9:41 AM Christian König <ckoenig.leichtzumerken@gmail.com> wrote: > > Am 13.11.20 um 21:52 schrieb Andrey Grodzovsky: > > > > On 6/22/20 1:50 PM, Daniel Vetter wrote: > >> On Mon, Jun 22, 2020 at 7:45 PM Christian König > >> <christian.koenig@amd.com> wrote: > >>> Am 22.06.20 um 16:32 schrieb Andrey Grodzovsky: > >>>> On 6/22/20 9:18 AM, Christian König wrote: > >>>>> Am 21.06.20 um 08:03 schrieb Andrey Grodzovsky: > >>>>>> Will be used to reroute CPU mapped BO's page faults once > >>>>>> device is removed. > >>>>>> > >>>>>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> > >>>>>> --- > >>>>>> drivers/gpu/drm/drm_file.c | 8 ++++++++ > >>>>>> drivers/gpu/drm/drm_prime.c | 10 ++++++++++ > >>>>>> include/drm/drm_file.h | 2 ++ > >>>>>> include/drm/drm_gem.h | 2 ++ > >>>>>> 4 files changed, 22 insertions(+) > >>>>>> > >>>>>> diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c > >>>>>> index c4c704e..67c0770 100644 > >>>>>> --- a/drivers/gpu/drm/drm_file.c > >>>>>> +++ b/drivers/gpu/drm/drm_file.c > >>>>>> @@ -188,6 +188,12 @@ struct drm_file *drm_file_alloc(struct > >>>>>> drm_minor *minor) > >>>>>> goto out_prime_destroy; > >>>>>> } > >>>>>> + file->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO); > >>>>>> + if (!file->dummy_page) { > >>>>>> + ret = -ENOMEM; > >>>>>> + goto out_prime_destroy; > >>>>>> + } > >>>>>> + > >>>>>> return file; > >>>>>> out_prime_destroy: > >>>>>> @@ -284,6 +290,8 @@ void drm_file_free(struct drm_file *file) > >>>>>> if (dev->driver->postclose) > >>>>>> dev->driver->postclose(dev, file); > >>>>>> + __free_page(file->dummy_page); > >>>>>> + > >>>>>> drm_prime_destroy_file_private(&file->prime); > >>>>>> WARN_ON(!list_empty(&file->event_list)); > >>>>>> diff --git a/drivers/gpu/drm/drm_prime.c > >>>>>> b/drivers/gpu/drm/drm_prime.c > >>>>>> index 1de2cde..c482e9c 100644 > >>>>>> --- a/drivers/gpu/drm/drm_prime.c > >>>>>> +++ b/drivers/gpu/drm/drm_prime.c > >>>>>> @@ -335,6 +335,13 @@ int drm_gem_prime_fd_to_handle(struct > >>>>>> drm_device *dev, > >>>>>> ret = drm_prime_add_buf_handle(&file_priv->prime, > >>>>>> dma_buf, *handle); > >>>>>> + > >>>>>> + if (!ret) { > >>>>>> + obj->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO); > >>>>>> + if (!obj->dummy_page) > >>>>>> + ret = -ENOMEM; > >>>>>> + } > >>>>>> + > >>>>> While the per file case still looks acceptable this is a clear NAK > >>>>> since it will massively increase the memory needed for a prime > >>>>> exported object. > >>>>> > >>>>> I think that this is quite overkill in the first place and for the > >>>>> hot unplug case we can just use the global dummy page as well. > >>>>> > >>>>> Christian. > >>>> > >>>> Global dummy page is good for read access, what do you do on write > >>>> access ? My first approach was indeed to map at first global dummy > >>>> page as read only and mark the vma->vm_flags as !VM_SHARED assuming > >>>> that this would trigger Copy On Write flow in core mm > >>>> (https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Felixir.bootlin.com%2Flinux%2Fv5.7-rc7%2Fsource%2Fmm%2Fmemory.c%23L3977&data=02%7C01%7CAndrey.Grodzovsky%40amd.com%7C3753451d037544e7495408d816d4c4ee%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637284450384586120&sdata=ZpRaQgqA5K4jRfidOiedey0AleeYQ97WNUkGA29ERA0%3D&reserved=0) > >>>> > >>>> on the next page fault to same address triggered by a write access but > >>>> then i realized a new COW page will be allocated for each such mapping > >>>> and this is much more wasteful then having a dedicated page per GEM > >>>> object. > >>> Yeah, but this is only for a very very small corner cases. What we need > >>> to prevent is increasing the memory usage during normal operation to > >>> much. > >>> > >>> Using memory during the unplug is completely unproblematic because we > >>> just released quite a bunch of it by releasing all those system memory > >>> buffers. > >>> > >>> And I'm pretty sure that COWed pages are correctly accounted towards > >>> the > >>> used memory of a process. > >>> > >>> So I think if that approach works as intended and the COW pages are > >>> released again on unmapping it would be the perfect solution to the > >>> problem. > >>> > >>> Daniel what do you think? > >> If COW works, sure sounds reasonable. And if we can make sure we > >> managed to drop all the system allocations (otherwise suddenly 2x > >> memory usage, worst case). But I have no idea whether we can > >> retroshoehorn that into an established vma, you might have fun stuff > >> like a mkwrite handler there (which I thought is the COW handler > >> thing, but really no idea). > >> > >> If we need to massively change stuff then I think rw dummy page, > >> allocated on first fault after hotunplug (maybe just make it one per > >> object, that's simplest) seems like the much safer option. Much less > >> code that can go wrong. > >> -Daniel > > > > > > Regarding COW, i was looking into how to properly implement it from > > within the fault handler (i.e. ttm_bo_vm_fault) > > and the main obstacle I hit is that of exclusive access to the > > vm_area_struct, i need to be able to modify > > vma->vm_flags (and vm_page_prot) to remove VM_SHARED bit so COW can > > be triggered on subsequent write access > > fault (here > > https://elixir.bootlin.com/linux/latest/source/mm/memory.c#L4128) > > but core mm takes only read side mm_sem (here for example > > https://elixir.bootlin.com/linux/latest/source/drivers/iommu/amd/iommu_v2.c#L488) > > and so I am not supposed to modify vm_area_struct in this case. I am > > not sure if it's legit to write lock tthe mm_sem from this point. > > I found some discussions about this here > > http://lkml.iu.edu/hypermail/linux/kernel/1909.1/02754.html but it > > wasn't really clear to me > > what's the solution. > > > > In any case, seems to me that easier and more memory saving solution > > would be to just switch to per ttm bo dumy rw page that > > would be allocated on demand as you suggested here. This should also > > take care of imported BOs and flink cases. > > Then i can drop the per device FD and per GEM object FD dummy BO and > > the ugly loop i am using in patch 2 to match faulting BO to the right > > dummy page. > > > > Does this makes sense ? > > I still don't see the information leak as much of a problem, but if > Daniel insists we should probably do this. Well amdgpu doesn't clear buffers by default, so indeed you guys are a lot more laissez-faire here. But in general we really don't do that kind of leaking. Iirc there's even radeonsi bugs because else clears, and radeonsi happily displays gunk :-) > But could we at least have only one page per client instead of per BO? I think you can do one page per file descriptor or something like that. But gets annoying with shared bo, especially with dma_buf_mmap forwarding. -Daniel > > Thanks, > Christian. > > > > > Andrey > > > > > >> > >>> Regards, > >>> Christian. > >>> > >>>> We can indeed optimize by allocating this dummy page on the first page > >>>> fault after device disconnect instead on GEM object creation. > >>>> > >>>> Andrey > >>>> > >>>> > >>>>>> mutex_unlock(&file_priv->prime.lock); > >>>>>> if (ret) > >>>>>> goto fail; > >>>>>> @@ -1006,6 +1013,9 @@ void drm_prime_gem_destroy(struct > >>>>>> drm_gem_object *obj, struct sg_table *sg) > >>>>>> dma_buf_unmap_attachment(attach, sg, DMA_BIDIRECTIONAL); > >>>>>> dma_buf = attach->dmabuf; > >>>>>> dma_buf_detach(attach->dmabuf, attach); > >>>>>> + > >>>>>> + __free_page(obj->dummy_page); > >>>>>> + > >>>>>> /* remove the reference */ > >>>>>> dma_buf_put(dma_buf); > >>>>>> } > >>>>>> diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h > >>>>>> index 19df802..349a658 100644 > >>>>>> --- a/include/drm/drm_file.h > >>>>>> +++ b/include/drm/drm_file.h > >>>>>> @@ -335,6 +335,8 @@ struct drm_file { > >>>>>> */ > >>>>>> struct drm_prime_file_private prime; > >>>>>> + struct page *dummy_page; > >>>>>> + > >>>>>> /* private: */ > >>>>>> #if IS_ENABLED(CONFIG_DRM_LEGACY) > >>>>>> unsigned long lock_count; /* DRI1 legacy lock count */ > >>>>>> diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h > >>>>>> index 0b37506..47460d1 100644 > >>>>>> --- a/include/drm/drm_gem.h > >>>>>> +++ b/include/drm/drm_gem.h > >>>>>> @@ -310,6 +310,8 @@ struct drm_gem_object { > >>>>>> * > >>>>>> */ > >>>>>> const struct drm_gem_object_funcs *funcs; > >>>>>> + > >>>>>> + struct page *dummy_page; > >>>>>> }; > >>>>>> /** > >> > > _______________________________________________ > > amd-gfx mailing list > > amd-gfx@lists.freedesktop.org > > https://lists.freedesktop.org/mailman/listinfo/amd-gfx >
On Sat, Nov 14, 2020 at 10:51 AM Daniel Vetter <daniel.vetter@ffwll.ch> wrote: > > On Sat, Nov 14, 2020 at 9:41 AM Christian König > <ckoenig.leichtzumerken@gmail.com> wrote: > > > > Am 13.11.20 um 21:52 schrieb Andrey Grodzovsky: > > > > > > On 6/22/20 1:50 PM, Daniel Vetter wrote: > > >> On Mon, Jun 22, 2020 at 7:45 PM Christian König > > >> <christian.koenig@amd.com> wrote: > > >>> Am 22.06.20 um 16:32 schrieb Andrey Grodzovsky: > > >>>> On 6/22/20 9:18 AM, Christian König wrote: > > >>>>> Am 21.06.20 um 08:03 schrieb Andrey Grodzovsky: > > >>>>>> Will be used to reroute CPU mapped BO's page faults once > > >>>>>> device is removed. > > >>>>>> > > >>>>>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> > > >>>>>> --- > > >>>>>> drivers/gpu/drm/drm_file.c | 8 ++++++++ > > >>>>>> drivers/gpu/drm/drm_prime.c | 10 ++++++++++ > > >>>>>> include/drm/drm_file.h | 2 ++ > > >>>>>> include/drm/drm_gem.h | 2 ++ > > >>>>>> 4 files changed, 22 insertions(+) > > >>>>>> > > >>>>>> diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c > > >>>>>> index c4c704e..67c0770 100644 > > >>>>>> --- a/drivers/gpu/drm/drm_file.c > > >>>>>> +++ b/drivers/gpu/drm/drm_file.c > > >>>>>> @@ -188,6 +188,12 @@ struct drm_file *drm_file_alloc(struct > > >>>>>> drm_minor *minor) > > >>>>>> goto out_prime_destroy; > > >>>>>> } > > >>>>>> + file->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO); > > >>>>>> + if (!file->dummy_page) { > > >>>>>> + ret = -ENOMEM; > > >>>>>> + goto out_prime_destroy; > > >>>>>> + } > > >>>>>> + > > >>>>>> return file; > > >>>>>> out_prime_destroy: > > >>>>>> @@ -284,6 +290,8 @@ void drm_file_free(struct drm_file *file) > > >>>>>> if (dev->driver->postclose) > > >>>>>> dev->driver->postclose(dev, file); > > >>>>>> + __free_page(file->dummy_page); > > >>>>>> + > > >>>>>> drm_prime_destroy_file_private(&file->prime); > > >>>>>> WARN_ON(!list_empty(&file->event_list)); > > >>>>>> diff --git a/drivers/gpu/drm/drm_prime.c > > >>>>>> b/drivers/gpu/drm/drm_prime.c > > >>>>>> index 1de2cde..c482e9c 100644 > > >>>>>> --- a/drivers/gpu/drm/drm_prime.c > > >>>>>> +++ b/drivers/gpu/drm/drm_prime.c > > >>>>>> @@ -335,6 +335,13 @@ int drm_gem_prime_fd_to_handle(struct > > >>>>>> drm_device *dev, > > >>>>>> ret = drm_prime_add_buf_handle(&file_priv->prime, > > >>>>>> dma_buf, *handle); > > >>>>>> + > > >>>>>> + if (!ret) { > > >>>>>> + obj->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO); > > >>>>>> + if (!obj->dummy_page) > > >>>>>> + ret = -ENOMEM; > > >>>>>> + } > > >>>>>> + > > >>>>> While the per file case still looks acceptable this is a clear NAK > > >>>>> since it will massively increase the memory needed for a prime > > >>>>> exported object. > > >>>>> > > >>>>> I think that this is quite overkill in the first place and for the > > >>>>> hot unplug case we can just use the global dummy page as well. > > >>>>> > > >>>>> Christian. > > >>>> > > >>>> Global dummy page is good for read access, what do you do on write > > >>>> access ? My first approach was indeed to map at first global dummy > > >>>> page as read only and mark the vma->vm_flags as !VM_SHARED assuming > > >>>> that this would trigger Copy On Write flow in core mm > > >>>> (https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Felixir.bootlin.com%2Flinux%2Fv5.7-rc7%2Fsource%2Fmm%2Fmemory.c%23L3977&data=02%7C01%7CAndrey.Grodzovsky%40amd.com%7C3753451d037544e7495408d816d4c4ee%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637284450384586120&sdata=ZpRaQgqA5K4jRfidOiedey0AleeYQ97WNUkGA29ERA0%3D&reserved=0) > > >>>> > > >>>> on the next page fault to same address triggered by a write access but > > >>>> then i realized a new COW page will be allocated for each such mapping > > >>>> and this is much more wasteful then having a dedicated page per GEM > > >>>> object. > > >>> Yeah, but this is only for a very very small corner cases. What we need > > >>> to prevent is increasing the memory usage during normal operation to > > >>> much. > > >>> > > >>> Using memory during the unplug is completely unproblematic because we > > >>> just released quite a bunch of it by releasing all those system memory > > >>> buffers. > > >>> > > >>> And I'm pretty sure that COWed pages are correctly accounted towards > > >>> the > > >>> used memory of a process. > > >>> > > >>> So I think if that approach works as intended and the COW pages are > > >>> released again on unmapping it would be the perfect solution to the > > >>> problem. > > >>> > > >>> Daniel what do you think? > > >> If COW works, sure sounds reasonable. And if we can make sure we > > >> managed to drop all the system allocations (otherwise suddenly 2x > > >> memory usage, worst case). But I have no idea whether we can > > >> retroshoehorn that into an established vma, you might have fun stuff > > >> like a mkwrite handler there (which I thought is the COW handler > > >> thing, but really no idea). > > >> > > >> If we need to massively change stuff then I think rw dummy page, > > >> allocated on first fault after hotunplug (maybe just make it one per > > >> object, that's simplest) seems like the much safer option. Much less > > >> code that can go wrong. > > >> -Daniel > > > > > > > > > Regarding COW, i was looking into how to properly implement it from > > > within the fault handler (i.e. ttm_bo_vm_fault) > > > and the main obstacle I hit is that of exclusive access to the > > > vm_area_struct, i need to be able to modify > > > vma->vm_flags (and vm_page_prot) to remove VM_SHARED bit so COW can > > > be triggered on subsequent write access > > > fault (here > > > https://elixir.bootlin.com/linux/latest/source/mm/memory.c#L4128) > > > but core mm takes only read side mm_sem (here for example > > > https://elixir.bootlin.com/linux/latest/source/drivers/iommu/amd/iommu_v2.c#L488) > > > and so I am not supposed to modify vm_area_struct in this case. I am > > > not sure if it's legit to write lock tthe mm_sem from this point. > > > I found some discussions about this here > > > http://lkml.iu.edu/hypermail/linux/kernel/1909.1/02754.html but it > > > wasn't really clear to me > > > what's the solution. > > > > > > In any case, seems to me that easier and more memory saving solution > > > would be to just switch to per ttm bo dumy rw page that > > > would be allocated on demand as you suggested here. This should also > > > take care of imported BOs and flink cases. > > > Then i can drop the per device FD and per GEM object FD dummy BO and > > > the ugly loop i am using in patch 2 to match faulting BO to the right > > > dummy page. > > > > > > Does this makes sense ? > > > > I still don't see the information leak as much of a problem, but if > > Daniel insists we should probably do this. > > Well amdgpu doesn't clear buffers by default, so indeed you guys are a > lot more laissez-faire here. But in general we really don't do that > kind of leaking. Iirc there's even radeonsi bugs because else clears, > and radeonsi happily displays gunk :-) btw I think not clearing at alloc breaks the render node model a bit. Without that this was all fine, since system pages still got cleared by alloc_page(), and we only leaked vram. And for the legacy node model with authentication of clients against the X server, leaking that all around was ok. With render nodes no leaking should happen, with no knob for userspace to opt out of the forced clearing. -Daniel > > But could we at least have only one page per client instead of per BO? > > I think you can do one page per file descriptor or something like > that. But gets annoying with shared bo, especially with dma_buf_mmap > forwarding. > -Daniel > > > > > Thanks, > > Christian. > > > > > > > > Andrey > > > > > > > > >> > > >>> Regards, > > >>> Christian. > > >>> > > >>>> We can indeed optimize by allocating this dummy page on the first page > > >>>> fault after device disconnect instead on GEM object creation. > > >>>> > > >>>> Andrey > > >>>> > > >>>> > > >>>>>> mutex_unlock(&file_priv->prime.lock); > > >>>>>> if (ret) > > >>>>>> goto fail; > > >>>>>> @@ -1006,6 +1013,9 @@ void drm_prime_gem_destroy(struct > > >>>>>> drm_gem_object *obj, struct sg_table *sg) > > >>>>>> dma_buf_unmap_attachment(attach, sg, DMA_BIDIRECTIONAL); > > >>>>>> dma_buf = attach->dmabuf; > > >>>>>> dma_buf_detach(attach->dmabuf, attach); > > >>>>>> + > > >>>>>> + __free_page(obj->dummy_page); > > >>>>>> + > > >>>>>> /* remove the reference */ > > >>>>>> dma_buf_put(dma_buf); > > >>>>>> } > > >>>>>> diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h > > >>>>>> index 19df802..349a658 100644 > > >>>>>> --- a/include/drm/drm_file.h > > >>>>>> +++ b/include/drm/drm_file.h > > >>>>>> @@ -335,6 +335,8 @@ struct drm_file { > > >>>>>> */ > > >>>>>> struct drm_prime_file_private prime; > > >>>>>> + struct page *dummy_page; > > >>>>>> + > > >>>>>> /* private: */ > > >>>>>> #if IS_ENABLED(CONFIG_DRM_LEGACY) > > >>>>>> unsigned long lock_count; /* DRI1 legacy lock count */ > > >>>>>> diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h > > >>>>>> index 0b37506..47460d1 100644 > > >>>>>> --- a/include/drm/drm_gem.h > > >>>>>> +++ b/include/drm/drm_gem.h > > >>>>>> @@ -310,6 +310,8 @@ struct drm_gem_object { > > >>>>>> * > > >>>>>> */ > > >>>>>> const struct drm_gem_object_funcs *funcs; > > >>>>>> + > > >>>>>> + struct page *dummy_page; > > >>>>>> }; > > >>>>>> /** > > >> > > > _______________________________________________ > > > amd-gfx mailing list > > > amd-gfx@lists.freedesktop.org > > > https://lists.freedesktop.org/mailman/listinfo/amd-gfx > > > > > -- > Daniel Vetter > Software Engineer, Intel Corporation > http://blog.ffwll.ch
On 11/14/20 4:51 AM, Daniel Vetter wrote: > On Sat, Nov 14, 2020 at 9:41 AM Christian König > <ckoenig.leichtzumerken@gmail.com> wrote: >> Am 13.11.20 um 21:52 schrieb Andrey Grodzovsky: >>> On 6/22/20 1:50 PM, Daniel Vetter wrote: >>>> On Mon, Jun 22, 2020 at 7:45 PM Christian König >>>> <christian.koenig@amd.com> wrote: >>>>> Am 22.06.20 um 16:32 schrieb Andrey Grodzovsky: >>>>>> On 6/22/20 9:18 AM, Christian König wrote: >>>>>>> Am 21.06.20 um 08:03 schrieb Andrey Grodzovsky: >>>>>>>> Will be used to reroute CPU mapped BO's page faults once >>>>>>>> device is removed. >>>>>>>> >>>>>>>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> >>>>>>>> --- >>>>>>>> drivers/gpu/drm/drm_file.c | 8 ++++++++ >>>>>>>> drivers/gpu/drm/drm_prime.c | 10 ++++++++++ >>>>>>>> include/drm/drm_file.h | 2 ++ >>>>>>>> include/drm/drm_gem.h | 2 ++ >>>>>>>> 4 files changed, 22 insertions(+) >>>>>>>> >>>>>>>> diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c >>>>>>>> index c4c704e..67c0770 100644 >>>>>>>> --- a/drivers/gpu/drm/drm_file.c >>>>>>>> +++ b/drivers/gpu/drm/drm_file.c >>>>>>>> @@ -188,6 +188,12 @@ struct drm_file *drm_file_alloc(struct >>>>>>>> drm_minor *minor) >>>>>>>> goto out_prime_destroy; >>>>>>>> } >>>>>>>> + file->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO); >>>>>>>> + if (!file->dummy_page) { >>>>>>>> + ret = -ENOMEM; >>>>>>>> + goto out_prime_destroy; >>>>>>>> + } >>>>>>>> + >>>>>>>> return file; >>>>>>>> out_prime_destroy: >>>>>>>> @@ -284,6 +290,8 @@ void drm_file_free(struct drm_file *file) >>>>>>>> if (dev->driver->postclose) >>>>>>>> dev->driver->postclose(dev, file); >>>>>>>> + __free_page(file->dummy_page); >>>>>>>> + >>>>>>>> drm_prime_destroy_file_private(&file->prime); >>>>>>>> WARN_ON(!list_empty(&file->event_list)); >>>>>>>> diff --git a/drivers/gpu/drm/drm_prime.c >>>>>>>> b/drivers/gpu/drm/drm_prime.c >>>>>>>> index 1de2cde..c482e9c 100644 >>>>>>>> --- a/drivers/gpu/drm/drm_prime.c >>>>>>>> +++ b/drivers/gpu/drm/drm_prime.c >>>>>>>> @@ -335,6 +335,13 @@ int drm_gem_prime_fd_to_handle(struct >>>>>>>> drm_device *dev, >>>>>>>> ret = drm_prime_add_buf_handle(&file_priv->prime, >>>>>>>> dma_buf, *handle); >>>>>>>> + >>>>>>>> + if (!ret) { >>>>>>>> + obj->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO); >>>>>>>> + if (!obj->dummy_page) >>>>>>>> + ret = -ENOMEM; >>>>>>>> + } >>>>>>>> + >>>>>>> While the per file case still looks acceptable this is a clear NAK >>>>>>> since it will massively increase the memory needed for a prime >>>>>>> exported object. >>>>>>> >>>>>>> I think that this is quite overkill in the first place and for the >>>>>>> hot unplug case we can just use the global dummy page as well. >>>>>>> >>>>>>> Christian. >>>>>> Global dummy page is good for read access, what do you do on write >>>>>> access ? My first approach was indeed to map at first global dummy >>>>>> page as read only and mark the vma->vm_flags as !VM_SHARED assuming >>>>>> that this would trigger Copy On Write flow in core mm >>>>>> (https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Felixir.bootlin.com%2Flinux%2Fv5.7-rc7%2Fsource%2Fmm%2Fmemory.c%23L3977&data=04%7C01%7CAndrey.Grodzovsky%40amd.com%7C00053e9d983041ed63ae08d88882ed87%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637409443224016377%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=kghiG3VpCJod6YefExoDVPl9X03zNhw3SN5GAxgbnmU%3D&reserved=0) >>>>>> >>>>>> on the next page fault to same address triggered by a write access but >>>>>> then i realized a new COW page will be allocated for each such mapping >>>>>> and this is much more wasteful then having a dedicated page per GEM >>>>>> object. >>>>> Yeah, but this is only for a very very small corner cases. What we need >>>>> to prevent is increasing the memory usage during normal operation to >>>>> much. >>>>> >>>>> Using memory during the unplug is completely unproblematic because we >>>>> just released quite a bunch of it by releasing all those system memory >>>>> buffers. >>>>> >>>>> And I'm pretty sure that COWed pages are correctly accounted towards >>>>> the >>>>> used memory of a process. >>>>> >>>>> So I think if that approach works as intended and the COW pages are >>>>> released again on unmapping it would be the perfect solution to the >>>>> problem. >>>>> >>>>> Daniel what do you think? >>>> If COW works, sure sounds reasonable. And if we can make sure we >>>> managed to drop all the system allocations (otherwise suddenly 2x >>>> memory usage, worst case). But I have no idea whether we can >>>> retroshoehorn that into an established vma, you might have fun stuff >>>> like a mkwrite handler there (which I thought is the COW handler >>>> thing, but really no idea). >>>> >>>> If we need to massively change stuff then I think rw dummy page, >>>> allocated on first fault after hotunplug (maybe just make it one per >>>> object, that's simplest) seems like the much safer option. Much less >>>> code that can go wrong. >>>> -Daniel >>> >>> Regarding COW, i was looking into how to properly implement it from >>> within the fault handler (i.e. ttm_bo_vm_fault) >>> and the main obstacle I hit is that of exclusive access to the >>> vm_area_struct, i need to be able to modify >>> vma->vm_flags (and vm_page_prot) to remove VM_SHARED bit so COW can >>> be triggered on subsequent write access >>> fault (here >>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Felixir.bootlin.com%2Flinux%2Flatest%2Fsource%2Fmm%2Fmemory.c%23L4128&data=04%7C01%7CAndrey.Grodzovsky%40amd.com%7C00053e9d983041ed63ae08d88882ed87%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637409443224016377%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=ziHJtqyHuLrlb0uYKhoWCWhUAZnX0JquE%2BkBJ5Fx%2BNo%3D&reserved=0) >>> but core mm takes only read side mm_sem (here for example >>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Felixir.bootlin.com%2Flinux%2Flatest%2Fsource%2Fdrivers%2Fiommu%2Famd%2Fiommu_v2.c%23L488&data=04%7C01%7CAndrey.Grodzovsky%40amd.com%7C00053e9d983041ed63ae08d88882ed87%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637409443224016377%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=h360c75Upl3%2FW7im7M1%2BxY%2FXy4gxin%2BkCF1Ui2zFXMs%3D&reserved=0) >>> and so I am not supposed to modify vm_area_struct in this case. I am >>> not sure if it's legit to write lock tthe mm_sem from this point. >>> I found some discussions about this here >>> https://nam11.safelinks.protection.outlook.com/?url=http%3A%2F%2Flkml.iu.edu%2Fhypermail%2Flinux%2Fkernel%2F1909.1%2F02754.html&data=04%7C01%7CAndrey.Grodzovsky%40amd.com%7C00053e9d983041ed63ae08d88882ed87%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637409443224021379%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=sx6s1lH%2FvxbIZajc4Yr49vFhxvPEnBHZlTt52D8qvZA%3D&reserved=0 but it >>> wasn't really clear to me >>> what's the solution. >>> >>> In any case, seems to me that easier and more memory saving solution >>> would be to just switch to per ttm bo dumy rw page that >>> would be allocated on demand as you suggested here. This should also >>> take care of imported BOs and flink cases. >>> Then i can drop the per device FD and per GEM object FD dummy BO and >>> the ugly loop i am using in patch 2 to match faulting BO to the right >>> dummy page. >>> >>> Does this makes sense ? >> I still don't see the information leak as much of a problem, but if >> Daniel insists we should probably do this. > Well amdgpu doesn't clear buffers by default, so indeed you guys are a > lot more laissez-faire here. But in general we really don't do that > kind of leaking. Iirc there's even radeonsi bugs because else clears, > and radeonsi happily displays gunk :-) > >> But could we at least have only one page per client instead of per BO? > I think you can do one page per file descriptor or something like > that. But gets annoying with shared bo, especially with dma_buf_mmap > forwarding. > -Daniel Christian - is your concern more with too much page allocations or with extra pointer member cluttering TTM BO struct ? Because we can allocate the dummy page on demand only when needed. It's just seems to me that keeping it per BO streamlines the code as I don't need to have different handling for local vs imported BOs. Andrey > >> Thanks, >> Christian. >> >>> Andrey >>> >>> >>>>> Regards, >>>>> Christian. >>>>> >>>>>> We can indeed optimize by allocating this dummy page on the first page >>>>>> fault after device disconnect instead on GEM object creation. >>>>>> >>>>>> Andrey >>>>>> >>>>>> >>>>>>>> mutex_unlock(&file_priv->prime.lock); >>>>>>>> if (ret) >>>>>>>> goto fail; >>>>>>>> @@ -1006,6 +1013,9 @@ void drm_prime_gem_destroy(struct >>>>>>>> drm_gem_object *obj, struct sg_table *sg) >>>>>>>> dma_buf_unmap_attachment(attach, sg, DMA_BIDIRECTIONAL); >>>>>>>> dma_buf = attach->dmabuf; >>>>>>>> dma_buf_detach(attach->dmabuf, attach); >>>>>>>> + >>>>>>>> + __free_page(obj->dummy_page); >>>>>>>> + >>>>>>>> /* remove the reference */ >>>>>>>> dma_buf_put(dma_buf); >>>>>>>> } >>>>>>>> diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h >>>>>>>> index 19df802..349a658 100644 >>>>>>>> --- a/include/drm/drm_file.h >>>>>>>> +++ b/include/drm/drm_file.h >>>>>>>> @@ -335,6 +335,8 @@ struct drm_file { >>>>>>>> */ >>>>>>>> struct drm_prime_file_private prime; >>>>>>>> + struct page *dummy_page; >>>>>>>> + >>>>>>>> /* private: */ >>>>>>>> #if IS_ENABLED(CONFIG_DRM_LEGACY) >>>>>>>> unsigned long lock_count; /* DRI1 legacy lock count */ >>>>>>>> diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h >>>>>>>> index 0b37506..47460d1 100644 >>>>>>>> --- a/include/drm/drm_gem.h >>>>>>>> +++ b/include/drm/drm_gem.h >>>>>>>> @@ -310,6 +310,8 @@ struct drm_gem_object { >>>>>>>> * >>>>>>>> */ >>>>>>>> const struct drm_gem_object_funcs *funcs; >>>>>>>> + >>>>>>>> + struct page *dummy_page; >>>>>>>> }; >>>>>>>> /** >>> _______________________________________________ >>> amd-gfx mailing list >>> amd-gfx@lists.freedesktop.org >>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=04%7C01%7CAndrey.Grodzovsky%40amd.com%7C00053e9d983041ed63ae08d88882ed87%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637409443224021379%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=WrsqkL%2BCnsZeANUqa88g3wyIMHuRjkKGFgf4RYNFAyw%3D&reserved=0 >
On 6/22/20 5:35 AM, Daniel Vetter wrote: > On Sun, Jun 21, 2020 at 02:03:01AM -0400, Andrey Grodzovsky wrote: >> Will be used to reroute CPU mapped BO's page faults once >> device is removed. >> >> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> >> --- >> drivers/gpu/drm/drm_file.c | 8 ++++++++ >> drivers/gpu/drm/drm_prime.c | 10 ++++++++++ >> include/drm/drm_file.h | 2 ++ >> include/drm/drm_gem.h | 2 ++ >> 4 files changed, 22 insertions(+) >> >> diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c >> index c4c704e..67c0770 100644 >> --- a/drivers/gpu/drm/drm_file.c >> +++ b/drivers/gpu/drm/drm_file.c >> @@ -188,6 +188,12 @@ struct drm_file *drm_file_alloc(struct drm_minor *minor) >> goto out_prime_destroy; >> } >> >> + file->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO); >> + if (!file->dummy_page) { >> + ret = -ENOMEM; >> + goto out_prime_destroy; >> + } >> + >> return file; >> >> out_prime_destroy: >> @@ -284,6 +290,8 @@ void drm_file_free(struct drm_file *file) >> if (dev->driver->postclose) >> dev->driver->postclose(dev, file); >> >> + __free_page(file->dummy_page); >> + >> drm_prime_destroy_file_private(&file->prime); >> >> WARN_ON(!list_empty(&file->event_list)); >> diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c >> index 1de2cde..c482e9c 100644 >> --- a/drivers/gpu/drm/drm_prime.c >> +++ b/drivers/gpu/drm/drm_prime.c >> @@ -335,6 +335,13 @@ int drm_gem_prime_fd_to_handle(struct drm_device *dev, >> >> ret = drm_prime_add_buf_handle(&file_priv->prime, >> dma_buf, *handle); >> + >> + if (!ret) { >> + obj->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO); >> + if (!obj->dummy_page) >> + ret = -ENOMEM; >> + } >> + >> mutex_unlock(&file_priv->prime.lock); >> if (ret) >> goto fail; >> @@ -1006,6 +1013,9 @@ void drm_prime_gem_destroy(struct drm_gem_object *obj, struct sg_table *sg) >> dma_buf_unmap_attachment(attach, sg, DMA_BIDIRECTIONAL); >> dma_buf = attach->dmabuf; >> dma_buf_detach(attach->dmabuf, attach); >> + >> + __free_page(obj->dummy_page); >> + >> /* remove the reference */ >> dma_buf_put(dma_buf); >> } >> diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h >> index 19df802..349a658 100644 >> --- a/include/drm/drm_file.h >> +++ b/include/drm/drm_file.h >> @@ -335,6 +335,8 @@ struct drm_file { >> */ >> struct drm_prime_file_private prime; >> > Kerneldoc for these please, including why we need them and when. E.g. the > one in gem_bo should say it's only for exported buffers, so that we're not > colliding security spaces. > >> + struct page *dummy_page; >> + >> /* private: */ >> #if IS_ENABLED(CONFIG_DRM_LEGACY) >> unsigned long lock_count; /* DRI1 legacy lock count */ >> diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h >> index 0b37506..47460d1 100644 >> --- a/include/drm/drm_gem.h >> +++ b/include/drm/drm_gem.h >> @@ -310,6 +310,8 @@ struct drm_gem_object { >> * >> */ >> const struct drm_gem_object_funcs *funcs; >> + >> + struct page *dummy_page; >> }; > I think amdgpu doesn't care, but everyone else still might care somewhat > about flink. That also shares buffers, so also needs to allocate the > per-bo dummy page. Not familiar with FLINK so I read a bit here https://lwn.net/Articles/283798/ sections 3 and 4 about FLINK naming and later mapping, I don't see a difference between FLINK and local BO mapping as opening by FLINK name returns handle to the same BO as the original. Why then we need a special handling for FLINK ? Andrey > > I also wonder whether we shouldn't have a helper to look up the dummy > page, just to encode in core code how it's supposedo to cascade. > -Daniel > >> >> /** >> -- >> 2.7.4 >>
On 2020-11-14 10:57 a.m., Daniel Vetter wrote: > On Sat, Nov 14, 2020 at 10:51 AM Daniel Vetter <daniel.vetter@ffwll.ch> wrote: >> >> On Sat, Nov 14, 2020 at 9:41 AM Christian König >> <ckoenig.leichtzumerken@gmail.com> wrote: >>> >>> Am 13.11.20 um 21:52 schrieb Andrey Grodzovsky: >>>> >>>> On 6/22/20 1:50 PM, Daniel Vetter wrote: >>>>> On Mon, Jun 22, 2020 at 7:45 PM Christian König >>>>> <christian.koenig@amd.com> wrote: >>>>>> Am 22.06.20 um 16:32 schrieb Andrey Grodzovsky: >>>>>>> On 6/22/20 9:18 AM, Christian König wrote: >>>>>>>> Am 21.06.20 um 08:03 schrieb Andrey Grodzovsky: >>>>>>>>> Will be used to reroute CPU mapped BO's page faults once >>>>>>>>> device is removed. >>>>>>>>> >>>>>>>>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> >>>>>>>>> --- >>>>>>>>> drivers/gpu/drm/drm_file.c | 8 ++++++++ >>>>>>>>> drivers/gpu/drm/drm_prime.c | 10 ++++++++++ >>>>>>>>> include/drm/drm_file.h | 2 ++ >>>>>>>>> include/drm/drm_gem.h | 2 ++ >>>>>>>>> 4 files changed, 22 insertions(+) >>>>>>>>> >>>>>>>>> diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c >>>>>>>>> index c4c704e..67c0770 100644 >>>>>>>>> --- a/drivers/gpu/drm/drm_file.c >>>>>>>>> +++ b/drivers/gpu/drm/drm_file.c >>>>>>>>> @@ -188,6 +188,12 @@ struct drm_file *drm_file_alloc(struct >>>>>>>>> drm_minor *minor) >>>>>>>>> goto out_prime_destroy; >>>>>>>>> } >>>>>>>>> + file->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO); >>>>>>>>> + if (!file->dummy_page) { >>>>>>>>> + ret = -ENOMEM; >>>>>>>>> + goto out_prime_destroy; >>>>>>>>> + } >>>>>>>>> + >>>>>>>>> return file; >>>>>>>>> out_prime_destroy: >>>>>>>>> @@ -284,6 +290,8 @@ void drm_file_free(struct drm_file *file) >>>>>>>>> if (dev->driver->postclose) >>>>>>>>> dev->driver->postclose(dev, file); >>>>>>>>> + __free_page(file->dummy_page); >>>>>>>>> + >>>>>>>>> drm_prime_destroy_file_private(&file->prime); >>>>>>>>> WARN_ON(!list_empty(&file->event_list)); >>>>>>>>> diff --git a/drivers/gpu/drm/drm_prime.c >>>>>>>>> b/drivers/gpu/drm/drm_prime.c >>>>>>>>> index 1de2cde..c482e9c 100644 >>>>>>>>> --- a/drivers/gpu/drm/drm_prime.c >>>>>>>>> +++ b/drivers/gpu/drm/drm_prime.c >>>>>>>>> @@ -335,6 +335,13 @@ int drm_gem_prime_fd_to_handle(struct >>>>>>>>> drm_device *dev, >>>>>>>>> ret = drm_prime_add_buf_handle(&file_priv->prime, >>>>>>>>> dma_buf, *handle); >>>>>>>>> + >>>>>>>>> + if (!ret) { >>>>>>>>> + obj->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO); >>>>>>>>> + if (!obj->dummy_page) >>>>>>>>> + ret = -ENOMEM; >>>>>>>>> + } >>>>>>>>> + >>>>>>>> While the per file case still looks acceptable this is a clear NAK >>>>>>>> since it will massively increase the memory needed for a prime >>>>>>>> exported object. >>>>>>>> >>>>>>>> I think that this is quite overkill in the first place and for the >>>>>>>> hot unplug case we can just use the global dummy page as well. >>>>>>>> >>>>>>>> Christian. >>>>>>> >>>>>>> Global dummy page is good for read access, what do you do on write >>>>>>> access ? My first approach was indeed to map at first global dummy >>>>>>> page as read only and mark the vma->vm_flags as !VM_SHARED assuming >>>>>>> that this would trigger Copy On Write flow in core mm >>>>>>> (https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Felixir.bootlin.com%2Flinux%2Fv5.7-rc7%2Fsource%2Fmm%2Fmemory.c%23L3977&data=02%7C01%7CAndrey.Grodzovsky%40amd.com%7C3753451d037544e7495408d816d4c4ee%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637284450384586120&sdata=ZpRaQgqA5K4jRfidOiedey0AleeYQ97WNUkGA29ERA0%3D&reserved=0) >>>>>>> >>>>>>> on the next page fault to same address triggered by a write access but >>>>>>> then i realized a new COW page will be allocated for each such mapping >>>>>>> and this is much more wasteful then having a dedicated page per GEM >>>>>>> object. >>>>>> Yeah, but this is only for a very very small corner cases. What we need >>>>>> to prevent is increasing the memory usage during normal operation to >>>>>> much. >>>>>> >>>>>> Using memory during the unplug is completely unproblematic because we >>>>>> just released quite a bunch of it by releasing all those system memory >>>>>> buffers. >>>>>> >>>>>> And I'm pretty sure that COWed pages are correctly accounted towards >>>>>> the >>>>>> used memory of a process. >>>>>> >>>>>> So I think if that approach works as intended and the COW pages are >>>>>> released again on unmapping it would be the perfect solution to the >>>>>> problem. >>>>>> >>>>>> Daniel what do you think? >>>>> If COW works, sure sounds reasonable. And if we can make sure we >>>>> managed to drop all the system allocations (otherwise suddenly 2x >>>>> memory usage, worst case). But I have no idea whether we can >>>>> retroshoehorn that into an established vma, you might have fun stuff >>>>> like a mkwrite handler there (which I thought is the COW handler >>>>> thing, but really no idea). >>>>> >>>>> If we need to massively change stuff then I think rw dummy page, >>>>> allocated on first fault after hotunplug (maybe just make it one per >>>>> object, that's simplest) seems like the much safer option. Much less >>>>> code that can go wrong. >>>>> -Daniel >>>> >>>> >>>> Regarding COW, i was looking into how to properly implement it from >>>> within the fault handler (i.e. ttm_bo_vm_fault) >>>> and the main obstacle I hit is that of exclusive access to the >>>> vm_area_struct, i need to be able to modify >>>> vma->vm_flags (and vm_page_prot) to remove VM_SHARED bit so COW can >>>> be triggered on subsequent write access >>>> fault (here >>>> https://elixir.bootlin.com/linux/latest/source/mm/memory.c#L4128) >>>> but core mm takes only read side mm_sem (here for example >>>> https://elixir.bootlin.com/linux/latest/source/drivers/iommu/amd/iommu_v2.c#L488) >>>> and so I am not supposed to modify vm_area_struct in this case. I am >>>> not sure if it's legit to write lock tthe mm_sem from this point. >>>> I found some discussions about this here >>>> http://lkml.iu.edu/hypermail/linux/kernel/1909.1/02754.html but it >>>> wasn't really clear to me >>>> what's the solution. >>>> >>>> In any case, seems to me that easier and more memory saving solution >>>> would be to just switch to per ttm bo dumy rw page that >>>> would be allocated on demand as you suggested here. This should also >>>> take care of imported BOs and flink cases. >>>> Then i can drop the per device FD and per GEM object FD dummy BO and >>>> the ugly loop i am using in patch 2 to match faulting BO to the right >>>> dummy page. >>>> >>>> Does this makes sense ? >>> >>> I still don't see the information leak as much of a problem, but if >>> Daniel insists we should probably do this. >> >> Well amdgpu doesn't clear buffers by default, so indeed you guys are a >> lot more laissez-faire here. But in general we really don't do that >> kind of leaking. Iirc there's even radeonsi bugs because else clears, >> and radeonsi happily displays gunk :-) > > btw I think not clearing at alloc breaks the render node model a bit. > Without that this was all fine, since system pages still got cleared > by alloc_page(), and we only leaked vram. And for the legacy node > model with authentication of clients against the X server, leaking > that all around was ok. With render nodes no leaking should happen, > with no knob for userspace to opt out of the forced clearing. Seconded.
Am 15.11.20 um 07:34 schrieb Andrey Grodzovsky: > > On 11/14/20 4:51 AM, Daniel Vetter wrote: >> On Sat, Nov 14, 2020 at 9:41 AM Christian König >> <ckoenig.leichtzumerken@gmail.com> wrote: >>> Am 13.11.20 um 21:52 schrieb Andrey Grodzovsky: >>>> On 6/22/20 1:50 PM, Daniel Vetter wrote: >>>>> On Mon, Jun 22, 2020 at 7:45 PM Christian König >>>>> <christian.koenig@amd.com> wrote: >>>>>> Am 22.06.20 um 16:32 schrieb Andrey Grodzovsky: >>>>>>> On 6/22/20 9:18 AM, Christian König wrote: >>>>>>>> Am 21.06.20 um 08:03 schrieb Andrey Grodzovsky: >>>>>>>>> Will be used to reroute CPU mapped BO's page faults once >>>>>>>>> device is removed. >>>>>>>>> >>>>>>>>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> >>>>>>>>> --- >>>>>>>>> drivers/gpu/drm/drm_file.c | 8 ++++++++ >>>>>>>>> drivers/gpu/drm/drm_prime.c | 10 ++++++++++ >>>>>>>>> include/drm/drm_file.h | 2 ++ >>>>>>>>> include/drm/drm_gem.h | 2 ++ >>>>>>>>> 4 files changed, 22 insertions(+) >>>>>>>>> >>>>>>>>> diff --git a/drivers/gpu/drm/drm_file.c >>>>>>>>> b/drivers/gpu/drm/drm_file.c >>>>>>>>> index c4c704e..67c0770 100644 >>>>>>>>> --- a/drivers/gpu/drm/drm_file.c >>>>>>>>> +++ b/drivers/gpu/drm/drm_file.c >>>>>>>>> @@ -188,6 +188,12 @@ struct drm_file *drm_file_alloc(struct >>>>>>>>> drm_minor *minor) >>>>>>>>> goto out_prime_destroy; >>>>>>>>> } >>>>>>>>> + file->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO); >>>>>>>>> + if (!file->dummy_page) { >>>>>>>>> + ret = -ENOMEM; >>>>>>>>> + goto out_prime_destroy; >>>>>>>>> + } >>>>>>>>> + >>>>>>>>> return file; >>>>>>>>> out_prime_destroy: >>>>>>>>> @@ -284,6 +290,8 @@ void drm_file_free(struct drm_file *file) >>>>>>>>> if (dev->driver->postclose) >>>>>>>>> dev->driver->postclose(dev, file); >>>>>>>>> + __free_page(file->dummy_page); >>>>>>>>> + >>>>>>>>> drm_prime_destroy_file_private(&file->prime); >>>>>>>>> WARN_ON(!list_empty(&file->event_list)); >>>>>>>>> diff --git a/drivers/gpu/drm/drm_prime.c >>>>>>>>> b/drivers/gpu/drm/drm_prime.c >>>>>>>>> index 1de2cde..c482e9c 100644 >>>>>>>>> --- a/drivers/gpu/drm/drm_prime.c >>>>>>>>> +++ b/drivers/gpu/drm/drm_prime.c >>>>>>>>> @@ -335,6 +335,13 @@ int drm_gem_prime_fd_to_handle(struct >>>>>>>>> drm_device *dev, >>>>>>>>> ret = drm_prime_add_buf_handle(&file_priv->prime, >>>>>>>>> dma_buf, *handle); >>>>>>>>> + >>>>>>>>> + if (!ret) { >>>>>>>>> + obj->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO); >>>>>>>>> + if (!obj->dummy_page) >>>>>>>>> + ret = -ENOMEM; >>>>>>>>> + } >>>>>>>>> + >>>>>>>> While the per file case still looks acceptable this is a clear NAK >>>>>>>> since it will massively increase the memory needed for a prime >>>>>>>> exported object. >>>>>>>> >>>>>>>> I think that this is quite overkill in the first place and for the >>>>>>>> hot unplug case we can just use the global dummy page as well. >>>>>>>> >>>>>>>> Christian. >>>>>>> Global dummy page is good for read access, what do you do on write >>>>>>> access ? My first approach was indeed to map at first global dummy >>>>>>> page as read only and mark the vma->vm_flags as !VM_SHARED assuming >>>>>>> that this would trigger Copy On Write flow in core mm >>>>>>> (https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Felixir.bootlin.com%2Flinux%2Fv5.7-rc7%2Fsource%2Fmm%2Fmemory.c%23L3977&data=04%7C01%7CAndrey.Grodzovsky%40amd.com%7C00053e9d983041ed63ae08d88882ed87%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637409443224016377%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=kghiG3VpCJod6YefExoDVPl9X03zNhw3SN5GAxgbnmU%3D&reserved=0) >>>>>>> >>>>>>> >>>>>>> on the next page fault to same address triggered by a write >>>>>>> access but >>>>>>> then i realized a new COW page will be allocated for each such >>>>>>> mapping >>>>>>> and this is much more wasteful then having a dedicated page per GEM >>>>>>> object. >>>>>> Yeah, but this is only for a very very small corner cases. What >>>>>> we need >>>>>> to prevent is increasing the memory usage during normal operation to >>>>>> much. >>>>>> >>>>>> Using memory during the unplug is completely unproblematic >>>>>> because we >>>>>> just released quite a bunch of it by releasing all those system >>>>>> memory >>>>>> buffers. >>>>>> >>>>>> And I'm pretty sure that COWed pages are correctly accounted towards >>>>>> the >>>>>> used memory of a process. >>>>>> >>>>>> So I think if that approach works as intended and the COW pages are >>>>>> released again on unmapping it would be the perfect solution to the >>>>>> problem. >>>>>> >>>>>> Daniel what do you think? >>>>> If COW works, sure sounds reasonable. And if we can make sure we >>>>> managed to drop all the system allocations (otherwise suddenly 2x >>>>> memory usage, worst case). But I have no idea whether we can >>>>> retroshoehorn that into an established vma, you might have fun stuff >>>>> like a mkwrite handler there (which I thought is the COW handler >>>>> thing, but really no idea). >>>>> >>>>> If we need to massively change stuff then I think rw dummy page, >>>>> allocated on first fault after hotunplug (maybe just make it one per >>>>> object, that's simplest) seems like the much safer option. Much less >>>>> code that can go wrong. >>>>> -Daniel >>>> >>>> Regarding COW, i was looking into how to properly implement it from >>>> within the fault handler (i.e. ttm_bo_vm_fault) >>>> and the main obstacle I hit is that of exclusive access to the >>>> vm_area_struct, i need to be able to modify >>>> vma->vm_flags (and vm_page_prot) to remove VM_SHARED bit so COW can >>>> be triggered on subsequent write access >>>> fault (here >>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Felixir.bootlin.com%2Flinux%2Flatest%2Fsource%2Fmm%2Fmemory.c%23L4128&data=04%7C01%7CAndrey.Grodzovsky%40amd.com%7C00053e9d983041ed63ae08d88882ed87%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637409443224016377%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=ziHJtqyHuLrlb0uYKhoWCWhUAZnX0JquE%2BkBJ5Fx%2BNo%3D&reserved=0) >>>> >>>> but core mm takes only read side mm_sem (here for example >>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Felixir.bootlin.com%2Flinux%2Flatest%2Fsource%2Fdrivers%2Fiommu%2Famd%2Fiommu_v2.c%23L488&data=04%7C01%7CAndrey.Grodzovsky%40amd.com%7C00053e9d983041ed63ae08d88882ed87%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637409443224016377%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=h360c75Upl3%2FW7im7M1%2BxY%2FXy4gxin%2BkCF1Ui2zFXMs%3D&reserved=0) >>>> >>>> and so I am not supposed to modify vm_area_struct in this case. I am >>>> not sure if it's legit to write lock tthe mm_sem from this point. >>>> I found some discussions about this here >>>> https://nam11.safelinks.protection.outlook.com/?url=http%3A%2F%2Flkml.iu.edu%2Fhypermail%2Flinux%2Fkernel%2F1909.1%2F02754.html&data=04%7C01%7CAndrey.Grodzovsky%40amd.com%7C00053e9d983041ed63ae08d88882ed87%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637409443224021379%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=sx6s1lH%2FvxbIZajc4Yr49vFhxvPEnBHZlTt52D8qvZA%3D&reserved=0 >>>> but it >>>> wasn't really clear to me >>>> what's the solution. >>>> >>>> In any case, seems to me that easier and more memory saving solution >>>> would be to just switch to per ttm bo dumy rw page that >>>> would be allocated on demand as you suggested here. This should also >>>> take care of imported BOs and flink cases. >>>> Then i can drop the per device FD and per GEM object FD dummy BO and >>>> the ugly loop i am using in patch 2 to match faulting BO to the right >>>> dummy page. >>>> >>>> Does this makes sense ? >>> I still don't see the information leak as much of a problem, but if >>> Daniel insists we should probably do this. >> Well amdgpu doesn't clear buffers by default, so indeed you guys are a >> lot more laissez-faire here. But in general we really don't do that >> kind of leaking. Iirc there's even radeonsi bugs because else clears, >> and radeonsi happily displays gunk :-) >> >>> But could we at least have only one page per client instead of per BO? >> I think you can do one page per file descriptor or something like >> that. But gets annoying with shared bo, especially with dma_buf_mmap >> forwarding. >> -Daniel > > > Christian - is your concern more with too much page allocations or > with extra pointer member > cluttering TTM BO struct ? Yes, that is one problem. > Because we can allocate the dummy page on demand only when > needed. It's just seems to me that keeping it per BO streamlines the > code as I don't need to > have different handling for local vs imported BOs. Why should you have a difference between local vs imported BOs? Christian. > > Andrey
On 11/16/20 4:48 AM, Christian König wrote: > Am 15.11.20 um 07:34 schrieb Andrey Grodzovsky: >> >> On 11/14/20 4:51 AM, Daniel Vetter wrote: >>> On Sat, Nov 14, 2020 at 9:41 AM Christian König >>> <ckoenig.leichtzumerken@gmail.com> wrote: >>>> Am 13.11.20 um 21:52 schrieb Andrey Grodzovsky: >>>>> On 6/22/20 1:50 PM, Daniel Vetter wrote: >>>>>> On Mon, Jun 22, 2020 at 7:45 PM Christian König >>>>>> <christian.koenig@amd.com> wrote: >>>>>>> Am 22.06.20 um 16:32 schrieb Andrey Grodzovsky: >>>>>>>> On 6/22/20 9:18 AM, Christian König wrote: >>>>>>>>> Am 21.06.20 um 08:03 schrieb Andrey Grodzovsky: >>>>>>>>>> Will be used to reroute CPU mapped BO's page faults once >>>>>>>>>> device is removed. >>>>>>>>>> >>>>>>>>>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> >>>>>>>>>> --- >>>>>>>>>> drivers/gpu/drm/drm_file.c | 8 ++++++++ >>>>>>>>>> drivers/gpu/drm/drm_prime.c | 10 ++++++++++ >>>>>>>>>> include/drm/drm_file.h | 2 ++ >>>>>>>>>> include/drm/drm_gem.h | 2 ++ >>>>>>>>>> 4 files changed, 22 insertions(+) >>>>>>>>>> >>>>>>>>>> diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c >>>>>>>>>> index c4c704e..67c0770 100644 >>>>>>>>>> --- a/drivers/gpu/drm/drm_file.c >>>>>>>>>> +++ b/drivers/gpu/drm/drm_file.c >>>>>>>>>> @@ -188,6 +188,12 @@ struct drm_file *drm_file_alloc(struct >>>>>>>>>> drm_minor *minor) >>>>>>>>>> goto out_prime_destroy; >>>>>>>>>> } >>>>>>>>>> + file->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO); >>>>>>>>>> + if (!file->dummy_page) { >>>>>>>>>> + ret = -ENOMEM; >>>>>>>>>> + goto out_prime_destroy; >>>>>>>>>> + } >>>>>>>>>> + >>>>>>>>>> return file; >>>>>>>>>> out_prime_destroy: >>>>>>>>>> @@ -284,6 +290,8 @@ void drm_file_free(struct drm_file *file) >>>>>>>>>> if (dev->driver->postclose) >>>>>>>>>> dev->driver->postclose(dev, file); >>>>>>>>>> + __free_page(file->dummy_page); >>>>>>>>>> + >>>>>>>>>> drm_prime_destroy_file_private(&file->prime); >>>>>>>>>> WARN_ON(!list_empty(&file->event_list)); >>>>>>>>>> diff --git a/drivers/gpu/drm/drm_prime.c >>>>>>>>>> b/drivers/gpu/drm/drm_prime.c >>>>>>>>>> index 1de2cde..c482e9c 100644 >>>>>>>>>> --- a/drivers/gpu/drm/drm_prime.c >>>>>>>>>> +++ b/drivers/gpu/drm/drm_prime.c >>>>>>>>>> @@ -335,6 +335,13 @@ int drm_gem_prime_fd_to_handle(struct >>>>>>>>>> drm_device *dev, >>>>>>>>>> ret = drm_prime_add_buf_handle(&file_priv->prime, >>>>>>>>>> dma_buf, *handle); >>>>>>>>>> + >>>>>>>>>> + if (!ret) { >>>>>>>>>> + obj->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO); >>>>>>>>>> + if (!obj->dummy_page) >>>>>>>>>> + ret = -ENOMEM; >>>>>>>>>> + } >>>>>>>>>> + >>>>>>>>> While the per file case still looks acceptable this is a clear NAK >>>>>>>>> since it will massively increase the memory needed for a prime >>>>>>>>> exported object. >>>>>>>>> >>>>>>>>> I think that this is quite overkill in the first place and for the >>>>>>>>> hot unplug case we can just use the global dummy page as well. >>>>>>>>> >>>>>>>>> Christian. >>>>>>>> Global dummy page is good for read access, what do you do on write >>>>>>>> access ? My first approach was indeed to map at first global dummy >>>>>>>> page as read only and mark the vma->vm_flags as !VM_SHARED assuming >>>>>>>> that this would trigger Copy On Write flow in core mm >>>>>>>> (https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Felixir.bootlin.com%2Flinux%2Fv5.7-rc7%2Fsource%2Fmm%2Fmemory.c%23L3977&data=04%7C01%7CAndrey.Grodzovsky%40amd.com%7C00053e9d983041ed63ae08d88882ed87%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637409443224016377%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=kghiG3VpCJod6YefExoDVPl9X03zNhw3SN5GAxgbnmU%3D&reserved=0) >>>>>>>> >>>>>>>> >>>>>>>> on the next page fault to same address triggered by a write access but >>>>>>>> then i realized a new COW page will be allocated for each such mapping >>>>>>>> and this is much more wasteful then having a dedicated page per GEM >>>>>>>> object. >>>>>>> Yeah, but this is only for a very very small corner cases. What we need >>>>>>> to prevent is increasing the memory usage during normal operation to >>>>>>> much. >>>>>>> >>>>>>> Using memory during the unplug is completely unproblematic because we >>>>>>> just released quite a bunch of it by releasing all those system memory >>>>>>> buffers. >>>>>>> >>>>>>> And I'm pretty sure that COWed pages are correctly accounted towards >>>>>>> the >>>>>>> used memory of a process. >>>>>>> >>>>>>> So I think if that approach works as intended and the COW pages are >>>>>>> released again on unmapping it would be the perfect solution to the >>>>>>> problem. >>>>>>> >>>>>>> Daniel what do you think? >>>>>> If COW works, sure sounds reasonable. And if we can make sure we >>>>>> managed to drop all the system allocations (otherwise suddenly 2x >>>>>> memory usage, worst case). But I have no idea whether we can >>>>>> retroshoehorn that into an established vma, you might have fun stuff >>>>>> like a mkwrite handler there (which I thought is the COW handler >>>>>> thing, but really no idea). >>>>>> >>>>>> If we need to massively change stuff then I think rw dummy page, >>>>>> allocated on first fault after hotunplug (maybe just make it one per >>>>>> object, that's simplest) seems like the much safer option. Much less >>>>>> code that can go wrong. >>>>>> -Daniel >>>>> >>>>> Regarding COW, i was looking into how to properly implement it from >>>>> within the fault handler (i.e. ttm_bo_vm_fault) >>>>> and the main obstacle I hit is that of exclusive access to the >>>>> vm_area_struct, i need to be able to modify >>>>> vma->vm_flags (and vm_page_prot) to remove VM_SHARED bit so COW can >>>>> be triggered on subsequent write access >>>>> fault (here >>>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Felixir.bootlin.com%2Flinux%2Flatest%2Fsource%2Fmm%2Fmemory.c%23L4128&data=04%7C01%7CAndrey.Grodzovsky%40amd.com%7C00053e9d983041ed63ae08d88882ed87%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637409443224016377%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=ziHJtqyHuLrlb0uYKhoWCWhUAZnX0JquE%2BkBJ5Fx%2BNo%3D&reserved=0) >>>>> >>>>> but core mm takes only read side mm_sem (here for example >>>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Felixir.bootlin.com%2Flinux%2Flatest%2Fsource%2Fdrivers%2Fiommu%2Famd%2Fiommu_v2.c%23L488&data=04%7C01%7CAndrey.Grodzovsky%40amd.com%7C00053e9d983041ed63ae08d88882ed87%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637409443224016377%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=h360c75Upl3%2FW7im7M1%2BxY%2FXy4gxin%2BkCF1Ui2zFXMs%3D&reserved=0) >>>>> >>>>> and so I am not supposed to modify vm_area_struct in this case. I am >>>>> not sure if it's legit to write lock tthe mm_sem from this point. >>>>> I found some discussions about this here >>>>> https://nam11.safelinks.protection.outlook.com/?url=http%3A%2F%2Flkml.iu.edu%2Fhypermail%2Flinux%2Fkernel%2F1909.1%2F02754.html&data=04%7C01%7CAndrey.Grodzovsky%40amd.com%7C00053e9d983041ed63ae08d88882ed87%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637409443224021379%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=sx6s1lH%2FvxbIZajc4Yr49vFhxvPEnBHZlTt52D8qvZA%3D&reserved=0 >>>>> but it >>>>> wasn't really clear to me >>>>> what's the solution. >>>>> >>>>> In any case, seems to me that easier and more memory saving solution >>>>> would be to just switch to per ttm bo dumy rw page that >>>>> would be allocated on demand as you suggested here. This should also >>>>> take care of imported BOs and flink cases. >>>>> Then i can drop the per device FD and per GEM object FD dummy BO and >>>>> the ugly loop i am using in patch 2 to match faulting BO to the right >>>>> dummy page. >>>>> >>>>> Does this makes sense ? >>>> I still don't see the information leak as much of a problem, but if >>>> Daniel insists we should probably do this. >>> Well amdgpu doesn't clear buffers by default, so indeed you guys are a >>> lot more laissez-faire here. But in general we really don't do that >>> kind of leaking. Iirc there's even radeonsi bugs because else clears, >>> and radeonsi happily displays gunk :-) >>> >>>> But could we at least have only one page per client instead of per BO? >>> I think you can do one page per file descriptor or something like >>> that. But gets annoying with shared bo, especially with dma_buf_mmap >>> forwarding. >>> -Daniel >> >> >> Christian - is your concern more with too much page allocations or with extra >> pointer member >> cluttering TTM BO struct ? > > Yes, that is one problem. > >> Because we can allocate the dummy page on demand only when >> needed. It's just seems to me that keeping it per BO streamlines the code as >> I don't need to >> have different handling for local vs imported BOs. > > Why should you have a difference between local vs imported BOs? For local BO seems like Daniel's suggestion to use vm_area_struct->vm_file->private_data should work as this points to drm_file. For imported BOs private_data will point to dma_buf structure since each imported BO is backed by a pseudo file (created in dma_buf_getfile). If so,where should we store the dummy RW BO in this case ? In current implementation it's stored in drm_gem_object. P.S For FLINK case it seems to me the handling should be no different then with local BO as the FD used for mmap in this case is still the same one associated with the DRM file. Andrey > > Christian. > >> >> Andrey >
Am 16.11.20 um 20:00 schrieb Andrey Grodzovsky: > > On 11/16/20 4:48 AM, Christian König wrote: >> Am 15.11.20 um 07:34 schrieb Andrey Grodzovsky: >>> >>> On 11/14/20 4:51 AM, Daniel Vetter wrote: >>>> On Sat, Nov 14, 2020 at 9:41 AM Christian König >>>> <ckoenig.leichtzumerken@gmail.com> wrote: >>>>> Am 13.11.20 um 21:52 schrieb Andrey Grodzovsky: >>>>>> On 6/22/20 1:50 PM, Daniel Vetter wrote: >>>>>>> On Mon, Jun 22, 2020 at 7:45 PM Christian König >>>>>>> <christian.koenig@amd.com> wrote: >>>>>>>> Am 22.06.20 um 16:32 schrieb Andrey Grodzovsky: >>>>>>>>> On 6/22/20 9:18 AM, Christian König wrote: >>>>>>>>>> Am 21.06.20 um 08:03 schrieb Andrey Grodzovsky: >>>>>>>>>>> Will be used to reroute CPU mapped BO's page faults once >>>>>>>>>>> device is removed. >>>>>>>>>>> >>>>>>>>>>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> >>>>>>>>>>> --- >>>>>>>>>>> drivers/gpu/drm/drm_file.c | 8 ++++++++ >>>>>>>>>>> drivers/gpu/drm/drm_prime.c | 10 ++++++++++ >>>>>>>>>>> include/drm/drm_file.h | 2 ++ >>>>>>>>>>> include/drm/drm_gem.h | 2 ++ >>>>>>>>>>> 4 files changed, 22 insertions(+) >>>>>>>>>>> >>>>>>>>>>> diff --git a/drivers/gpu/drm/drm_file.c >>>>>>>>>>> b/drivers/gpu/drm/drm_file.c >>>>>>>>>>> index c4c704e..67c0770 100644 >>>>>>>>>>> --- a/drivers/gpu/drm/drm_file.c >>>>>>>>>>> +++ b/drivers/gpu/drm/drm_file.c >>>>>>>>>>> @@ -188,6 +188,12 @@ struct drm_file *drm_file_alloc(struct >>>>>>>>>>> drm_minor *minor) >>>>>>>>>>> goto out_prime_destroy; >>>>>>>>>>> } >>>>>>>>>>> + file->dummy_page = alloc_page(GFP_KERNEL | >>>>>>>>>>> __GFP_ZERO); >>>>>>>>>>> + if (!file->dummy_page) { >>>>>>>>>>> + ret = -ENOMEM; >>>>>>>>>>> + goto out_prime_destroy; >>>>>>>>>>> + } >>>>>>>>>>> + >>>>>>>>>>> return file; >>>>>>>>>>> out_prime_destroy: >>>>>>>>>>> @@ -284,6 +290,8 @@ void drm_file_free(struct drm_file *file) >>>>>>>>>>> if (dev->driver->postclose) >>>>>>>>>>> dev->driver->postclose(dev, file); >>>>>>>>>>> + __free_page(file->dummy_page); >>>>>>>>>>> + >>>>>>>>>>> drm_prime_destroy_file_private(&file->prime); >>>>>>>>>>> WARN_ON(!list_empty(&file->event_list)); >>>>>>>>>>> diff --git a/drivers/gpu/drm/drm_prime.c >>>>>>>>>>> b/drivers/gpu/drm/drm_prime.c >>>>>>>>>>> index 1de2cde..c482e9c 100644 >>>>>>>>>>> --- a/drivers/gpu/drm/drm_prime.c >>>>>>>>>>> +++ b/drivers/gpu/drm/drm_prime.c >>>>>>>>>>> @@ -335,6 +335,13 @@ int drm_gem_prime_fd_to_handle(struct >>>>>>>>>>> drm_device *dev, >>>>>>>>>>> ret = drm_prime_add_buf_handle(&file_priv->prime, >>>>>>>>>>> dma_buf, *handle); >>>>>>>>>>> + >>>>>>>>>>> + if (!ret) { >>>>>>>>>>> + obj->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO); >>>>>>>>>>> + if (!obj->dummy_page) >>>>>>>>>>> + ret = -ENOMEM; >>>>>>>>>>> + } >>>>>>>>>>> + >>>>>>>>>> While the per file case still looks acceptable this is a >>>>>>>>>> clear NAK >>>>>>>>>> since it will massively increase the memory needed for a prime >>>>>>>>>> exported object. >>>>>>>>>> >>>>>>>>>> I think that this is quite overkill in the first place and >>>>>>>>>> for the >>>>>>>>>> hot unplug case we can just use the global dummy page as well. >>>>>>>>>> >>>>>>>>>> Christian. >>>>>>>>> Global dummy page is good for read access, what do you do on >>>>>>>>> write >>>>>>>>> access ? My first approach was indeed to map at first global >>>>>>>>> dummy >>>>>>>>> page as read only and mark the vma->vm_flags as !VM_SHARED >>>>>>>>> assuming >>>>>>>>> that this would trigger Copy On Write flow in core mm >>>>>>>>> (https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Felixir.bootlin.com%2Flinux%2Fv5.7-rc7%2Fsource%2Fmm%2Fmemory.c%23L3977&data=04%7C01%7CAndrey.Grodzovsky%40amd.com%7C00053e9d983041ed63ae08d88882ed87%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637409443224016377%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=kghiG3VpCJod6YefExoDVPl9X03zNhw3SN5GAxgbnmU%3D&reserved=0) >>>>>>>>> >>>>>>>>> >>>>>>>>> on the next page fault to same address triggered by a write >>>>>>>>> access but >>>>>>>>> then i realized a new COW page will be allocated for each such >>>>>>>>> mapping >>>>>>>>> and this is much more wasteful then having a dedicated page >>>>>>>>> per GEM >>>>>>>>> object. >>>>>>>> Yeah, but this is only for a very very small corner cases. What >>>>>>>> we need >>>>>>>> to prevent is increasing the memory usage during normal >>>>>>>> operation to >>>>>>>> much. >>>>>>>> >>>>>>>> Using memory during the unplug is completely unproblematic >>>>>>>> because we >>>>>>>> just released quite a bunch of it by releasing all those system >>>>>>>> memory >>>>>>>> buffers. >>>>>>>> >>>>>>>> And I'm pretty sure that COWed pages are correctly accounted >>>>>>>> towards >>>>>>>> the >>>>>>>> used memory of a process. >>>>>>>> >>>>>>>> So I think if that approach works as intended and the COW pages >>>>>>>> are >>>>>>>> released again on unmapping it would be the perfect solution to >>>>>>>> the >>>>>>>> problem. >>>>>>>> >>>>>>>> Daniel what do you think? >>>>>>> If COW works, sure sounds reasonable. And if we can make sure we >>>>>>> managed to drop all the system allocations (otherwise suddenly 2x >>>>>>> memory usage, worst case). But I have no idea whether we can >>>>>>> retroshoehorn that into an established vma, you might have fun >>>>>>> stuff >>>>>>> like a mkwrite handler there (which I thought is the COW handler >>>>>>> thing, but really no idea). >>>>>>> >>>>>>> If we need to massively change stuff then I think rw dummy page, >>>>>>> allocated on first fault after hotunplug (maybe just make it one >>>>>>> per >>>>>>> object, that's simplest) seems like the much safer option. Much >>>>>>> less >>>>>>> code that can go wrong. >>>>>>> -Daniel >>>>>> >>>>>> Regarding COW, i was looking into how to properly implement it from >>>>>> within the fault handler (i.e. ttm_bo_vm_fault) >>>>>> and the main obstacle I hit is that of exclusive access to the >>>>>> vm_area_struct, i need to be able to modify >>>>>> vma->vm_flags (and vm_page_prot) to remove VM_SHARED bit so COW can >>>>>> be triggered on subsequent write access >>>>>> fault (here >>>>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Felixir.bootlin.com%2Flinux%2Flatest%2Fsource%2Fmm%2Fmemory.c%23L4128&data=04%7C01%7CAndrey.Grodzovsky%40amd.com%7C00053e9d983041ed63ae08d88882ed87%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637409443224016377%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=ziHJtqyHuLrlb0uYKhoWCWhUAZnX0JquE%2BkBJ5Fx%2BNo%3D&reserved=0) >>>>>> >>>>>> but core mm takes only read side mm_sem (here for example >>>>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Felixir.bootlin.com%2Flinux%2Flatest%2Fsource%2Fdrivers%2Fiommu%2Famd%2Fiommu_v2.c%23L488&data=04%7C01%7CAndrey.Grodzovsky%40amd.com%7C00053e9d983041ed63ae08d88882ed87%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637409443224016377%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=h360c75Upl3%2FW7im7M1%2BxY%2FXy4gxin%2BkCF1Ui2zFXMs%3D&reserved=0) >>>>>> >>>>>> and so I am not supposed to modify vm_area_struct in this case. I am >>>>>> not sure if it's legit to write lock tthe mm_sem from this point. >>>>>> I found some discussions about this here >>>>>> https://nam11.safelinks.protection.outlook.com/?url=http%3A%2F%2Flkml.iu.edu%2Fhypermail%2Flinux%2Fkernel%2F1909.1%2F02754.html&data=04%7C01%7CAndrey.Grodzovsky%40amd.com%7C00053e9d983041ed63ae08d88882ed87%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637409443224021379%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=sx6s1lH%2FvxbIZajc4Yr49vFhxvPEnBHZlTt52D8qvZA%3D&reserved=0 >>>>>> but it >>>>>> wasn't really clear to me >>>>>> what's the solution. >>>>>> >>>>>> In any case, seems to me that easier and more memory saving solution >>>>>> would be to just switch to per ttm bo dumy rw page that >>>>>> would be allocated on demand as you suggested here. This should also >>>>>> take care of imported BOs and flink cases. >>>>>> Then i can drop the per device FD and per GEM object FD dummy BO and >>>>>> the ugly loop i am using in patch 2 to match faulting BO to the >>>>>> right >>>>>> dummy page. >>>>>> >>>>>> Does this makes sense ? >>>>> I still don't see the information leak as much of a problem, but if >>>>> Daniel insists we should probably do this. >>>> Well amdgpu doesn't clear buffers by default, so indeed you guys are a >>>> lot more laissez-faire here. But in general we really don't do that >>>> kind of leaking. Iirc there's even radeonsi bugs because else clears, >>>> and radeonsi happily displays gunk :-) >>>> >>>>> But could we at least have only one page per client instead of per >>>>> BO? >>>> I think you can do one page per file descriptor or something like >>>> that. But gets annoying with shared bo, especially with dma_buf_mmap >>>> forwarding. >>>> -Daniel >>> >>> >>> Christian - is your concern more with too much page allocations or >>> with extra pointer member >>> cluttering TTM BO struct ? >> >> Yes, that is one problem. >> >>> Because we can allocate the dummy page on demand only when >>> needed. It's just seems to me that keeping it per BO streamlines the >>> code as I don't need to >>> have different handling for local vs imported BOs. >> >> Why should you have a difference between local vs imported BOs? > > > For local BO seems like Daniel's suggestion to use > vm_area_struct->vm_file->private_data > should work as this points to drm_file. For imported BOs private_data > will point to dma_buf structure > since each imported BO is backed by a pseudo file (created in > dma_buf_getfile). Oh, good point. But we could easily fix that now. That should make the mapping code less complex as well. Regards, Christian. > If so,where should we store the dummy RW BO in this case ? In current > implementation it's stored in drm_gem_object. > > P.S For FLINK case it seems to me the handling should be no different > then with local BO as the > FD used for mmap in this case is still the same one associated with > the DRM file. > > Andrey > > >> >> Christian. >> >>> >>> Andrey >>
On 11/16/20 3:36 PM, Christian König wrote: > Am 16.11.20 um 20:00 schrieb Andrey Grodzovsky: >> >> On 11/16/20 4:48 AM, Christian König wrote: >>> Am 15.11.20 um 07:34 schrieb Andrey Grodzovsky: >>>> >>>> On 11/14/20 4:51 AM, Daniel Vetter wrote: >>>>> On Sat, Nov 14, 2020 at 9:41 AM Christian König >>>>> <ckoenig.leichtzumerken@gmail.com> wrote: >>>>>> Am 13.11.20 um 21:52 schrieb Andrey Grodzovsky: >>>>>>> On 6/22/20 1:50 PM, Daniel Vetter wrote: >>>>>>>> On Mon, Jun 22, 2020 at 7:45 PM Christian König >>>>>>>> <christian.koenig@amd.com> wrote: >>>>>>>>> Am 22.06.20 um 16:32 schrieb Andrey Grodzovsky: >>>>>>>>>> On 6/22/20 9:18 AM, Christian König wrote: >>>>>>>>>>> Am 21.06.20 um 08:03 schrieb Andrey Grodzovsky: >>>>>>>>>>>> Will be used to reroute CPU mapped BO's page faults once >>>>>>>>>>>> device is removed. >>>>>>>>>>>> >>>>>>>>>>>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> >>>>>>>>>>>> --- >>>>>>>>>>>> drivers/gpu/drm/drm_file.c | 8 ++++++++ >>>>>>>>>>>> drivers/gpu/drm/drm_prime.c | 10 ++++++++++ >>>>>>>>>>>> include/drm/drm_file.h | 2 ++ >>>>>>>>>>>> include/drm/drm_gem.h | 2 ++ >>>>>>>>>>>> 4 files changed, 22 insertions(+) >>>>>>>>>>>> >>>>>>>>>>>> diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c >>>>>>>>>>>> index c4c704e..67c0770 100644 >>>>>>>>>>>> --- a/drivers/gpu/drm/drm_file.c >>>>>>>>>>>> +++ b/drivers/gpu/drm/drm_file.c >>>>>>>>>>>> @@ -188,6 +188,12 @@ struct drm_file *drm_file_alloc(struct >>>>>>>>>>>> drm_minor *minor) >>>>>>>>>>>> goto out_prime_destroy; >>>>>>>>>>>> } >>>>>>>>>>>> + file->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO); >>>>>>>>>>>> + if (!file->dummy_page) { >>>>>>>>>>>> + ret = -ENOMEM; >>>>>>>>>>>> + goto out_prime_destroy; >>>>>>>>>>>> + } >>>>>>>>>>>> + >>>>>>>>>>>> return file; >>>>>>>>>>>> out_prime_destroy: >>>>>>>>>>>> @@ -284,6 +290,8 @@ void drm_file_free(struct drm_file *file) >>>>>>>>>>>> if (dev->driver->postclose) >>>>>>>>>>>> dev->driver->postclose(dev, file); >>>>>>>>>>>> + __free_page(file->dummy_page); >>>>>>>>>>>> + >>>>>>>>>>>> drm_prime_destroy_file_private(&file->prime); >>>>>>>>>>>> WARN_ON(!list_empty(&file->event_list)); >>>>>>>>>>>> diff --git a/drivers/gpu/drm/drm_prime.c >>>>>>>>>>>> b/drivers/gpu/drm/drm_prime.c >>>>>>>>>>>> index 1de2cde..c482e9c 100644 >>>>>>>>>>>> --- a/drivers/gpu/drm/drm_prime.c >>>>>>>>>>>> +++ b/drivers/gpu/drm/drm_prime.c >>>>>>>>>>>> @@ -335,6 +335,13 @@ int drm_gem_prime_fd_to_handle(struct >>>>>>>>>>>> drm_device *dev, >>>>>>>>>>>> ret = drm_prime_add_buf_handle(&file_priv->prime, >>>>>>>>>>>> dma_buf, *handle); >>>>>>>>>>>> + >>>>>>>>>>>> + if (!ret) { >>>>>>>>>>>> + obj->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO); >>>>>>>>>>>> + if (!obj->dummy_page) >>>>>>>>>>>> + ret = -ENOMEM; >>>>>>>>>>>> + } >>>>>>>>>>>> + >>>>>>>>>>> While the per file case still looks acceptable this is a clear NAK >>>>>>>>>>> since it will massively increase the memory needed for a prime >>>>>>>>>>> exported object. >>>>>>>>>>> >>>>>>>>>>> I think that this is quite overkill in the first place and for the >>>>>>>>>>> hot unplug case we can just use the global dummy page as well. >>>>>>>>>>> >>>>>>>>>>> Christian. >>>>>>>>>> Global dummy page is good for read access, what do you do on write >>>>>>>>>> access ? My first approach was indeed to map at first global dummy >>>>>>>>>> page as read only and mark the vma->vm_flags as !VM_SHARED assuming >>>>>>>>>> that this would trigger Copy On Write flow in core mm >>>>>>>>>> (https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Felixir.bootlin.com%2Flinux%2Fv5.7-rc7%2Fsource%2Fmm%2Fmemory.c%23L3977&data=04%7C01%7CAndrey.Grodzovsky%40amd.com%7C00053e9d983041ed63ae08d88882ed87%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637409443224016377%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=kghiG3VpCJod6YefExoDVPl9X03zNhw3SN5GAxgbnmU%3D&reserved=0) >>>>>>>>>> >>>>>>>>>> >>>>>>>>>> on the next page fault to same address triggered by a write access but >>>>>>>>>> then i realized a new COW page will be allocated for each such mapping >>>>>>>>>> and this is much more wasteful then having a dedicated page per GEM >>>>>>>>>> object. >>>>>>>>> Yeah, but this is only for a very very small corner cases. What we need >>>>>>>>> to prevent is increasing the memory usage during normal operation to >>>>>>>>> much. >>>>>>>>> >>>>>>>>> Using memory during the unplug is completely unproblematic because we >>>>>>>>> just released quite a bunch of it by releasing all those system memory >>>>>>>>> buffers. >>>>>>>>> >>>>>>>>> And I'm pretty sure that COWed pages are correctly accounted towards >>>>>>>>> the >>>>>>>>> used memory of a process. >>>>>>>>> >>>>>>>>> So I think if that approach works as intended and the COW pages are >>>>>>>>> released again on unmapping it would be the perfect solution to the >>>>>>>>> problem. >>>>>>>>> >>>>>>>>> Daniel what do you think? >>>>>>>> If COW works, sure sounds reasonable. And if we can make sure we >>>>>>>> managed to drop all the system allocations (otherwise suddenly 2x >>>>>>>> memory usage, worst case). But I have no idea whether we can >>>>>>>> retroshoehorn that into an established vma, you might have fun stuff >>>>>>>> like a mkwrite handler there (which I thought is the COW handler >>>>>>>> thing, but really no idea). >>>>>>>> >>>>>>>> If we need to massively change stuff then I think rw dummy page, >>>>>>>> allocated on first fault after hotunplug (maybe just make it one per >>>>>>>> object, that's simplest) seems like the much safer option. Much less >>>>>>>> code that can go wrong. >>>>>>>> -Daniel >>>>>>> >>>>>>> Regarding COW, i was looking into how to properly implement it from >>>>>>> within the fault handler (i.e. ttm_bo_vm_fault) >>>>>>> and the main obstacle I hit is that of exclusive access to the >>>>>>> vm_area_struct, i need to be able to modify >>>>>>> vma->vm_flags (and vm_page_prot) to remove VM_SHARED bit so COW can >>>>>>> be triggered on subsequent write access >>>>>>> fault (here >>>>>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Felixir.bootlin.com%2Flinux%2Flatest%2Fsource%2Fmm%2Fmemory.c%23L4128&data=04%7C01%7CAndrey.Grodzovsky%40amd.com%7C00053e9d983041ed63ae08d88882ed87%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637409443224016377%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=ziHJtqyHuLrlb0uYKhoWCWhUAZnX0JquE%2BkBJ5Fx%2BNo%3D&reserved=0) >>>>>>> >>>>>>> but core mm takes only read side mm_sem (here for example >>>>>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Felixir.bootlin.com%2Flinux%2Flatest%2Fsource%2Fdrivers%2Fiommu%2Famd%2Fiommu_v2.c%23L488&data=04%7C01%7CAndrey.Grodzovsky%40amd.com%7C00053e9d983041ed63ae08d88882ed87%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637409443224016377%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=h360c75Upl3%2FW7im7M1%2BxY%2FXy4gxin%2BkCF1Ui2zFXMs%3D&reserved=0) >>>>>>> >>>>>>> and so I am not supposed to modify vm_area_struct in this case. I am >>>>>>> not sure if it's legit to write lock tthe mm_sem from this point. >>>>>>> I found some discussions about this here >>>>>>> https://nam11.safelinks.protection.outlook.com/?url=http%3A%2F%2Flkml.iu.edu%2Fhypermail%2Flinux%2Fkernel%2F1909.1%2F02754.html&data=04%7C01%7CAndrey.Grodzovsky%40amd.com%7C00053e9d983041ed63ae08d88882ed87%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637409443224021379%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=sx6s1lH%2FvxbIZajc4Yr49vFhxvPEnBHZlTt52D8qvZA%3D&reserved=0 >>>>>>> but it >>>>>>> wasn't really clear to me >>>>>>> what's the solution. >>>>>>> >>>>>>> In any case, seems to me that easier and more memory saving solution >>>>>>> would be to just switch to per ttm bo dumy rw page that >>>>>>> would be allocated on demand as you suggested here. This should also >>>>>>> take care of imported BOs and flink cases. >>>>>>> Then i can drop the per device FD and per GEM object FD dummy BO and >>>>>>> the ugly loop i am using in patch 2 to match faulting BO to the right >>>>>>> dummy page. >>>>>>> >>>>>>> Does this makes sense ? >>>>>> I still don't see the information leak as much of a problem, but if >>>>>> Daniel insists we should probably do this. >>>>> Well amdgpu doesn't clear buffers by default, so indeed you guys are a >>>>> lot more laissez-faire here. But in general we really don't do that >>>>> kind of leaking. Iirc there's even radeonsi bugs because else clears, >>>>> and radeonsi happily displays gunk :-) >>>>> >>>>>> But could we at least have only one page per client instead of per BO? >>>>> I think you can do one page per file descriptor or something like >>>>> that. But gets annoying with shared bo, especially with dma_buf_mmap >>>>> forwarding. >>>>> -Daniel >>>> >>>> >>>> Christian - is your concern more with too much page allocations or with >>>> extra pointer member >>>> cluttering TTM BO struct ? >>> >>> Yes, that is one problem. >>> >>>> Because we can allocate the dummy page on demand only when >>>> needed. It's just seems to me that keeping it per BO streamlines the code >>>> as I don't need to >>>> have different handling for local vs imported BOs. >>> >>> Why should you have a difference between local vs imported BOs? >> >> >> For local BO seems like Daniel's suggestion to use >> vm_area_struct->vm_file->private_data >> should work as this points to drm_file. For imported BOs private_data will >> point to dma_buf structure >> since each imported BO is backed by a pseudo file (created in dma_buf_getfile). > > Oh, good point. But we could easily fix that now. That should make the mapping > code less complex as well. Can you clarify what fix u have in mind ? I assume it's not by altering file->private_data to point to something else as we need to retrieve dmabuf (e.g. dma_buf_mmap_internal) Andrey > > Regards, > Christian. > >> If so,where should we store the dummy RW BO in this case ? In current >> implementation it's stored in drm_gem_object. >> >> P.S For FLINK case it seems to me the handling should be no different then >> with local BO as the >> FD used for mmap in this case is still the same one associated with the DRM >> file. >> >> Andrey >> >> >>> >>> Christian. >>> >>>> >>>> Andrey >>> >
Am 16.11.20 um 21:42 schrieb Andrey Grodzovsky: > > On 11/16/20 3:36 PM, Christian König wrote: >> Am 16.11.20 um 20:00 schrieb Andrey Grodzovsky: >>> >>> On 11/16/20 4:48 AM, Christian König wrote: >>>> Am 15.11.20 um 07:34 schrieb Andrey Grodzovsky: >>>>> >>>>> On 11/14/20 4:51 AM, Daniel Vetter wrote: >>>>>> On Sat, Nov 14, 2020 at 9:41 AM Christian König >>>>>> <ckoenig.leichtzumerken@gmail.com> wrote: >>>>>>> Am 13.11.20 um 21:52 schrieb Andrey Grodzovsky: >>>>>>>> On 6/22/20 1:50 PM, Daniel Vetter wrote: >>>>>>>>> On Mon, Jun 22, 2020 at 7:45 PM Christian König >>>>>>>>> <christian.koenig@amd.com> wrote: >>>>>>>>>> Am 22.06.20 um 16:32 schrieb Andrey Grodzovsky: >>>>>>>>>>> On 6/22/20 9:18 AM, Christian König wrote: >>>>>>>>>>>> Am 21.06.20 um 08:03 schrieb Andrey Grodzovsky: >>>>>>>>>>>>> Will be used to reroute CPU mapped BO's page faults once >>>>>>>>>>>>> device is removed. >>>>>>>>>>>>> >>>>>>>>>>>>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> >>>>>>>>>>>>> --- >>>>>>>>>>>>> drivers/gpu/drm/drm_file.c | 8 ++++++++ >>>>>>>>>>>>> drivers/gpu/drm/drm_prime.c | 10 ++++++++++ >>>>>>>>>>>>> include/drm/drm_file.h | 2 ++ >>>>>>>>>>>>> include/drm/drm_gem.h | 2 ++ >>>>>>>>>>>>> 4 files changed, 22 insertions(+) >>>>>>>>>>>>> >>>>>>>>>>>>> diff --git a/drivers/gpu/drm/drm_file.c >>>>>>>>>>>>> b/drivers/gpu/drm/drm_file.c >>>>>>>>>>>>> index c4c704e..67c0770 100644 >>>>>>>>>>>>> --- a/drivers/gpu/drm/drm_file.c >>>>>>>>>>>>> +++ b/drivers/gpu/drm/drm_file.c >>>>>>>>>>>>> @@ -188,6 +188,12 @@ struct drm_file *drm_file_alloc(struct >>>>>>>>>>>>> drm_minor *minor) >>>>>>>>>>>>> goto out_prime_destroy; >>>>>>>>>>>>> } >>>>>>>>>>>>> + file->dummy_page = alloc_page(GFP_KERNEL | >>>>>>>>>>>>> __GFP_ZERO); >>>>>>>>>>>>> + if (!file->dummy_page) { >>>>>>>>>>>>> + ret = -ENOMEM; >>>>>>>>>>>>> + goto out_prime_destroy; >>>>>>>>>>>>> + } >>>>>>>>>>>>> + >>>>>>>>>>>>> return file; >>>>>>>>>>>>> out_prime_destroy: >>>>>>>>>>>>> @@ -284,6 +290,8 @@ void drm_file_free(struct drm_file *file) >>>>>>>>>>>>> if (dev->driver->postclose) >>>>>>>>>>>>> dev->driver->postclose(dev, file); >>>>>>>>>>>>> + __free_page(file->dummy_page); >>>>>>>>>>>>> + >>>>>>>>>>>>> drm_prime_destroy_file_private(&file->prime); >>>>>>>>>>>>> WARN_ON(!list_empty(&file->event_list)); >>>>>>>>>>>>> diff --git a/drivers/gpu/drm/drm_prime.c >>>>>>>>>>>>> b/drivers/gpu/drm/drm_prime.c >>>>>>>>>>>>> index 1de2cde..c482e9c 100644 >>>>>>>>>>>>> --- a/drivers/gpu/drm/drm_prime.c >>>>>>>>>>>>> +++ b/drivers/gpu/drm/drm_prime.c >>>>>>>>>>>>> @@ -335,6 +335,13 @@ int drm_gem_prime_fd_to_handle(struct >>>>>>>>>>>>> drm_device *dev, >>>>>>>>>>>>> ret = drm_prime_add_buf_handle(&file_priv->prime, >>>>>>>>>>>>> dma_buf, *handle); >>>>>>>>>>>>> + >>>>>>>>>>>>> + if (!ret) { >>>>>>>>>>>>> + obj->dummy_page = alloc_page(GFP_KERNEL | >>>>>>>>>>>>> __GFP_ZERO); >>>>>>>>>>>>> + if (!obj->dummy_page) >>>>>>>>>>>>> + ret = -ENOMEM; >>>>>>>>>>>>> + } >>>>>>>>>>>>> + >>>>>>>>>>>> While the per file case still looks acceptable this is a >>>>>>>>>>>> clear NAK >>>>>>>>>>>> since it will massively increase the memory needed for a prime >>>>>>>>>>>> exported object. >>>>>>>>>>>> >>>>>>>>>>>> I think that this is quite overkill in the first place and >>>>>>>>>>>> for the >>>>>>>>>>>> hot unplug case we can just use the global dummy page as well. >>>>>>>>>>>> >>>>>>>>>>>> Christian. >>>>>>>>>>> Global dummy page is good for read access, what do you do on >>>>>>>>>>> write >>>>>>>>>>> access ? My first approach was indeed to map at first global >>>>>>>>>>> dummy >>>>>>>>>>> page as read only and mark the vma->vm_flags as !VM_SHARED >>>>>>>>>>> assuming >>>>>>>>>>> that this would trigger Copy On Write flow in core mm >>>>>>>>>>> (https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Felixir.bootlin.com%2Flinux%2Fv5.7-rc7%2Fsource%2Fmm%2Fmemory.c%23L3977&data=04%7C01%7CAndrey.Grodzovsky%40amd.com%7C00053e9d983041ed63ae08d88882ed87%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637409443224016377%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=kghiG3VpCJod6YefExoDVPl9X03zNhw3SN5GAxgbnmU%3D&reserved=0) >>>>>>>>>>> >>>>>>>>>>> >>>>>>>>>>> on the next page fault to same address triggered by a write >>>>>>>>>>> access but >>>>>>>>>>> then i realized a new COW page will be allocated for each >>>>>>>>>>> such mapping >>>>>>>>>>> and this is much more wasteful then having a dedicated page >>>>>>>>>>> per GEM >>>>>>>>>>> object. >>>>>>>>>> Yeah, but this is only for a very very small corner cases. >>>>>>>>>> What we need >>>>>>>>>> to prevent is increasing the memory usage during normal >>>>>>>>>> operation to >>>>>>>>>> much. >>>>>>>>>> >>>>>>>>>> Using memory during the unplug is completely unproblematic >>>>>>>>>> because we >>>>>>>>>> just released quite a bunch of it by releasing all those >>>>>>>>>> system memory >>>>>>>>>> buffers. >>>>>>>>>> >>>>>>>>>> And I'm pretty sure that COWed pages are correctly accounted >>>>>>>>>> towards >>>>>>>>>> the >>>>>>>>>> used memory of a process. >>>>>>>>>> >>>>>>>>>> So I think if that approach works as intended and the COW >>>>>>>>>> pages are >>>>>>>>>> released again on unmapping it would be the perfect solution >>>>>>>>>> to the >>>>>>>>>> problem. >>>>>>>>>> >>>>>>>>>> Daniel what do you think? >>>>>>>>> If COW works, sure sounds reasonable. And if we can make sure we >>>>>>>>> managed to drop all the system allocations (otherwise suddenly 2x >>>>>>>>> memory usage, worst case). But I have no idea whether we can >>>>>>>>> retroshoehorn that into an established vma, you might have fun >>>>>>>>> stuff >>>>>>>>> like a mkwrite handler there (which I thought is the COW handler >>>>>>>>> thing, but really no idea). >>>>>>>>> >>>>>>>>> If we need to massively change stuff then I think rw dummy page, >>>>>>>>> allocated on first fault after hotunplug (maybe just make it >>>>>>>>> one per >>>>>>>>> object, that's simplest) seems like the much safer option. >>>>>>>>> Much less >>>>>>>>> code that can go wrong. >>>>>>>>> -Daniel >>>>>>>> >>>>>>>> Regarding COW, i was looking into how to properly implement it >>>>>>>> from >>>>>>>> within the fault handler (i.e. ttm_bo_vm_fault) >>>>>>>> and the main obstacle I hit is that of exclusive access to the >>>>>>>> vm_area_struct, i need to be able to modify >>>>>>>> vma->vm_flags (and vm_page_prot) to remove VM_SHARED bit so >>>>>>>> COW can >>>>>>>> be triggered on subsequent write access >>>>>>>> fault (here >>>>>>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Felixir.bootlin.com%2Flinux%2Flatest%2Fsource%2Fmm%2Fmemory.c%23L4128&data=04%7C01%7CAndrey.Grodzovsky%40amd.com%7C00053e9d983041ed63ae08d88882ed87%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637409443224016377%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=ziHJtqyHuLrlb0uYKhoWCWhUAZnX0JquE%2BkBJ5Fx%2BNo%3D&reserved=0) >>>>>>>> >>>>>>>> but core mm takes only read side mm_sem (here for example >>>>>>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Felixir.bootlin.com%2Flinux%2Flatest%2Fsource%2Fdrivers%2Fiommu%2Famd%2Fiommu_v2.c%23L488&data=04%7C01%7CAndrey.Grodzovsky%40amd.com%7C00053e9d983041ed63ae08d88882ed87%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637409443224016377%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=h360c75Upl3%2FW7im7M1%2BxY%2FXy4gxin%2BkCF1Ui2zFXMs%3D&reserved=0) >>>>>>>> >>>>>>>> and so I am not supposed to modify vm_area_struct in this case. >>>>>>>> I am >>>>>>>> not sure if it's legit to write lock tthe mm_sem from this point. >>>>>>>> I found some discussions about this here >>>>>>>> https://nam11.safelinks.protection.outlook.com/?url=http%3A%2F%2Flkml.iu.edu%2Fhypermail%2Flinux%2Fkernel%2F1909.1%2F02754.html&data=04%7C01%7CAndrey.Grodzovsky%40amd.com%7C00053e9d983041ed63ae08d88882ed87%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637409443224021379%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=sx6s1lH%2FvxbIZajc4Yr49vFhxvPEnBHZlTt52D8qvZA%3D&reserved=0 >>>>>>>> but it >>>>>>>> wasn't really clear to me >>>>>>>> what's the solution. >>>>>>>> >>>>>>>> In any case, seems to me that easier and more memory saving >>>>>>>> solution >>>>>>>> would be to just switch to per ttm bo dumy rw page that >>>>>>>> would be allocated on demand as you suggested here. This should >>>>>>>> also >>>>>>>> take care of imported BOs and flink cases. >>>>>>>> Then i can drop the per device FD and per GEM object FD dummy >>>>>>>> BO and >>>>>>>> the ugly loop i am using in patch 2 to match faulting BO to the >>>>>>>> right >>>>>>>> dummy page. >>>>>>>> >>>>>>>> Does this makes sense ? >>>>>>> I still don't see the information leak as much of a problem, but if >>>>>>> Daniel insists we should probably do this. >>>>>> Well amdgpu doesn't clear buffers by default, so indeed you guys >>>>>> are a >>>>>> lot more laissez-faire here. But in general we really don't do that >>>>>> kind of leaking. Iirc there's even radeonsi bugs because else >>>>>> clears, >>>>>> and radeonsi happily displays gunk :-) >>>>>> >>>>>>> But could we at least have only one page per client instead of >>>>>>> per BO? >>>>>> I think you can do one page per file descriptor or something like >>>>>> that. But gets annoying with shared bo, especially with dma_buf_mmap >>>>>> forwarding. >>>>>> -Daniel >>>>> >>>>> >>>>> Christian - is your concern more with too much page allocations or >>>>> with extra pointer member >>>>> cluttering TTM BO struct ? >>>> >>>> Yes, that is one problem. >>>> >>>>> Because we can allocate the dummy page on demand only when >>>>> needed. It's just seems to me that keeping it per BO streamlines >>>>> the code as I don't need to >>>>> have different handling for local vs imported BOs. >>>> >>>> Why should you have a difference between local vs imported BOs? >>> >>> >>> For local BO seems like Daniel's suggestion to use >>> vm_area_struct->vm_file->private_data >>> should work as this points to drm_file. For imported BOs >>> private_data will point to dma_buf structure >>> since each imported BO is backed by a pseudo file (created in >>> dma_buf_getfile). >> >> Oh, good point. But we could easily fix that now. That should make >> the mapping code less complex as well. > > > Can you clarify what fix u have in mind ? I assume it's not by > altering file->private_data to point > to something else as we need to retrieve dmabuf (e.g. > dma_buf_mmap_internal) Ah, crap. You are right that is really tricky because vma->vm_file doesn't point to something useful in this situation. I was talking about the new vma_set_file() function I've just pushed to drm-misc-next, but that stuff can't be used here. I still don't see the need to use more than the global dummy page even if that means information leak between processes on unplug. Christian. > > Andrey > > >> >> Regards, >> Christian. >> >>> If so,where should we store the dummy RW BO in this case ? In >>> current implementation it's stored in drm_gem_object. >>> >>> P.S For FLINK case it seems to me the handling should be no >>> different then with local BO as the >>> FD used for mmap in this case is still the same one associated with >>> the DRM file. >>> >>> Andrey >>> >>> >>>> >>>> Christian. >>>> >>>>> >>>>> Andrey >>>> >>
diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c index c4c704e..67c0770 100644 --- a/drivers/gpu/drm/drm_file.c +++ b/drivers/gpu/drm/drm_file.c @@ -188,6 +188,12 @@ struct drm_file *drm_file_alloc(struct drm_minor *minor) goto out_prime_destroy; } + file->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!file->dummy_page) { + ret = -ENOMEM; + goto out_prime_destroy; + } + return file; out_prime_destroy: @@ -284,6 +290,8 @@ void drm_file_free(struct drm_file *file) if (dev->driver->postclose) dev->driver->postclose(dev, file); + __free_page(file->dummy_page); + drm_prime_destroy_file_private(&file->prime); WARN_ON(!list_empty(&file->event_list)); diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c index 1de2cde..c482e9c 100644 --- a/drivers/gpu/drm/drm_prime.c +++ b/drivers/gpu/drm/drm_prime.c @@ -335,6 +335,13 @@ int drm_gem_prime_fd_to_handle(struct drm_device *dev, ret = drm_prime_add_buf_handle(&file_priv->prime, dma_buf, *handle); + + if (!ret) { + obj->dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!obj->dummy_page) + ret = -ENOMEM; + } + mutex_unlock(&file_priv->prime.lock); if (ret) goto fail; @@ -1006,6 +1013,9 @@ void drm_prime_gem_destroy(struct drm_gem_object *obj, struct sg_table *sg) dma_buf_unmap_attachment(attach, sg, DMA_BIDIRECTIONAL); dma_buf = attach->dmabuf; dma_buf_detach(attach->dmabuf, attach); + + __free_page(obj->dummy_page); + /* remove the reference */ dma_buf_put(dma_buf); } diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h index 19df802..349a658 100644 --- a/include/drm/drm_file.h +++ b/include/drm/drm_file.h @@ -335,6 +335,8 @@ struct drm_file { */ struct drm_prime_file_private prime; + struct page *dummy_page; + /* private: */ #if IS_ENABLED(CONFIG_DRM_LEGACY) unsigned long lock_count; /* DRI1 legacy lock count */ diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h index 0b37506..47460d1 100644 --- a/include/drm/drm_gem.h +++ b/include/drm/drm_gem.h @@ -310,6 +310,8 @@ struct drm_gem_object { * */ const struct drm_gem_object_funcs *funcs; + + struct page *dummy_page; }; /**
Will be used to reroute CPU mapped BO's page faults once device is removed. Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> --- drivers/gpu/drm/drm_file.c | 8 ++++++++ drivers/gpu/drm/drm_prime.c | 10 ++++++++++ include/drm/drm_file.h | 2 ++ include/drm/drm_gem.h | 2 ++ 4 files changed, 22 insertions(+)