Message ID | 20210615115746.27424-1-xinhui.pan@amd.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [RFC] drm/ttm: Do page counting after populate callback succeed | expand |
Am 15.06.21 um 13:57 schrieb xinhui pan: > Amdgpu set SG flag in populate callback. So TTM still count pages in SG > BO. It's probably better to fix this instead. E.g. why does amdgpu modify the SG flag during populate and not during initial creation? That doesn't seem to make sense. Christian. > One easy way to fix this is lets count pages after populate callback. > > We hit one issue that amdgpu alloc many SG BOs, but TTM try to do swap > again and again even if swapout does not swap SG BOs at all. > > Signed-off-by: xinhui pan <xinhui.pan@amd.com> > --- > drivers/gpu/drm/ttm/ttm_tt.c | 32 +++++++++++++------------------- > 1 file changed, 13 insertions(+), 19 deletions(-) > > diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c > index a1a25410ec74..4fa0a8cd71c0 100644 > --- a/drivers/gpu/drm/ttm/ttm_tt.c > +++ b/drivers/gpu/drm/ttm/ttm_tt.c > @@ -317,13 +317,6 @@ int ttm_tt_populate(struct ttm_device *bdev, > if (ttm_tt_is_populated(ttm)) > return 0; > > - if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) { > - atomic_long_add(ttm->num_pages, &ttm_pages_allocated); > - if (bdev->pool.use_dma32) > - atomic_long_add(ttm->num_pages, > - &ttm_dma32_pages_allocated); > - } > - > while (atomic_long_read(&ttm_pages_allocated) > ttm_pages_limit || > atomic_long_read(&ttm_dma32_pages_allocated) > > ttm_dma32_pages_limit) { > @@ -342,6 +335,13 @@ int ttm_tt_populate(struct ttm_device *bdev, > if (ret) > goto error; > > + if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) { > + atomic_long_add(ttm->num_pages, &ttm_pages_allocated); > + if (bdev->pool.use_dma32) > + atomic_long_add(ttm->num_pages, > + &ttm_dma32_pages_allocated); > + } > + > ttm_tt_add_mapping(bdev, ttm); > ttm->page_flags |= TTM_PAGE_FLAG_PRIV_POPULATED; > if (unlikely(ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)) { > @@ -355,12 +355,6 @@ int ttm_tt_populate(struct ttm_device *bdev, > return 0; > > error: > - if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) { > - atomic_long_sub(ttm->num_pages, &ttm_pages_allocated); > - if (bdev->pool.use_dma32) > - atomic_long_sub(ttm->num_pages, > - &ttm_dma32_pages_allocated); > - } > return ret; > } > EXPORT_SYMBOL(ttm_tt_populate); > @@ -384,12 +378,6 @@ void ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm) > if (!ttm_tt_is_populated(ttm)) > return; > > - ttm_tt_clear_mapping(ttm); > - if (bdev->funcs->ttm_tt_unpopulate) > - bdev->funcs->ttm_tt_unpopulate(bdev, ttm); > - else > - ttm_pool_free(&bdev->pool, ttm); > - > if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) { > atomic_long_sub(ttm->num_pages, &ttm_pages_allocated); > if (bdev->pool.use_dma32) > @@ -397,6 +385,12 @@ void ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm) > &ttm_dma32_pages_allocated); > } > > + ttm_tt_clear_mapping(ttm); > + if (bdev->funcs->ttm_tt_unpopulate) > + bdev->funcs->ttm_tt_unpopulate(bdev, ttm); > + else > + ttm_pool_free(&bdev->pool, ttm); > + > ttm->page_flags &= ~TTM_PAGE_FLAG_PRIV_POPULATED; > } >
> 2021年6月15日 20:01,Christian König <ckoenig.leichtzumerken@gmail.com> 写道: > > Am 15.06.21 um 13:57 schrieb xinhui pan: >> Amdgpu set SG flag in populate callback. So TTM still count pages in SG >> BO. > > It's probably better to fix this instead. E.g. why does amdgpu modify the SG flag during populate and not during initial creation? That doesn't seem to make sense. fair enough. Let me have a try. No idea why we set SG flag in populate years ago. > > Christian. > >> One easy way to fix this is lets count pages after populate callback. >> >> We hit one issue that amdgpu alloc many SG BOs, but TTM try to do swap >> again and again even if swapout does not swap SG BOs at all. >> >> Signed-off-by: xinhui pan <xinhui.pan@amd.com> >> --- >> drivers/gpu/drm/ttm/ttm_tt.c | 32 +++++++++++++------------------- >> 1 file changed, 13 insertions(+), 19 deletions(-) >> >> diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c >> index a1a25410ec74..4fa0a8cd71c0 100644 >> --- a/drivers/gpu/drm/ttm/ttm_tt.c >> +++ b/drivers/gpu/drm/ttm/ttm_tt.c >> @@ -317,13 +317,6 @@ int ttm_tt_populate(struct ttm_device *bdev, >> if (ttm_tt_is_populated(ttm)) >> return 0; >> - if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) { >> - atomic_long_add(ttm->num_pages, &ttm_pages_allocated); >> - if (bdev->pool.use_dma32) >> - atomic_long_add(ttm->num_pages, >> - &ttm_dma32_pages_allocated); >> - } >> - >> while (atomic_long_read(&ttm_pages_allocated) > ttm_pages_limit || >> atomic_long_read(&ttm_dma32_pages_allocated) > >> ttm_dma32_pages_limit) { >> @@ -342,6 +335,13 @@ int ttm_tt_populate(struct ttm_device *bdev, >> if (ret) >> goto error; >> + if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) { >> + atomic_long_add(ttm->num_pages, &ttm_pages_allocated); >> + if (bdev->pool.use_dma32) >> + atomic_long_add(ttm->num_pages, >> + &ttm_dma32_pages_allocated); >> + } >> + >> ttm_tt_add_mapping(bdev, ttm); >> ttm->page_flags |= TTM_PAGE_FLAG_PRIV_POPULATED; >> if (unlikely(ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)) { >> @@ -355,12 +355,6 @@ int ttm_tt_populate(struct ttm_device *bdev, >> return 0; >> error: >> - if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) { >> - atomic_long_sub(ttm->num_pages, &ttm_pages_allocated); >> - if (bdev->pool.use_dma32) >> - atomic_long_sub(ttm->num_pages, >> - &ttm_dma32_pages_allocated); >> - } >> return ret; >> } >> EXPORT_SYMBOL(ttm_tt_populate); >> @@ -384,12 +378,6 @@ void ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm) >> if (!ttm_tt_is_populated(ttm)) >> return; >> - ttm_tt_clear_mapping(ttm); >> - if (bdev->funcs->ttm_tt_unpopulate) >> - bdev->funcs->ttm_tt_unpopulate(bdev, ttm); >> - else >> - ttm_pool_free(&bdev->pool, ttm); >> - >> if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) { >> atomic_long_sub(ttm->num_pages, &ttm_pages_allocated); >> if (bdev->pool.use_dma32) >> @@ -397,6 +385,12 @@ void ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm) >> &ttm_dma32_pages_allocated); >> } >> + ttm_tt_clear_mapping(ttm); >> + if (bdev->funcs->ttm_tt_unpopulate) >> + bdev->funcs->ttm_tt_unpopulate(bdev, ttm); >> + else >> + ttm_pool_free(&bdev->pool, ttm); >> + >> ttm->page_flags &= ~TTM_PAGE_FLAG_PRIV_POPULATED; >> } >> >
Am 15.06.21 um 14:11 schrieb Pan, Xinhui: >> 2021年6月15日 20:01,Christian König <ckoenig.leichtzumerken@gmail.com> 写道: >> >> Am 15.06.21 um 13:57 schrieb xinhui pan: >>> Amdgpu set SG flag in populate callback. So TTM still count pages in SG >>> BO. >> It's probably better to fix this instead. E.g. why does amdgpu modify the SG flag during populate and not during initial creation? That doesn't seem to make sense. > fair enough. Let me have a try. > No idea why we set SG flag in populate years ago. That's pretty recent IIRC. Felix moved the code around a bit to fix another problem. Christian. > >> Christian. >> >>> One easy way to fix this is lets count pages after populate callback. >>> >>> We hit one issue that amdgpu alloc many SG BOs, but TTM try to do swap >>> again and again even if swapout does not swap SG BOs at all. >>> >>> Signed-off-by: xinhui pan <xinhui.pan@amd.com> >>> --- >>> drivers/gpu/drm/ttm/ttm_tt.c | 32 +++++++++++++------------------- >>> 1 file changed, 13 insertions(+), 19 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c >>> index a1a25410ec74..4fa0a8cd71c0 100644 >>> --- a/drivers/gpu/drm/ttm/ttm_tt.c >>> +++ b/drivers/gpu/drm/ttm/ttm_tt.c >>> @@ -317,13 +317,6 @@ int ttm_tt_populate(struct ttm_device *bdev, >>> if (ttm_tt_is_populated(ttm)) >>> return 0; >>> - if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) { >>> - atomic_long_add(ttm->num_pages, &ttm_pages_allocated); >>> - if (bdev->pool.use_dma32) >>> - atomic_long_add(ttm->num_pages, >>> - &ttm_dma32_pages_allocated); >>> - } >>> - >>> while (atomic_long_read(&ttm_pages_allocated) > ttm_pages_limit || >>> atomic_long_read(&ttm_dma32_pages_allocated) > >>> ttm_dma32_pages_limit) { >>> @@ -342,6 +335,13 @@ int ttm_tt_populate(struct ttm_device *bdev, >>> if (ret) >>> goto error; >>> + if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) { >>> + atomic_long_add(ttm->num_pages, &ttm_pages_allocated); >>> + if (bdev->pool.use_dma32) >>> + atomic_long_add(ttm->num_pages, >>> + &ttm_dma32_pages_allocated); >>> + } >>> + >>> ttm_tt_add_mapping(bdev, ttm); >>> ttm->page_flags |= TTM_PAGE_FLAG_PRIV_POPULATED; >>> if (unlikely(ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)) { >>> @@ -355,12 +355,6 @@ int ttm_tt_populate(struct ttm_device *bdev, >>> return 0; >>> error: >>> - if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) { >>> - atomic_long_sub(ttm->num_pages, &ttm_pages_allocated); >>> - if (bdev->pool.use_dma32) >>> - atomic_long_sub(ttm->num_pages, >>> - &ttm_dma32_pages_allocated); >>> - } >>> return ret; >>> } >>> EXPORT_SYMBOL(ttm_tt_populate); >>> @@ -384,12 +378,6 @@ void ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm) >>> if (!ttm_tt_is_populated(ttm)) >>> return; >>> - ttm_tt_clear_mapping(ttm); >>> - if (bdev->funcs->ttm_tt_unpopulate) >>> - bdev->funcs->ttm_tt_unpopulate(bdev, ttm); >>> - else >>> - ttm_pool_free(&bdev->pool, ttm); >>> - >>> if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) { >>> atomic_long_sub(ttm->num_pages, &ttm_pages_allocated); >>> if (bdev->pool.use_dma32) >>> @@ -397,6 +385,12 @@ void ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm) >>> &ttm_dma32_pages_allocated); >>> } >>> + ttm_tt_clear_mapping(ttm); >>> + if (bdev->funcs->ttm_tt_unpopulate) >>> + bdev->funcs->ttm_tt_unpopulate(bdev, ttm); >>> + else >>> + ttm_pool_free(&bdev->pool, ttm); >>> + >>> ttm->page_flags &= ~TTM_PAGE_FLAG_PRIV_POPULATED; >>> } >>>
Am 2021-06-15 um 8:18 a.m. schrieb Christian König: > Am 15.06.21 um 14:11 schrieb Pan, Xinhui: >>> 2021年6月15日 20:01,Christian König <ckoenig.leichtzumerken@gmail.com> >>> 写道: >>> >>> Am 15.06.21 um 13:57 schrieb xinhui pan: >>>> Amdgpu set SG flag in populate callback. So TTM still count pages >>>> in SG >>>> BO. >>> It's probably better to fix this instead. E.g. why does amdgpu >>> modify the SG flag during populate and not during initial creation? >>> That doesn't seem to make sense. >> fair enough. Let me have a try. >> No idea why we set SG flag in populate years ago. > > That's pretty recent IIRC. Felix moved the code around a bit to fix > another problem. I moved some code from populate/unpopulate to backend_bind/unbind for attaching and detaching dmabufs. I didn't change the setting/unsetting of SG flags for userptr BOs. That goes back all the way to 2015. As far as I can tell, this is needed because userptr BOs are not created as SG BOs. That's something I've also pointed out before. Regards, Felix > > Christian. > >> >>> Christian. >>> >>>> One easy way to fix this is lets count pages after populate callback. >>>> >>>> We hit one issue that amdgpu alloc many SG BOs, but TTM try to do swap >>>> again and again even if swapout does not swap SG BOs at all. >>>> >>>> Signed-off-by: xinhui pan <xinhui.pan@amd.com> >>>> --- >>>> drivers/gpu/drm/ttm/ttm_tt.c | 32 +++++++++++++------------------- >>>> 1 file changed, 13 insertions(+), 19 deletions(-) >>>> >>>> diff --git a/drivers/gpu/drm/ttm/ttm_tt.c >>>> b/drivers/gpu/drm/ttm/ttm_tt.c >>>> index a1a25410ec74..4fa0a8cd71c0 100644 >>>> --- a/drivers/gpu/drm/ttm/ttm_tt.c >>>> +++ b/drivers/gpu/drm/ttm/ttm_tt.c >>>> @@ -317,13 +317,6 @@ int ttm_tt_populate(struct ttm_device *bdev, >>>> if (ttm_tt_is_populated(ttm)) >>>> return 0; >>>> - if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) { >>>> - atomic_long_add(ttm->num_pages, &ttm_pages_allocated); >>>> - if (bdev->pool.use_dma32) >>>> - atomic_long_add(ttm->num_pages, >>>> - &ttm_dma32_pages_allocated); >>>> - } >>>> - >>>> while (atomic_long_read(&ttm_pages_allocated) > >>>> ttm_pages_limit || >>>> atomic_long_read(&ttm_dma32_pages_allocated) > >>>> ttm_dma32_pages_limit) { >>>> @@ -342,6 +335,13 @@ int ttm_tt_populate(struct ttm_device *bdev, >>>> if (ret) >>>> goto error; >>>> + if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) { >>>> + atomic_long_add(ttm->num_pages, &ttm_pages_allocated); >>>> + if (bdev->pool.use_dma32) >>>> + atomic_long_add(ttm->num_pages, >>>> + &ttm_dma32_pages_allocated); >>>> + } >>>> + >>>> ttm_tt_add_mapping(bdev, ttm); >>>> ttm->page_flags |= TTM_PAGE_FLAG_PRIV_POPULATED; >>>> if (unlikely(ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)) { >>>> @@ -355,12 +355,6 @@ int ttm_tt_populate(struct ttm_device *bdev, >>>> return 0; >>>> error: >>>> - if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) { >>>> - atomic_long_sub(ttm->num_pages, &ttm_pages_allocated); >>>> - if (bdev->pool.use_dma32) >>>> - atomic_long_sub(ttm->num_pages, >>>> - &ttm_dma32_pages_allocated); >>>> - } >>>> return ret; >>>> } >>>> EXPORT_SYMBOL(ttm_tt_populate); >>>> @@ -384,12 +378,6 @@ void ttm_tt_unpopulate(struct ttm_device >>>> *bdev, struct ttm_tt *ttm) >>>> if (!ttm_tt_is_populated(ttm)) >>>> return; >>>> - ttm_tt_clear_mapping(ttm); >>>> - if (bdev->funcs->ttm_tt_unpopulate) >>>> - bdev->funcs->ttm_tt_unpopulate(bdev, ttm); >>>> - else >>>> - ttm_pool_free(&bdev->pool, ttm); >>>> - >>>> if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) { >>>> atomic_long_sub(ttm->num_pages, &ttm_pages_allocated); >>>> if (bdev->pool.use_dma32) >>>> @@ -397,6 +385,12 @@ void ttm_tt_unpopulate(struct ttm_device >>>> *bdev, struct ttm_tt *ttm) >>>> &ttm_dma32_pages_allocated); >>>> } >>>> + ttm_tt_clear_mapping(ttm); >>>> + if (bdev->funcs->ttm_tt_unpopulate) >>>> + bdev->funcs->ttm_tt_unpopulate(bdev, ttm); >>>> + else >>>> + ttm_pool_free(&bdev->pool, ttm); >>>> + >>>> ttm->page_flags &= ~TTM_PAGE_FLAG_PRIV_POPULATED; >>>> } >>>> >
Am 15.06.21 um 17:06 schrieb Felix Kuehling: > Am 2021-06-15 um 8:18 a.m. schrieb Christian König: >> Am 15.06.21 um 14:11 schrieb Pan, Xinhui: >>>> 2021年6月15日 20:01,Christian König <ckoenig.leichtzumerken@gmail.com> >>>> 写道: >>>> >>>> Am 15.06.21 um 13:57 schrieb xinhui pan: >>>>> Amdgpu set SG flag in populate callback. So TTM still count pages >>>>> in SG >>>>> BO. >>>> It's probably better to fix this instead. E.g. why does amdgpu >>>> modify the SG flag during populate and not during initial creation? >>>> That doesn't seem to make sense. >>> fair enough. Let me have a try. >>> No idea why we set SG flag in populate years ago. >> That's pretty recent IIRC. Felix moved the code around a bit to fix >> another problem. > I moved some code from populate/unpopulate to backend_bind/unbind for > attaching and detaching dmabufs. I didn't change the setting/unsetting > of SG flags for userptr BOs. That goes back all the way to 2015. > > As far as I can tell, this is needed because userptr BOs are not created > as SG BOs. That's something I've also pointed out before. Ah, yes. That's because we need the pages array for userptr BOs. Then we should probably move that to amdgpu_ttm_tt_set_userptr(). Thanks, Christian. > > Regards, > Felix > > >> Christian. >> >>>> Christian. >>>> >>>>> One easy way to fix this is lets count pages after populate callback. >>>>> >>>>> We hit one issue that amdgpu alloc many SG BOs, but TTM try to do swap >>>>> again and again even if swapout does not swap SG BOs at all. >>>>> >>>>> Signed-off-by: xinhui pan <xinhui.pan@amd.com> >>>>> --- >>>>> drivers/gpu/drm/ttm/ttm_tt.c | 32 +++++++++++++------------------- >>>>> 1 file changed, 13 insertions(+), 19 deletions(-) >>>>> >>>>> diff --git a/drivers/gpu/drm/ttm/ttm_tt.c >>>>> b/drivers/gpu/drm/ttm/ttm_tt.c >>>>> index a1a25410ec74..4fa0a8cd71c0 100644 >>>>> --- a/drivers/gpu/drm/ttm/ttm_tt.c >>>>> +++ b/drivers/gpu/drm/ttm/ttm_tt.c >>>>> @@ -317,13 +317,6 @@ int ttm_tt_populate(struct ttm_device *bdev, >>>>> if (ttm_tt_is_populated(ttm)) >>>>> return 0; >>>>> - if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) { >>>>> - atomic_long_add(ttm->num_pages, &ttm_pages_allocated); >>>>> - if (bdev->pool.use_dma32) >>>>> - atomic_long_add(ttm->num_pages, >>>>> - &ttm_dma32_pages_allocated); >>>>> - } >>>>> - >>>>> while (atomic_long_read(&ttm_pages_allocated) > >>>>> ttm_pages_limit || >>>>> atomic_long_read(&ttm_dma32_pages_allocated) > >>>>> ttm_dma32_pages_limit) { >>>>> @@ -342,6 +335,13 @@ int ttm_tt_populate(struct ttm_device *bdev, >>>>> if (ret) >>>>> goto error; >>>>> + if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) { >>>>> + atomic_long_add(ttm->num_pages, &ttm_pages_allocated); >>>>> + if (bdev->pool.use_dma32) >>>>> + atomic_long_add(ttm->num_pages, >>>>> + &ttm_dma32_pages_allocated); >>>>> + } >>>>> + >>>>> ttm_tt_add_mapping(bdev, ttm); >>>>> ttm->page_flags |= TTM_PAGE_FLAG_PRIV_POPULATED; >>>>> if (unlikely(ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)) { >>>>> @@ -355,12 +355,6 @@ int ttm_tt_populate(struct ttm_device *bdev, >>>>> return 0; >>>>> error: >>>>> - if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) { >>>>> - atomic_long_sub(ttm->num_pages, &ttm_pages_allocated); >>>>> - if (bdev->pool.use_dma32) >>>>> - atomic_long_sub(ttm->num_pages, >>>>> - &ttm_dma32_pages_allocated); >>>>> - } >>>>> return ret; >>>>> } >>>>> EXPORT_SYMBOL(ttm_tt_populate); >>>>> @@ -384,12 +378,6 @@ void ttm_tt_unpopulate(struct ttm_device >>>>> *bdev, struct ttm_tt *ttm) >>>>> if (!ttm_tt_is_populated(ttm)) >>>>> return; >>>>> - ttm_tt_clear_mapping(ttm); >>>>> - if (bdev->funcs->ttm_tt_unpopulate) >>>>> - bdev->funcs->ttm_tt_unpopulate(bdev, ttm); >>>>> - else >>>>> - ttm_pool_free(&bdev->pool, ttm); >>>>> - >>>>> if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) { >>>>> atomic_long_sub(ttm->num_pages, &ttm_pages_allocated); >>>>> if (bdev->pool.use_dma32) >>>>> @@ -397,6 +385,12 @@ void ttm_tt_unpopulate(struct ttm_device >>>>> *bdev, struct ttm_tt *ttm) >>>>> &ttm_dma32_pages_allocated); >>>>> } >>>>> + ttm_tt_clear_mapping(ttm); >>>>> + if (bdev->funcs->ttm_tt_unpopulate) >>>>> + bdev->funcs->ttm_tt_unpopulate(bdev, ttm); >>>>> + else >>>>> + ttm_pool_free(&bdev->pool, ttm); >>>>> + >>>>> ttm->page_flags &= ~TTM_PAGE_FLAG_PRIV_POPULATED; >>>>> } >>>>>
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index a1a25410ec74..4fa0a8cd71c0 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -317,13 +317,6 @@ int ttm_tt_populate(struct ttm_device *bdev, if (ttm_tt_is_populated(ttm)) return 0; - if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) { - atomic_long_add(ttm->num_pages, &ttm_pages_allocated); - if (bdev->pool.use_dma32) - atomic_long_add(ttm->num_pages, - &ttm_dma32_pages_allocated); - } - while (atomic_long_read(&ttm_pages_allocated) > ttm_pages_limit || atomic_long_read(&ttm_dma32_pages_allocated) > ttm_dma32_pages_limit) { @@ -342,6 +335,13 @@ int ttm_tt_populate(struct ttm_device *bdev, if (ret) goto error; + if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) { + atomic_long_add(ttm->num_pages, &ttm_pages_allocated); + if (bdev->pool.use_dma32) + atomic_long_add(ttm->num_pages, + &ttm_dma32_pages_allocated); + } + ttm_tt_add_mapping(bdev, ttm); ttm->page_flags |= TTM_PAGE_FLAG_PRIV_POPULATED; if (unlikely(ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)) { @@ -355,12 +355,6 @@ int ttm_tt_populate(struct ttm_device *bdev, return 0; error: - if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) { - atomic_long_sub(ttm->num_pages, &ttm_pages_allocated); - if (bdev->pool.use_dma32) - atomic_long_sub(ttm->num_pages, - &ttm_dma32_pages_allocated); - } return ret; } EXPORT_SYMBOL(ttm_tt_populate); @@ -384,12 +378,6 @@ void ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm) if (!ttm_tt_is_populated(ttm)) return; - ttm_tt_clear_mapping(ttm); - if (bdev->funcs->ttm_tt_unpopulate) - bdev->funcs->ttm_tt_unpopulate(bdev, ttm); - else - ttm_pool_free(&bdev->pool, ttm); - if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) { atomic_long_sub(ttm->num_pages, &ttm_pages_allocated); if (bdev->pool.use_dma32) @@ -397,6 +385,12 @@ void ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm) &ttm_dma32_pages_allocated); } + ttm_tt_clear_mapping(ttm); + if (bdev->funcs->ttm_tt_unpopulate) + bdev->funcs->ttm_tt_unpopulate(bdev, ttm); + else + ttm_pool_free(&bdev->pool, ttm); + ttm->page_flags &= ~TTM_PAGE_FLAG_PRIV_POPULATED; }
Amdgpu set SG flag in populate callback. So TTM still count pages in SG BO. One easy way to fix this is lets count pages after populate callback. We hit one issue that amdgpu alloc many SG BOs, but TTM try to do swap again and again even if swapout does not swap SG BOs at all. Signed-off-by: xinhui pan <xinhui.pan@amd.com> --- drivers/gpu/drm/ttm/ttm_tt.c | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-)