Message ID | 20200822164233.71583-1-paul@crapouillou.net (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v4] gpu/drm: ingenic: Add option to mmap GEM buffers cached | expand |
Any love for my patch? I have more pending :) Thanks, -Paul Le sam. 22 août 2020 à 18:42, Paul Cercueil <paul@crapouillou.net> a écrit : > Ingenic SoCs are most notably used in cheap chinese handheld gaming > consoles. There, the games and applications generally render in > software > directly into GEM buffers. > > Traditionally, GEM buffers are mapped write-combine. Writes to the > buffer are accelerated, and reads are slow. Application doing lots of > alpha-blending paint inside shadow buffers, which is then memcpy'd > into > the final GEM buffer. > > On recent Ingenic SoCs however, it is much faster to have a fully > cached > GEM buffer, in which applications paint directly, and whose data is > invalidated before scanout, than having a write-combine GEM buffer, > even > when alpha blending is not used. > > Add an optional 'cached_gem_buffers' parameter to the ingenic-drm > driver > to allow GEM buffers to be mapped fully-cached, in order to speed up > software rendering. > > v2: Use standard noncoherent DMA APIs > > v3: Use damage clips instead of invalidating full frames > > v4: Avoid dma_pgprot() which is not exported. Using vm_get_page_prot() > is enough in this case. > > Signed-off-by: Paul Cercueil <paul@crapouillou.net> > --- > drivers/gpu/drm/ingenic/ingenic-drm-drv.c | 107 > +++++++++++++++++++++- > drivers/gpu/drm/ingenic/ingenic-drm.h | 4 + > drivers/gpu/drm/ingenic/ingenic-ipu.c | 12 ++- > 3 files changed, 119 insertions(+), 4 deletions(-) > > diff --git a/drivers/gpu/drm/ingenic/ingenic-drm-drv.c > b/drivers/gpu/drm/ingenic/ingenic-drm-drv.c > index 5dab9c3d0a52..bf571411b73f 100644 > --- a/drivers/gpu/drm/ingenic/ingenic-drm-drv.c > +++ b/drivers/gpu/drm/ingenic/ingenic-drm-drv.c > @@ -9,6 +9,8 @@ > #include <linux/component.h> > #include <linux/clk.h> > #include <linux/dma-mapping.h> > +#include <linux/dma-noncoherent.h> > +#include <linux/io.h> > #include <linux/module.h> > #include <linux/of_device.h> > #include <linux/platform_device.h> > @@ -19,6 +21,7 @@ > #include <drm/drm_bridge.h> > #include <drm/drm_crtc.h> > #include <drm/drm_crtc_helper.h> > +#include <drm/drm_damage_helper.h> > #include <drm/drm_drv.h> > #include <drm/drm_gem_cma_helper.h> > #include <drm/drm_fb_cma_helper.h> > @@ -76,6 +79,11 @@ static const u32 ingenic_drm_primary_formats[] = { > DRM_FORMAT_XRGB8888, > }; > > +static bool ingenic_drm_cached_gem_buf; > +module_param_named(cached_gem_buffers, ingenic_drm_cached_gem_buf, > bool, 0400); > +MODULE_PARM_DESC(cached_gem_buffers, > + "Enable fully cached GEM buffers [default=false]"); > + > static bool ingenic_drm_writeable_reg(struct device *dev, unsigned > int reg) > { > switch (reg) { > @@ -338,6 +346,8 @@ static int ingenic_drm_plane_atomic_check(struct > drm_plane *plane, > plane->state->fb->format->format != state->fb->format->format)) > crtc_state->mode_changed = true; > > + drm_atomic_helper_check_plane_damage(state->state, state); > + > return 0; > } > > @@ -440,6 +450,38 @@ void ingenic_drm_plane_config(struct device *dev, > } > } > > +void ingenic_drm_sync_data(struct device *dev, > + struct drm_plane_state *old_state, > + struct drm_plane_state *state) > +{ > + const struct drm_format_info *finfo = state->fb->format; > + struct ingenic_drm *priv = dev_get_drvdata(dev); > + struct drm_atomic_helper_damage_iter iter; > + unsigned int offset, i; > + struct drm_rect clip; > + dma_addr_t paddr; > + void *addr; > + > + if (!ingenic_drm_cached_gem_buf) > + return; > + > + drm_atomic_helper_damage_iter_init(&iter, old_state, state); > + > + drm_atomic_for_each_plane_damage(&iter, &clip) { > + for (i = 0; i < finfo->num_planes; i++) { > + paddr = drm_fb_cma_get_gem_addr(state->fb, state, i); > + addr = phys_to_virt(paddr); > + > + /* Ignore x1/x2 values, invalidate complete lines */ > + offset = clip.y1 * state->fb->pitches[i]; > + > + dma_cache_sync(priv->dev, addr + offset, > + (clip.y2 - clip.y1) * state->fb->pitches[i], > + DMA_TO_DEVICE); > + } > + } > +} > + > static void ingenic_drm_plane_atomic_update(struct drm_plane *plane, > struct drm_plane_state *oldstate) > { > @@ -450,6 +492,8 @@ static void > ingenic_drm_plane_atomic_update(struct drm_plane *plane, > dma_addr_t addr; > > if (state && state->fb) { > + ingenic_drm_sync_data(priv->dev, oldstate, state); > + > addr = drm_fb_cma_get_gem_addr(state->fb, state, 0); > width = state->src_w >> 16; > height = state->src_h >> 16; > @@ -605,7 +649,62 @@ static void ingenic_drm_disable_vblank(struct > drm_crtc *crtc) > regmap_update_bits(priv->map, JZ_REG_LCD_CTRL, JZ_LCD_CTRL_EOF_IRQ, > 0); > } > > -DEFINE_DRM_GEM_CMA_FOPS(ingenic_drm_fops); > +static struct drm_framebuffer * > +ingenic_drm_gem_fb_create(struct drm_device *dev, struct drm_file > *file, > + const struct drm_mode_fb_cmd2 *mode_cmd) > +{ > + if (ingenic_drm_cached_gem_buf) > + return drm_gem_fb_create_with_dirty(dev, file, mode_cmd); > + > + return drm_gem_fb_create(dev, file, mode_cmd); > +} > + > +static int ingenic_drm_gem_mmap(struct drm_gem_object *obj, > + struct vm_area_struct *vma) > +{ > + struct drm_gem_cma_object *cma_obj = to_drm_gem_cma_obj(obj); > + struct device *dev = cma_obj->base.dev->dev; > + > + if (!ingenic_drm_cached_gem_buf) > + return drm_gem_cma_prime_mmap(obj, vma); > + > + /* > + * Clear the VM_PFNMAP flag that was set by drm_gem_mmap(), and set > the > + * vm_pgoff (used as a fake buffer offset by DRM) to 0 as we want > to map > + * the whole buffer. > + */ > + vma->vm_flags &= ~VM_PFNMAP; > + vma->vm_pgoff = 0; > + vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); > + > + return dma_mmap_attrs(dev, vma, cma_obj->vaddr, cma_obj->paddr, > + vma->vm_end - vma->vm_start, > + DMA_ATTR_NON_CONSISTENT); > +} > + > +static int ingenic_drm_gem_cma_mmap(struct file *filp, > + struct vm_area_struct *vma) > +{ > + int ret; > + > + ret = drm_gem_mmap(filp, vma); > + if (ret) > + return ret; > + > + return ingenic_drm_gem_mmap(vma->vm_private_data, vma); > +} > + > +static const struct file_operations ingenic_drm_fops = { > + .owner = THIS_MODULE, > + .open = drm_open, > + .release = drm_release, > + .unlocked_ioctl = drm_ioctl, > + .compat_ioctl = drm_compat_ioctl, > + .poll = drm_poll, > + .read = drm_read, > + .llseek = noop_llseek, > + .mmap = ingenic_drm_gem_cma_mmap, > +}; > > static struct drm_driver ingenic_drm_driver_data = { > .driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_ATOMIC, > @@ -669,7 +768,7 @@ static const struct drm_encoder_helper_funcs > ingenic_drm_encoder_helper_funcs = > }; > > static const struct drm_mode_config_funcs > ingenic_drm_mode_config_funcs = { > - .fb_create = drm_gem_fb_create, > + .fb_create = ingenic_drm_gem_fb_create, > .output_poll_changed = drm_fb_helper_output_poll_changed, > .atomic_check = drm_atomic_helper_check, > .atomic_commit = drm_atomic_helper_commit, > @@ -796,6 +895,8 @@ static int ingenic_drm_bind(struct device *dev) > return ret; > } > > + drm_plane_enable_fb_damage_clips(&priv->f1); > + > drm_crtc_helper_add(&priv->crtc, &ingenic_drm_crtc_helper_funcs); > > ret = drm_crtc_init_with_planes(drm, &priv->crtc, &priv->f1, > @@ -821,6 +922,8 @@ static int ingenic_drm_bind(struct device *dev) > return ret; > } > > + drm_plane_enable_fb_damage_clips(&priv->f0); > + > if (IS_ENABLED(CONFIG_DRM_INGENIC_IPU)) { > ret = component_bind_all(dev, drm); > if (ret) { > diff --git a/drivers/gpu/drm/ingenic/ingenic-drm.h > b/drivers/gpu/drm/ingenic/ingenic-drm.h > index 43f7d959cff7..df99f0f75d39 100644 > --- a/drivers/gpu/drm/ingenic/ingenic-drm.h > +++ b/drivers/gpu/drm/ingenic/ingenic-drm.h > @@ -168,6 +168,10 @@ void ingenic_drm_plane_config(struct device *dev, > struct drm_plane *plane, u32 fourcc); > void ingenic_drm_plane_disable(struct device *dev, struct drm_plane > *plane); > > +void ingenic_drm_sync_data(struct device *dev, > + struct drm_plane_state *old_state, > + struct drm_plane_state *state); > + > extern struct platform_driver *ingenic_ipu_driver_ptr; > > #endif /* DRIVERS_GPU_DRM_INGENIC_INGENIC_DRM_H */ > diff --git a/drivers/gpu/drm/ingenic/ingenic-ipu.c > b/drivers/gpu/drm/ingenic/ingenic-ipu.c > index fc8c6e970ee3..38c83e8cc6a5 100644 > --- a/drivers/gpu/drm/ingenic/ingenic-ipu.c > +++ b/drivers/gpu/drm/ingenic/ingenic-ipu.c > @@ -20,6 +20,7 @@ > > #include <drm/drm_atomic.h> > #include <drm/drm_atomic_helper.h> > +#include <drm/drm_damage_helper.h> > #include <drm/drm_drv.h> > #include <drm/drm_fb_cma_helper.h> > #include <drm/drm_fourcc.h> > @@ -316,6 +317,8 @@ static void > ingenic_ipu_plane_atomic_update(struct drm_plane *plane, > JZ_IPU_CTRL_CHIP_EN | JZ_IPU_CTRL_LCDC_SEL); > } > > + ingenic_drm_sync_data(ipu->master, oldstate, state); > + > /* New addresses will be committed in vblank handler... */ > ipu->addr_y = drm_fb_cma_get_gem_addr(state->fb, state, 0); > if (finfo->num_planes > 1) > @@ -534,7 +537,7 @@ static int ingenic_ipu_plane_atomic_check(struct > drm_plane *plane, > > if (!state->crtc || > !crtc_state->mode.hdisplay || !crtc_state->mode.vdisplay) > - return 0; > + goto out_check_damage; > > /* Plane must be fully visible */ > if (state->crtc_x < 0 || state->crtc_y < 0 || > @@ -551,7 +554,7 @@ static int ingenic_ipu_plane_atomic_check(struct > drm_plane *plane, > return -EINVAL; > > if (!osd_changed(state, plane->state)) > - return 0; > + goto out_check_damage; > > crtc_state->mode_changed = true; > > @@ -578,6 +581,9 @@ static int ingenic_ipu_plane_atomic_check(struct > drm_plane *plane, > ipu->denom_w = denom_w; > ipu->denom_h = denom_h; > > +out_check_damage: > + drm_atomic_helper_check_plane_damage(state->state, state); > + > return 0; > } > > @@ -759,6 +765,8 @@ static int ingenic_ipu_bind(struct device *dev, > struct device *master, void *d) > return err; > } > > + drm_plane_enable_fb_damage_clips(plane); > + > /* > * Sharpness settings range is [0,32] > * 0 : nearest-neighbor > -- > 2.28.0 >
Hi Paul.
On Wed, Sep 09, 2020 at 03:26:52PM +0200, Paul Cercueil wrote:
> Any love for my patch? I have more pending :)
I have looked through the patch a few times. And I did not find any
spelling errors. But the memory magic was beyond me so I hope someone
more knowledgeable can chime in here.
Sam
diff --git a/drivers/gpu/drm/ingenic/ingenic-drm-drv.c b/drivers/gpu/drm/ingenic/ingenic-drm-drv.c index 5dab9c3d0a52..bf571411b73f 100644 --- a/drivers/gpu/drm/ingenic/ingenic-drm-drv.c +++ b/drivers/gpu/drm/ingenic/ingenic-drm-drv.c @@ -9,6 +9,8 @@ #include <linux/component.h> #include <linux/clk.h> #include <linux/dma-mapping.h> +#include <linux/dma-noncoherent.h> +#include <linux/io.h> #include <linux/module.h> #include <linux/of_device.h> #include <linux/platform_device.h> @@ -19,6 +21,7 @@ #include <drm/drm_bridge.h> #include <drm/drm_crtc.h> #include <drm/drm_crtc_helper.h> +#include <drm/drm_damage_helper.h> #include <drm/drm_drv.h> #include <drm/drm_gem_cma_helper.h> #include <drm/drm_fb_cma_helper.h> @@ -76,6 +79,11 @@ static const u32 ingenic_drm_primary_formats[] = { DRM_FORMAT_XRGB8888, }; +static bool ingenic_drm_cached_gem_buf; +module_param_named(cached_gem_buffers, ingenic_drm_cached_gem_buf, bool, 0400); +MODULE_PARM_DESC(cached_gem_buffers, + "Enable fully cached GEM buffers [default=false]"); + static bool ingenic_drm_writeable_reg(struct device *dev, unsigned int reg) { switch (reg) { @@ -338,6 +346,8 @@ static int ingenic_drm_plane_atomic_check(struct drm_plane *plane, plane->state->fb->format->format != state->fb->format->format)) crtc_state->mode_changed = true; + drm_atomic_helper_check_plane_damage(state->state, state); + return 0; } @@ -440,6 +450,38 @@ void ingenic_drm_plane_config(struct device *dev, } } +void ingenic_drm_sync_data(struct device *dev, + struct drm_plane_state *old_state, + struct drm_plane_state *state) +{ + const struct drm_format_info *finfo = state->fb->format; + struct ingenic_drm *priv = dev_get_drvdata(dev); + struct drm_atomic_helper_damage_iter iter; + unsigned int offset, i; + struct drm_rect clip; + dma_addr_t paddr; + void *addr; + + if (!ingenic_drm_cached_gem_buf) + return; + + drm_atomic_helper_damage_iter_init(&iter, old_state, state); + + drm_atomic_for_each_plane_damage(&iter, &clip) { + for (i = 0; i < finfo->num_planes; i++) { + paddr = drm_fb_cma_get_gem_addr(state->fb, state, i); + addr = phys_to_virt(paddr); + + /* Ignore x1/x2 values, invalidate complete lines */ + offset = clip.y1 * state->fb->pitches[i]; + + dma_cache_sync(priv->dev, addr + offset, + (clip.y2 - clip.y1) * state->fb->pitches[i], + DMA_TO_DEVICE); + } + } +} + static void ingenic_drm_plane_atomic_update(struct drm_plane *plane, struct drm_plane_state *oldstate) { @@ -450,6 +492,8 @@ static void ingenic_drm_plane_atomic_update(struct drm_plane *plane, dma_addr_t addr; if (state && state->fb) { + ingenic_drm_sync_data(priv->dev, oldstate, state); + addr = drm_fb_cma_get_gem_addr(state->fb, state, 0); width = state->src_w >> 16; height = state->src_h >> 16; @@ -605,7 +649,62 @@ static void ingenic_drm_disable_vblank(struct drm_crtc *crtc) regmap_update_bits(priv->map, JZ_REG_LCD_CTRL, JZ_LCD_CTRL_EOF_IRQ, 0); } -DEFINE_DRM_GEM_CMA_FOPS(ingenic_drm_fops); +static struct drm_framebuffer * +ingenic_drm_gem_fb_create(struct drm_device *dev, struct drm_file *file, + const struct drm_mode_fb_cmd2 *mode_cmd) +{ + if (ingenic_drm_cached_gem_buf) + return drm_gem_fb_create_with_dirty(dev, file, mode_cmd); + + return drm_gem_fb_create(dev, file, mode_cmd); +} + +static int ingenic_drm_gem_mmap(struct drm_gem_object *obj, + struct vm_area_struct *vma) +{ + struct drm_gem_cma_object *cma_obj = to_drm_gem_cma_obj(obj); + struct device *dev = cma_obj->base.dev->dev; + + if (!ingenic_drm_cached_gem_buf) + return drm_gem_cma_prime_mmap(obj, vma); + + /* + * Clear the VM_PFNMAP flag that was set by drm_gem_mmap(), and set the + * vm_pgoff (used as a fake buffer offset by DRM) to 0 as we want to map + * the whole buffer. + */ + vma->vm_flags &= ~VM_PFNMAP; + vma->vm_pgoff = 0; + vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); + + return dma_mmap_attrs(dev, vma, cma_obj->vaddr, cma_obj->paddr, + vma->vm_end - vma->vm_start, + DMA_ATTR_NON_CONSISTENT); +} + +static int ingenic_drm_gem_cma_mmap(struct file *filp, + struct vm_area_struct *vma) +{ + int ret; + + ret = drm_gem_mmap(filp, vma); + if (ret) + return ret; + + return ingenic_drm_gem_mmap(vma->vm_private_data, vma); +} + +static const struct file_operations ingenic_drm_fops = { + .owner = THIS_MODULE, + .open = drm_open, + .release = drm_release, + .unlocked_ioctl = drm_ioctl, + .compat_ioctl = drm_compat_ioctl, + .poll = drm_poll, + .read = drm_read, + .llseek = noop_llseek, + .mmap = ingenic_drm_gem_cma_mmap, +}; static struct drm_driver ingenic_drm_driver_data = { .driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_ATOMIC, @@ -669,7 +768,7 @@ static const struct drm_encoder_helper_funcs ingenic_drm_encoder_helper_funcs = }; static const struct drm_mode_config_funcs ingenic_drm_mode_config_funcs = { - .fb_create = drm_gem_fb_create, + .fb_create = ingenic_drm_gem_fb_create, .output_poll_changed = drm_fb_helper_output_poll_changed, .atomic_check = drm_atomic_helper_check, .atomic_commit = drm_atomic_helper_commit, @@ -796,6 +895,8 @@ static int ingenic_drm_bind(struct device *dev) return ret; } + drm_plane_enable_fb_damage_clips(&priv->f1); + drm_crtc_helper_add(&priv->crtc, &ingenic_drm_crtc_helper_funcs); ret = drm_crtc_init_with_planes(drm, &priv->crtc, &priv->f1, @@ -821,6 +922,8 @@ static int ingenic_drm_bind(struct device *dev) return ret; } + drm_plane_enable_fb_damage_clips(&priv->f0); + if (IS_ENABLED(CONFIG_DRM_INGENIC_IPU)) { ret = component_bind_all(dev, drm); if (ret) { diff --git a/drivers/gpu/drm/ingenic/ingenic-drm.h b/drivers/gpu/drm/ingenic/ingenic-drm.h index 43f7d959cff7..df99f0f75d39 100644 --- a/drivers/gpu/drm/ingenic/ingenic-drm.h +++ b/drivers/gpu/drm/ingenic/ingenic-drm.h @@ -168,6 +168,10 @@ void ingenic_drm_plane_config(struct device *dev, struct drm_plane *plane, u32 fourcc); void ingenic_drm_plane_disable(struct device *dev, struct drm_plane *plane); +void ingenic_drm_sync_data(struct device *dev, + struct drm_plane_state *old_state, + struct drm_plane_state *state); + extern struct platform_driver *ingenic_ipu_driver_ptr; #endif /* DRIVERS_GPU_DRM_INGENIC_INGENIC_DRM_H */ diff --git a/drivers/gpu/drm/ingenic/ingenic-ipu.c b/drivers/gpu/drm/ingenic/ingenic-ipu.c index fc8c6e970ee3..38c83e8cc6a5 100644 --- a/drivers/gpu/drm/ingenic/ingenic-ipu.c +++ b/drivers/gpu/drm/ingenic/ingenic-ipu.c @@ -20,6 +20,7 @@ #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> +#include <drm/drm_damage_helper.h> #include <drm/drm_drv.h> #include <drm/drm_fb_cma_helper.h> #include <drm/drm_fourcc.h> @@ -316,6 +317,8 @@ static void ingenic_ipu_plane_atomic_update(struct drm_plane *plane, JZ_IPU_CTRL_CHIP_EN | JZ_IPU_CTRL_LCDC_SEL); } + ingenic_drm_sync_data(ipu->master, oldstate, state); + /* New addresses will be committed in vblank handler... */ ipu->addr_y = drm_fb_cma_get_gem_addr(state->fb, state, 0); if (finfo->num_planes > 1) @@ -534,7 +537,7 @@ static int ingenic_ipu_plane_atomic_check(struct drm_plane *plane, if (!state->crtc || !crtc_state->mode.hdisplay || !crtc_state->mode.vdisplay) - return 0; + goto out_check_damage; /* Plane must be fully visible */ if (state->crtc_x < 0 || state->crtc_y < 0 || @@ -551,7 +554,7 @@ static int ingenic_ipu_plane_atomic_check(struct drm_plane *plane, return -EINVAL; if (!osd_changed(state, plane->state)) - return 0; + goto out_check_damage; crtc_state->mode_changed = true; @@ -578,6 +581,9 @@ static int ingenic_ipu_plane_atomic_check(struct drm_plane *plane, ipu->denom_w = denom_w; ipu->denom_h = denom_h; +out_check_damage: + drm_atomic_helper_check_plane_damage(state->state, state); + return 0; } @@ -759,6 +765,8 @@ static int ingenic_ipu_bind(struct device *dev, struct device *master, void *d) return err; } + drm_plane_enable_fb_damage_clips(plane); + /* * Sharpness settings range is [0,32] * 0 : nearest-neighbor
Ingenic SoCs are most notably used in cheap chinese handheld gaming consoles. There, the games and applications generally render in software directly into GEM buffers. Traditionally, GEM buffers are mapped write-combine. Writes to the buffer are accelerated, and reads are slow. Application doing lots of alpha-blending paint inside shadow buffers, which is then memcpy'd into the final GEM buffer. On recent Ingenic SoCs however, it is much faster to have a fully cached GEM buffer, in which applications paint directly, and whose data is invalidated before scanout, than having a write-combine GEM buffer, even when alpha blending is not used. Add an optional 'cached_gem_buffers' parameter to the ingenic-drm driver to allow GEM buffers to be mapped fully-cached, in order to speed up software rendering. v2: Use standard noncoherent DMA APIs v3: Use damage clips instead of invalidating full frames v4: Avoid dma_pgprot() which is not exported. Using vm_get_page_prot() is enough in this case. Signed-off-by: Paul Cercueil <paul@crapouillou.net> --- drivers/gpu/drm/ingenic/ingenic-drm-drv.c | 107 +++++++++++++++++++++- drivers/gpu/drm/ingenic/ingenic-drm.h | 4 + drivers/gpu/drm/ingenic/ingenic-ipu.c | 12 ++- 3 files changed, 119 insertions(+), 4 deletions(-)