@@ -313,6 +313,7 @@ static void vc4_drm_unbind(struct device *dev)
drm_mode_config_cleanup(drm);
+ drm_atomic_private_obj_fini(&vc4->load_tracker);
drm_atomic_private_obj_fini(&vc4->ctm_manager);
drm_dev_put(drm);
@@ -207,6 +207,7 @@ struct vc4_dev {
struct drm_modeset_lock ctm_state_lock;
struct drm_private_obj ctm_manager;
+ struct drm_private_obj load_tracker;
};
static inline struct vc4_dev *
@@ -382,6 +383,16 @@ struct vc4_plane_state {
* when async update is not possible.
*/
bool dlist_initialized;
+
+ /* Load of this plane on the HVS block. The load is expressed in HVS
+ * cycles/sec.
+ */
+ u64 hvs_load;
+
+ /* Memory bandwidth needed for this plane. This is expressed in
+ * bytes/sec.
+ */
+ u64 membus_load;
};
static inline struct vc4_plane_state *
@@ -34,6 +34,18 @@ static struct vc4_ctm_state *to_vc4_ctm_state(struct drm_private_state *priv)
return container_of(priv, struct vc4_ctm_state, base);
}
+struct vc4_load_tracker_state {
+ struct drm_private_state base;
+ u64 hvs_load;
+ u64 membus_load;
+};
+
+static struct vc4_load_tracker_state *
+to_vc4_load_tracker_state(struct drm_private_state *priv)
+{
+ return container_of(priv, struct vc4_load_tracker_state, base);
+}
+
static struct vc4_ctm_state *vc4_get_ctm_state(struct drm_atomic_state *state,
struct drm_private_obj *manager)
{
@@ -391,6 +403,81 @@ vc4_ctm_atomic_check(struct drm_device *dev, struct drm_atomic_state *state)
return 0;
}
+static int vc4_load_tracker_atomic_check(struct drm_atomic_state *state)
+{
+ struct drm_plane_state *old_plane_state, *new_plane_state;
+ struct vc4_dev *vc4 = to_vc4_dev(state->dev);
+ struct vc4_load_tracker_state *load_state;
+ struct drm_private_state *priv_state;
+ struct drm_plane *plane;
+ int i;
+
+ priv_state = drm_atomic_get_private_obj_state(state,
+ &vc4->load_tracker);
+ if (IS_ERR(priv_state))
+ return PTR_ERR(priv_state);
+
+ load_state = to_vc4_load_tracker_state(priv_state);
+ for_each_oldnew_plane_in_state(state, plane, old_plane_state,
+ new_plane_state, i) {
+ struct vc4_plane_state *vc4_plane_state;
+
+ if (old_plane_state->fb && old_plane_state->crtc) {
+ vc4_plane_state = to_vc4_plane_state(old_plane_state);
+ load_state->membus_load -= vc4_plane_state->membus_load;
+ load_state->hvs_load -= vc4_plane_state->hvs_load;
+ }
+
+ if (new_plane_state->fb && new_plane_state->crtc) {
+ vc4_plane_state = to_vc4_plane_state(new_plane_state);
+ load_state->membus_load += vc4_plane_state->membus_load;
+ load_state->hvs_load += vc4_plane_state->hvs_load;
+ }
+ }
+
+ /* The absolute limit is 2Gbyte/sec, but let's take a margin to let
+ * the system work when other blocks are accessing the memory.
+ */
+ if (load_state->membus_load > SZ_1G + SZ_512M)
+ return -ENOSPC;
+
+ /* HVS clock is supposed to run @ 250Mhz, let's take a margin and
+ * consider the maximum number of cycles is 240M.
+ */
+ if (load_state->hvs_load > 240000000ULL)
+ return -ENOSPC;
+
+ return 0;
+}
+
+static struct drm_private_state *
+vc4_load_tracker_duplicate_state(struct drm_private_obj *obj)
+{
+ struct vc4_load_tracker_state *state;
+
+ state = kmemdup(obj->state, sizeof(*state), GFP_KERNEL);
+ if (!state)
+ return NULL;
+
+ __drm_atomic_helper_private_obj_duplicate_state(obj, &state->base);
+
+ return &state->base;
+}
+
+static void vc4_load_tracker_destroy_state(struct drm_private_obj *obj,
+ struct drm_private_state *state)
+{
+ struct vc4_load_tracker_state *load_state;
+
+ load_state = to_vc4_load_tracker_state(state);
+ kfree(load_state);
+}
+
+static const struct drm_private_state_funcs vc4_load_tracker_state_funcs = {
+ .atomic_duplicate_state = vc4_load_tracker_duplicate_state,
+ .atomic_destroy_state = vc4_load_tracker_destroy_state,
+};
+
static int
vc4_atomic_check(struct drm_device *dev, struct drm_atomic_state *state)
{
@@ -400,7 +487,11 @@ vc4_atomic_check(struct drm_device *dev, struct drm_atomic_state *state)
if (ret < 0)
return ret;
- return drm_atomic_helper_check(dev, state);
+ ret = drm_atomic_helper_check(dev, state);
+ if (ret)
+ return ret;
+
+ return vc4_load_tracker_atomic_check(state);
}
static const struct drm_mode_config_funcs vc4_mode_funcs = {
@@ -413,6 +504,7 @@ int vc4_kms_load(struct drm_device *dev)
{
struct vc4_dev *vc4 = to_vc4_dev(dev);
struct vc4_ctm_state *ctm_state;
+ struct vc4_load_tracker_state *load_state;
int ret;
sema_init(&vc4->async_modeset, 1);
@@ -442,6 +534,15 @@ int vc4_kms_load(struct drm_device *dev)
drm_atomic_private_obj_init(dev, &vc4->ctm_manager, &ctm_state->base,
&vc4_ctm_state_funcs);
+ load_state = kzalloc(sizeof(*load_state), GFP_KERNEL);
+ if (!load_state) {
+ drm_atomic_private_obj_fini(&vc4->ctm_manager);
+ return -ENOMEM;
+ }
+
+ drm_atomic_private_obj_init(dev, &vc4->load_tracker, &load_state->base,
+ &vc4_load_tracker_state_funcs);
+
drm_mode_config_reset(dev);
drm_kms_helper_poll_init(dev);
@@ -488,6 +488,61 @@ static void vc4_write_scaling_parameters(struct drm_plane_state *state,
}
}
+static void vc4_plane_calc_load(struct drm_plane_state *state)
+{
+ unsigned int hvs_load_shift, vrefresh, i;
+ struct drm_framebuffer *fb = state->fb;
+ struct vc4_plane_state *vc4_state;
+ struct drm_crtc_state *crtc_state;
+ unsigned int vscale_factor;
+
+ vc4_state = to_vc4_plane_state(state);
+ crtc_state = drm_atomic_get_existing_crtc_state(state->state,
+ state->crtc);
+ vrefresh = drm_mode_vrefresh(&crtc_state->adjusted_mode);
+
+ /* The HVS is able to process 2 pixels/cycle when scaling the source,
+ * 4 pixels/cycle otherwise.
+ * Alpha blending step seems to be pipelined and it's always operating
+ * at 4 pixels/cycle, so the limiting aspect here seems to be the
+ * scaler block.
+ * HVS load is expressed in clk-cycles/sec (AKA Hz).
+ */
+ if (vc4_state->x_scaling[0] != VC4_SCALING_NONE ||
+ vc4_state->x_scaling[1] != VC4_SCALING_NONE ||
+ vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
+ vc4_state->y_scaling[1] != VC4_SCALING_NONE)
+ hvs_load_shift = 1;
+ else
+ hvs_load_shift = 2;
+
+ vc4_state->membus_load = 0;
+ vc4_state->hvs_load = 0;
+ for (i = 0; i < fb->format->num_planes; i++) {
+ /* Even if the bandwidth/plane required for a single frame is
+ *
+ * vc4_state->src_w[i] * vc4_state->src_h[i] * cpp * vrefresh
+ *
+ * when downscaling, we have to read more pixels per line in
+ * the time frame reserved for a single line, so the bandwidth
+ * demand can be punctually higher. To account for that, we
+ * calculate the down-scaling factor and multiply the plane
+ * load by this number. We're likely over-estimating the read
+ * demand, but that's better than under-estimating it.
+ */
+ vscale_factor = DIV_ROUND_UP(vc4_state->src_h[i],
+ vc4_state->crtc_h);
+ vc4_state->membus_load += vc4_state->src_w[i] *
+ vc4_state->src_h[i] * vscale_factor *
+ fb->format->cpp[i];
+ vc4_state->hvs_load += vc4_state->crtc_h * vc4_state->crtc_w;
+ }
+
+ vc4_state->hvs_load *= vrefresh;
+ vc4_state->hvs_load >>= hvs_load_shift;
+ vc4_state->membus_load *= vrefresh;
+}
+
static int vc4_plane_allocate_lbm(struct drm_plane_state *state)
{
struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev);
@@ -888,6 +943,8 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
*/
vc4_state->dlist_initialized = 1;
+ vc4_plane_calc_load(state);
+
return 0;
}