diff mbox series

drm/msm/dp: move add fail safe mode to dp_connector_get_mode()

Message ID 1650671124-14030-1-git-send-email-quic_khsieh@quicinc.com (mailing list archive)
State Not Applicable
Headers show
Series drm/msm/dp: move add fail safe mode to dp_connector_get_mode() | expand

Commit Message

Kuogee Hsieh April 22, 2022, 11:45 p.m. UTC
Current DP driver implementation has adding safe mode done at
dp_hpd_plug_handle() which is expected to be executed under event
thread context.

However there is possible circular locking happen (see blow stack trace)
after edp driver call dp_hpd_plug_handle() from dp_bridge_enable() which
is executed under drm_thread context.

To break this circular locking, this patch have safe mode added at
dp_connector_get_mode() which is executed under drm thread context.
Therefore no lock acquired required for &dev->mode_config.mutex while
adding fail safe mode since it has been hold by drm thread already.

======================================================
 WARNING: possible circular locking dependency detected
 5.15.35-lockdep #6 Tainted: G        W
 ------------------------------------------------------
 frecon/429 is trying to acquire lock:
 ffffff808dc3c4e8 (&dev->mode_config.mutex){+.+.}-{3:3}, at:
dp_panel_add_fail_safe_mode+0x4c/0xa0

 but task is already holding lock:
 ffffff808dc441e0 (&kms->commit_lock[i]){+.+.}-{3:3}, at: lock_crtcs+0xb4/0x124

 which lock already depends on the new lock.

 the existing dependency chain (in reverse order) is:

 -> #3 (&kms->commit_lock[i]){+.+.}-{3:3}:
        __mutex_lock_common+0x174/0x1a64
        mutex_lock_nested+0x98/0xac
        lock_crtcs+0xb4/0x124
        msm_atomic_commit_tail+0x330/0x748
        commit_tail+0x19c/0x278
        drm_atomic_helper_commit+0x1dc/0x1f0
        drm_atomic_commit+0xc0/0xd8
        drm_atomic_helper_set_config+0xb4/0x134
        drm_mode_setcrtc+0x688/0x1248
        drm_ioctl_kernel+0x1e4/0x338
        drm_ioctl+0x3a4/0x684
        __arm64_sys_ioctl+0x118/0x154
        invoke_syscall+0x78/0x224
        el0_svc_common+0x178/0x200
        do_el0_svc+0x94/0x13c
        el0_svc+0x5c/0xec
        el0t_64_sync_handler+0x78/0x108
        el0t_64_sync+0x1a4/0x1a8

 -> #2 (crtc_ww_class_mutex){+.+.}-{3:3}:
        __mutex_lock_common+0x174/0x1a64
        ww_mutex_lock+0xb8/0x278
        modeset_lock+0x304/0x4ac
        drm_modeset_lock+0x4c/0x7c
        drmm_mode_config_init+0x4a8/0xc50
        msm_drm_init+0x274/0xac0
        msm_drm_bind+0x20/0x2c
        try_to_bring_up_master+0x3dc/0x470
        __component_add+0x18c/0x3c0
        component_add+0x1c/0x28
        dp_display_probe+0x954/0xa98
        platform_probe+0x124/0x15c
        really_probe+0x1b0/0x5f8
        __driver_probe_device+0x174/0x20c
        driver_probe_device+0x70/0x134
        __device_attach_driver+0x130/0x1d0
        bus_for_each_drv+0xfc/0x14c
        __device_attach+0x1bc/0x2bc
        device_initial_probe+0x1c/0x28
        bus_probe_device+0x94/0x178
        deferred_probe_work_func+0x1a4/0x1f0
        process_one_work+0x5d4/0x9dc
        worker_thread+0x898/0xccc
        kthread+0x2d4/0x3d4
        ret_from_fork+0x10/0x20

 -> #1 (crtc_ww_class_acquire){+.+.}-{0:0}:
        ww_acquire_init+0x1c4/0x2c8
        drm_modeset_acquire_init+0x44/0xc8
        drm_helper_probe_single_connector_modes+0xb0/0x12dc
        drm_mode_getconnector+0x5dc/0xfe8
        drm_ioctl_kernel+0x1e4/0x338
        drm_ioctl+0x3a4/0x684
        __arm64_sys_ioctl+0x118/0x154
        invoke_syscall+0x78/0x224
        el0_svc_common+0x178/0x200
        do_el0_svc+0x94/0x13c
        el0_svc+0x5c/0xec
        el0t_64_sync_handler+0x78/0x108
        el0t_64_sync+0x1a4/0x1a8

 -> #0 (&dev->mode_config.mutex){+.+.}-{3:3}:
        __lock_acquire+0x2650/0x672c
        lock_acquire+0x1b4/0x4ac
        __mutex_lock_common+0x174/0x1a64
        mutex_lock_nested+0x98/0xac
        dp_panel_add_fail_safe_mode+0x4c/0xa0
        dp_hpd_plug_handle+0x1f0/0x280
        dp_bridge_enable+0x94/0x2b8
        drm_atomic_bridge_chain_enable+0x11c/0x168
        drm_atomic_helper_commit_modeset_enables+0x500/0x740
        msm_atomic_commit_tail+0x3e4/0x748
        commit_tail+0x19c/0x278
        drm_atomic_helper_commit+0x1dc/0x1f0
        drm_atomic_commit+0xc0/0xd8
        drm_atomic_helper_set_config+0xb4/0x134
        drm_mode_setcrtc+0x688/0x1248
        drm_ioctl_kernel+0x1e4/0x338
        drm_ioctl+0x3a4/0x684
        __arm64_sys_ioctl+0x118/0x154
        invoke_syscall+0x78/0x224
        el0_svc_common+0x178/0x200
        do_el0_svc+0x94/0x13c
        el0_svc+0x5c/0xec
        el0t_64_sync_handler+0x78/0x108
        el0t_64_sync+0x1a4/0x1a8

Signed-off-by: Kuogee Hsieh <quic_khsieh@quicinc.com>
---
 drivers/gpu/drm/msm/dp/dp_display.c |  6 ------
 drivers/gpu/drm/msm/dp/dp_panel.c   | 23 +++++++++++++----------
 2 files changed, 13 insertions(+), 16 deletions(-)

Comments

Stephen Boyd April 23, 2022, 12:02 a.m. UTC | #1
Quoting Kuogee Hsieh (2022-04-22 16:45:23)
> Current DP driver implementation has adding safe mode done at
> dp_hpd_plug_handle() which is expected to be executed under event
> thread context.
>
> However there is possible circular locking happen (see blow stack trace)
> after edp driver call dp_hpd_plug_handle() from dp_bridge_enable() which
> is executed under drm_thread context.
>
> To break this circular locking, this patch have safe mode added at
> dp_connector_get_mode() which is executed under drm thread context.
> Therefore no lock acquired required for &dev->mode_config.mutex while
> adding fail safe mode since it has been hold by drm thread already.

Reported-by: Douglas Anderson <dianders@chromium.org>
Fixes: 8b2c181e3dcf ("drm/msm/dp: add fail safe mode outside of
event_mutex context")
Reviewed-by: Stephen Boyd <swboyd@chromium.org>
Dmitry Baryshkov April 23, 2022, 12:07 a.m. UTC | #2
On 23/04/2022 02:45, Kuogee Hsieh wrote:
> Current DP driver implementation has adding safe mode done at
> dp_hpd_plug_handle() which is expected to be executed under event
> thread context.
> 
> However there is possible circular locking happen (see blow stack trace)
> after edp driver call dp_hpd_plug_handle() from dp_bridge_enable() which
> is executed under drm_thread context.
> 
> To break this circular locking, this patch have safe mode added at
> dp_connector_get_mode() which is executed under drm thread context.
> Therefore no lock acquired required for &dev->mode_config.mutex while
> adding fail safe mode since it has been hold by drm thread already.
> 
> ======================================================
>   WARNING: possible circular locking dependency detected
>   5.15.35-lockdep #6 Tainted: G        W
>   ------------------------------------------------------
>   frecon/429 is trying to acquire lock:
>   ffffff808dc3c4e8 (&dev->mode_config.mutex){+.+.}-{3:3}, at:
> dp_panel_add_fail_safe_mode+0x4c/0xa0
> 
>   but task is already holding lock:
>   ffffff808dc441e0 (&kms->commit_lock[i]){+.+.}-{3:3}, at: lock_crtcs+0xb4/0x124
> 
>   which lock already depends on the new lock.
> 
>   the existing dependency chain (in reverse order) is:
> 
>   -> #3 (&kms->commit_lock[i]){+.+.}-{3:3}:
>          __mutex_lock_common+0x174/0x1a64
>          mutex_lock_nested+0x98/0xac
>          lock_crtcs+0xb4/0x124
>          msm_atomic_commit_tail+0x330/0x748
>          commit_tail+0x19c/0x278
>          drm_atomic_helper_commit+0x1dc/0x1f0
>          drm_atomic_commit+0xc0/0xd8
>          drm_atomic_helper_set_config+0xb4/0x134
>          drm_mode_setcrtc+0x688/0x1248
>          drm_ioctl_kernel+0x1e4/0x338
>          drm_ioctl+0x3a4/0x684
>          __arm64_sys_ioctl+0x118/0x154
>          invoke_syscall+0x78/0x224
>          el0_svc_common+0x178/0x200
>          do_el0_svc+0x94/0x13c
>          el0_svc+0x5c/0xec
>          el0t_64_sync_handler+0x78/0x108
>          el0t_64_sync+0x1a4/0x1a8
> 
>   -> #2 (crtc_ww_class_mutex){+.+.}-{3:3}:
>          __mutex_lock_common+0x174/0x1a64
>          ww_mutex_lock+0xb8/0x278
>          modeset_lock+0x304/0x4ac
>          drm_modeset_lock+0x4c/0x7c
>          drmm_mode_config_init+0x4a8/0xc50
>          msm_drm_init+0x274/0xac0
>          msm_drm_bind+0x20/0x2c
>          try_to_bring_up_master+0x3dc/0x470
>          __component_add+0x18c/0x3c0
>          component_add+0x1c/0x28
>          dp_display_probe+0x954/0xa98
>          platform_probe+0x124/0x15c
>          really_probe+0x1b0/0x5f8
>          __driver_probe_device+0x174/0x20c
>          driver_probe_device+0x70/0x134
>          __device_attach_driver+0x130/0x1d0
>          bus_for_each_drv+0xfc/0x14c
>          __device_attach+0x1bc/0x2bc
>          device_initial_probe+0x1c/0x28
>          bus_probe_device+0x94/0x178
>          deferred_probe_work_func+0x1a4/0x1f0
>          process_one_work+0x5d4/0x9dc
>          worker_thread+0x898/0xccc
>          kthread+0x2d4/0x3d4
>          ret_from_fork+0x10/0x20
> 
>   -> #1 (crtc_ww_class_acquire){+.+.}-{0:0}:
>          ww_acquire_init+0x1c4/0x2c8
>          drm_modeset_acquire_init+0x44/0xc8
>          drm_helper_probe_single_connector_modes+0xb0/0x12dc
>          drm_mode_getconnector+0x5dc/0xfe8
>          drm_ioctl_kernel+0x1e4/0x338
>          drm_ioctl+0x3a4/0x684
>          __arm64_sys_ioctl+0x118/0x154
>          invoke_syscall+0x78/0x224
>          el0_svc_common+0x178/0x200
>          do_el0_svc+0x94/0x13c
>          el0_svc+0x5c/0xec
>          el0t_64_sync_handler+0x78/0x108
>          el0t_64_sync+0x1a4/0x1a8
> 
>   -> #0 (&dev->mode_config.mutex){+.+.}-{3:3}:
>          __lock_acquire+0x2650/0x672c
>          lock_acquire+0x1b4/0x4ac
>          __mutex_lock_common+0x174/0x1a64
>          mutex_lock_nested+0x98/0xac
>          dp_panel_add_fail_safe_mode+0x4c/0xa0
>          dp_hpd_plug_handle+0x1f0/0x280
>          dp_bridge_enable+0x94/0x2b8
>          drm_atomic_bridge_chain_enable+0x11c/0x168
>          drm_atomic_helper_commit_modeset_enables+0x500/0x740
>          msm_atomic_commit_tail+0x3e4/0x748
>          commit_tail+0x19c/0x278
>          drm_atomic_helper_commit+0x1dc/0x1f0
>          drm_atomic_commit+0xc0/0xd8
>          drm_atomic_helper_set_config+0xb4/0x134
>          drm_mode_setcrtc+0x688/0x1248
>          drm_ioctl_kernel+0x1e4/0x338
>          drm_ioctl+0x3a4/0x684
>          __arm64_sys_ioctl+0x118/0x154
>          invoke_syscall+0x78/0x224
>          el0_svc_common+0x178/0x200
>          do_el0_svc+0x94/0x13c
>          el0_svc+0x5c/0xec
>          el0t_64_sync_handler+0x78/0x108
>          el0t_64_sync+0x1a4/0x1a8
> 
> Signed-off-by: Kuogee Hsieh <quic_khsieh@quicinc.com>
> ---
>   drivers/gpu/drm/msm/dp/dp_display.c |  6 ------
>   drivers/gpu/drm/msm/dp/dp_panel.c   | 23 +++++++++++++----------
>   2 files changed, 13 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c
> index 92cd50f..01453db 100644
> --- a/drivers/gpu/drm/msm/dp/dp_display.c
> +++ b/drivers/gpu/drm/msm/dp/dp_display.c
> @@ -555,12 +555,6 @@ static int dp_hpd_plug_handle(struct dp_display_private *dp, u32 data)
>   
>   	mutex_unlock(&dp->event_mutex);
>   
> -	/*
> -	 * add fail safe mode outside event_mutex scope
> -	 * to avoid potiential circular lock with drm thread
> -	 */
> -	dp_panel_add_fail_safe_mode(dp->dp_display.connector);
> -
>   	/* uevent will complete connection part */
>   	return 0;
>   };
> diff --git a/drivers/gpu/drm/msm/dp/dp_panel.c b/drivers/gpu/drm/msm/dp/dp_panel.c
> index 1aa9aa8c..23fee42 100644
> --- a/drivers/gpu/drm/msm/dp/dp_panel.c
> +++ b/drivers/gpu/drm/msm/dp/dp_panel.c
> @@ -151,15 +151,6 @@ static int dp_panel_update_modes(struct drm_connector *connector,
>   	return rc;
>   }
>   
> -void dp_panel_add_fail_safe_mode(struct drm_connector *connector)
> -{
> -	/* fail safe edid */
> -	mutex_lock(&connector->dev->mode_config.mutex);
> -	if (drm_add_modes_noedid(connector, 640, 480))
> -		drm_set_preferred_mode(connector, 640, 480);
> -	mutex_unlock(&connector->dev->mode_config.mutex);
> -}
> -
>   int dp_panel_read_sink_caps(struct dp_panel *dp_panel,
>   	struct drm_connector *connector)
>   {
> @@ -216,7 +207,11 @@ int dp_panel_read_sink_caps(struct dp_panel *dp_panel,
>   			goto end;
>   		}
>   
> -		dp_panel_add_fail_safe_mode(connector);
> +		/* fail safe edid */
> +		mutex_lock(&connector->dev->mode_config.mutex);
> +		if (drm_add_modes_noedid(connector, 640, 480))
> +			drm_set_preferred_mode(connector, 640, 480);
> +		mutex_unlock(&connector->dev->mode_config.mutex);
>   	}
>   
>   	if (panel->aux_cfg_update_done) {
> @@ -266,6 +261,14 @@ int dp_panel_get_modes(struct dp_panel *dp_panel,
>   		return -EINVAL;
>   	}
>   
> +	/*
> +	 * add fail safe mode (640x480) here
> +	 * since we are executed in drm_thread context,
> +	 * no mode_config.mutex acquired required
> +	 */
> +	if (drm_add_modes_noedid(connector, 640, 480))
> +		drm_set_preferred_mode(connector, 640, 480);
> +
>   	if (dp_panel->edid)
>   		return dp_panel_update_modes(connector, dp_panel->edid);
>   
Also, wouldn't calling get_modes() several times make cause adding more 
and more 640x480 modes to the modes list?
Abhinav Kumar April 23, 2022, 12:12 a.m. UTC | #3
On 4/22/2022 5:07 PM, Dmitry Baryshkov wrote:
> On 23/04/2022 02:45, Kuogee Hsieh wrote:
>> Current DP driver implementation has adding safe mode done at
>> dp_hpd_plug_handle() which is expected to be executed under event
>> thread context.
>>
>> However there is possible circular locking happen (see blow stack trace)
>> after edp driver call dp_hpd_plug_handle() from dp_bridge_enable() which
>> is executed under drm_thread context.
>>
>> To break this circular locking, this patch have safe mode added at
>> dp_connector_get_mode() which is executed under drm thread context.
>> Therefore no lock acquired required for &dev->mode_config.mutex while
>> adding fail safe mode since it has been hold by drm thread already.
>>
>> ======================================================
>>   WARNING: possible circular locking dependency detected
>>   5.15.35-lockdep #6 Tainted: G        W
>>   ------------------------------------------------------
>>   frecon/429 is trying to acquire lock:
>>   ffffff808dc3c4e8 (&dev->mode_config.mutex){+.+.}-{3:3}, at:
>> dp_panel_add_fail_safe_mode+0x4c/0xa0
>>
>>   but task is already holding lock:
>>   ffffff808dc441e0 (&kms->commit_lock[i]){+.+.}-{3:3}, at: 
>> lock_crtcs+0xb4/0x124
>>
>>   which lock already depends on the new lock.
>>
>>   the existing dependency chain (in reverse order) is:
>>
>>   -> #3 (&kms->commit_lock[i]){+.+.}-{3:3}:
>>          __mutex_lock_common+0x174/0x1a64
>>          mutex_lock_nested+0x98/0xac
>>          lock_crtcs+0xb4/0x124
>>          msm_atomic_commit_tail+0x330/0x748
>>          commit_tail+0x19c/0x278
>>          drm_atomic_helper_commit+0x1dc/0x1f0
>>          drm_atomic_commit+0xc0/0xd8
>>          drm_atomic_helper_set_config+0xb4/0x134
>>          drm_mode_setcrtc+0x688/0x1248
>>          drm_ioctl_kernel+0x1e4/0x338
>>          drm_ioctl+0x3a4/0x684
>>          __arm64_sys_ioctl+0x118/0x154
>>          invoke_syscall+0x78/0x224
>>          el0_svc_common+0x178/0x200
>>          do_el0_svc+0x94/0x13c
>>          el0_svc+0x5c/0xec
>>          el0t_64_sync_handler+0x78/0x108
>>          el0t_64_sync+0x1a4/0x1a8
>>
>>   -> #2 (crtc_ww_class_mutex){+.+.}-{3:3}:
>>          __mutex_lock_common+0x174/0x1a64
>>          ww_mutex_lock+0xb8/0x278
>>          modeset_lock+0x304/0x4ac
>>          drm_modeset_lock+0x4c/0x7c
>>          drmm_mode_config_init+0x4a8/0xc50
>>          msm_drm_init+0x274/0xac0
>>          msm_drm_bind+0x20/0x2c
>>          try_to_bring_up_master+0x3dc/0x470
>>          __component_add+0x18c/0x3c0
>>          component_add+0x1c/0x28
>>          dp_display_probe+0x954/0xa98
>>          platform_probe+0x124/0x15c
>>          really_probe+0x1b0/0x5f8
>>          __driver_probe_device+0x174/0x20c
>>          driver_probe_device+0x70/0x134
>>          __device_attach_driver+0x130/0x1d0
>>          bus_for_each_drv+0xfc/0x14c
>>          __device_attach+0x1bc/0x2bc
>>          device_initial_probe+0x1c/0x28
>>          bus_probe_device+0x94/0x178
>>          deferred_probe_work_func+0x1a4/0x1f0
>>          process_one_work+0x5d4/0x9dc
>>          worker_thread+0x898/0xccc
>>          kthread+0x2d4/0x3d4
>>          ret_from_fork+0x10/0x20
>>
>>   -> #1 (crtc_ww_class_acquire){+.+.}-{0:0}:
>>          ww_acquire_init+0x1c4/0x2c8
>>          drm_modeset_acquire_init+0x44/0xc8
>>          drm_helper_probe_single_connector_modes+0xb0/0x12dc
>>          drm_mode_getconnector+0x5dc/0xfe8
>>          drm_ioctl_kernel+0x1e4/0x338
>>          drm_ioctl+0x3a4/0x684
>>          __arm64_sys_ioctl+0x118/0x154
>>          invoke_syscall+0x78/0x224
>>          el0_svc_common+0x178/0x200
>>          do_el0_svc+0x94/0x13c
>>          el0_svc+0x5c/0xec
>>          el0t_64_sync_handler+0x78/0x108
>>          el0t_64_sync+0x1a4/0x1a8
>>
>>   -> #0 (&dev->mode_config.mutex){+.+.}-{3:3}:
>>          __lock_acquire+0x2650/0x672c
>>          lock_acquire+0x1b4/0x4ac
>>          __mutex_lock_common+0x174/0x1a64
>>          mutex_lock_nested+0x98/0xac
>>          dp_panel_add_fail_safe_mode+0x4c/0xa0
>>          dp_hpd_plug_handle+0x1f0/0x280
>>          dp_bridge_enable+0x94/0x2b8
>>          drm_atomic_bridge_chain_enable+0x11c/0x168
>>          drm_atomic_helper_commit_modeset_enables+0x500/0x740
>>          msm_atomic_commit_tail+0x3e4/0x748
>>          commit_tail+0x19c/0x278
>>          drm_atomic_helper_commit+0x1dc/0x1f0
>>          drm_atomic_commit+0xc0/0xd8
>>          drm_atomic_helper_set_config+0xb4/0x134
>>          drm_mode_setcrtc+0x688/0x1248
>>          drm_ioctl_kernel+0x1e4/0x338
>>          drm_ioctl+0x3a4/0x684
>>          __arm64_sys_ioctl+0x118/0x154
>>          invoke_syscall+0x78/0x224
>>          el0_svc_common+0x178/0x200
>>          do_el0_svc+0x94/0x13c
>>          el0_svc+0x5c/0xec
>>          el0t_64_sync_handler+0x78/0x108
>>          el0t_64_sync+0x1a4/0x1a8
>>
>> Signed-off-by: Kuogee Hsieh <quic_khsieh@quicinc.com>
>> ---
>>   drivers/gpu/drm/msm/dp/dp_display.c |  6 ------
>>   drivers/gpu/drm/msm/dp/dp_panel.c   | 23 +++++++++++++----------
>>   2 files changed, 13 insertions(+), 16 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/msm/dp/dp_display.c 
>> b/drivers/gpu/drm/msm/dp/dp_display.c
>> index 92cd50f..01453db 100644
>> --- a/drivers/gpu/drm/msm/dp/dp_display.c
>> +++ b/drivers/gpu/drm/msm/dp/dp_display.c
>> @@ -555,12 +555,6 @@ static int dp_hpd_plug_handle(struct 
>> dp_display_private *dp, u32 data)
>>       mutex_unlock(&dp->event_mutex);
>> -    /*
>> -     * add fail safe mode outside event_mutex scope
>> -     * to avoid potiential circular lock with drm thread
>> -     */
>> -    dp_panel_add_fail_safe_mode(dp->dp_display.connector);
>> -
>>       /* uevent will complete connection part */
>>       return 0;
>>   };
>> diff --git a/drivers/gpu/drm/msm/dp/dp_panel.c 
>> b/drivers/gpu/drm/msm/dp/dp_panel.c
>> index 1aa9aa8c..23fee42 100644
>> --- a/drivers/gpu/drm/msm/dp/dp_panel.c
>> +++ b/drivers/gpu/drm/msm/dp/dp_panel.c
>> @@ -151,15 +151,6 @@ static int dp_panel_update_modes(struct 
>> drm_connector *connector,
>>       return rc;
>>   }
>> -void dp_panel_add_fail_safe_mode(struct drm_connector *connector)
>> -{
>> -    /* fail safe edid */
>> -    mutex_lock(&connector->dev->mode_config.mutex);
>> -    if (drm_add_modes_noedid(connector, 640, 480))
>> -        drm_set_preferred_mode(connector, 640, 480);
>> -    mutex_unlock(&connector->dev->mode_config.mutex);
>> -}
>> -
>>   int dp_panel_read_sink_caps(struct dp_panel *dp_panel,
>>       struct drm_connector *connector)
>>   {
>> @@ -216,7 +207,11 @@ int dp_panel_read_sink_caps(struct dp_panel 
>> *dp_panel,
>>               goto end;
>>           }
>> -        dp_panel_add_fail_safe_mode(connector);
>> +        /* fail safe edid */
>> +        mutex_lock(&connector->dev->mode_config.mutex);
>> +        if (drm_add_modes_noedid(connector, 640, 480))
>> +            drm_set_preferred_mode(connector, 640, 480);
>> +        mutex_unlock(&connector->dev->mode_config.mutex);
>>       }
>>       if (panel->aux_cfg_update_done) {
>> @@ -266,6 +261,14 @@ int dp_panel_get_modes(struct dp_panel *dp_panel,
>>           return -EINVAL;
>>       }
>> +    /*
>> +     * add fail safe mode (640x480) here
>> +     * since we are executed in drm_thread context,
>> +     * no mode_config.mutex acquired required
>> +     */
>> +    if (drm_add_modes_noedid(connector, 640, 480))
>> +        drm_set_preferred_mode(connector, 640, 480);
>> +
>>       if (dp_panel->edid)
>>           return dp_panel_update_modes(connector, dp_panel->edid);
> Also, wouldn't calling get_modes() several times make cause adding more 
> and more 640x480 modes to the modes list?
> 

Shouldnt DRM be blocking that here? Call should trickle down here only 
if count_modes was 0

    if (out_resp->count_modes == 0) {
         if (is_current_master)
             connector->funcs->fill_modes(connector,
                              dev->mode_config.max_width,
                              dev->mode_config.max_height);
         else
             drm_dbg_kms(dev, "User-space requested a forced probe on 
[CONNECTOR:%d:%s] but is not the DRM master, demoting to read-only probe",
                     connector->base.id, connector->name);
     }

>
Dmitry Baryshkov April 23, 2022, 6:25 a.m. UTC | #4
On Sat, 23 Apr 2022 at 03:12, Abhinav Kumar <quic_abhinavk@quicinc.com> wrote:
>
>
>
> On 4/22/2022 5:07 PM, Dmitry Baryshkov wrote:
> > On 23/04/2022 02:45, Kuogee Hsieh wrote:
> >> Current DP driver implementation has adding safe mode done at
> >> dp_hpd_plug_handle() which is expected to be executed under event
> >> thread context.
> >>
> >> However there is possible circular locking happen (see blow stack trace)
> >> after edp driver call dp_hpd_plug_handle() from dp_bridge_enable() which
> >> is executed under drm_thread context.
> >>
> >> To break this circular locking, this patch have safe mode added at
> >> dp_connector_get_mode() which is executed under drm thread context.
> >> Therefore no lock acquired required for &dev->mode_config.mutex while
> >> adding fail safe mode since it has been hold by drm thread already.
> >>
> >> ======================================================
> >>   WARNING: possible circular locking dependency detected
> >>   5.15.35-lockdep #6 Tainted: G        W
> >>   ------------------------------------------------------
> >>   frecon/429 is trying to acquire lock:
> >>   ffffff808dc3c4e8 (&dev->mode_config.mutex){+.+.}-{3:3}, at:
> >> dp_panel_add_fail_safe_mode+0x4c/0xa0
> >>
> >>   but task is already holding lock:
> >>   ffffff808dc441e0 (&kms->commit_lock[i]){+.+.}-{3:3}, at:
> >> lock_crtcs+0xb4/0x124
> >>
> >>   which lock already depends on the new lock.
> >>
> >>   the existing dependency chain (in reverse order) is:
> >>
> >>   -> #3 (&kms->commit_lock[i]){+.+.}-{3:3}:
> >>          __mutex_lock_common+0x174/0x1a64
> >>          mutex_lock_nested+0x98/0xac
> >>          lock_crtcs+0xb4/0x124
> >>          msm_atomic_commit_tail+0x330/0x748
> >>          commit_tail+0x19c/0x278
> >>          drm_atomic_helper_commit+0x1dc/0x1f0
> >>          drm_atomic_commit+0xc0/0xd8
> >>          drm_atomic_helper_set_config+0xb4/0x134
> >>          drm_mode_setcrtc+0x688/0x1248
> >>          drm_ioctl_kernel+0x1e4/0x338
> >>          drm_ioctl+0x3a4/0x684
> >>          __arm64_sys_ioctl+0x118/0x154
> >>          invoke_syscall+0x78/0x224
> >>          el0_svc_common+0x178/0x200
> >>          do_el0_svc+0x94/0x13c
> >>          el0_svc+0x5c/0xec
> >>          el0t_64_sync_handler+0x78/0x108
> >>          el0t_64_sync+0x1a4/0x1a8
> >>
> >>   -> #2 (crtc_ww_class_mutex){+.+.}-{3:3}:
> >>          __mutex_lock_common+0x174/0x1a64
> >>          ww_mutex_lock+0xb8/0x278
> >>          modeset_lock+0x304/0x4ac
> >>          drm_modeset_lock+0x4c/0x7c
> >>          drmm_mode_config_init+0x4a8/0xc50
> >>          msm_drm_init+0x274/0xac0
> >>          msm_drm_bind+0x20/0x2c
> >>          try_to_bring_up_master+0x3dc/0x470
> >>          __component_add+0x18c/0x3c0
> >>          component_add+0x1c/0x28
> >>          dp_display_probe+0x954/0xa98
> >>          platform_probe+0x124/0x15c
> >>          really_probe+0x1b0/0x5f8
> >>          __driver_probe_device+0x174/0x20c
> >>          driver_probe_device+0x70/0x134
> >>          __device_attach_driver+0x130/0x1d0
> >>          bus_for_each_drv+0xfc/0x14c
> >>          __device_attach+0x1bc/0x2bc
> >>          device_initial_probe+0x1c/0x28
> >>          bus_probe_device+0x94/0x178
> >>          deferred_probe_work_func+0x1a4/0x1f0
> >>          process_one_work+0x5d4/0x9dc
> >>          worker_thread+0x898/0xccc
> >>          kthread+0x2d4/0x3d4
> >>          ret_from_fork+0x10/0x20
> >>
> >>   -> #1 (crtc_ww_class_acquire){+.+.}-{0:0}:
> >>          ww_acquire_init+0x1c4/0x2c8
> >>          drm_modeset_acquire_init+0x44/0xc8
> >>          drm_helper_probe_single_connector_modes+0xb0/0x12dc
> >>          drm_mode_getconnector+0x5dc/0xfe8
> >>          drm_ioctl_kernel+0x1e4/0x338
> >>          drm_ioctl+0x3a4/0x684
> >>          __arm64_sys_ioctl+0x118/0x154
> >>          invoke_syscall+0x78/0x224
> >>          el0_svc_common+0x178/0x200
> >>          do_el0_svc+0x94/0x13c
> >>          el0_svc+0x5c/0xec
> >>          el0t_64_sync_handler+0x78/0x108
> >>          el0t_64_sync+0x1a4/0x1a8
> >>
> >>   -> #0 (&dev->mode_config.mutex){+.+.}-{3:3}:
> >>          __lock_acquire+0x2650/0x672c
> >>          lock_acquire+0x1b4/0x4ac
> >>          __mutex_lock_common+0x174/0x1a64
> >>          mutex_lock_nested+0x98/0xac
> >>          dp_panel_add_fail_safe_mode+0x4c/0xa0
> >>          dp_hpd_plug_handle+0x1f0/0x280
> >>          dp_bridge_enable+0x94/0x2b8
> >>          drm_atomic_bridge_chain_enable+0x11c/0x168
> >>          drm_atomic_helper_commit_modeset_enables+0x500/0x740
> >>          msm_atomic_commit_tail+0x3e4/0x748
> >>          commit_tail+0x19c/0x278
> >>          drm_atomic_helper_commit+0x1dc/0x1f0
> >>          drm_atomic_commit+0xc0/0xd8
> >>          drm_atomic_helper_set_config+0xb4/0x134
> >>          drm_mode_setcrtc+0x688/0x1248
> >>          drm_ioctl_kernel+0x1e4/0x338
> >>          drm_ioctl+0x3a4/0x684
> >>          __arm64_sys_ioctl+0x118/0x154
> >>          invoke_syscall+0x78/0x224
> >>          el0_svc_common+0x178/0x200
> >>          do_el0_svc+0x94/0x13c
> >>          el0_svc+0x5c/0xec
> >>          el0t_64_sync_handler+0x78/0x108
> >>          el0t_64_sync+0x1a4/0x1a8
> >>
> >> Signed-off-by: Kuogee Hsieh <quic_khsieh@quicinc.com>
> >> ---
> >>   drivers/gpu/drm/msm/dp/dp_display.c |  6 ------
> >>   drivers/gpu/drm/msm/dp/dp_panel.c   | 23 +++++++++++++----------
> >>   2 files changed, 13 insertions(+), 16 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/msm/dp/dp_display.c
> >> b/drivers/gpu/drm/msm/dp/dp_display.c
> >> index 92cd50f..01453db 100644
> >> --- a/drivers/gpu/drm/msm/dp/dp_display.c
> >> +++ b/drivers/gpu/drm/msm/dp/dp_display.c
> >> @@ -555,12 +555,6 @@ static int dp_hpd_plug_handle(struct
> >> dp_display_private *dp, u32 data)
> >>       mutex_unlock(&dp->event_mutex);
> >> -    /*
> >> -     * add fail safe mode outside event_mutex scope
> >> -     * to avoid potiential circular lock with drm thread
> >> -     */
> >> -    dp_panel_add_fail_safe_mode(dp->dp_display.connector);
> >> -
> >>       /* uevent will complete connection part */
> >>       return 0;
> >>   };
> >> diff --git a/drivers/gpu/drm/msm/dp/dp_panel.c
> >> b/drivers/gpu/drm/msm/dp/dp_panel.c
> >> index 1aa9aa8c..23fee42 100644
> >> --- a/drivers/gpu/drm/msm/dp/dp_panel.c
> >> +++ b/drivers/gpu/drm/msm/dp/dp_panel.c
> >> @@ -151,15 +151,6 @@ static int dp_panel_update_modes(struct
> >> drm_connector *connector,
> >>       return rc;
> >>   }
> >> -void dp_panel_add_fail_safe_mode(struct drm_connector *connector)
> >> -{
> >> -    /* fail safe edid */
> >> -    mutex_lock(&connector->dev->mode_config.mutex);
> >> -    if (drm_add_modes_noedid(connector, 640, 480))
> >> -        drm_set_preferred_mode(connector, 640, 480);
> >> -    mutex_unlock(&connector->dev->mode_config.mutex);
> >> -}
> >> -
> >>   int dp_panel_read_sink_caps(struct dp_panel *dp_panel,
> >>       struct drm_connector *connector)
> >>   {
> >> @@ -216,7 +207,11 @@ int dp_panel_read_sink_caps(struct dp_panel
> >> *dp_panel,
> >>               goto end;
> >>           }
> >> -        dp_panel_add_fail_safe_mode(connector);
> >> +        /* fail safe edid */
> >> +        mutex_lock(&connector->dev->mode_config.mutex);
> >> +        if (drm_add_modes_noedid(connector, 640, 480))
> >> +            drm_set_preferred_mode(connector, 640, 480);
> >> +        mutex_unlock(&connector->dev->mode_config.mutex);
> >>       }
> >>       if (panel->aux_cfg_update_done) {
> >> @@ -266,6 +261,14 @@ int dp_panel_get_modes(struct dp_panel *dp_panel,
> >>           return -EINVAL;
> >>       }
> >> +    /*
> >> +     * add fail safe mode (640x480) here
> >> +     * since we are executed in drm_thread context,
> >> +     * no mode_config.mutex acquired required
> >> +     */
> >> +    if (drm_add_modes_noedid(connector, 640, 480))
> >> +        drm_set_preferred_mode(connector, 640, 480);
> >> +
> >>       if (dp_panel->edid)
> >>           return dp_panel_update_modes(connector, dp_panel->edid);
> > Also, wouldn't calling get_modes() several times make cause adding more
> > and more 640x480 modes to the modes list?
> >
>
> Shouldnt DRM be blocking that here? Call should trickle down here only
> if count_modes was 0
>
>     if (out_resp->count_modes == 0) {
>          if (is_current_master)
>              connector->funcs->fill_modes(connector,
>                               dev->mode_config.max_width,
>                               dev->mode_config.max_height);
>          else
>              drm_dbg_kms(dev, "User-space requested a forced probe on
> [CONNECTOR:%d:%s] but is not the DRM master, demoting to read-only probe",
>                      connector->base.id, connector->name);
>      }
>

count_modes is set by userspace:
        /*
         * This ioctl is called twice, once to determine how much space is
         * needed, and the 2nd time to fill it.
         */

So, nothing prevents userspace from passing zero count_mode more than once.

However drm_helper_probe_single_connector_modes() will set old modes
to MODE_STALE and then will call get_modes().
Then drm_mode_prune_invalid() will prune stale modes. So, this should be fine.

A more generic question is why do we need to add the mode in two places?
Abhinav Kumar April 23, 2022, 3:33 p.m. UTC | #5
On 4/22/2022 11:25 PM, Dmitry Baryshkov wrote:
> On Sat, 23 Apr 2022 at 03:12, Abhinav Kumar <quic_abhinavk@quicinc.com> wrote:
>>
>>
>>
>> On 4/22/2022 5:07 PM, Dmitry Baryshkov wrote:
>>> On 23/04/2022 02:45, Kuogee Hsieh wrote:
>>>> Current DP driver implementation has adding safe mode done at
>>>> dp_hpd_plug_handle() which is expected to be executed under event
>>>> thread context.
>>>>
>>>> However there is possible circular locking happen (see blow stack trace)
>>>> after edp driver call dp_hpd_plug_handle() from dp_bridge_enable() which
>>>> is executed under drm_thread context.
>>>>
>>>> To break this circular locking, this patch have safe mode added at
>>>> dp_connector_get_mode() which is executed under drm thread context.
>>>> Therefore no lock acquired required for &dev->mode_config.mutex while
>>>> adding fail safe mode since it has been hold by drm thread already.
>>>>
>>>> ======================================================
>>>>    WARNING: possible circular locking dependency detected
>>>>    5.15.35-lockdep #6 Tainted: G        W
>>>>    ------------------------------------------------------
>>>>    frecon/429 is trying to acquire lock:
>>>>    ffffff808dc3c4e8 (&dev->mode_config.mutex){+.+.}-{3:3}, at:
>>>> dp_panel_add_fail_safe_mode+0x4c/0xa0
>>>>
>>>>    but task is already holding lock:
>>>>    ffffff808dc441e0 (&kms->commit_lock[i]){+.+.}-{3:3}, at:
>>>> lock_crtcs+0xb4/0x124
>>>>
>>>>    which lock already depends on the new lock.
>>>>
>>>>    the existing dependency chain (in reverse order) is:
>>>>
>>>>    -> #3 (&kms->commit_lock[i]){+.+.}-{3:3}:
>>>>           __mutex_lock_common+0x174/0x1a64
>>>>           mutex_lock_nested+0x98/0xac
>>>>           lock_crtcs+0xb4/0x124
>>>>           msm_atomic_commit_tail+0x330/0x748
>>>>           commit_tail+0x19c/0x278
>>>>           drm_atomic_helper_commit+0x1dc/0x1f0
>>>>           drm_atomic_commit+0xc0/0xd8
>>>>           drm_atomic_helper_set_config+0xb4/0x134
>>>>           drm_mode_setcrtc+0x688/0x1248
>>>>           drm_ioctl_kernel+0x1e4/0x338
>>>>           drm_ioctl+0x3a4/0x684
>>>>           __arm64_sys_ioctl+0x118/0x154
>>>>           invoke_syscall+0x78/0x224
>>>>           el0_svc_common+0x178/0x200
>>>>           do_el0_svc+0x94/0x13c
>>>>           el0_svc+0x5c/0xec
>>>>           el0t_64_sync_handler+0x78/0x108
>>>>           el0t_64_sync+0x1a4/0x1a8
>>>>
>>>>    -> #2 (crtc_ww_class_mutex){+.+.}-{3:3}:
>>>>           __mutex_lock_common+0x174/0x1a64
>>>>           ww_mutex_lock+0xb8/0x278
>>>>           modeset_lock+0x304/0x4ac
>>>>           drm_modeset_lock+0x4c/0x7c
>>>>           drmm_mode_config_init+0x4a8/0xc50
>>>>           msm_drm_init+0x274/0xac0
>>>>           msm_drm_bind+0x20/0x2c
>>>>           try_to_bring_up_master+0x3dc/0x470
>>>>           __component_add+0x18c/0x3c0
>>>>           component_add+0x1c/0x28
>>>>           dp_display_probe+0x954/0xa98
>>>>           platform_probe+0x124/0x15c
>>>>           really_probe+0x1b0/0x5f8
>>>>           __driver_probe_device+0x174/0x20c
>>>>           driver_probe_device+0x70/0x134
>>>>           __device_attach_driver+0x130/0x1d0
>>>>           bus_for_each_drv+0xfc/0x14c
>>>>           __device_attach+0x1bc/0x2bc
>>>>           device_initial_probe+0x1c/0x28
>>>>           bus_probe_device+0x94/0x178
>>>>           deferred_probe_work_func+0x1a4/0x1f0
>>>>           process_one_work+0x5d4/0x9dc
>>>>           worker_thread+0x898/0xccc
>>>>           kthread+0x2d4/0x3d4
>>>>           ret_from_fork+0x10/0x20
>>>>
>>>>    -> #1 (crtc_ww_class_acquire){+.+.}-{0:0}:
>>>>           ww_acquire_init+0x1c4/0x2c8
>>>>           drm_modeset_acquire_init+0x44/0xc8
>>>>           drm_helper_probe_single_connector_modes+0xb0/0x12dc
>>>>           drm_mode_getconnector+0x5dc/0xfe8
>>>>           drm_ioctl_kernel+0x1e4/0x338
>>>>           drm_ioctl+0x3a4/0x684
>>>>           __arm64_sys_ioctl+0x118/0x154
>>>>           invoke_syscall+0x78/0x224
>>>>           el0_svc_common+0x178/0x200
>>>>           do_el0_svc+0x94/0x13c
>>>>           el0_svc+0x5c/0xec
>>>>           el0t_64_sync_handler+0x78/0x108
>>>>           el0t_64_sync+0x1a4/0x1a8
>>>>
>>>>    -> #0 (&dev->mode_config.mutex){+.+.}-{3:3}:
>>>>           __lock_acquire+0x2650/0x672c
>>>>           lock_acquire+0x1b4/0x4ac
>>>>           __mutex_lock_common+0x174/0x1a64
>>>>           mutex_lock_nested+0x98/0xac
>>>>           dp_panel_add_fail_safe_mode+0x4c/0xa0
>>>>           dp_hpd_plug_handle+0x1f0/0x280
>>>>           dp_bridge_enable+0x94/0x2b8
>>>>           drm_atomic_bridge_chain_enable+0x11c/0x168
>>>>           drm_atomic_helper_commit_modeset_enables+0x500/0x740
>>>>           msm_atomic_commit_tail+0x3e4/0x748
>>>>           commit_tail+0x19c/0x278
>>>>           drm_atomic_helper_commit+0x1dc/0x1f0
>>>>           drm_atomic_commit+0xc0/0xd8
>>>>           drm_atomic_helper_set_config+0xb4/0x134
>>>>           drm_mode_setcrtc+0x688/0x1248
>>>>           drm_ioctl_kernel+0x1e4/0x338
>>>>           drm_ioctl+0x3a4/0x684
>>>>           __arm64_sys_ioctl+0x118/0x154
>>>>           invoke_syscall+0x78/0x224
>>>>           el0_svc_common+0x178/0x200
>>>>           do_el0_svc+0x94/0x13c
>>>>           el0_svc+0x5c/0xec
>>>>           el0t_64_sync_handler+0x78/0x108
>>>>           el0t_64_sync+0x1a4/0x1a8
>>>>
>>>> Signed-off-by: Kuogee Hsieh <quic_khsieh@quicinc.com>
>>>> ---
>>>>    drivers/gpu/drm/msm/dp/dp_display.c |  6 ------
>>>>    drivers/gpu/drm/msm/dp/dp_panel.c   | 23 +++++++++++++----------
>>>>    2 files changed, 13 insertions(+), 16 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/msm/dp/dp_display.c
>>>> b/drivers/gpu/drm/msm/dp/dp_display.c
>>>> index 92cd50f..01453db 100644
>>>> --- a/drivers/gpu/drm/msm/dp/dp_display.c
>>>> +++ b/drivers/gpu/drm/msm/dp/dp_display.c
>>>> @@ -555,12 +555,6 @@ static int dp_hpd_plug_handle(struct
>>>> dp_display_private *dp, u32 data)
>>>>        mutex_unlock(&dp->event_mutex);
>>>> -    /*
>>>> -     * add fail safe mode outside event_mutex scope
>>>> -     * to avoid potiential circular lock with drm thread
>>>> -     */
>>>> -    dp_panel_add_fail_safe_mode(dp->dp_display.connector);
>>>> -
>>>>        /* uevent will complete connection part */
>>>>        return 0;
>>>>    };
>>>> diff --git a/drivers/gpu/drm/msm/dp/dp_panel.c
>>>> b/drivers/gpu/drm/msm/dp/dp_panel.c
>>>> index 1aa9aa8c..23fee42 100644
>>>> --- a/drivers/gpu/drm/msm/dp/dp_panel.c
>>>> +++ b/drivers/gpu/drm/msm/dp/dp_panel.c
>>>> @@ -151,15 +151,6 @@ static int dp_panel_update_modes(struct
>>>> drm_connector *connector,
>>>>        return rc;
>>>>    }
>>>> -void dp_panel_add_fail_safe_mode(struct drm_connector *connector)
>>>> -{
>>>> -    /* fail safe edid */
>>>> -    mutex_lock(&connector->dev->mode_config.mutex);
>>>> -    if (drm_add_modes_noedid(connector, 640, 480))
>>>> -        drm_set_preferred_mode(connector, 640, 480);
>>>> -    mutex_unlock(&connector->dev->mode_config.mutex);
>>>> -}
>>>> -
>>>>    int dp_panel_read_sink_caps(struct dp_panel *dp_panel,
>>>>        struct drm_connector *connector)
>>>>    {
>>>> @@ -216,7 +207,11 @@ int dp_panel_read_sink_caps(struct dp_panel
>>>> *dp_panel,
>>>>                goto end;
>>>>            }
>>>> -        dp_panel_add_fail_safe_mode(connector);
>>>> +        /* fail safe edid */
>>>> +        mutex_lock(&connector->dev->mode_config.mutex);
>>>> +        if (drm_add_modes_noedid(connector, 640, 480))
>>>> +            drm_set_preferred_mode(connector, 640, 480);
>>>> +        mutex_unlock(&connector->dev->mode_config.mutex);
>>>>        }
>>>>        if (panel->aux_cfg_update_done) {
>>>> @@ -266,6 +261,14 @@ int dp_panel_get_modes(struct dp_panel *dp_panel,
>>>>            return -EINVAL;
>>>>        }
>>>> +    /*
>>>> +     * add fail safe mode (640x480) here
>>>> +     * since we are executed in drm_thread context,
>>>> +     * no mode_config.mutex acquired required
>>>> +     */
>>>> +    if (drm_add_modes_noedid(connector, 640, 480))
>>>> +        drm_set_preferred_mode(connector, 640, 480);
>>>> +
>>>>        if (dp_panel->edid)
>>>>            return dp_panel_update_modes(connector, dp_panel->edid);
>>> Also, wouldn't calling get_modes() several times make cause adding more
>>> and more 640x480 modes to the modes list?
>>>
>>
>> Shouldnt DRM be blocking that here? Call should trickle down here only
>> if count_modes was 0
>>
>>      if (out_resp->count_modes == 0) {
>>           if (is_current_master)
>>               connector->funcs->fill_modes(connector,
>>                                dev->mode_config.max_width,
>>                                dev->mode_config.max_height);
>>           else
>>               drm_dbg_kms(dev, "User-space requested a forced probe on
>> [CONNECTOR:%d:%s] but is not the DRM master, demoting to read-only probe",
>>                       connector->base.id, connector->name);
>>       }
>>
> 
> count_modes is set by userspace:
>          /*
>           * This ioctl is called twice, once to determine how much space is
>           * needed, and the 2nd time to fill it.
>           */
> 
> So, nothing prevents userspace from passing zero count_mode more than once.
Ack, some non-optimized usermodes can do this.
> 
> However drm_helper_probe_single_connector_modes() will set old modes
> to MODE_STALE and then will call get_modes().
> Then drm_mode_prune_invalid() will prune stale modes. So, this should be fine.
> 
Got it.
> A more generic question is why do we need to add the mode in two places?
> 
Answering behalf of kuogee but the two places are for different purposes:

1) When there is no EDID read

if (!dp_panel->edid) {

That case we should add the fail-safe mode as otherwise display will be 
blank for cases where there was nothing wrong with the monitor as such 
but the EDID read from aux failed for some reason. Even DRM does this 
but just not 640x480 here:

518 	if (count == 0 && (connector->status == connector_status_connected ||
519 			   connector->status == connector_status_unknown))
520 		count = drm_add_modes_noedid(connector, 1024, 768);


2) When there was a valid EDID but no 640x480 mode

This is the equipment specific case and the one even I was a bit 
surprised. There is a DP compliance equipment we have in-house and while 
validation, it was found that in its list of modes , it did not have any 
modes which chromebook supported ( due to 2 lanes ). But my 
understanding was that, all sinks should have atleast 640x480 but 
apparently this one did not have that. So to handle this DP compliance 
equipment behavior, we had to do this.

Thanks

Abhinav
Dmitry Baryshkov April 25, 2022, 11:14 p.m. UTC | #6
On 23/04/2022 18:33, Abhinav Kumar wrote:
> 
> 
> On 4/22/2022 11:25 PM, Dmitry Baryshkov wrote:
>> On Sat, 23 Apr 2022 at 03:12, Abhinav Kumar 
>> <quic_abhinavk@quicinc.com> wrote:
>>>
>>>
>>>
>>> On 4/22/2022 5:07 PM, Dmitry Baryshkov wrote:
>>>> On 23/04/2022 02:45, Kuogee Hsieh wrote:
>>>>> Current DP driver implementation has adding safe mode done at
>>>>> dp_hpd_plug_handle() which is expected to be executed under event
>>>>> thread context.
>>>>>
>>>>> However there is possible circular locking happen (see blow stack 
>>>>> trace)
>>>>> after edp driver call dp_hpd_plug_handle() from dp_bridge_enable() 
>>>>> which
>>>>> is executed under drm_thread context.
>>>>>
>>>>> To break this circular locking, this patch have safe mode added at
>>>>> dp_connector_get_mode() which is executed under drm thread context.
>>>>> Therefore no lock acquired required for &dev->mode_config.mutex while
>>>>> adding fail safe mode since it has been hold by drm thread already.
>>>>>
>>>>> ======================================================
>>>>>    WARNING: possible circular locking dependency detected
>>>>>    5.15.35-lockdep #6 Tainted: G        W
>>>>>    ------------------------------------------------------
>>>>>    frecon/429 is trying to acquire lock:
>>>>>    ffffff808dc3c4e8 (&dev->mode_config.mutex){+.+.}-{3:3}, at:
>>>>> dp_panel_add_fail_safe_mode+0x4c/0xa0
>>>>>
>>>>>    but task is already holding lock:
>>>>>    ffffff808dc441e0 (&kms->commit_lock[i]){+.+.}-{3:3}, at:
>>>>> lock_crtcs+0xb4/0x124
>>>>>
>>>>>    which lock already depends on the new lock.
>>>>>
>>>>>    the existing dependency chain (in reverse order) is:
>>>>>
>>>>>    -> #3 (&kms->commit_lock[i]){+.+.}-{3:3}:
>>>>>           __mutex_lock_common+0x174/0x1a64
>>>>>           mutex_lock_nested+0x98/0xac
>>>>>           lock_crtcs+0xb4/0x124
>>>>>           msm_atomic_commit_tail+0x330/0x748
>>>>>           commit_tail+0x19c/0x278
>>>>>           drm_atomic_helper_commit+0x1dc/0x1f0
>>>>>           drm_atomic_commit+0xc0/0xd8
>>>>>           drm_atomic_helper_set_config+0xb4/0x134
>>>>>           drm_mode_setcrtc+0x688/0x1248
>>>>>           drm_ioctl_kernel+0x1e4/0x338
>>>>>           drm_ioctl+0x3a4/0x684
>>>>>           __arm64_sys_ioctl+0x118/0x154
>>>>>           invoke_syscall+0x78/0x224
>>>>>           el0_svc_common+0x178/0x200
>>>>>           do_el0_svc+0x94/0x13c
>>>>>           el0_svc+0x5c/0xec
>>>>>           el0t_64_sync_handler+0x78/0x108
>>>>>           el0t_64_sync+0x1a4/0x1a8
>>>>>
>>>>>    -> #2 (crtc_ww_class_mutex){+.+.}-{3:3}:
>>>>>           __mutex_lock_common+0x174/0x1a64
>>>>>           ww_mutex_lock+0xb8/0x278
>>>>>           modeset_lock+0x304/0x4ac
>>>>>           drm_modeset_lock+0x4c/0x7c
>>>>>           drmm_mode_config_init+0x4a8/0xc50
>>>>>           msm_drm_init+0x274/0xac0
>>>>>           msm_drm_bind+0x20/0x2c
>>>>>           try_to_bring_up_master+0x3dc/0x470
>>>>>           __component_add+0x18c/0x3c0
>>>>>           component_add+0x1c/0x28
>>>>>           dp_display_probe+0x954/0xa98
>>>>>           platform_probe+0x124/0x15c
>>>>>           really_probe+0x1b0/0x5f8
>>>>>           __driver_probe_device+0x174/0x20c
>>>>>           driver_probe_device+0x70/0x134
>>>>>           __device_attach_driver+0x130/0x1d0
>>>>>           bus_for_each_drv+0xfc/0x14c
>>>>>           __device_attach+0x1bc/0x2bc
>>>>>           device_initial_probe+0x1c/0x28
>>>>>           bus_probe_device+0x94/0x178
>>>>>           deferred_probe_work_func+0x1a4/0x1f0
>>>>>           process_one_work+0x5d4/0x9dc
>>>>>           worker_thread+0x898/0xccc
>>>>>           kthread+0x2d4/0x3d4
>>>>>           ret_from_fork+0x10/0x20
>>>>>
>>>>>    -> #1 (crtc_ww_class_acquire){+.+.}-{0:0}:
>>>>>           ww_acquire_init+0x1c4/0x2c8
>>>>>           drm_modeset_acquire_init+0x44/0xc8
>>>>>           drm_helper_probe_single_connector_modes+0xb0/0x12dc
>>>>>           drm_mode_getconnector+0x5dc/0xfe8
>>>>>           drm_ioctl_kernel+0x1e4/0x338
>>>>>           drm_ioctl+0x3a4/0x684
>>>>>           __arm64_sys_ioctl+0x118/0x154
>>>>>           invoke_syscall+0x78/0x224
>>>>>           el0_svc_common+0x178/0x200
>>>>>           do_el0_svc+0x94/0x13c
>>>>>           el0_svc+0x5c/0xec
>>>>>           el0t_64_sync_handler+0x78/0x108
>>>>>           el0t_64_sync+0x1a4/0x1a8
>>>>>
>>>>>    -> #0 (&dev->mode_config.mutex){+.+.}-{3:3}:
>>>>>           __lock_acquire+0x2650/0x672c
>>>>>           lock_acquire+0x1b4/0x4ac
>>>>>           __mutex_lock_common+0x174/0x1a64
>>>>>           mutex_lock_nested+0x98/0xac
>>>>>           dp_panel_add_fail_safe_mode+0x4c/0xa0
>>>>>           dp_hpd_plug_handle+0x1f0/0x280
>>>>>           dp_bridge_enable+0x94/0x2b8
>>>>>           drm_atomic_bridge_chain_enable+0x11c/0x168
>>>>>           drm_atomic_helper_commit_modeset_enables+0x500/0x740
>>>>>           msm_atomic_commit_tail+0x3e4/0x748
>>>>>           commit_tail+0x19c/0x278
>>>>>           drm_atomic_helper_commit+0x1dc/0x1f0
>>>>>           drm_atomic_commit+0xc0/0xd8
>>>>>           drm_atomic_helper_set_config+0xb4/0x134
>>>>>           drm_mode_setcrtc+0x688/0x1248
>>>>>           drm_ioctl_kernel+0x1e4/0x338
>>>>>           drm_ioctl+0x3a4/0x684
>>>>>           __arm64_sys_ioctl+0x118/0x154
>>>>>           invoke_syscall+0x78/0x224
>>>>>           el0_svc_common+0x178/0x200
>>>>>           do_el0_svc+0x94/0x13c
>>>>>           el0_svc+0x5c/0xec
>>>>>           el0t_64_sync_handler+0x78/0x108
>>>>>           el0t_64_sync+0x1a4/0x1a8
>>>>>
>>>>> Signed-off-by: Kuogee Hsieh <quic_khsieh@quicinc.com>
>>>>> ---
>>>>>    drivers/gpu/drm/msm/dp/dp_display.c |  6 ------
>>>>>    drivers/gpu/drm/msm/dp/dp_panel.c   | 23 +++++++++++++----------
>>>>>    2 files changed, 13 insertions(+), 16 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/msm/dp/dp_display.c
>>>>> b/drivers/gpu/drm/msm/dp/dp_display.c
>>>>> index 92cd50f..01453db 100644
>>>>> --- a/drivers/gpu/drm/msm/dp/dp_display.c
>>>>> +++ b/drivers/gpu/drm/msm/dp/dp_display.c
>>>>> @@ -555,12 +555,6 @@ static int dp_hpd_plug_handle(struct
>>>>> dp_display_private *dp, u32 data)
>>>>>        mutex_unlock(&dp->event_mutex);
>>>>> -    /*
>>>>> -     * add fail safe mode outside event_mutex scope
>>>>> -     * to avoid potiential circular lock with drm thread
>>>>> -     */
>>>>> -    dp_panel_add_fail_safe_mode(dp->dp_display.connector);
>>>>> -
>>>>>        /* uevent will complete connection part */
>>>>>        return 0;
>>>>>    };
>>>>> diff --git a/drivers/gpu/drm/msm/dp/dp_panel.c
>>>>> b/drivers/gpu/drm/msm/dp/dp_panel.c
>>>>> index 1aa9aa8c..23fee42 100644
>>>>> --- a/drivers/gpu/drm/msm/dp/dp_panel.c
>>>>> +++ b/drivers/gpu/drm/msm/dp/dp_panel.c
>>>>> @@ -151,15 +151,6 @@ static int dp_panel_update_modes(struct
>>>>> drm_connector *connector,
>>>>>        return rc;
>>>>>    }
>>>>> -void dp_panel_add_fail_safe_mode(struct drm_connector *connector)
>>>>> -{
>>>>> -    /* fail safe edid */
>>>>> -    mutex_lock(&connector->dev->mode_config.mutex);
>>>>> -    if (drm_add_modes_noedid(connector, 640, 480))
>>>>> -        drm_set_preferred_mode(connector, 640, 480);
>>>>> -    mutex_unlock(&connector->dev->mode_config.mutex);
>>>>> -}
>>>>> -
>>>>>    int dp_panel_read_sink_caps(struct dp_panel *dp_panel,
>>>>>        struct drm_connector *connector)
>>>>>    {
>>>>> @@ -216,7 +207,11 @@ int dp_panel_read_sink_caps(struct dp_panel
>>>>> *dp_panel,
>>>>>                goto end;
>>>>>            }
>>>>> -        dp_panel_add_fail_safe_mode(connector);
>>>>> +        /* fail safe edid */
>>>>> +        mutex_lock(&connector->dev->mode_config.mutex);
>>>>> +        if (drm_add_modes_noedid(connector, 640, 480))
>>>>> +            drm_set_preferred_mode(connector, 640, 480);
>>>>> +        mutex_unlock(&connector->dev->mode_config.mutex);
>>>>>        }
>>>>>        if (panel->aux_cfg_update_done) {
>>>>> @@ -266,6 +261,14 @@ int dp_panel_get_modes(struct dp_panel *dp_panel,
>>>>>            return -EINVAL;
>>>>>        }
>>>>> +    /*
>>>>> +     * add fail safe mode (640x480) here
>>>>> +     * since we are executed in drm_thread context,
>>>>> +     * no mode_config.mutex acquired required
>>>>> +     */
>>>>> +    if (drm_add_modes_noedid(connector, 640, 480))
>>>>> +        drm_set_preferred_mode(connector, 640, 480);
>>>>> +
>>>>>        if (dp_panel->edid)
>>>>>            return dp_panel_update_modes(connector, dp_panel->edid);
>>>> Also, wouldn't calling get_modes() several times make cause adding more
>>>> and more 640x480 modes to the modes list?
>>>>
>>>
>>> Shouldnt DRM be blocking that here? Call should trickle down here only
>>> if count_modes was 0
>>>
>>>      if (out_resp->count_modes == 0) {
>>>           if (is_current_master)
>>>               connector->funcs->fill_modes(connector,
>>>                                dev->mode_config.max_width,
>>>                                dev->mode_config.max_height);
>>>           else
>>>               drm_dbg_kms(dev, "User-space requested a forced probe on
>>> [CONNECTOR:%d:%s] but is not the DRM master, demoting to read-only 
>>> probe",
>>>                       connector->base.id, connector->name);
>>>       }
>>>
>>
>> count_modes is set by userspace:
>>          /*
>>           * This ioctl is called twice, once to determine how much 
>> space is
>>           * needed, and the 2nd time to fill it.
>>           */
>>
>> So, nothing prevents userspace from passing zero count_mode more than 
>> once.
> Ack, some non-optimized usermodes can do this.
>>
>> However drm_helper_probe_single_connector_modes() will set old modes
>> to MODE_STALE and then will call get_modes().
>> Then drm_mode_prune_invalid() will prune stale modes. So, this should 
>> be fine.
>>
> Got it.
>> A more generic question is why do we need to add the mode in two places?
>>
> Answering behalf of kuogee but the two places are for different purposes:
> 
> 1) When there is no EDID read
> 
> if (!dp_panel->edid) {
> 
> That case we should add the fail-safe mode as otherwise display will be 
> blank for cases where there was nothing wrong with the monitor as such 
> but the EDID read from aux failed for some reason. Even DRM does this 
> but just not 640x480 here:
> 
> 518     if (count == 0 && (connector->status == 
> connector_status_connected ||
> 519                connector->status == connector_status_unknown))
> 520         count = drm_add_modes_noedid(connector, 1024, 768);

Yes, but this happens when there are no other modes. While if I'm not 
mistaken our code adds 640x480 even if there are modes.

> 2) When there was a valid EDID but no 640x480 mode
> 
> This is the equipment specific case and the one even I was a bit 
> surprised. There is a DP compliance equipment we have in-house and while 
> validation, it was found that in its list of modes , it did not have any 
> modes which chromebook supported ( due to 2 lanes ). But my 
> understanding was that, all sinks should have atleast 640x480 but 
> apparently this one did not have that. So to handle this DP compliance 
> equipment behavior, we had to do this.

I see. Not the perfect solution, but looks like a necessity.
Doug Anderson April 26, 2022, 12:26 a.m. UTC | #7
Hi,

On Sat, Apr 23, 2022 at 8:34 AM Abhinav Kumar <quic_abhinavk@quicinc.com> wrote:
>
> On 4/22/2022 11:25 PM, Dmitry Baryshkov wrote:
> > On Sat, 23 Apr 2022 at 03:12, Abhinav Kumar <quic_abhinavk@quicinc.com> wrote:
> >>
> >>
> >>
> >> On 4/22/2022 5:07 PM, Dmitry Baryshkov wrote:
> >>> On 23/04/2022 02:45, Kuogee Hsieh wrote:
> >>>> Current DP driver implementation has adding safe mode done at
> >>>> dp_hpd_plug_handle() which is expected to be executed under event
> >>>> thread context.
> >>>>
> >>>> However there is possible circular locking happen (see blow stack trace)
> >>>> after edp driver call dp_hpd_plug_handle() from dp_bridge_enable() which
> >>>> is executed under drm_thread context.
> >>>>
> >>>> To break this circular locking, this patch have safe mode added at
> >>>> dp_connector_get_mode() which is executed under drm thread context.
> >>>> Therefore no lock acquired required for &dev->mode_config.mutex while
> >>>> adding fail safe mode since it has been hold by drm thread already.
> >>>>
> >>>> ======================================================
> >>>>    WARNING: possible circular locking dependency detected
> >>>>    5.15.35-lockdep #6 Tainted: G        W
> >>>>    ------------------------------------------------------
> >>>>    frecon/429 is trying to acquire lock:
> >>>>    ffffff808dc3c4e8 (&dev->mode_config.mutex){+.+.}-{3:3}, at:
> >>>> dp_panel_add_fail_safe_mode+0x4c/0xa0
> >>>>
> >>>>    but task is already holding lock:
> >>>>    ffffff808dc441e0 (&kms->commit_lock[i]){+.+.}-{3:3}, at:
> >>>> lock_crtcs+0xb4/0x124
> >>>>
> >>>>    which lock already depends on the new lock.
> >>>>
> >>>>    the existing dependency chain (in reverse order) is:
> >>>>
> >>>>    -> #3 (&kms->commit_lock[i]){+.+.}-{3:3}:
> >>>>           __mutex_lock_common+0x174/0x1a64
> >>>>           mutex_lock_nested+0x98/0xac
> >>>>           lock_crtcs+0xb4/0x124
> >>>>           msm_atomic_commit_tail+0x330/0x748
> >>>>           commit_tail+0x19c/0x278
> >>>>           drm_atomic_helper_commit+0x1dc/0x1f0
> >>>>           drm_atomic_commit+0xc0/0xd8
> >>>>           drm_atomic_helper_set_config+0xb4/0x134
> >>>>           drm_mode_setcrtc+0x688/0x1248
> >>>>           drm_ioctl_kernel+0x1e4/0x338
> >>>>           drm_ioctl+0x3a4/0x684
> >>>>           __arm64_sys_ioctl+0x118/0x154
> >>>>           invoke_syscall+0x78/0x224
> >>>>           el0_svc_common+0x178/0x200
> >>>>           do_el0_svc+0x94/0x13c
> >>>>           el0_svc+0x5c/0xec
> >>>>           el0t_64_sync_handler+0x78/0x108
> >>>>           el0t_64_sync+0x1a4/0x1a8
> >>>>
> >>>>    -> #2 (crtc_ww_class_mutex){+.+.}-{3:3}:
> >>>>           __mutex_lock_common+0x174/0x1a64
> >>>>           ww_mutex_lock+0xb8/0x278
> >>>>           modeset_lock+0x304/0x4ac
> >>>>           drm_modeset_lock+0x4c/0x7c
> >>>>           drmm_mode_config_init+0x4a8/0xc50
> >>>>           msm_drm_init+0x274/0xac0
> >>>>           msm_drm_bind+0x20/0x2c
> >>>>           try_to_bring_up_master+0x3dc/0x470
> >>>>           __component_add+0x18c/0x3c0
> >>>>           component_add+0x1c/0x28
> >>>>           dp_display_probe+0x954/0xa98
> >>>>           platform_probe+0x124/0x15c
> >>>>           really_probe+0x1b0/0x5f8
> >>>>           __driver_probe_device+0x174/0x20c
> >>>>           driver_probe_device+0x70/0x134
> >>>>           __device_attach_driver+0x130/0x1d0
> >>>>           bus_for_each_drv+0xfc/0x14c
> >>>>           __device_attach+0x1bc/0x2bc
> >>>>           device_initial_probe+0x1c/0x28
> >>>>           bus_probe_device+0x94/0x178
> >>>>           deferred_probe_work_func+0x1a4/0x1f0
> >>>>           process_one_work+0x5d4/0x9dc
> >>>>           worker_thread+0x898/0xccc
> >>>>           kthread+0x2d4/0x3d4
> >>>>           ret_from_fork+0x10/0x20
> >>>>
> >>>>    -> #1 (crtc_ww_class_acquire){+.+.}-{0:0}:
> >>>>           ww_acquire_init+0x1c4/0x2c8
> >>>>           drm_modeset_acquire_init+0x44/0xc8
> >>>>           drm_helper_probe_single_connector_modes+0xb0/0x12dc
> >>>>           drm_mode_getconnector+0x5dc/0xfe8
> >>>>           drm_ioctl_kernel+0x1e4/0x338
> >>>>           drm_ioctl+0x3a4/0x684
> >>>>           __arm64_sys_ioctl+0x118/0x154
> >>>>           invoke_syscall+0x78/0x224
> >>>>           el0_svc_common+0x178/0x200
> >>>>           do_el0_svc+0x94/0x13c
> >>>>           el0_svc+0x5c/0xec
> >>>>           el0t_64_sync_handler+0x78/0x108
> >>>>           el0t_64_sync+0x1a4/0x1a8
> >>>>
> >>>>    -> #0 (&dev->mode_config.mutex){+.+.}-{3:3}:
> >>>>           __lock_acquire+0x2650/0x672c
> >>>>           lock_acquire+0x1b4/0x4ac
> >>>>           __mutex_lock_common+0x174/0x1a64
> >>>>           mutex_lock_nested+0x98/0xac
> >>>>           dp_panel_add_fail_safe_mode+0x4c/0xa0
> >>>>           dp_hpd_plug_handle+0x1f0/0x280
> >>>>           dp_bridge_enable+0x94/0x2b8
> >>>>           drm_atomic_bridge_chain_enable+0x11c/0x168
> >>>>           drm_atomic_helper_commit_modeset_enables+0x500/0x740
> >>>>           msm_atomic_commit_tail+0x3e4/0x748
> >>>>           commit_tail+0x19c/0x278
> >>>>           drm_atomic_helper_commit+0x1dc/0x1f0
> >>>>           drm_atomic_commit+0xc0/0xd8
> >>>>           drm_atomic_helper_set_config+0xb4/0x134
> >>>>           drm_mode_setcrtc+0x688/0x1248
> >>>>           drm_ioctl_kernel+0x1e4/0x338
> >>>>           drm_ioctl+0x3a4/0x684
> >>>>           __arm64_sys_ioctl+0x118/0x154
> >>>>           invoke_syscall+0x78/0x224
> >>>>           el0_svc_common+0x178/0x200
> >>>>           do_el0_svc+0x94/0x13c
> >>>>           el0_svc+0x5c/0xec
> >>>>           el0t_64_sync_handler+0x78/0x108
> >>>>           el0t_64_sync+0x1a4/0x1a8
> >>>>
> >>>> Signed-off-by: Kuogee Hsieh <quic_khsieh@quicinc.com>
> >>>> ---
> >>>>    drivers/gpu/drm/msm/dp/dp_display.c |  6 ------
> >>>>    drivers/gpu/drm/msm/dp/dp_panel.c   | 23 +++++++++++++----------
> >>>>    2 files changed, 13 insertions(+), 16 deletions(-)
> >>>>
> >>>> diff --git a/drivers/gpu/drm/msm/dp/dp_display.c
> >>>> b/drivers/gpu/drm/msm/dp/dp_display.c
> >>>> index 92cd50f..01453db 100644
> >>>> --- a/drivers/gpu/drm/msm/dp/dp_display.c
> >>>> +++ b/drivers/gpu/drm/msm/dp/dp_display.c
> >>>> @@ -555,12 +555,6 @@ static int dp_hpd_plug_handle(struct
> >>>> dp_display_private *dp, u32 data)
> >>>>        mutex_unlock(&dp->event_mutex);
> >>>> -    /*
> >>>> -     * add fail safe mode outside event_mutex scope
> >>>> -     * to avoid potiential circular lock with drm thread
> >>>> -     */
> >>>> -    dp_panel_add_fail_safe_mode(dp->dp_display.connector);
> >>>> -
> >>>>        /* uevent will complete connection part */
> >>>>        return 0;
> >>>>    };
> >>>> diff --git a/drivers/gpu/drm/msm/dp/dp_panel.c
> >>>> b/drivers/gpu/drm/msm/dp/dp_panel.c
> >>>> index 1aa9aa8c..23fee42 100644
> >>>> --- a/drivers/gpu/drm/msm/dp/dp_panel.c
> >>>> +++ b/drivers/gpu/drm/msm/dp/dp_panel.c
> >>>> @@ -151,15 +151,6 @@ static int dp_panel_update_modes(struct
> >>>> drm_connector *connector,
> >>>>        return rc;
> >>>>    }
> >>>> -void dp_panel_add_fail_safe_mode(struct drm_connector *connector)
> >>>> -{
> >>>> -    /* fail safe edid */
> >>>> -    mutex_lock(&connector->dev->mode_config.mutex);
> >>>> -    if (drm_add_modes_noedid(connector, 640, 480))
> >>>> -        drm_set_preferred_mode(connector, 640, 480);
> >>>> -    mutex_unlock(&connector->dev->mode_config.mutex);
> >>>> -}
> >>>> -
> >>>>    int dp_panel_read_sink_caps(struct dp_panel *dp_panel,
> >>>>        struct drm_connector *connector)
> >>>>    {
> >>>> @@ -216,7 +207,11 @@ int dp_panel_read_sink_caps(struct dp_panel
> >>>> *dp_panel,
> >>>>                goto end;
> >>>>            }
> >>>> -        dp_panel_add_fail_safe_mode(connector);
> >>>> +        /* fail safe edid */
> >>>> +        mutex_lock(&connector->dev->mode_config.mutex);
> >>>> +        if (drm_add_modes_noedid(connector, 640, 480))
> >>>> +            drm_set_preferred_mode(connector, 640, 480);
> >>>> +        mutex_unlock(&connector->dev->mode_config.mutex);
> >>>>        }
> >>>>        if (panel->aux_cfg_update_done) {
> >>>> @@ -266,6 +261,14 @@ int dp_panel_get_modes(struct dp_panel *dp_panel,
> >>>>            return -EINVAL;
> >>>>        }
> >>>> +    /*
> >>>> +     * add fail safe mode (640x480) here
> >>>> +     * since we are executed in drm_thread context,
> >>>> +     * no mode_config.mutex acquired required
> >>>> +     */
> >>>> +    if (drm_add_modes_noedid(connector, 640, 480))
> >>>> +        drm_set_preferred_mode(connector, 640, 480);
> >>>> +
> >>>>        if (dp_panel->edid)
> >>>>            return dp_panel_update_modes(connector, dp_panel->edid);
> >>> Also, wouldn't calling get_modes() several times make cause adding more
> >>> and more 640x480 modes to the modes list?
> >>>
> >>
> >> Shouldnt DRM be blocking that here? Call should trickle down here only
> >> if count_modes was 0
> >>
> >>      if (out_resp->count_modes == 0) {
> >>           if (is_current_master)
> >>               connector->funcs->fill_modes(connector,
> >>                                dev->mode_config.max_width,
> >>                                dev->mode_config.max_height);
> >>           else
> >>               drm_dbg_kms(dev, "User-space requested a forced probe on
> >> [CONNECTOR:%d:%s] but is not the DRM master, demoting to read-only probe",
> >>                       connector->base.id, connector->name);
> >>       }
> >>
> >
> > count_modes is set by userspace:
> >          /*
> >           * This ioctl is called twice, once to determine how much space is
> >           * needed, and the 2nd time to fill it.
> >           */
> >
> > So, nothing prevents userspace from passing zero count_mode more than once.
> Ack, some non-optimized usermodes can do this.
> >
> > However drm_helper_probe_single_connector_modes() will set old modes
> > to MODE_STALE and then will call get_modes().
> > Then drm_mode_prune_invalid() will prune stale modes. So, this should be fine.
> >
> Got it.
> > A more generic question is why do we need to add the mode in two places?
> >
> Answering behalf of kuogee but the two places are for different purposes:
>
> 1) When there is no EDID read
>
> if (!dp_panel->edid) {
>
> That case we should add the fail-safe mode as otherwise display will be
> blank for cases where there was nothing wrong with the monitor as such
> but the EDID read from aux failed for some reason. Even DRM does this
> but just not 640x480 here:
>
> 518     if (count == 0 && (connector->status == connector_status_connected ||
> 519                        connector->status == connector_status_unknown))
> 520             count = drm_add_modes_noedid(connector, 1024, 768);

But drm_add_modes_noedid() _will_ add the 640x480 modes, won't it? It
will add all "failsafe" modes that are less than or equal to 1024x768
and 60Hz or less. See the table "drm_dmt_modes". I don't understand
why the DRM core's call doesn't solve the problem for you in the first
place?


> 2) When there was a valid EDID but no 640x480 mode
>
> This is the equipment specific case and the one even I was a bit
> surprised. There is a DP compliance equipment we have in-house and while
> validation, it was found that in its list of modes , it did not have any
> modes which chromebook supported ( due to 2 lanes ). But my
> understanding was that, all sinks should have atleast 640x480 but
> apparently this one did not have that. So to handle this DP compliance
> equipment behavior, we had to do this.

That doesn't seem right. If there's a valid EDID and the valid EDID
doesn't contain 640x480, are you _sure_ you're supposed to be adding
640x480? That doesn't sound right to me. I've got a tiny display in
front of me for testing that only has one mode:

  #0 800x480 65.68 800 840 888 928 480 493 496 525 32000

It wouldn't be correct to add a 640x480 mode to this panel... ...and,
in fact, after applying ${SUBJECT} patch I see that DRM (incorrectly)
thinks that my display supports 640x480. I see:

  #0 800x480 65.68 800 840 888 928 480 493 496 525 32000
  #1 640x480 59.94 640 656 752 800 480 490 492 525 25175

So IMO we _shouldn't_ land ${SUBJECT} patch.

Just for testing, I also tried a hack to make EDID reading fail
(return -EIO in the MSM dp_aux_transfer() function if msg->request <
8). Before ${SUBJECT} patch I'd see these modes:

  #0 1024x768 60.00 1024 1048 1184 1344 768 771 777 806 65000
  #1 800x600 60.32 800 840 968 1056 600 601 605 628 40000
  #2 800x600 56.25 800 824 896 1024 600 601 603 625 36000
  #3 848x480 60.00 848 864 976 1088 480 486 494 517 33750
  #4 640x480 59.94 640 656 752 800 480 490 492 525 25175

...and after ${SUBJECT} patch I'd see:

  #0 640x480 59.94 640 656 752 800 480 490 492 525 25175
  #1 1024x768 60.00 1024 1048 1184 1344 768 771 777 806 65000
  #2 800x600 60.32 800 840 968 1056 600 601 605 628 40000
  #3 800x600 56.25 800 824 896 1024 600 601 603 625 36000
  #4 848x480 60.00 848 864 976 1088 480 486 494 517 33750

...so your patch causes 640x480 to be prioritized. That also doesn't
seem ideal. If it was ideal, the DRM core should have listed 640x480
first.

I'll repeat my refrain that I'm not a DRM expert, but if I were doing
things, I'd rather revert commit 8b2c181e3dcf ("drm/msm/dp: add fail
safe mode outside of event_mutex context") and commit d4aca422539c
("drm/msm/dp: always add fail-safe mode into connector mode list") and
then go back and look more carefully about what the problem was in the
first place. Why didn't the failsafe modes added by the DRM core solve
the problem for you in the first place?

-Doug
Abhinav Kumar April 26, 2022, 1:42 a.m. UTC | #8
Hi Doug

On 4/25/2022 5:26 PM, Doug Anderson wrote:
> Hi,
> 
> On Sat, Apr 23, 2022 at 8:34 AM Abhinav Kumar <quic_abhinavk@quicinc.com> wrote:
>>
>> On 4/22/2022 11:25 PM, Dmitry Baryshkov wrote:
>>> On Sat, 23 Apr 2022 at 03:12, Abhinav Kumar <quic_abhinavk@quicinc.com> wrote:
>>>>
>>>>
>>>>
>>>> On 4/22/2022 5:07 PM, Dmitry Baryshkov wrote:
>>>>> On 23/04/2022 02:45, Kuogee Hsieh wrote:
>>>>>> Current DP driver implementation has adding safe mode done at
>>>>>> dp_hpd_plug_handle() which is expected to be executed under event
>>>>>> thread context.
>>>>>>
>>>>>> However there is possible circular locking happen (see blow stack trace)
>>>>>> after edp driver call dp_hpd_plug_handle() from dp_bridge_enable() which
>>>>>> is executed under drm_thread context.
>>>>>>
>>>>>> To break this circular locking, this patch have safe mode added at
>>>>>> dp_connector_get_mode() which is executed under drm thread context.
>>>>>> Therefore no lock acquired required for &dev->mode_config.mutex while
>>>>>> adding fail safe mode since it has been hold by drm thread already.
>>>>>>
>>>>>> ======================================================
>>>>>>     WARNING: possible circular locking dependency detected
>>>>>>     5.15.35-lockdep #6 Tainted: G        W
>>>>>>     ------------------------------------------------------
>>>>>>     frecon/429 is trying to acquire lock:
>>>>>>     ffffff808dc3c4e8 (&dev->mode_config.mutex){+.+.}-{3:3}, at:
>>>>>> dp_panel_add_fail_safe_mode+0x4c/0xa0
>>>>>>
>>>>>>     but task is already holding lock:
>>>>>>     ffffff808dc441e0 (&kms->commit_lock[i]){+.+.}-{3:3}, at:
>>>>>> lock_crtcs+0xb4/0x124
>>>>>>
>>>>>>     which lock already depends on the new lock.
>>>>>>
>>>>>>     the existing dependency chain (in reverse order) is:
>>>>>>
>>>>>>     -> #3 (&kms->commit_lock[i]){+.+.}-{3:3}:
>>>>>>            __mutex_lock_common+0x174/0x1a64
>>>>>>            mutex_lock_nested+0x98/0xac
>>>>>>            lock_crtcs+0xb4/0x124
>>>>>>            msm_atomic_commit_tail+0x330/0x748
>>>>>>            commit_tail+0x19c/0x278
>>>>>>            drm_atomic_helper_commit+0x1dc/0x1f0
>>>>>>            drm_atomic_commit+0xc0/0xd8
>>>>>>            drm_atomic_helper_set_config+0xb4/0x134
>>>>>>            drm_mode_setcrtc+0x688/0x1248
>>>>>>            drm_ioctl_kernel+0x1e4/0x338
>>>>>>            drm_ioctl+0x3a4/0x684
>>>>>>            __arm64_sys_ioctl+0x118/0x154
>>>>>>            invoke_syscall+0x78/0x224
>>>>>>            el0_svc_common+0x178/0x200
>>>>>>            do_el0_svc+0x94/0x13c
>>>>>>            el0_svc+0x5c/0xec
>>>>>>            el0t_64_sync_handler+0x78/0x108
>>>>>>            el0t_64_sync+0x1a4/0x1a8
>>>>>>
>>>>>>     -> #2 (crtc_ww_class_mutex){+.+.}-{3:3}:
>>>>>>            __mutex_lock_common+0x174/0x1a64
>>>>>>            ww_mutex_lock+0xb8/0x278
>>>>>>            modeset_lock+0x304/0x4ac
>>>>>>            drm_modeset_lock+0x4c/0x7c
>>>>>>            drmm_mode_config_init+0x4a8/0xc50
>>>>>>            msm_drm_init+0x274/0xac0
>>>>>>            msm_drm_bind+0x20/0x2c
>>>>>>            try_to_bring_up_master+0x3dc/0x470
>>>>>>            __component_add+0x18c/0x3c0
>>>>>>            component_add+0x1c/0x28
>>>>>>            dp_display_probe+0x954/0xa98
>>>>>>            platform_probe+0x124/0x15c
>>>>>>            really_probe+0x1b0/0x5f8
>>>>>>            __driver_probe_device+0x174/0x20c
>>>>>>            driver_probe_device+0x70/0x134
>>>>>>            __device_attach_driver+0x130/0x1d0
>>>>>>            bus_for_each_drv+0xfc/0x14c
>>>>>>            __device_attach+0x1bc/0x2bc
>>>>>>            device_initial_probe+0x1c/0x28
>>>>>>            bus_probe_device+0x94/0x178
>>>>>>            deferred_probe_work_func+0x1a4/0x1f0
>>>>>>            process_one_work+0x5d4/0x9dc
>>>>>>            worker_thread+0x898/0xccc
>>>>>>            kthread+0x2d4/0x3d4
>>>>>>            ret_from_fork+0x10/0x20
>>>>>>
>>>>>>     -> #1 (crtc_ww_class_acquire){+.+.}-{0:0}:
>>>>>>            ww_acquire_init+0x1c4/0x2c8
>>>>>>            drm_modeset_acquire_init+0x44/0xc8
>>>>>>            drm_helper_probe_single_connector_modes+0xb0/0x12dc
>>>>>>            drm_mode_getconnector+0x5dc/0xfe8
>>>>>>            drm_ioctl_kernel+0x1e4/0x338
>>>>>>            drm_ioctl+0x3a4/0x684
>>>>>>            __arm64_sys_ioctl+0x118/0x154
>>>>>>            invoke_syscall+0x78/0x224
>>>>>>            el0_svc_common+0x178/0x200
>>>>>>            do_el0_svc+0x94/0x13c
>>>>>>            el0_svc+0x5c/0xec
>>>>>>            el0t_64_sync_handler+0x78/0x108
>>>>>>            el0t_64_sync+0x1a4/0x1a8
>>>>>>
>>>>>>     -> #0 (&dev->mode_config.mutex){+.+.}-{3:3}:
>>>>>>            __lock_acquire+0x2650/0x672c
>>>>>>            lock_acquire+0x1b4/0x4ac
>>>>>>            __mutex_lock_common+0x174/0x1a64
>>>>>>            mutex_lock_nested+0x98/0xac
>>>>>>            dp_panel_add_fail_safe_mode+0x4c/0xa0
>>>>>>            dp_hpd_plug_handle+0x1f0/0x280
>>>>>>            dp_bridge_enable+0x94/0x2b8
>>>>>>            drm_atomic_bridge_chain_enable+0x11c/0x168
>>>>>>            drm_atomic_helper_commit_modeset_enables+0x500/0x740
>>>>>>            msm_atomic_commit_tail+0x3e4/0x748
>>>>>>            commit_tail+0x19c/0x278
>>>>>>            drm_atomic_helper_commit+0x1dc/0x1f0
>>>>>>            drm_atomic_commit+0xc0/0xd8
>>>>>>            drm_atomic_helper_set_config+0xb4/0x134
>>>>>>            drm_mode_setcrtc+0x688/0x1248
>>>>>>            drm_ioctl_kernel+0x1e4/0x338
>>>>>>            drm_ioctl+0x3a4/0x684
>>>>>>            __arm64_sys_ioctl+0x118/0x154
>>>>>>            invoke_syscall+0x78/0x224
>>>>>>            el0_svc_common+0x178/0x200
>>>>>>            do_el0_svc+0x94/0x13c
>>>>>>            el0_svc+0x5c/0xec
>>>>>>            el0t_64_sync_handler+0x78/0x108
>>>>>>            el0t_64_sync+0x1a4/0x1a8
>>>>>>
>>>>>> Signed-off-by: Kuogee Hsieh <quic_khsieh@quicinc.com>
>>>>>> ---
>>>>>>     drivers/gpu/drm/msm/dp/dp_display.c |  6 ------
>>>>>>     drivers/gpu/drm/msm/dp/dp_panel.c   | 23 +++++++++++++----------
>>>>>>     2 files changed, 13 insertions(+), 16 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/msm/dp/dp_display.c
>>>>>> b/drivers/gpu/drm/msm/dp/dp_display.c
>>>>>> index 92cd50f..01453db 100644
>>>>>> --- a/drivers/gpu/drm/msm/dp/dp_display.c
>>>>>> +++ b/drivers/gpu/drm/msm/dp/dp_display.c
>>>>>> @@ -555,12 +555,6 @@ static int dp_hpd_plug_handle(struct
>>>>>> dp_display_private *dp, u32 data)
>>>>>>         mutex_unlock(&dp->event_mutex);
>>>>>> -    /*
>>>>>> -     * add fail safe mode outside event_mutex scope
>>>>>> -     * to avoid potiential circular lock with drm thread
>>>>>> -     */
>>>>>> -    dp_panel_add_fail_safe_mode(dp->dp_display.connector);
>>>>>> -
>>>>>>         /* uevent will complete connection part */
>>>>>>         return 0;
>>>>>>     };
>>>>>> diff --git a/drivers/gpu/drm/msm/dp/dp_panel.c
>>>>>> b/drivers/gpu/drm/msm/dp/dp_panel.c
>>>>>> index 1aa9aa8c..23fee42 100644
>>>>>> --- a/drivers/gpu/drm/msm/dp/dp_panel.c
>>>>>> +++ b/drivers/gpu/drm/msm/dp/dp_panel.c
>>>>>> @@ -151,15 +151,6 @@ static int dp_panel_update_modes(struct
>>>>>> drm_connector *connector,
>>>>>>         return rc;
>>>>>>     }
>>>>>> -void dp_panel_add_fail_safe_mode(struct drm_connector *connector)
>>>>>> -{
>>>>>> -    /* fail safe edid */
>>>>>> -    mutex_lock(&connector->dev->mode_config.mutex);
>>>>>> -    if (drm_add_modes_noedid(connector, 640, 480))
>>>>>> -        drm_set_preferred_mode(connector, 640, 480);
>>>>>> -    mutex_unlock(&connector->dev->mode_config.mutex);
>>>>>> -}
>>>>>> -
>>>>>>     int dp_panel_read_sink_caps(struct dp_panel *dp_panel,
>>>>>>         struct drm_connector *connector)
>>>>>>     {
>>>>>> @@ -216,7 +207,11 @@ int dp_panel_read_sink_caps(struct dp_panel
>>>>>> *dp_panel,
>>>>>>                 goto end;
>>>>>>             }
>>>>>> -        dp_panel_add_fail_safe_mode(connector);
>>>>>> +        /* fail safe edid */
>>>>>> +        mutex_lock(&connector->dev->mode_config.mutex);
>>>>>> +        if (drm_add_modes_noedid(connector, 640, 480))
>>>>>> +            drm_set_preferred_mode(connector, 640, 480);
>>>>>> +        mutex_unlock(&connector->dev->mode_config.mutex);
>>>>>>         }
>>>>>>         if (panel->aux_cfg_update_done) {
>>>>>> @@ -266,6 +261,14 @@ int dp_panel_get_modes(struct dp_panel *dp_panel,
>>>>>>             return -EINVAL;
>>>>>>         }
>>>>>> +    /*
>>>>>> +     * add fail safe mode (640x480) here
>>>>>> +     * since we are executed in drm_thread context,
>>>>>> +     * no mode_config.mutex acquired required
>>>>>> +     */
>>>>>> +    if (drm_add_modes_noedid(connector, 640, 480))
>>>>>> +        drm_set_preferred_mode(connector, 640, 480);
>>>>>> +
>>>>>>         if (dp_panel->edid)
>>>>>>             return dp_panel_update_modes(connector, dp_panel->edid);
>>>>> Also, wouldn't calling get_modes() several times make cause adding more
>>>>> and more 640x480 modes to the modes list?
>>>>>
>>>>
>>>> Shouldnt DRM be blocking that here? Call should trickle down here only
>>>> if count_modes was 0
>>>>
>>>>       if (out_resp->count_modes == 0) {
>>>>            if (is_current_master)
>>>>                connector->funcs->fill_modes(connector,
>>>>                                 dev->mode_config.max_width,
>>>>                                 dev->mode_config.max_height);
>>>>            else
>>>>                drm_dbg_kms(dev, "User-space requested a forced probe on
>>>> [CONNECTOR:%d:%s] but is not the DRM master, demoting to read-only probe",
>>>>                        connector->base.id, connector->name);
>>>>        }
>>>>
>>>
>>> count_modes is set by userspace:
>>>           /*
>>>            * This ioctl is called twice, once to determine how much space is
>>>            * needed, and the 2nd time to fill it.
>>>            */
>>>
>>> So, nothing prevents userspace from passing zero count_mode more than once.
>> Ack, some non-optimized usermodes can do this.
>>>
>>> However drm_helper_probe_single_connector_modes() will set old modes
>>> to MODE_STALE and then will call get_modes().
>>> Then drm_mode_prune_invalid() will prune stale modes. So, this should be fine.
>>>
>> Got it.
>>> A more generic question is why do we need to add the mode in two places?
>>>
>> Answering behalf of kuogee but the two places are for different purposes:
>>
>> 1) When there is no EDID read
>>
>> if (!dp_panel->edid) {
>>
>> That case we should add the fail-safe mode as otherwise display will be
>> blank for cases where there was nothing wrong with the monitor as such
>> but the EDID read from aux failed for some reason. Even DRM does this
>> but just not 640x480 here:
>>
>> 518     if (count == 0 && (connector->status == connector_status_connected ||
>> 519                        connector->status == connector_status_unknown))
>> 520             count = drm_add_modes_noedid(connector, 1024, 768);
> 
> But drm_add_modes_noedid() _will_ add the 640x480 modes, won't it? It
> will add all "failsafe" modes that are less than or equal to 1024x768
> and 60Hz or less. See the table "drm_dmt_modes". I don't understand
> why the DRM core's call doesn't solve the problem for you in the first
> place?

This is a good point that drm_add_modes_noedid() will add all modes 
<=1024x768. Perhaps we can drop the call for the case where there was no 
EDID and let DRM fwk handle that.

But that wont fix this problem, will explain in (2).

> 
> 
>> 2) When there was a valid EDID but no 640x480 mode
>>
>> This is the equipment specific case and the one even I was a bit
>> surprised. There is a DP compliance equipment we have in-house and while
>> validation, it was found that in its list of modes , it did not have any
>> modes which chromebook supported ( due to 2 lanes ). But my
>> understanding was that, all sinks should have atleast 640x480 but
>> apparently this one did not have that. So to handle this DP compliance
>> equipment behavior, we had to do this.
> 
> That doesn't seem right. If there's a valid EDID and the valid EDID
> doesn't contain 640x480, are you _sure_ you're supposed to be adding
> 640x480? That doesn't sound right to me. I've got a tiny display in
> front of me for testing that only has one mode:
> 
>    #0 800x480 65.68 800 840 888 928 480 493 496 525 32000
> 

As I had wrote, DRM core kicks in only when the count of modes is 0.
Here what is happening is the count was not 0 but 640x480 was not 
present in the EDID. So we had to add it explicitly.

Your tiny display is a display port display?

I am referring to only display port monitors. If your tiny display is 
DP, it should have had 640x480 in its list of modes.


> It wouldn't be correct to add a 640x480 mode to this panel... ...and,
> in fact, after applying ${SUBJECT} patch I see that DRM (incorrectly)
> thinks that my display supports 640x480. I see:
> 
>    #0 800x480 65.68 800 840 888 928 480 493 496 525 32000
>    #1 640x480 59.94 640 656 752 800 480 490 492 525 25175
> 
Its not incorrect if its display port as it should have had it.
The equipment we had did not have it which was incorrect.

So typically for DP monitors this change should cause no change as 
640x480 mode should already be present.


> So IMO we _shouldn't_ land ${SUBJECT} patch.
> 
> Just for testing, I also tried a hack to make EDID reading fail
> (return -EIO in the MSM dp_aux_transfer() function if msg->request <
> 8). Before ${SUBJECT} patch I'd see these modes:
> 
>    #0 1024x768 60.00 1024 1048 1184 1344 768 771 777 806 65000
>    #1 800x600 60.32 800 840 968 1056 600 601 605 628 40000
>    #2 800x600 56.25 800 824 896 1024 600 601 603 625 36000
>    #3 848x480 60.00 848 864 976 1088 480 486 494 517 33750
>    #4 640x480 59.94 640 656 752 800 480 490 492 525 25175
> 
> ...and after ${SUBJECT} patch I'd see:
> 
>    #0 640x480 59.94 640 656 752 800 480 490 492 525 25175
>    #1 1024x768 60.00 1024 1048 1184 1344 768 771 777 806 65000
>    #2 800x600 60.32 800 840 968 1056 600 601 605 628 40000
>    #3 800x600 56.25 800 824 896 1024 600 601 603 625 36000
>    #4 848x480 60.00 848 864 976 1088 480 486 494 517 33750
> 
> ...so your patch causes 640x480 to be prioritized. That also doesn't
> seem ideal. If it was ideal, the DRM core should have listed 640x480
> first.

So this is a different display or these modes are coming due to the 
drm_add_modes_noedid() call because of the EDID read fail right?

If its coming due to the drm_add_modes_noedid() call and then we are 
adding the 640x480 mode on top of that, like I mentioned in my previous 
comment, we can remove the call for the !edid case to address this.

> 
> I'll repeat my refrain that I'm not a DRM expert, but if I were doing
> things, I'd rather revert commit 8b2c181e3dcf ("drm/msm/dp: add fail
> safe mode outside of event_mutex context") and commit d4aca422539c
> ("drm/msm/dp: always add fail-safe mode into connector mode list") and
> then go back and look more carefully about what the problem was in the
> first place. Why didn't the failsafe modes added by the DRM core solve
> the problem for you in the first place?

I have explained why DRM core did not solve this problem. Thats because 
it will hit only when count of modes is 0, here count of modes is not 0.
So we know the root-cause.
> 
> -Doug
Doug Anderson April 26, 2022, 2:18 a.m. UTC | #9
Hi,

On Mon, Apr 25, 2022 at 6:42 PM Abhinav Kumar <quic_abhinavk@quicinc.com> wrote:
>
> >> 2) When there was a valid EDID but no 640x480 mode
> >>
> >> This is the equipment specific case and the one even I was a bit
> >> surprised. There is a DP compliance equipment we have in-house and while
> >> validation, it was found that in its list of modes , it did not have any
> >> modes which chromebook supported ( due to 2 lanes ). But my
> >> understanding was that, all sinks should have atleast 640x480 but
> >> apparently this one did not have that. So to handle this DP compliance
> >> equipment behavior, we had to do this.
> >
> > That doesn't seem right. If there's a valid EDID and the valid EDID
> > doesn't contain 640x480, are you _sure_ you're supposed to be adding
> > 640x480? That doesn't sound right to me. I've got a tiny display in
> > front of me for testing that only has one mode:
> >
> >    #0 800x480 65.68 800 840 888 928 480 493 496 525 32000
> >
>
> As I had wrote, DRM core kicks in only when the count of modes is 0.
> Here what is happening is the count was not 0 but 640x480 was not
> present in the EDID. So we had to add it explicitly.
>
> Your tiny display is a display port display?
>
> I am referring to only display port monitors. If your tiny display is
> DP, it should have had 640x480 in its list of modes.

My tiny display is actually a HDMI display hooked up to a HDMI to DP
(active) adapter.

...but this is a legal and common thing to have. I suppose possibly my
HDMI display is "illegal"?

OK, so reading through the spec more carefully, I do see that the DP
spec makes numerous mentions of the fact that DP sinks _must_ support
640x480. Even going back to DP 1.4, I see section "5.2.1.2 Video
Timing Format" says that we must support 640x480. It seems like that's
_intended_ to be used only if the EDID read fails, though or if we
somehow have to output video without knowledge of the EDID. It seems
hard to believe that there's a great reason to assume a display will
support 640x480 if we have more accurate knowledge.

In any case, I guess I would still say that adding this mode belongs
in the DRM core. The core should notice that it's a DP connection
(bridge->type == DRM_MODE_CONNECTOR_DisplayPort) and that 640x480 was
left out and it should add it. We should also make sure it's not
"preferred" and is last in the list so we never accidentally pick it.
If DP truly says that we should always give the user 640x480 then
that's true for everyone, not just Qualcomm. We should add it in the
core. If, later, someone wants to hide this from the UI it would be
much easier if they only needed to modify one place.


> > So IMO we _shouldn't_ land ${SUBJECT} patch.
> >
> > Just for testing, I also tried a hack to make EDID reading fail
> > (return -EIO in the MSM dp_aux_transfer() function if msg->request <
> > 8). Before ${SUBJECT} patch I'd see these modes:
> >
> >    #0 1024x768 60.00 1024 1048 1184 1344 768 771 777 806 65000
> >    #1 800x600 60.32 800 840 968 1056 600 601 605 628 40000
> >    #2 800x600 56.25 800 824 896 1024 600 601 603 625 36000
> >    #3 848x480 60.00 848 864 976 1088 480 486 494 517 33750
> >    #4 640x480 59.94 640 656 752 800 480 490 492 525 25175
> >
> > ...and after ${SUBJECT} patch I'd see:
> >
> >    #0 640x480 59.94 640 656 752 800 480 490 492 525 25175
> >    #1 1024x768 60.00 1024 1048 1184 1344 768 771 777 806 65000
> >    #2 800x600 60.32 800 840 968 1056 600 601 605 628 40000
> >    #3 800x600 56.25 800 824 896 1024 600 601 603 625 36000
> >    #4 848x480 60.00 848 864 976 1088 480 486 494 517 33750
> >
> > ...so your patch causes 640x480 to be prioritized. That also doesn't
> > seem ideal. If it was ideal, the DRM core should have listed 640x480
> > first.
>
> So this is a different display or these modes are coming due to the
> drm_add_modes_noedid() call because of the EDID read fail right?

Right, it's from the !edid case.

-Doug
Abhinav Kumar April 26, 2022, 3:35 a.m. UTC | #10
On 4/25/2022 7:18 PM, Doug Anderson wrote:
> Hi,
> 
> On Mon, Apr 25, 2022 at 6:42 PM Abhinav Kumar <quic_abhinavk@quicinc.com> wrote:
>>
>>>> 2) When there was a valid EDID but no 640x480 mode
>>>>
>>>> This is the equipment specific case and the one even I was a bit
>>>> surprised. There is a DP compliance equipment we have in-house and while
>>>> validation, it was found that in its list of modes , it did not have any
>>>> modes which chromebook supported ( due to 2 lanes ). But my
>>>> understanding was that, all sinks should have atleast 640x480 but
>>>> apparently this one did not have that. So to handle this DP compliance
>>>> equipment behavior, we had to do this.
>>>
>>> That doesn't seem right. If there's a valid EDID and the valid EDID
>>> doesn't contain 640x480, are you _sure_ you're supposed to be adding
>>> 640x480? That doesn't sound right to me. I've got a tiny display in
>>> front of me for testing that only has one mode:
>>>
>>>     #0 800x480 65.68 800 840 888 928 480 493 496 525 32000
>>>
>>
>> As I had wrote, DRM core kicks in only when the count of modes is 0.
>> Here what is happening is the count was not 0 but 640x480 was not
>> present in the EDID. So we had to add it explicitly.
>>
>> Your tiny display is a display port display?
>>
>> I am referring to only display port monitors. If your tiny display is
>> DP, it should have had 640x480 in its list of modes.
> 
> My tiny display is actually a HDMI display hooked up to a HDMI to DP
> (active) adapter.
> 
> ...but this is a legal and common thing to have. I suppose possibly my
> HDMI display is "illegal"?
> 
> OK, so reading through the spec more carefully, I do see that the DP
> spec makes numerous mentions of the fact that DP sinks _must_ support
> 640x480. Even going back to DP 1.4, I see section "5.2.1.2 Video
> Timing Format" says that we must support 640x480. It seems like that's
> _intended_ to be used only if the EDID read fails, though or if we
> somehow have to output video without knowledge of the EDID. It seems
> hard to believe that there's a great reason to assume a display will
> support 640x480 if we have more accurate knowledge.
> 
> In any case, I guess I would still say that adding this mode belongs
> in the DRM core. The core should notice that it's a DP connection
> (bridge->type == DRM_MODE_CONNECTOR_DisplayPort) and that 640x480 was
> left out and it should add it. We should also make sure it's not
> "preferred" and is last in the list so we never accidentally pick it.
> If DP truly says that we should always give the user 640x480 then
> that's true for everyone, not just Qualcomm. We should add it in the
> core. If, later, someone wants to hide this from the UI it would be
> much easier if they only needed to modify one place.
> 

So I debugged with kuogee just now using the DP compliance equipment.
It turns out, the issue is not that 640x480 mode is not present.

The issue is that it is not marked as preferred.

Hence we missed this part during debugging this equipment failure.

We still have to figure out the best way to either mark 640x480 as 
preferred or eliminate other modes during the test-case so that 640x480 
is actually picked by usermode.

Now that being said, the fix still doesn't belong in the framework. It 
has to be in the msm/dp code.

Different vendors handle this failure case differently looks like.

Lets take below snippet from i915 as example.

3361 	if (intel_connector->detect_edid == NULL ||
3362 	    connector->edid_corrupt ||
3363 	    intel_dp->aux.i2c_defer_count > 6) {
3364 		/* Check EDID read for NACKs, DEFERs and corruption
3365 		 * (DP CTS 1.2 Core r1.1)
3366 		 *    4.2.2.4 : Failed EDID read, I2C_NAK
3367 		 *    4.2.2.5 : Failed EDID read, I2C_DEFER
3368 		 *    4.2.2.6 : EDID corruption detected
3369 		 * Use failsafe mode for all cases
3370 		 */
3371 		if (intel_dp->aux.i2c_nack_count > 0 ||
3372 			intel_dp->aux.i2c_defer_count > 0)
3373 			drm_dbg_kms(&i915->drm,
3374 				    "EDID read had %d NACKs, %d DEFERs\n",
3375 				    intel_dp->aux.i2c_nack_count,
3376 				    intel_dp->aux.i2c_defer_count);
3377 		intel_dp->compliance.test_data.edid = INTEL_DP_RESOLUTION_FAILSAFE;

This marks the fail safe mode and IGT test case reads this to set this 
mode and hence the test passes.

We rely on the chromeOS usermode to output pixel data for this test-case 
and not IGT. We use IGT only for video pattern CTS today but this is a 
different test-case which is failing.

ChromeOS usermode will not pick 640x480 unless we mark it as preferred 
or other modes are eliminated.

So we have to come up with the right way for the usermode to pick 640x480.

We will discuss this a bit more and come up with a different change.

> 
>>> So IMO we _shouldn't_ land ${SUBJECT} patch.
>>>
>>> Just for testing, I also tried a hack to make EDID reading fail
>>> (return -EIO in the MSM dp_aux_transfer() function if msg->request <
>>> 8). Before ${SUBJECT} patch I'd see these modes:
>>>
>>>     #0 1024x768 60.00 1024 1048 1184 1344 768 771 777 806 65000
>>>     #1 800x600 60.32 800 840 968 1056 600 601 605 628 40000
>>>     #2 800x600 56.25 800 824 896 1024 600 601 603 625 36000
>>>     #3 848x480 60.00 848 864 976 1088 480 486 494 517 33750
>>>     #4 640x480 59.94 640 656 752 800 480 490 492 525 25175
>>>
>>> ...and after ${SUBJECT} patch I'd see:
>>>
>>>     #0 640x480 59.94 640 656 752 800 480 490 492 525 25175
>>>     #1 1024x768 60.00 1024 1048 1184 1344 768 771 777 806 65000
>>>     #2 800x600 60.32 800 840 968 1056 600 601 605 628 40000
>>>     #3 800x600 56.25 800 824 896 1024 600 601 603 625 36000
>>>     #4 848x480 60.00 848 864 976 1088 480 486 494 517 33750
>>>
>>> ...so your patch causes 640x480 to be prioritized. That also doesn't
>>> seem ideal. If it was ideal, the DRM core should have listed 640x480
>>> first.
>>
>> So this is a different display or these modes are coming due to the
>> drm_add_modes_noedid() call because of the EDID read fail right?
> 
> Right, it's from the !edid case.
> 
> -Doug
Doug Anderson April 26, 2022, 3:20 p.m. UTC | #11
Hi,

On Mon, Apr 25, 2022 at 8:35 PM Abhinav Kumar <quic_abhinavk@quicinc.com> wrote:
>
> On 4/25/2022 7:18 PM, Doug Anderson wrote:
> > Hi,
> >
> > On Mon, Apr 25, 2022 at 6:42 PM Abhinav Kumar <quic_abhinavk@quicinc.com> wrote:
> >>
> >>>> 2) When there was a valid EDID but no 640x480 mode
> >>>>
> >>>> This is the equipment specific case and the one even I was a bit
> >>>> surprised. There is a DP compliance equipment we have in-house and while
> >>>> validation, it was found that in its list of modes , it did not have any
> >>>> modes which chromebook supported ( due to 2 lanes ). But my
> >>>> understanding was that, all sinks should have atleast 640x480 but
> >>>> apparently this one did not have that. So to handle this DP compliance
> >>>> equipment behavior, we had to do this.
> >>>
> >>> That doesn't seem right. If there's a valid EDID and the valid EDID
> >>> doesn't contain 640x480, are you _sure_ you're supposed to be adding
> >>> 640x480? That doesn't sound right to me. I've got a tiny display in
> >>> front of me for testing that only has one mode:
> >>>
> >>>     #0 800x480 65.68 800 840 888 928 480 493 496 525 32000
> >>>
> >>
> >> As I had wrote, DRM core kicks in only when the count of modes is 0.
> >> Here what is happening is the count was not 0 but 640x480 was not
> >> present in the EDID. So we had to add it explicitly.
> >>
> >> Your tiny display is a display port display?
> >>
> >> I am referring to only display port monitors. If your tiny display is
> >> DP, it should have had 640x480 in its list of modes.
> >
> > My tiny display is actually a HDMI display hooked up to a HDMI to DP
> > (active) adapter.
> >
> > ...but this is a legal and common thing to have. I suppose possibly my
> > HDMI display is "illegal"?
> >
> > OK, so reading through the spec more carefully, I do see that the DP
> > spec makes numerous mentions of the fact that DP sinks _must_ support
> > 640x480. Even going back to DP 1.4, I see section "5.2.1.2 Video
> > Timing Format" says that we must support 640x480. It seems like that's
> > _intended_ to be used only if the EDID read fails, though or if we
> > somehow have to output video without knowledge of the EDID. It seems
> > hard to believe that there's a great reason to assume a display will
> > support 640x480 if we have more accurate knowledge.
> >
> > In any case, I guess I would still say that adding this mode belongs
> > in the DRM core. The core should notice that it's a DP connection
> > (bridge->type == DRM_MODE_CONNECTOR_DisplayPort) and that 640x480 was
> > left out and it should add it. We should also make sure it's not
> > "preferred" and is last in the list so we never accidentally pick it.
> > If DP truly says that we should always give the user 640x480 then
> > that's true for everyone, not just Qualcomm. We should add it in the
> > core. If, later, someone wants to hide this from the UI it would be
> > much easier if they only needed to modify one place.
> >
>
> So I debugged with kuogee just now using the DP compliance equipment.
> It turns out, the issue is not that 640x480 mode is not present.
>
> The issue is that it is not marked as preferred.
>
> Hence we missed this part during debugging this equipment failure.
>
> We still have to figure out the best way to either mark 640x480 as
> preferred or eliminate other modes during the test-case so that 640x480
> is actually picked by usermode.
>
> Now that being said, the fix still doesn't belong in the framework. It
> has to be in the msm/dp code.
>
> Different vendors handle this failure case differently looks like.
>
> Lets take below snippet from i915 as example.
>
> 3361    if (intel_connector->detect_edid == NULL ||
> 3362        connector->edid_corrupt ||
> 3363        intel_dp->aux.i2c_defer_count > 6) {
> 3364            /* Check EDID read for NACKs, DEFERs and corruption
> 3365             * (DP CTS 1.2 Core r1.1)
> 3366             *    4.2.2.4 : Failed EDID read, I2C_NAK
> 3367             *    4.2.2.5 : Failed EDID read, I2C_DEFER
> 3368             *    4.2.2.6 : EDID corruption detected
> 3369             * Use failsafe mode for all cases
> 3370             */
> 3371            if (intel_dp->aux.i2c_nack_count > 0 ||
> 3372                    intel_dp->aux.i2c_defer_count > 0)
> 3373                    drm_dbg_kms(&i915->drm,
> 3374                                "EDID read had %d NACKs, %d DEFERs\n",
> 3375                                intel_dp->aux.i2c_nack_count,
> 3376                                intel_dp->aux.i2c_defer_count);
> 3377            intel_dp->compliance.test_data.edid = INTEL_DP_RESOLUTION_FAILSAFE;

Just because Intel DRM has its own solution for something doesn't mean
everyone else should copy them and implement their own solution. Up
until recently DP AUX backlights were baked into different DRM
drivers. A recent effort was made to pull it out. I think the Intel
DRM code was the "first one" to the party and it wasn't clear how
things should be broken up to share with other drivers, so mostly it
did everything itself, but that's not the long term answer.

I'm not saying that we need to block your change on a full re-design
or anything, but I'm just saying that:

* You're trying to implement a generic DP rule, not something specific
to Qualcomm hardware. That implies that, if possible, it shouldn't be
in a Qualcomm driver.

* It doesn't seem like it would be terrible to handle this in the core.


> This marks the fail safe mode and IGT test case reads this to set this
> mode and hence the test passes.
>
> We rely on the chromeOS usermode to output pixel data for this test-case
> and not IGT. We use IGT only for video pattern CTS today but this is a
> different test-case which is failing.
>
> ChromeOS usermode will not pick 640x480 unless we mark it as preferred
> or other modes are eliminated.
>
> So we have to come up with the right way for the usermode to pick 640x480.
>
> We will discuss this a bit more and come up with a different change.

Can you provide the exact EDID from the failing test case? Maybe that
will help shed some light on what's going on. I looked at the original
commit and it just referred to 4.2.2.1, which I assume is "EDID Read
upon HPD Plug Event", but that doesn't give details that seem relevant
to the discussion here.

I guess maybe what's happening is that the test case is giving an EDID
where all the modes are not supportable by the current clock rate /
lanes? ...and then somehow we're not falling back to 640x480. It's
always possible that this is a userspace problem.

In any case, would you object to a revert of the patches in the short term?

-Doug
Abhinav Kumar April 26, 2022, 3:37 p.m. UTC | #12
Hi Doug

On 4/26/2022 8:20 AM, Doug Anderson wrote:
> Hi,
> 
> On Mon, Apr 25, 2022 at 8:35 PM Abhinav Kumar <quic_abhinavk@quicinc.com> wrote:
>>
>> On 4/25/2022 7:18 PM, Doug Anderson wrote:
>>> Hi,
>>>
>>> On Mon, Apr 25, 2022 at 6:42 PM Abhinav Kumar <quic_abhinavk@quicinc.com> wrote:
>>>>
>>>>>> 2) When there was a valid EDID but no 640x480 mode
>>>>>>
>>>>>> This is the equipment specific case and the one even I was a bit
>>>>>> surprised. There is a DP compliance equipment we have in-house and while
>>>>>> validation, it was found that in its list of modes , it did not have any
>>>>>> modes which chromebook supported ( due to 2 lanes ). But my
>>>>>> understanding was that, all sinks should have atleast 640x480 but
>>>>>> apparently this one did not have that. So to handle this DP compliance
>>>>>> equipment behavior, we had to do this.
>>>>>
>>>>> That doesn't seem right. If there's a valid EDID and the valid EDID
>>>>> doesn't contain 640x480, are you _sure_ you're supposed to be adding
>>>>> 640x480? That doesn't sound right to me. I've got a tiny display in
>>>>> front of me for testing that only has one mode:
>>>>>
>>>>>      #0 800x480 65.68 800 840 888 928 480 493 496 525 32000
>>>>>
>>>>
>>>> As I had wrote, DRM core kicks in only when the count of modes is 0.
>>>> Here what is happening is the count was not 0 but 640x480 was not
>>>> present in the EDID. So we had to add it explicitly.
>>>>
>>>> Your tiny display is a display port display?
>>>>
>>>> I am referring to only display port monitors. If your tiny display is
>>>> DP, it should have had 640x480 in its list of modes.
>>>
>>> My tiny display is actually a HDMI display hooked up to a HDMI to DP
>>> (active) adapter.
>>>
>>> ...but this is a legal and common thing to have. I suppose possibly my
>>> HDMI display is "illegal"?
>>>
>>> OK, so reading through the spec more carefully, I do see that the DP
>>> spec makes numerous mentions of the fact that DP sinks _must_ support
>>> 640x480. Even going back to DP 1.4, I see section "5.2.1.2 Video
>>> Timing Format" says that we must support 640x480. It seems like that's
>>> _intended_ to be used only if the EDID read fails, though or if we
>>> somehow have to output video without knowledge of the EDID. It seems
>>> hard to believe that there's a great reason to assume a display will
>>> support 640x480 if we have more accurate knowledge.
>>>
>>> In any case, I guess I would still say that adding this mode belongs
>>> in the DRM core. The core should notice that it's a DP connection
>>> (bridge->type == DRM_MODE_CONNECTOR_DisplayPort) and that 640x480 was
>>> left out and it should add it. We should also make sure it's not
>>> "preferred" and is last in the list so we never accidentally pick it.
>>> If DP truly says that we should always give the user 640x480 then
>>> that's true for everyone, not just Qualcomm. We should add it in the
>>> core. If, later, someone wants to hide this from the UI it would be
>>> much easier if they only needed to modify one place.
>>>
>>
>> So I debugged with kuogee just now using the DP compliance equipment.
>> It turns out, the issue is not that 640x480 mode is not present.
>>
>> The issue is that it is not marked as preferred.
>>
>> Hence we missed this part during debugging this equipment failure.
>>
>> We still have to figure out the best way to either mark 640x480 as
>> preferred or eliminate other modes during the test-case so that 640x480
>> is actually picked by usermode.
>>
>> Now that being said, the fix still doesn't belong in the framework. It
>> has to be in the msm/dp code.
>>
>> Different vendors handle this failure case differently looks like.
>>
>> Lets take below snippet from i915 as example.
>>
>> 3361    if (intel_connector->detect_edid == NULL ||
>> 3362        connector->edid_corrupt ||
>> 3363        intel_dp->aux.i2c_defer_count > 6) {
>> 3364            /* Check EDID read for NACKs, DEFERs and corruption
>> 3365             * (DP CTS 1.2 Core r1.1)
>> 3366             *    4.2.2.4 : Failed EDID read, I2C_NAK
>> 3367             *    4.2.2.5 : Failed EDID read, I2C_DEFER
>> 3368             *    4.2.2.6 : EDID corruption detected
>> 3369             * Use failsafe mode for all cases
>> 3370             */
>> 3371            if (intel_dp->aux.i2c_nack_count > 0 ||
>> 3372                    intel_dp->aux.i2c_defer_count > 0)
>> 3373                    drm_dbg_kms(&i915->drm,
>> 3374                                "EDID read had %d NACKs, %d DEFERs\n",
>> 3375                                intel_dp->aux.i2c_nack_count,
>> 3376                                intel_dp->aux.i2c_defer_count);
>> 3377            intel_dp->compliance.test_data.edid = INTEL_DP_RESOLUTION_FAILSAFE;
> 

The reason I pointed to this code is to give an example of how other 
drivers handle this test-case.

We added this patch for 4.2.2.1 and 4.2.2.6 EDID test cases.

The challenge here as found out from our discussion here was to mark a 
particular mode as preferred so that the Chrome usermode can pick it.

Now whats happening with that there was always a possibility of two 
modes being marked as preferred due to this and so-on.

We had a pretty long discussion last night and thought of all possible 
solutions but all of them look like a hack to us in the driver because 
we end up breaking other things due to this.

So we decided that driver is not the place to handle this test case.
Since we do have IGT support for chromebooks, we will handle both these 
test cases there as other vendors do the same way and it works.


> Just because Intel DRM has its own solution for something doesn't mean
> everyone else should copy them and implement their own solution. Up
> until recently DP AUX backlights were baked into different DRM
> drivers. A recent effort was made to pull it out. I think the Intel
> DRM code was the "first one" to the party and it wasn't clear how
> things should be broken up to share with other drivers, so mostly it
> did everything itself, but that's not the long term answer.
> 
> I'm not saying that we need to block your change on a full re-design
> or anything, but I'm just saying that:
> 
> * You're trying to implement a generic DP rule, not something specific
> to Qualcomm hardware. That implies that, if possible, it shouldn't be
> in a Qualcomm driver.
> 
> * It doesn't seem like it would be terrible to handle this in the core.
> 
> 
>> This marks the fail safe mode and IGT test case reads this to set this
>> mode and hence the test passes.
>>
>> We rely on the chromeOS usermode to output pixel data for this test-case
>> and not IGT. We use IGT only for video pattern CTS today but this is a
>> different test-case which is failing.
>>
>> ChromeOS usermode will not pick 640x480 unless we mark it as preferred
>> or other modes are eliminated.
>>
>> So we have to come up with the right way for the usermode to pick 640x480.
>>
>> We will discuss this a bit more and come up with a different change.
> 
> Can you provide the exact EDID from the failing test case? Maybe that
> will help shed some light on what's going on. I looked at the original
> commit and it just referred to 4.2.2.1, which I assume is "EDID Read
> upon HPD Plug Event", but that doesn't give details that seem relevant
> to the discussion here.

Yes so it is 4.2.2.1 and 4.2.2.6.

That alone wont give the full picture.

So its a combination of things.

While running the test, the test equipment published only one mode.
But we could not support that mode because of 2 lanes.
Equipment did not add 640x480 to the list of modes.
DRM fwk will also not add it because count_modes is not 0 ( there was 
one mode ).
So we ended up making these changes.


> 
> I guess maybe what's happening is that the test case is giving an EDID
> where all the modes are not supportable by the current clock rate /
> lanes? ...and then somehow we're not falling back to 640x480. It's
> always possible that this is a userspace problem.
> 
> In any case, would you object to a revert of the patches in the short term?

Not sure, if you saw this change kuogee posted last night.
https://patchwork.freedesktop.org/patch/483415/
We did decided to remove all the code related to these test cases and 
handle them in IGT.

> 
> -Doug
Dmitry Baryshkov April 26, 2022, 5:01 p.m. UTC | #13
On 26/04/2022 18:37, Abhinav Kumar wrote:
> Hi Doug
> 
> On 4/26/2022 8:20 AM, Doug Anderson wrote:
>> Hi,
>>
>> On Mon, Apr 25, 2022 at 8:35 PM Abhinav Kumar 
>> <quic_abhinavk@quicinc.com> wrote:
>>>
>>> On 4/25/2022 7:18 PM, Doug Anderson wrote:
>>>> Hi,
>>>>
>>>> On Mon, Apr 25, 2022 at 6:42 PM Abhinav Kumar 
>>>> <quic_abhinavk@quicinc.com> wrote:
>>>>>
>>>>>>> 2) When there was a valid EDID but no 640x480 mode
>>>>>>>
>>>>>>> This is the equipment specific case and the one even I was a bit
>>>>>>> surprised. There is a DP compliance equipment we have in-house 
>>>>>>> and while
>>>>>>> validation, it was found that in its list of modes , it did not 
>>>>>>> have any
>>>>>>> modes which chromebook supported ( due to 2 lanes ). But my
>>>>>>> understanding was that, all sinks should have atleast 640x480 but
>>>>>>> apparently this one did not have that. So to handle this DP 
>>>>>>> compliance
>>>>>>> equipment behavior, we had to do this.
>>>>>>
>>>>>> That doesn't seem right. If there's a valid EDID and the valid EDID
>>>>>> doesn't contain 640x480, are you _sure_ you're supposed to be adding
>>>>>> 640x480? That doesn't sound right to me. I've got a tiny display in
>>>>>> front of me for testing that only has one mode:
>>>>>>
>>>>>>      #0 800x480 65.68 800 840 888 928 480 493 496 525 32000
>>>>>>
>>>>>
>>>>> As I had wrote, DRM core kicks in only when the count of modes is 0.
>>>>> Here what is happening is the count was not 0 but 640x480 was not
>>>>> present in the EDID. So we had to add it explicitly.
>>>>>
>>>>> Your tiny display is a display port display?
>>>>>
>>>>> I am referring to only display port monitors. If your tiny display is
>>>>> DP, it should have had 640x480 in its list of modes.
>>>>
>>>> My tiny display is actually a HDMI display hooked up to a HDMI to DP
>>>> (active) adapter.
>>>>
>>>> ...but this is a legal and common thing to have. I suppose possibly my
>>>> HDMI display is "illegal"?
>>>>
>>>> OK, so reading through the spec more carefully, I do see that the DP
>>>> spec makes numerous mentions of the fact that DP sinks _must_ support
>>>> 640x480. Even going back to DP 1.4, I see section "5.2.1.2 Video
>>>> Timing Format" says that we must support 640x480. It seems like that's
>>>> _intended_ to be used only if the EDID read fails, though or if we
>>>> somehow have to output video without knowledge of the EDID. It seems
>>>> hard to believe that there's a great reason to assume a display will
>>>> support 640x480 if we have more accurate knowledge.
>>>>
>>>> In any case, I guess I would still say that adding this mode belongs
>>>> in the DRM core. The core should notice that it's a DP connection
>>>> (bridge->type == DRM_MODE_CONNECTOR_DisplayPort) and that 640x480 was
>>>> left out and it should add it. We should also make sure it's not
>>>> "preferred" and is last in the list so we never accidentally pick it.
>>>> If DP truly says that we should always give the user 640x480 then
>>>> that's true for everyone, not just Qualcomm. We should add it in the
>>>> core. If, later, someone wants to hide this from the UI it would be
>>>> much easier if they only needed to modify one place.
>>>>
>>>
>>> So I debugged with kuogee just now using the DP compliance equipment.
>>> It turns out, the issue is not that 640x480 mode is not present.
>>>
>>> The issue is that it is not marked as preferred.
>>>
>>> Hence we missed this part during debugging this equipment failure.
>>>
>>> We still have to figure out the best way to either mark 640x480 as
>>> preferred or eliminate other modes during the test-case so that 640x480
>>> is actually picked by usermode.
>>>
>>> Now that being said, the fix still doesn't belong in the framework. It
>>> has to be in the msm/dp code.
>>>
>>> Different vendors handle this failure case differently looks like.
>>>
>>> Lets take below snippet from i915 as example.
>>>
>>> 3361    if (intel_connector->detect_edid == NULL ||
>>> 3362        connector->edid_corrupt ||
>>> 3363        intel_dp->aux.i2c_defer_count > 6) {
>>> 3364            /* Check EDID read for NACKs, DEFERs and corruption
>>> 3365             * (DP CTS 1.2 Core r1.1)
>>> 3366             *    4.2.2.4 : Failed EDID read, I2C_NAK
>>> 3367             *    4.2.2.5 : Failed EDID read, I2C_DEFER
>>> 3368             *    4.2.2.6 : EDID corruption detected
>>> 3369             * Use failsafe mode for all cases
>>> 3370             */
>>> 3371            if (intel_dp->aux.i2c_nack_count > 0 ||
>>> 3372                    intel_dp->aux.i2c_defer_count > 0)
>>> 3373                    drm_dbg_kms(&i915->drm,
>>> 3374                                "EDID read had %d NACKs, %d 
>>> DEFERs\n",
>>> 3375                                intel_dp->aux.i2c_nack_count,
>>> 3376                                intel_dp->aux.i2c_defer_count);
>>> 3377            intel_dp->compliance.test_data.edid = 
>>> INTEL_DP_RESOLUTION_FAILSAFE;
>>
> 
> The reason I pointed to this code is to give an example of how other 
> drivers handle this test-case.
> 
> We added this patch for 4.2.2.1 and 4.2.2.6 EDID test cases.
> 
> The challenge here as found out from our discussion here was to mark a 
> particular mode as preferred so that the Chrome usermode can pick it.
> 
> Now whats happening with that there was always a possibility of two 
> modes being marked as preferred due to this and so-on.
> 
> We had a pretty long discussion last night and thought of all possible 
> solutions but all of them look like a hack to us in the driver because 
> we end up breaking other things due to this.
> 
> So we decided that driver is not the place to handle this test case.
> Since we do have IGT support for chromebooks, we will handle both these 
> test cases there as other vendors do the same way and it works.
> 
> 
>> Just because Intel DRM has its own solution for something doesn't mean
>> everyone else should copy them and implement their own solution. Up
>> until recently DP AUX backlights were baked into different DRM
>> drivers. A recent effort was made to pull it out. I think the Intel
>> DRM code was the "first one" to the party and it wasn't clear how
>> things should be broken up to share with other drivers, so mostly it
>> did everything itself, but that's not the long term answer.
>>
>> I'm not saying that we need to block your change on a full re-design
>> or anything, but I'm just saying that:
>>
>> * You're trying to implement a generic DP rule, not something specific
>> to Qualcomm hardware. That implies that, if possible, it shouldn't be
>> in a Qualcomm driver.
>>
>> * It doesn't seem like it would be terrible to handle this in the core.
>>
>>
>>> This marks the fail safe mode and IGT test case reads this to set this
>>> mode and hence the test passes.
>>>
>>> We rely on the chromeOS usermode to output pixel data for this test-case
>>> and not IGT. We use IGT only for video pattern CTS today but this is a
>>> different test-case which is failing.
>>>
>>> ChromeOS usermode will not pick 640x480 unless we mark it as preferred
>>> or other modes are eliminated.
>>>
>>> So we have to come up with the right way for the usermode to pick 
>>> 640x480.
>>>
>>> We will discuss this a bit more and come up with a different change.
>>
>> Can you provide the exact EDID from the failing test case? Maybe that
>> will help shed some light on what's going on. I looked at the original
>> commit and it just referred to 4.2.2.1, which I assume is "EDID Read
>> upon HPD Plug Event", but that doesn't give details that seem relevant
>> to the discussion here.
> 
> Yes so it is 4.2.2.1 and 4.2.2.6.
> 
> That alone wont give the full picture.
> 
> So its a combination of things.
> 
> While running the test, the test equipment published only one mode.
> But we could not support that mode because of 2 lanes.
> Equipment did not add 640x480 to the list of modes.
> DRM fwk will also not add it because count_modes is not 0 ( there was 
> one mode ).
> So we ended up making these changes.

I think a proper solution might be to rewrite 
drm_helper_probe_single_connector_modes() in the following way:
- call get_modes()
- validate the result
- prune invalid

- if the number of modes is 0, call drm_add_override_edid_modes()
- validate the result
- prune invalid

- if the number of modes is still 0, call drm_add_modes_noedid()
- validate the result
- prune invalid

[A separate change might happen here after all the checks: if the number 
of modes is still 0 and if it is a DP, enforce adding 640x480 even w/o 
validation. But generally I feel that this shouldn't be necessary 
because the previous step should have added it.]

This way we can be sure that all modes are validated, but still to do 
our best to add something supported to the list of modes.

> 
> 
>>
>> I guess maybe what's happening is that the test case is giving an EDID
>> where all the modes are not supportable by the current clock rate /
>> lanes? ...and then somehow we're not falling back to 640x480. It's
>> always possible that this is a userspace problem.
>>
>> In any case, would you object to a revert of the patches in the short 
>> term?
> 
> Not sure, if you saw this change kuogee posted last night.
> https://patchwork.freedesktop.org/patch/483415/
> We did decided to remove all the code related to these test cases and 
> handle them in IGT.
> 
>>
>> -Doug
Doug Anderson April 26, 2022, 5:11 p.m. UTC | #14
Hi,

On Tue, Apr 26, 2022 at 10:01 AM Dmitry Baryshkov
<dmitry.baryshkov@linaro.org> wrote:
>
> On 26/04/2022 18:37, Abhinav Kumar wrote:
> > Hi Doug
> >
> > On 4/26/2022 8:20 AM, Doug Anderson wrote:
> >> Hi,
> >>
> >> On Mon, Apr 25, 2022 at 8:35 PM Abhinav Kumar
> >> <quic_abhinavk@quicinc.com> wrote:
> >>>
> >>> On 4/25/2022 7:18 PM, Doug Anderson wrote:
> >>>> Hi,
> >>>>
> >>>> On Mon, Apr 25, 2022 at 6:42 PM Abhinav Kumar
> >>>> <quic_abhinavk@quicinc.com> wrote:
> >>>>>
> >>>>>>> 2) When there was a valid EDID but no 640x480 mode
> >>>>>>>
> >>>>>>> This is the equipment specific case and the one even I was a bit
> >>>>>>> surprised. There is a DP compliance equipment we have in-house
> >>>>>>> and while
> >>>>>>> validation, it was found that in its list of modes , it did not
> >>>>>>> have any
> >>>>>>> modes which chromebook supported ( due to 2 lanes ). But my
> >>>>>>> understanding was that, all sinks should have atleast 640x480 but
> >>>>>>> apparently this one did not have that. So to handle this DP
> >>>>>>> compliance
> >>>>>>> equipment behavior, we had to do this.
> >>>>>>
> >>>>>> That doesn't seem right. If there's a valid EDID and the valid EDID
> >>>>>> doesn't contain 640x480, are you _sure_ you're supposed to be adding
> >>>>>> 640x480? That doesn't sound right to me. I've got a tiny display in
> >>>>>> front of me for testing that only has one mode:
> >>>>>>
> >>>>>>      #0 800x480 65.68 800 840 888 928 480 493 496 525 32000
> >>>>>>
> >>>>>
> >>>>> As I had wrote, DRM core kicks in only when the count of modes is 0.
> >>>>> Here what is happening is the count was not 0 but 640x480 was not
> >>>>> present in the EDID. So we had to add it explicitly.
> >>>>>
> >>>>> Your tiny display is a display port display?
> >>>>>
> >>>>> I am referring to only display port monitors. If your tiny display is
> >>>>> DP, it should have had 640x480 in its list of modes.
> >>>>
> >>>> My tiny display is actually a HDMI display hooked up to a HDMI to DP
> >>>> (active) adapter.
> >>>>
> >>>> ...but this is a legal and common thing to have. I suppose possibly my
> >>>> HDMI display is "illegal"?
> >>>>
> >>>> OK, so reading through the spec more carefully, I do see that the DP
> >>>> spec makes numerous mentions of the fact that DP sinks _must_ support
> >>>> 640x480. Even going back to DP 1.4, I see section "5.2.1.2 Video
> >>>> Timing Format" says that we must support 640x480. It seems like that's
> >>>> _intended_ to be used only if the EDID read fails, though or if we
> >>>> somehow have to output video without knowledge of the EDID. It seems
> >>>> hard to believe that there's a great reason to assume a display will
> >>>> support 640x480 if we have more accurate knowledge.
> >>>>
> >>>> In any case, I guess I would still say that adding this mode belongs
> >>>> in the DRM core. The core should notice that it's a DP connection
> >>>> (bridge->type == DRM_MODE_CONNECTOR_DisplayPort) and that 640x480 was
> >>>> left out and it should add it. We should also make sure it's not
> >>>> "preferred" and is last in the list so we never accidentally pick it.
> >>>> If DP truly says that we should always give the user 640x480 then
> >>>> that's true for everyone, not just Qualcomm. We should add it in the
> >>>> core. If, later, someone wants to hide this from the UI it would be
> >>>> much easier if they only needed to modify one place.
> >>>>
> >>>
> >>> So I debugged with kuogee just now using the DP compliance equipment.
> >>> It turns out, the issue is not that 640x480 mode is not present.
> >>>
> >>> The issue is that it is not marked as preferred.
> >>>
> >>> Hence we missed this part during debugging this equipment failure.
> >>>
> >>> We still have to figure out the best way to either mark 640x480 as
> >>> preferred or eliminate other modes during the test-case so that 640x480
> >>> is actually picked by usermode.
> >>>
> >>> Now that being said, the fix still doesn't belong in the framework. It
> >>> has to be in the msm/dp code.
> >>>
> >>> Different vendors handle this failure case differently looks like.
> >>>
> >>> Lets take below snippet from i915 as example.
> >>>
> >>> 3361    if (intel_connector->detect_edid == NULL ||
> >>> 3362        connector->edid_corrupt ||
> >>> 3363        intel_dp->aux.i2c_defer_count > 6) {
> >>> 3364            /* Check EDID read for NACKs, DEFERs and corruption
> >>> 3365             * (DP CTS 1.2 Core r1.1)
> >>> 3366             *    4.2.2.4 : Failed EDID read, I2C_NAK
> >>> 3367             *    4.2.2.5 : Failed EDID read, I2C_DEFER
> >>> 3368             *    4.2.2.6 : EDID corruption detected
> >>> 3369             * Use failsafe mode for all cases
> >>> 3370             */
> >>> 3371            if (intel_dp->aux.i2c_nack_count > 0 ||
> >>> 3372                    intel_dp->aux.i2c_defer_count > 0)
> >>> 3373                    drm_dbg_kms(&i915->drm,
> >>> 3374                                "EDID read had %d NACKs, %d
> >>> DEFERs\n",
> >>> 3375                                intel_dp->aux.i2c_nack_count,
> >>> 3376                                intel_dp->aux.i2c_defer_count);
> >>> 3377            intel_dp->compliance.test_data.edid =
> >>> INTEL_DP_RESOLUTION_FAILSAFE;
> >>
> >
> > The reason I pointed to this code is to give an example of how other
> > drivers handle this test-case.
> >
> > We added this patch for 4.2.2.1 and 4.2.2.6 EDID test cases.
> >
> > The challenge here as found out from our discussion here was to mark a
> > particular mode as preferred so that the Chrome usermode can pick it.
> >
> > Now whats happening with that there was always a possibility of two
> > modes being marked as preferred due to this and so-on.
> >
> > We had a pretty long discussion last night and thought of all possible
> > solutions but all of them look like a hack to us in the driver because
> > we end up breaking other things due to this.
> >
> > So we decided that driver is not the place to handle this test case.
> > Since we do have IGT support for chromebooks, we will handle both these
> > test cases there as other vendors do the same way and it works.
> >
> >
> >> Just because Intel DRM has its own solution for something doesn't mean
> >> everyone else should copy them and implement their own solution. Up
> >> until recently DP AUX backlights were baked into different DRM
> >> drivers. A recent effort was made to pull it out. I think the Intel
> >> DRM code was the "first one" to the party and it wasn't clear how
> >> things should be broken up to share with other drivers, so mostly it
> >> did everything itself, but that's not the long term answer.
> >>
> >> I'm not saying that we need to block your change on a full re-design
> >> or anything, but I'm just saying that:
> >>
> >> * You're trying to implement a generic DP rule, not something specific
> >> to Qualcomm hardware. That implies that, if possible, it shouldn't be
> >> in a Qualcomm driver.
> >>
> >> * It doesn't seem like it would be terrible to handle this in the core.
> >>
> >>
> >>> This marks the fail safe mode and IGT test case reads this to set this
> >>> mode and hence the test passes.
> >>>
> >>> We rely on the chromeOS usermode to output pixel data for this test-case
> >>> and not IGT. We use IGT only for video pattern CTS today but this is a
> >>> different test-case which is failing.
> >>>
> >>> ChromeOS usermode will not pick 640x480 unless we mark it as preferred
> >>> or other modes are eliminated.
> >>>
> >>> So we have to come up with the right way for the usermode to pick
> >>> 640x480.
> >>>
> >>> We will discuss this a bit more and come up with a different change.
> >>
> >> Can you provide the exact EDID from the failing test case? Maybe that
> >> will help shed some light on what's going on. I looked at the original
> >> commit and it just referred to 4.2.2.1, which I assume is "EDID Read
> >> upon HPD Plug Event", but that doesn't give details that seem relevant
> >> to the discussion here.
> >
> > Yes so it is 4.2.2.1 and 4.2.2.6.
> >
> > That alone wont give the full picture.
> >
> > So its a combination of things.
> >
> > While running the test, the test equipment published only one mode.
> > But we could not support that mode because of 2 lanes.
> > Equipment did not add 640x480 to the list of modes.
> > DRM fwk will also not add it because count_modes is not 0 ( there was
> > one mode ).
> > So we ended up making these changes.
>
> I think a proper solution might be to rewrite
> drm_helper_probe_single_connector_modes() in the following way:
> - call get_modes()
> - validate the result
> - prune invalid
>
> - if the number of modes is 0, call drm_add_override_edid_modes()
> - validate the result
> - prune invalid
>
> - if the number of modes is still 0, call drm_add_modes_noedid()
> - validate the result
> - prune invalid
>
> [A separate change might happen here after all the checks: if the number
> of modes is still 0 and if it is a DP, enforce adding 640x480 even w/o
> validation. But generally I feel that this shouldn't be necessary
> because the previous step should have added it.]
>
> This way we can be sure that all modes are validated, but still to do
> our best to add something supported to the list of modes.

I'm partway through implementing / testing something similar to this.
;-) My logic is slightly different than yours, though. In the very
least I'm not convinced that we want to add the higher resolution
modes (like 1024x768) if all the modes fail to validate. The DP spec
only claims 640x480 is always supported. The higher resolution modes
are for when the EDID fails to read I think. Similarly I'm not
convinced that we should do pruning before deciding on
drm_add_override_edid_modes().

-Doug
Abhinav Kumar April 26, 2022, 5:24 p.m. UTC | #15
On 4/26/2022 10:11 AM, Doug Anderson wrote:
> Hi,
> 
> On Tue, Apr 26, 2022 at 10:01 AM Dmitry Baryshkov
> <dmitry.baryshkov@linaro.org> wrote:
>>
>> On 26/04/2022 18:37, Abhinav Kumar wrote:
>>> Hi Doug
>>>
>>> On 4/26/2022 8:20 AM, Doug Anderson wrote:
>>>> Hi,
>>>>
>>>> On Mon, Apr 25, 2022 at 8:35 PM Abhinav Kumar
>>>> <quic_abhinavk@quicinc.com> wrote:
>>>>>
>>>>> On 4/25/2022 7:18 PM, Doug Anderson wrote:
>>>>>> Hi,
>>>>>>
>>>>>> On Mon, Apr 25, 2022 at 6:42 PM Abhinav Kumar
>>>>>> <quic_abhinavk@quicinc.com> wrote:
>>>>>>>
>>>>>>>>> 2) When there was a valid EDID but no 640x480 mode
>>>>>>>>>
>>>>>>>>> This is the equipment specific case and the one even I was a bit
>>>>>>>>> surprised. There is a DP compliance equipment we have in-house
>>>>>>>>> and while
>>>>>>>>> validation, it was found that in its list of modes , it did not
>>>>>>>>> have any
>>>>>>>>> modes which chromebook supported ( due to 2 lanes ). But my
>>>>>>>>> understanding was that, all sinks should have atleast 640x480 but
>>>>>>>>> apparently this one did not have that. So to handle this DP
>>>>>>>>> compliance
>>>>>>>>> equipment behavior, we had to do this.
>>>>>>>>
>>>>>>>> That doesn't seem right. If there's a valid EDID and the valid EDID
>>>>>>>> doesn't contain 640x480, are you _sure_ you're supposed to be adding
>>>>>>>> 640x480? That doesn't sound right to me. I've got a tiny display in
>>>>>>>> front of me for testing that only has one mode:
>>>>>>>>
>>>>>>>>       #0 800x480 65.68 800 840 888 928 480 493 496 525 32000
>>>>>>>>
>>>>>>>
>>>>>>> As I had wrote, DRM core kicks in only when the count of modes is 0.
>>>>>>> Here what is happening is the count was not 0 but 640x480 was not
>>>>>>> present in the EDID. So we had to add it explicitly.
>>>>>>>
>>>>>>> Your tiny display is a display port display?
>>>>>>>
>>>>>>> I am referring to only display port monitors. If your tiny display is
>>>>>>> DP, it should have had 640x480 in its list of modes.
>>>>>>
>>>>>> My tiny display is actually a HDMI display hooked up to a HDMI to DP
>>>>>> (active) adapter.
>>>>>>
>>>>>> ...but this is a legal and common thing to have. I suppose possibly my
>>>>>> HDMI display is "illegal"?
>>>>>>
>>>>>> OK, so reading through the spec more carefully, I do see that the DP
>>>>>> spec makes numerous mentions of the fact that DP sinks _must_ support
>>>>>> 640x480. Even going back to DP 1.4, I see section "5.2.1.2 Video
>>>>>> Timing Format" says that we must support 640x480. It seems like that's
>>>>>> _intended_ to be used only if the EDID read fails, though or if we
>>>>>> somehow have to output video without knowledge of the EDID. It seems
>>>>>> hard to believe that there's a great reason to assume a display will
>>>>>> support 640x480 if we have more accurate knowledge.
>>>>>>
>>>>>> In any case, I guess I would still say that adding this mode belongs
>>>>>> in the DRM core. The core should notice that it's a DP connection
>>>>>> (bridge->type == DRM_MODE_CONNECTOR_DisplayPort) and that 640x480 was
>>>>>> left out and it should add it. We should also make sure it's not
>>>>>> "preferred" and is last in the list so we never accidentally pick it.
>>>>>> If DP truly says that we should always give the user 640x480 then
>>>>>> that's true for everyone, not just Qualcomm. We should add it in the
>>>>>> core. If, later, someone wants to hide this from the UI it would be
>>>>>> much easier if they only needed to modify one place.
>>>>>>
>>>>>
>>>>> So I debugged with kuogee just now using the DP compliance equipment.
>>>>> It turns out, the issue is not that 640x480 mode is not present.
>>>>>
>>>>> The issue is that it is not marked as preferred.
>>>>>
>>>>> Hence we missed this part during debugging this equipment failure.
>>>>>
>>>>> We still have to figure out the best way to either mark 640x480 as
>>>>> preferred or eliminate other modes during the test-case so that 640x480
>>>>> is actually picked by usermode.
>>>>>
>>>>> Now that being said, the fix still doesn't belong in the framework. It
>>>>> has to be in the msm/dp code.
>>>>>
>>>>> Different vendors handle this failure case differently looks like.
>>>>>
>>>>> Lets take below snippet from i915 as example.
>>>>>
>>>>> 3361    if (intel_connector->detect_edid == NULL ||
>>>>> 3362        connector->edid_corrupt ||
>>>>> 3363        intel_dp->aux.i2c_defer_count > 6) {
>>>>> 3364            /* Check EDID read for NACKs, DEFERs and corruption
>>>>> 3365             * (DP CTS 1.2 Core r1.1)
>>>>> 3366             *    4.2.2.4 : Failed EDID read, I2C_NAK
>>>>> 3367             *    4.2.2.5 : Failed EDID read, I2C_DEFER
>>>>> 3368             *    4.2.2.6 : EDID corruption detected
>>>>> 3369             * Use failsafe mode for all cases
>>>>> 3370             */
>>>>> 3371            if (intel_dp->aux.i2c_nack_count > 0 ||
>>>>> 3372                    intel_dp->aux.i2c_defer_count > 0)
>>>>> 3373                    drm_dbg_kms(&i915->drm,
>>>>> 3374                                "EDID read had %d NACKs, %d
>>>>> DEFERs\n",
>>>>> 3375                                intel_dp->aux.i2c_nack_count,
>>>>> 3376                                intel_dp->aux.i2c_defer_count);
>>>>> 3377            intel_dp->compliance.test_data.edid =
>>>>> INTEL_DP_RESOLUTION_FAILSAFE;
>>>>
>>>
>>> The reason I pointed to this code is to give an example of how other
>>> drivers handle this test-case.
>>>
>>> We added this patch for 4.2.2.1 and 4.2.2.6 EDID test cases.
>>>
>>> The challenge here as found out from our discussion here was to mark a
>>> particular mode as preferred so that the Chrome usermode can pick it.
>>>
>>> Now whats happening with that there was always a possibility of two
>>> modes being marked as preferred due to this and so-on.
>>>
>>> We had a pretty long discussion last night and thought of all possible
>>> solutions but all of them look like a hack to us in the driver because
>>> we end up breaking other things due to this.
>>>
>>> So we decided that driver is not the place to handle this test case.
>>> Since we do have IGT support for chromebooks, we will handle both these
>>> test cases there as other vendors do the same way and it works.
>>>
>>>
>>>> Just because Intel DRM has its own solution for something doesn't mean
>>>> everyone else should copy them and implement their own solution. Up
>>>> until recently DP AUX backlights were baked into different DRM
>>>> drivers. A recent effort was made to pull it out. I think the Intel
>>>> DRM code was the "first one" to the party and it wasn't clear how
>>>> things should be broken up to share with other drivers, so mostly it
>>>> did everything itself, but that's not the long term answer.
>>>>
>>>> I'm not saying that we need to block your change on a full re-design
>>>> or anything, but I'm just saying that:
>>>>
>>>> * You're trying to implement a generic DP rule, not something specific
>>>> to Qualcomm hardware. That implies that, if possible, it shouldn't be
>>>> in a Qualcomm driver.
>>>>
>>>> * It doesn't seem like it would be terrible to handle this in the core.
>>>>
>>>>
>>>>> This marks the fail safe mode and IGT test case reads this to set this
>>>>> mode and hence the test passes.
>>>>>
>>>>> We rely on the chromeOS usermode to output pixel data for this test-case
>>>>> and not IGT. We use IGT only for video pattern CTS today but this is a
>>>>> different test-case which is failing.
>>>>>
>>>>> ChromeOS usermode will not pick 640x480 unless we mark it as preferred
>>>>> or other modes are eliminated.
>>>>>
>>>>> So we have to come up with the right way for the usermode to pick
>>>>> 640x480.
>>>>>
>>>>> We will discuss this a bit more and come up with a different change.
>>>>
>>>> Can you provide the exact EDID from the failing test case? Maybe that
>>>> will help shed some light on what's going on. I looked at the original
>>>> commit and it just referred to 4.2.2.1, which I assume is "EDID Read
>>>> upon HPD Plug Event", but that doesn't give details that seem relevant
>>>> to the discussion here.
>>>
>>> Yes so it is 4.2.2.1 and 4.2.2.6.
>>>
>>> That alone wont give the full picture.
>>>
>>> So its a combination of things.
>>>
>>> While running the test, the test equipment published only one mode.
>>> But we could not support that mode because of 2 lanes.
>>> Equipment did not add 640x480 to the list of modes.
>>> DRM fwk will also not add it because count_modes is not 0 ( there was
>>> one mode ).
>>> So we ended up making these changes.
>>
>> I think a proper solution might be to rewrite
>> drm_helper_probe_single_connector_modes() in the following way:
>> - call get_modes()
>> - validate the result
>> - prune invalid
>>
>> - if the number of modes is 0, call drm_add_override_edid_modes()
>> - validate the result
>> - prune invalid
>>
>> - if the number of modes is still 0, call drm_add_modes_noedid()
>> - validate the result
>> - prune invalid
>>
>> [A separate change might happen here after all the checks: if the number
>> of modes is still 0 and if it is a DP, enforce adding 640x480 even w/o
>> validation. But generally I feel that this shouldn't be necessary
>> because the previous step should have added it.]
>>
>> This way we can be sure that all modes are validated, but still to do
>> our best to add something supported to the list of modes.
> 
> I'm partway through implementing / testing something similar to this.
> ;-) My logic is slightly different than yours, though. In the very
> least I'm not convinced that we want to add the higher resolution
> modes (like 1024x768) if all the modes fail to validate. The DP spec
> only claims 640x480 is always supported. The higher resolution modes
> are for when the EDID fails to read I think. Similarly I'm not
> convinced that we should do pruning before deciding on
> drm_add_override_edid_modes().

Doug, we will certainly evaluate it once you post it.

Thanks

Abhinav


> 
> -Doug
Dmitry Baryshkov April 26, 2022, 5:44 p.m. UTC | #16
On 26/04/2022 20:11, Doug Anderson wrote:
> Hi,
> 
> On Tue, Apr 26, 2022 at 10:01 AM Dmitry Baryshkov
> <dmitry.baryshkov@linaro.org> wrote:
>>
>> On 26/04/2022 18:37, Abhinav Kumar wrote:
>>> Hi Doug
>>>
>>> On 4/26/2022 8:20 AM, Doug Anderson wrote:
>>>> Hi,
>>>>
>>>> On Mon, Apr 25, 2022 at 8:35 PM Abhinav Kumar
>>>> <quic_abhinavk@quicinc.com> wrote:
>>>>>
>>>>> On 4/25/2022 7:18 PM, Doug Anderson wrote:
>>>>>> Hi,
>>>>>>
>>>>>> On Mon, Apr 25, 2022 at 6:42 PM Abhinav Kumar
>>>>>> <quic_abhinavk@quicinc.com> wrote:
>>>>>>>
>>>>>>>>> 2) When there was a valid EDID but no 640x480 mode
>>>>>>>>>
>>>>>>>>> This is the equipment specific case and the one even I was a bit
>>>>>>>>> surprised. There is a DP compliance equipment we have in-house
>>>>>>>>> and while
>>>>>>>>> validation, it was found that in its list of modes , it did not
>>>>>>>>> have any
>>>>>>>>> modes which chromebook supported ( due to 2 lanes ). But my
>>>>>>>>> understanding was that, all sinks should have atleast 640x480 but
>>>>>>>>> apparently this one did not have that. So to handle this DP
>>>>>>>>> compliance
>>>>>>>>> equipment behavior, we had to do this.
>>>>>>>>
>>>>>>>> That doesn't seem right. If there's a valid EDID and the valid EDID
>>>>>>>> doesn't contain 640x480, are you _sure_ you're supposed to be adding
>>>>>>>> 640x480? That doesn't sound right to me. I've got a tiny display in
>>>>>>>> front of me for testing that only has one mode:
>>>>>>>>
>>>>>>>>       #0 800x480 65.68 800 840 888 928 480 493 496 525 32000
>>>>>>>>
>>>>>>>
>>>>>>> As I had wrote, DRM core kicks in only when the count of modes is 0.
>>>>>>> Here what is happening is the count was not 0 but 640x480 was not
>>>>>>> present in the EDID. So we had to add it explicitly.
>>>>>>>
>>>>>>> Your tiny display is a display port display?
>>>>>>>
>>>>>>> I am referring to only display port monitors. If your tiny display is
>>>>>>> DP, it should have had 640x480 in its list of modes.
>>>>>>
>>>>>> My tiny display is actually a HDMI display hooked up to a HDMI to DP
>>>>>> (active) adapter.
>>>>>>
>>>>>> ...but this is a legal and common thing to have. I suppose possibly my
>>>>>> HDMI display is "illegal"?
>>>>>>
>>>>>> OK, so reading through the spec more carefully, I do see that the DP
>>>>>> spec makes numerous mentions of the fact that DP sinks _must_ support
>>>>>> 640x480. Even going back to DP 1.4, I see section "5.2.1.2 Video
>>>>>> Timing Format" says that we must support 640x480. It seems like that's
>>>>>> _intended_ to be used only if the EDID read fails, though or if we
>>>>>> somehow have to output video without knowledge of the EDID. It seems
>>>>>> hard to believe that there's a great reason to assume a display will
>>>>>> support 640x480 if we have more accurate knowledge.
>>>>>>
>>>>>> In any case, I guess I would still say that adding this mode belongs
>>>>>> in the DRM core. The core should notice that it's a DP connection
>>>>>> (bridge->type == DRM_MODE_CONNECTOR_DisplayPort) and that 640x480 was
>>>>>> left out and it should add it. We should also make sure it's not
>>>>>> "preferred" and is last in the list so we never accidentally pick it.
>>>>>> If DP truly says that we should always give the user 640x480 then
>>>>>> that's true for everyone, not just Qualcomm. We should add it in the
>>>>>> core. If, later, someone wants to hide this from the UI it would be
>>>>>> much easier if they only needed to modify one place.
>>>>>>
>>>>>
>>>>> So I debugged with kuogee just now using the DP compliance equipment.
>>>>> It turns out, the issue is not that 640x480 mode is not present.
>>>>>
>>>>> The issue is that it is not marked as preferred.
>>>>>
>>>>> Hence we missed this part during debugging this equipment failure.
>>>>>
>>>>> We still have to figure out the best way to either mark 640x480 as
>>>>> preferred or eliminate other modes during the test-case so that 640x480
>>>>> is actually picked by usermode.
>>>>>
>>>>> Now that being said, the fix still doesn't belong in the framework. It
>>>>> has to be in the msm/dp code.
>>>>>
>>>>> Different vendors handle this failure case differently looks like.
>>>>>
>>>>> Lets take below snippet from i915 as example.
>>>>>
>>>>> 3361    if (intel_connector->detect_edid == NULL ||
>>>>> 3362        connector->edid_corrupt ||
>>>>> 3363        intel_dp->aux.i2c_defer_count > 6) {
>>>>> 3364            /* Check EDID read for NACKs, DEFERs and corruption
>>>>> 3365             * (DP CTS 1.2 Core r1.1)
>>>>> 3366             *    4.2.2.4 : Failed EDID read, I2C_NAK
>>>>> 3367             *    4.2.2.5 : Failed EDID read, I2C_DEFER
>>>>> 3368             *    4.2.2.6 : EDID corruption detected
>>>>> 3369             * Use failsafe mode for all cases
>>>>> 3370             */
>>>>> 3371            if (intel_dp->aux.i2c_nack_count > 0 ||
>>>>> 3372                    intel_dp->aux.i2c_defer_count > 0)
>>>>> 3373                    drm_dbg_kms(&i915->drm,
>>>>> 3374                                "EDID read had %d NACKs, %d
>>>>> DEFERs\n",
>>>>> 3375                                intel_dp->aux.i2c_nack_count,
>>>>> 3376                                intel_dp->aux.i2c_defer_count);
>>>>> 3377            intel_dp->compliance.test_data.edid =
>>>>> INTEL_DP_RESOLUTION_FAILSAFE;
>>>>
>>>
>>> The reason I pointed to this code is to give an example of how other
>>> drivers handle this test-case.
>>>
>>> We added this patch for 4.2.2.1 and 4.2.2.6 EDID test cases.
>>>
>>> The challenge here as found out from our discussion here was to mark a
>>> particular mode as preferred so that the Chrome usermode can pick it.
>>>
>>> Now whats happening with that there was always a possibility of two
>>> modes being marked as preferred due to this and so-on.
>>>
>>> We had a pretty long discussion last night and thought of all possible
>>> solutions but all of them look like a hack to us in the driver because
>>> we end up breaking other things due to this.
>>>
>>> So we decided that driver is not the place to handle this test case.
>>> Since we do have IGT support for chromebooks, we will handle both these
>>> test cases there as other vendors do the same way and it works.
>>>
>>>
>>>> Just because Intel DRM has its own solution for something doesn't mean
>>>> everyone else should copy them and implement their own solution. Up
>>>> until recently DP AUX backlights were baked into different DRM
>>>> drivers. A recent effort was made to pull it out. I think the Intel
>>>> DRM code was the "first one" to the party and it wasn't clear how
>>>> things should be broken up to share with other drivers, so mostly it
>>>> did everything itself, but that's not the long term answer.
>>>>
>>>> I'm not saying that we need to block your change on a full re-design
>>>> or anything, but I'm just saying that:
>>>>
>>>> * You're trying to implement a generic DP rule, not something specific
>>>> to Qualcomm hardware. That implies that, if possible, it shouldn't be
>>>> in a Qualcomm driver.
>>>>
>>>> * It doesn't seem like it would be terrible to handle this in the core.
>>>>
>>>>
>>>>> This marks the fail safe mode and IGT test case reads this to set this
>>>>> mode and hence the test passes.
>>>>>
>>>>> We rely on the chromeOS usermode to output pixel data for this test-case
>>>>> and not IGT. We use IGT only for video pattern CTS today but this is a
>>>>> different test-case which is failing.
>>>>>
>>>>> ChromeOS usermode will not pick 640x480 unless we mark it as preferred
>>>>> or other modes are eliminated.
>>>>>
>>>>> So we have to come up with the right way for the usermode to pick
>>>>> 640x480.
>>>>>
>>>>> We will discuss this a bit more and come up with a different change.
>>>>
>>>> Can you provide the exact EDID from the failing test case? Maybe that
>>>> will help shed some light on what's going on. I looked at the original
>>>> commit and it just referred to 4.2.2.1, which I assume is "EDID Read
>>>> upon HPD Plug Event", but that doesn't give details that seem relevant
>>>> to the discussion here.
>>>
>>> Yes so it is 4.2.2.1 and 4.2.2.6.
>>>
>>> That alone wont give the full picture.
>>>
>>> So its a combination of things.
>>>
>>> While running the test, the test equipment published only one mode.
>>> But we could not support that mode because of 2 lanes.
>>> Equipment did not add 640x480 to the list of modes.
>>> DRM fwk will also not add it because count_modes is not 0 ( there was
>>> one mode ).
>>> So we ended up making these changes.
>>
>> I think a proper solution might be to rewrite
>> drm_helper_probe_single_connector_modes() in the following way:
>> - call get_modes()
>> - validate the result
>> - prune invalid
>>
>> - if the number of modes is 0, call drm_add_override_edid_modes()
>> - validate the result
>> - prune invalid
>>
>> - if the number of modes is still 0, call drm_add_modes_noedid()
>> - validate the result
>> - prune invalid
>>
>> [A separate change might happen here after all the checks: if the number
>> of modes is still 0 and if it is a DP, enforce adding 640x480 even w/o
>> validation. But generally I feel that this shouldn't be necessary
>> because the previous step should have added it.]
>>
>> This way we can be sure that all modes are validated, but still to do
>> our best to add something supported to the list of modes.
> 
> I'm partway through implementing / testing something similar to this.
> ;-) My logic is slightly different than yours, though. In the very
> least I'm not convinced that we want to add the higher resolution
> modes (like 1024x768) if all the modes fail to validate. The DP spec
> only claims 640x480 is always supported. The higher resolution modes
> are for when the EDID fails to read I think. Similarly I'm not
> convinced that we should do pruning before deciding on
> drm_add_override_edid_modes().


I think pruning before drm_add_override_edid_modes() would allow one to 
use override if the first read returned some modes which are invalid.

Regarding 1024 vs 640. For the restructure we shouldn't change this. And 
I'd actually point to the following commit message:

commit 9632b41f00cc2fb2846569cc99dbeef78e5c94a0
Author: Adam Jackson <ajax@redhat.com>
Date:   Mon Nov 23 14:23:07 2009 -0500

     drm/modes: Fall back to 1024x768 instead of 800x600

     This matches the X server's fallback modes when using RANDR 1.2.

     See also: http://bugzilla.redhat.com/538761

     Signed-off-by: Adam Jackson <ajax@redhat.com>
     Signed-off-by: Dave Airlie <airlied@redhat.com>


So I'd say, let's leave 1024 as is and just try them if all other modes 
are invalid.
Doug Anderson April 26, 2022, 5:56 p.m. UTC | #17
Hi,

On Tue, Apr 26, 2022 at 10:44 AM Dmitry Baryshkov
<dmitry.baryshkov@linaro.org> wrote:
>
> On 26/04/2022 20:11, Doug Anderson wrote:
> > Hi,
> >
> > On Tue, Apr 26, 2022 at 10:01 AM Dmitry Baryshkov
> > <dmitry.baryshkov@linaro.org> wrote:
> >>
> >> On 26/04/2022 18:37, Abhinav Kumar wrote:
> >>> Hi Doug
> >>>
> >>> On 4/26/2022 8:20 AM, Doug Anderson wrote:
> >>>> Hi,
> >>>>
> >>>> On Mon, Apr 25, 2022 at 8:35 PM Abhinav Kumar
> >>>> <quic_abhinavk@quicinc.com> wrote:
> >>>>>
> >>>>> On 4/25/2022 7:18 PM, Doug Anderson wrote:
> >>>>>> Hi,
> >>>>>>
> >>>>>> On Mon, Apr 25, 2022 at 6:42 PM Abhinav Kumar
> >>>>>> <quic_abhinavk@quicinc.com> wrote:
> >>>>>>>
> >>>>>>>>> 2) When there was a valid EDID but no 640x480 mode
> >>>>>>>>>
> >>>>>>>>> This is the equipment specific case and the one even I was a bit
> >>>>>>>>> surprised. There is a DP compliance equipment we have in-house
> >>>>>>>>> and while
> >>>>>>>>> validation, it was found that in its list of modes , it did not
> >>>>>>>>> have any
> >>>>>>>>> modes which chromebook supported ( due to 2 lanes ). But my
> >>>>>>>>> understanding was that, all sinks should have atleast 640x480 but
> >>>>>>>>> apparently this one did not have that. So to handle this DP
> >>>>>>>>> compliance
> >>>>>>>>> equipment behavior, we had to do this.
> >>>>>>>>
> >>>>>>>> That doesn't seem right. If there's a valid EDID and the valid EDID
> >>>>>>>> doesn't contain 640x480, are you _sure_ you're supposed to be adding
> >>>>>>>> 640x480? That doesn't sound right to me. I've got a tiny display in
> >>>>>>>> front of me for testing that only has one mode:
> >>>>>>>>
> >>>>>>>>       #0 800x480 65.68 800 840 888 928 480 493 496 525 32000
> >>>>>>>>
> >>>>>>>
> >>>>>>> As I had wrote, DRM core kicks in only when the count of modes is 0.
> >>>>>>> Here what is happening is the count was not 0 but 640x480 was not
> >>>>>>> present in the EDID. So we had to add it explicitly.
> >>>>>>>
> >>>>>>> Your tiny display is a display port display?
> >>>>>>>
> >>>>>>> I am referring to only display port monitors. If your tiny display is
> >>>>>>> DP, it should have had 640x480 in its list of modes.
> >>>>>>
> >>>>>> My tiny display is actually a HDMI display hooked up to a HDMI to DP
> >>>>>> (active) adapter.
> >>>>>>
> >>>>>> ...but this is a legal and common thing to have. I suppose possibly my
> >>>>>> HDMI display is "illegal"?
> >>>>>>
> >>>>>> OK, so reading through the spec more carefully, I do see that the DP
> >>>>>> spec makes numerous mentions of the fact that DP sinks _must_ support
> >>>>>> 640x480. Even going back to DP 1.4, I see section "5.2.1.2 Video
> >>>>>> Timing Format" says that we must support 640x480. It seems like that's
> >>>>>> _intended_ to be used only if the EDID read fails, though or if we
> >>>>>> somehow have to output video without knowledge of the EDID. It seems
> >>>>>> hard to believe that there's a great reason to assume a display will
> >>>>>> support 640x480 if we have more accurate knowledge.
> >>>>>>
> >>>>>> In any case, I guess I would still say that adding this mode belongs
> >>>>>> in the DRM core. The core should notice that it's a DP connection
> >>>>>> (bridge->type == DRM_MODE_CONNECTOR_DisplayPort) and that 640x480 was
> >>>>>> left out and it should add it. We should also make sure it's not
> >>>>>> "preferred" and is last in the list so we never accidentally pick it.
> >>>>>> If DP truly says that we should always give the user 640x480 then
> >>>>>> that's true for everyone, not just Qualcomm. We should add it in the
> >>>>>> core. If, later, someone wants to hide this from the UI it would be
> >>>>>> much easier if they only needed to modify one place.
> >>>>>>
> >>>>>
> >>>>> So I debugged with kuogee just now using the DP compliance equipment.
> >>>>> It turns out, the issue is not that 640x480 mode is not present.
> >>>>>
> >>>>> The issue is that it is not marked as preferred.
> >>>>>
> >>>>> Hence we missed this part during debugging this equipment failure.
> >>>>>
> >>>>> We still have to figure out the best way to either mark 640x480 as
> >>>>> preferred or eliminate other modes during the test-case so that 640x480
> >>>>> is actually picked by usermode.
> >>>>>
> >>>>> Now that being said, the fix still doesn't belong in the framework. It
> >>>>> has to be in the msm/dp code.
> >>>>>
> >>>>> Different vendors handle this failure case differently looks like.
> >>>>>
> >>>>> Lets take below snippet from i915 as example.
> >>>>>
> >>>>> 3361    if (intel_connector->detect_edid == NULL ||
> >>>>> 3362        connector->edid_corrupt ||
> >>>>> 3363        intel_dp->aux.i2c_defer_count > 6) {
> >>>>> 3364            /* Check EDID read for NACKs, DEFERs and corruption
> >>>>> 3365             * (DP CTS 1.2 Core r1.1)
> >>>>> 3366             *    4.2.2.4 : Failed EDID read, I2C_NAK
> >>>>> 3367             *    4.2.2.5 : Failed EDID read, I2C_DEFER
> >>>>> 3368             *    4.2.2.6 : EDID corruption detected
> >>>>> 3369             * Use failsafe mode for all cases
> >>>>> 3370             */
> >>>>> 3371            if (intel_dp->aux.i2c_nack_count > 0 ||
> >>>>> 3372                    intel_dp->aux.i2c_defer_count > 0)
> >>>>> 3373                    drm_dbg_kms(&i915->drm,
> >>>>> 3374                                "EDID read had %d NACKs, %d
> >>>>> DEFERs\n",
> >>>>> 3375                                intel_dp->aux.i2c_nack_count,
> >>>>> 3376                                intel_dp->aux.i2c_defer_count);
> >>>>> 3377            intel_dp->compliance.test_data.edid =
> >>>>> INTEL_DP_RESOLUTION_FAILSAFE;
> >>>>
> >>>
> >>> The reason I pointed to this code is to give an example of how other
> >>> drivers handle this test-case.
> >>>
> >>> We added this patch for 4.2.2.1 and 4.2.2.6 EDID test cases.
> >>>
> >>> The challenge here as found out from our discussion here was to mark a
> >>> particular mode as preferred so that the Chrome usermode can pick it.
> >>>
> >>> Now whats happening with that there was always a possibility of two
> >>> modes being marked as preferred due to this and so-on.
> >>>
> >>> We had a pretty long discussion last night and thought of all possible
> >>> solutions but all of them look like a hack to us in the driver because
> >>> we end up breaking other things due to this.
> >>>
> >>> So we decided that driver is not the place to handle this test case.
> >>> Since we do have IGT support for chromebooks, we will handle both these
> >>> test cases there as other vendors do the same way and it works.
> >>>
> >>>
> >>>> Just because Intel DRM has its own solution for something doesn't mean
> >>>> everyone else should copy them and implement their own solution. Up
> >>>> until recently DP AUX backlights were baked into different DRM
> >>>> drivers. A recent effort was made to pull it out. I think the Intel
> >>>> DRM code was the "first one" to the party and it wasn't clear how
> >>>> things should be broken up to share with other drivers, so mostly it
> >>>> did everything itself, but that's not the long term answer.
> >>>>
> >>>> I'm not saying that we need to block your change on a full re-design
> >>>> or anything, but I'm just saying that:
> >>>>
> >>>> * You're trying to implement a generic DP rule, not something specific
> >>>> to Qualcomm hardware. That implies that, if possible, it shouldn't be
> >>>> in a Qualcomm driver.
> >>>>
> >>>> * It doesn't seem like it would be terrible to handle this in the core.
> >>>>
> >>>>
> >>>>> This marks the fail safe mode and IGT test case reads this to set this
> >>>>> mode and hence the test passes.
> >>>>>
> >>>>> We rely on the chromeOS usermode to output pixel data for this test-case
> >>>>> and not IGT. We use IGT only for video pattern CTS today but this is a
> >>>>> different test-case which is failing.
> >>>>>
> >>>>> ChromeOS usermode will not pick 640x480 unless we mark it as preferred
> >>>>> or other modes are eliminated.
> >>>>>
> >>>>> So we have to come up with the right way for the usermode to pick
> >>>>> 640x480.
> >>>>>
> >>>>> We will discuss this a bit more and come up with a different change.
> >>>>
> >>>> Can you provide the exact EDID from the failing test case? Maybe that
> >>>> will help shed some light on what's going on. I looked at the original
> >>>> commit and it just referred to 4.2.2.1, which I assume is "EDID Read
> >>>> upon HPD Plug Event", but that doesn't give details that seem relevant
> >>>> to the discussion here.
> >>>
> >>> Yes so it is 4.2.2.1 and 4.2.2.6.
> >>>
> >>> That alone wont give the full picture.
> >>>
> >>> So its a combination of things.
> >>>
> >>> While running the test, the test equipment published only one mode.
> >>> But we could not support that mode because of 2 lanes.
> >>> Equipment did not add 640x480 to the list of modes.
> >>> DRM fwk will also not add it because count_modes is not 0 ( there was
> >>> one mode ).
> >>> So we ended up making these changes.
> >>
> >> I think a proper solution might be to rewrite
> >> drm_helper_probe_single_connector_modes() in the following way:
> >> - call get_modes()
> >> - validate the result
> >> - prune invalid
> >>
> >> - if the number of modes is 0, call drm_add_override_edid_modes()
> >> - validate the result
> >> - prune invalid
> >>
> >> - if the number of modes is still 0, call drm_add_modes_noedid()
> >> - validate the result
> >> - prune invalid
> >>
> >> [A separate change might happen here after all the checks: if the number
> >> of modes is still 0 and if it is a DP, enforce adding 640x480 even w/o
> >> validation. But generally I feel that this shouldn't be necessary
> >> because the previous step should have added it.]
> >>
> >> This way we can be sure that all modes are validated, but still to do
> >> our best to add something supported to the list of modes.
> >
> > I'm partway through implementing / testing something similar to this.
> > ;-) My logic is slightly different than yours, though. In the very
> > least I'm not convinced that we want to add the higher resolution
> > modes (like 1024x768) if all the modes fail to validate. The DP spec
> > only claims 640x480 is always supported. The higher resolution modes
> > are for when the EDID fails to read I think. Similarly I'm not
> > convinced that we should do pruning before deciding on
> > drm_add_override_edid_modes().
>
>
> I think pruning before drm_add_override_edid_modes() would allow one to
> use override if the first read returned some modes which are invalid.

Yeah, I'm less certain about drm_add_override_edid_modes(), but as per
documented it's only to be used if the EDID failed to read.

If someone has an actual use case where they need to add the override
modes for this specific case then we can, but I think it can be done
separately once someone has an actual use case.


> Regarding 1024 vs 640. For the restructure we shouldn't change this. And
> I'd actually point to the following commit message:
>
> commit 9632b41f00cc2fb2846569cc99dbeef78e5c94a0
> Author: Adam Jackson <ajax@redhat.com>
> Date:   Mon Nov 23 14:23:07 2009 -0500
>
>      drm/modes: Fall back to 1024x768 instead of 800x600
>
>      This matches the X server's fallback modes when using RANDR 1.2.
>
>      See also: http://bugzilla.redhat.com/538761
>
>      Signed-off-by: Adam Jackson <ajax@redhat.com>
>      Signed-off-by: Dave Airlie <airlied@redhat.com>
>
>
> So I'd say, let's leave 1024 as is and just try them if all other modes
> are invalid.

I'm pretty strongly against adding 1024x768 when all modes fail to
validate. Specifically:

If the EDID fully fails to read then adding these higher resolution
modes makes sense. We have no knowledge at all about the display in
this case and so we can guess that some standard higher resolutions
might make sense. In the case we're dealing with here, we have very
specific knowledge about what the display said it could handle and we
can't support any of them. The DP spec _only_ lists 640x480 as a
required mode so that's the only one we should add.

-Doug
Abhinav Kumar April 26, 2022, 6:03 p.m. UTC | #18
On 4/26/2022 10:56 AM, Doug Anderson wrote:
> Hi,
> 
> On Tue, Apr 26, 2022 at 10:44 AM Dmitry Baryshkov
> <dmitry.baryshkov@linaro.org> wrote:
>>
>> On 26/04/2022 20:11, Doug Anderson wrote:
>>> Hi,
>>>
>>> On Tue, Apr 26, 2022 at 10:01 AM Dmitry Baryshkov
>>> <dmitry.baryshkov@linaro.org> wrote:
>>>>
>>>> On 26/04/2022 18:37, Abhinav Kumar wrote:
>>>>> Hi Doug
>>>>>
>>>>> On 4/26/2022 8:20 AM, Doug Anderson wrote:
>>>>>> Hi,
>>>>>>
>>>>>> On Mon, Apr 25, 2022 at 8:35 PM Abhinav Kumar
>>>>>> <quic_abhinavk@quicinc.com> wrote:
>>>>>>>
>>>>>>> On 4/25/2022 7:18 PM, Doug Anderson wrote:
>>>>>>>> Hi,
>>>>>>>>
>>>>>>>> On Mon, Apr 25, 2022 at 6:42 PM Abhinav Kumar
>>>>>>>> <quic_abhinavk@quicinc.com> wrote:
>>>>>>>>>
>>>>>>>>>>> 2) When there was a valid EDID but no 640x480 mode
>>>>>>>>>>>
>>>>>>>>>>> This is the equipment specific case and the one even I was a bit
>>>>>>>>>>> surprised. There is a DP compliance equipment we have in-house
>>>>>>>>>>> and while
>>>>>>>>>>> validation, it was found that in its list of modes , it did not
>>>>>>>>>>> have any
>>>>>>>>>>> modes which chromebook supported ( due to 2 lanes ). But my
>>>>>>>>>>> understanding was that, all sinks should have atleast 640x480 but
>>>>>>>>>>> apparently this one did not have that. So to handle this DP
>>>>>>>>>>> compliance
>>>>>>>>>>> equipment behavior, we had to do this.
>>>>>>>>>>
>>>>>>>>>> That doesn't seem right. If there's a valid EDID and the valid EDID
>>>>>>>>>> doesn't contain 640x480, are you _sure_ you're supposed to be adding
>>>>>>>>>> 640x480? That doesn't sound right to me. I've got a tiny display in
>>>>>>>>>> front of me for testing that only has one mode:
>>>>>>>>>>
>>>>>>>>>>        #0 800x480 65.68 800 840 888 928 480 493 496 525 32000
>>>>>>>>>>
>>>>>>>>>
>>>>>>>>> As I had wrote, DRM core kicks in only when the count of modes is 0.
>>>>>>>>> Here what is happening is the count was not 0 but 640x480 was not
>>>>>>>>> present in the EDID. So we had to add it explicitly.
>>>>>>>>>
>>>>>>>>> Your tiny display is a display port display?
>>>>>>>>>
>>>>>>>>> I am referring to only display port monitors. If your tiny display is
>>>>>>>>> DP, it should have had 640x480 in its list of modes.
>>>>>>>>
>>>>>>>> My tiny display is actually a HDMI display hooked up to a HDMI to DP
>>>>>>>> (active) adapter.
>>>>>>>>
>>>>>>>> ...but this is a legal and common thing to have. I suppose possibly my
>>>>>>>> HDMI display is "illegal"?
>>>>>>>>
>>>>>>>> OK, so reading through the spec more carefully, I do see that the DP
>>>>>>>> spec makes numerous mentions of the fact that DP sinks _must_ support
>>>>>>>> 640x480. Even going back to DP 1.4, I see section "5.2.1.2 Video
>>>>>>>> Timing Format" says that we must support 640x480. It seems like that's
>>>>>>>> _intended_ to be used only if the EDID read fails, though or if we
>>>>>>>> somehow have to output video without knowledge of the EDID. It seems
>>>>>>>> hard to believe that there's a great reason to assume a display will
>>>>>>>> support 640x480 if we have more accurate knowledge.
>>>>>>>>
>>>>>>>> In any case, I guess I would still say that adding this mode belongs
>>>>>>>> in the DRM core. The core should notice that it's a DP connection
>>>>>>>> (bridge->type == DRM_MODE_CONNECTOR_DisplayPort) and that 640x480 was
>>>>>>>> left out and it should add it. We should also make sure it's not
>>>>>>>> "preferred" and is last in the list so we never accidentally pick it.
>>>>>>>> If DP truly says that we should always give the user 640x480 then
>>>>>>>> that's true for everyone, not just Qualcomm. We should add it in the
>>>>>>>> core. If, later, someone wants to hide this from the UI it would be
>>>>>>>> much easier if they only needed to modify one place.
>>>>>>>>
>>>>>>>
>>>>>>> So I debugged with kuogee just now using the DP compliance equipment.
>>>>>>> It turns out, the issue is not that 640x480 mode is not present.
>>>>>>>
>>>>>>> The issue is that it is not marked as preferred.
>>>>>>>
>>>>>>> Hence we missed this part during debugging this equipment failure.
>>>>>>>
>>>>>>> We still have to figure out the best way to either mark 640x480 as
>>>>>>> preferred or eliminate other modes during the test-case so that 640x480
>>>>>>> is actually picked by usermode.
>>>>>>>
>>>>>>> Now that being said, the fix still doesn't belong in the framework. It
>>>>>>> has to be in the msm/dp code.
>>>>>>>
>>>>>>> Different vendors handle this failure case differently looks like.
>>>>>>>
>>>>>>> Lets take below snippet from i915 as example.
>>>>>>>
>>>>>>> 3361    if (intel_connector->detect_edid == NULL ||
>>>>>>> 3362        connector->edid_corrupt ||
>>>>>>> 3363        intel_dp->aux.i2c_defer_count > 6) {
>>>>>>> 3364            /* Check EDID read for NACKs, DEFERs and corruption
>>>>>>> 3365             * (DP CTS 1.2 Core r1.1)
>>>>>>> 3366             *    4.2.2.4 : Failed EDID read, I2C_NAK
>>>>>>> 3367             *    4.2.2.5 : Failed EDID read, I2C_DEFER
>>>>>>> 3368             *    4.2.2.6 : EDID corruption detected
>>>>>>> 3369             * Use failsafe mode for all cases
>>>>>>> 3370             */
>>>>>>> 3371            if (intel_dp->aux.i2c_nack_count > 0 ||
>>>>>>> 3372                    intel_dp->aux.i2c_defer_count > 0)
>>>>>>> 3373                    drm_dbg_kms(&i915->drm,
>>>>>>> 3374                                "EDID read had %d NACKs, %d
>>>>>>> DEFERs\n",
>>>>>>> 3375                                intel_dp->aux.i2c_nack_count,
>>>>>>> 3376                                intel_dp->aux.i2c_defer_count);
>>>>>>> 3377            intel_dp->compliance.test_data.edid =
>>>>>>> INTEL_DP_RESOLUTION_FAILSAFE;
>>>>>>
>>>>>
>>>>> The reason I pointed to this code is to give an example of how other
>>>>> drivers handle this test-case.
>>>>>
>>>>> We added this patch for 4.2.2.1 and 4.2.2.6 EDID test cases.
>>>>>
>>>>> The challenge here as found out from our discussion here was to mark a
>>>>> particular mode as preferred so that the Chrome usermode can pick it.
>>>>>
>>>>> Now whats happening with that there was always a possibility of two
>>>>> modes being marked as preferred due to this and so-on.
>>>>>
>>>>> We had a pretty long discussion last night and thought of all possible
>>>>> solutions but all of them look like a hack to us in the driver because
>>>>> we end up breaking other things due to this.
>>>>>
>>>>> So we decided that driver is not the place to handle this test case.
>>>>> Since we do have IGT support for chromebooks, we will handle both these
>>>>> test cases there as other vendors do the same way and it works.
>>>>>
>>>>>
>>>>>> Just because Intel DRM has its own solution for something doesn't mean
>>>>>> everyone else should copy them and implement their own solution. Up
>>>>>> until recently DP AUX backlights were baked into different DRM
>>>>>> drivers. A recent effort was made to pull it out. I think the Intel
>>>>>> DRM code was the "first one" to the party and it wasn't clear how
>>>>>> things should be broken up to share with other drivers, so mostly it
>>>>>> did everything itself, but that's not the long term answer.
>>>>>>
>>>>>> I'm not saying that we need to block your change on a full re-design
>>>>>> or anything, but I'm just saying that:
>>>>>>
>>>>>> * You're trying to implement a generic DP rule, not something specific
>>>>>> to Qualcomm hardware. That implies that, if possible, it shouldn't be
>>>>>> in a Qualcomm driver.
>>>>>>
>>>>>> * It doesn't seem like it would be terrible to handle this in the core.
>>>>>>
>>>>>>
>>>>>>> This marks the fail safe mode and IGT test case reads this to set this
>>>>>>> mode and hence the test passes.
>>>>>>>
>>>>>>> We rely on the chromeOS usermode to output pixel data for this test-case
>>>>>>> and not IGT. We use IGT only for video pattern CTS today but this is a
>>>>>>> different test-case which is failing.
>>>>>>>
>>>>>>> ChromeOS usermode will not pick 640x480 unless we mark it as preferred
>>>>>>> or other modes are eliminated.
>>>>>>>
>>>>>>> So we have to come up with the right way for the usermode to pick
>>>>>>> 640x480.
>>>>>>>
>>>>>>> We will discuss this a bit more and come up with a different change.
>>>>>>
>>>>>> Can you provide the exact EDID from the failing test case? Maybe that
>>>>>> will help shed some light on what's going on. I looked at the original
>>>>>> commit and it just referred to 4.2.2.1, which I assume is "EDID Read
>>>>>> upon HPD Plug Event", but that doesn't give details that seem relevant
>>>>>> to the discussion here.
>>>>>
>>>>> Yes so it is 4.2.2.1 and 4.2.2.6.
>>>>>
>>>>> That alone wont give the full picture.
>>>>>
>>>>> So its a combination of things.
>>>>>
>>>>> While running the test, the test equipment published only one mode.
>>>>> But we could not support that mode because of 2 lanes.
>>>>> Equipment did not add 640x480 to the list of modes.
>>>>> DRM fwk will also not add it because count_modes is not 0 ( there was
>>>>> one mode ).
>>>>> So we ended up making these changes.
>>>>
>>>> I think a proper solution might be to rewrite
>>>> drm_helper_probe_single_connector_modes() in the following way:
>>>> - call get_modes()
>>>> - validate the result
>>>> - prune invalid
>>>>
>>>> - if the number of modes is 0, call drm_add_override_edid_modes()
>>>> - validate the result
>>>> - prune invalid
>>>>
>>>> - if the number of modes is still 0, call drm_add_modes_noedid()
>>>> - validate the result
>>>> - prune invalid
>>>>
>>>> [A separate change might happen here after all the checks: if the number
>>>> of modes is still 0 and if it is a DP, enforce adding 640x480 even w/o
>>>> validation. But generally I feel that this shouldn't be necessary
>>>> because the previous step should have added it.]
>>>>
>>>> This way we can be sure that all modes are validated, but still to do
>>>> our best to add something supported to the list of modes.
>>>
>>> I'm partway through implementing / testing something similar to this.
>>> ;-) My logic is slightly different than yours, though. In the very
>>> least I'm not convinced that we want to add the higher resolution
>>> modes (like 1024x768) if all the modes fail to validate. The DP spec
>>> only claims 640x480 is always supported. The higher resolution modes
>>> are for when the EDID fails to read I think. Similarly I'm not
>>> convinced that we should do pruning before deciding on
>>> drm_add_override_edid_modes().
>>
>>
>> I think pruning before drm_add_override_edid_modes() would allow one to
>> use override if the first read returned some modes which are invalid.
> 
> Yeah, I'm less certain about drm_add_override_edid_modes(), but as per
> documented it's only to be used if the EDID failed to read.
> 
> If someone has an actual use case where they need to add the override
> modes for this specific case then we can, but I think it can be done
> separately once someone has an actual use case.
> 
> 
>> Regarding 1024 vs 640. For the restructure we shouldn't change this. And
>> I'd actually point to the following commit message:
>>
>> commit 9632b41f00cc2fb2846569cc99dbeef78e5c94a0
>> Author: Adam Jackson <ajax@redhat.com>
>> Date:   Mon Nov 23 14:23:07 2009 -0500
>>
>>       drm/modes: Fall back to 1024x768 instead of 800x600
>>
>>       This matches the X server's fallback modes when using RANDR 1.2.
>>
>>       See also: http://bugzilla.redhat.com/538761
>>
>>       Signed-off-by: Adam Jackson <ajax@redhat.com>
>>       Signed-off-by: Dave Airlie <airlied@redhat.com>
>>
>>
>> So I'd say, let's leave 1024 as is and just try them if all other modes
>> are invalid.
> 
> I'm pretty strongly against adding 1024x768 when all modes fail to
> validate. Specifically:
> 
> If the EDID fully fails to read then adding these higher resolution
> modes makes sense. We have no knowledge at all about the display in
> this case and so we can guess that some standard higher resolutions
> might make sense. In the case we're dealing with here, we have very
> specific knowledge about what the display said it could handle and we
> can't support any of them. The DP spec _only_ lists 640x480 as a
> required mode so that's the only one we should add.
> 
> -Doug

So one thing to note Doug, in case you have not already made note of it.
I believe you are referring to drm_add_modes_noedid() and NOT 
drm_add_override_edid_modes()? Because the latter needs an override firware

If so, just wanted to mention that drm_add_modes_noedid() operates on 
the probed_modes() list but after validation and pruning, probed_modes 
list does not exits. It gets copied over to connector->modes list.

So we cannot directly call that.
Dmitry Baryshkov April 26, 2022, 6:04 p.m. UTC | #19
On 26/04/2022 20:56, Doug Anderson wrote:
> Hi,
> 
> On Tue, Apr 26, 2022 at 10:44 AM Dmitry Baryshkov
> <dmitry.baryshkov@linaro.org> wrote:
>>
>> On 26/04/2022 20:11, Doug Anderson wrote:
>>> Hi,
>>>
>>> On Tue, Apr 26, 2022 at 10:01 AM Dmitry Baryshkov
>>> <dmitry.baryshkov@linaro.org> wrote:
>>>>
>>>> On 26/04/2022 18:37, Abhinav Kumar wrote:
>>>>> Hi Doug
>>>>>
>>>>> On 4/26/2022 8:20 AM, Doug Anderson wrote:
>>>>>> Hi,
>>>>>>
>>>>>> On Mon, Apr 25, 2022 at 8:35 PM Abhinav Kumar
>>>>>> <quic_abhinavk@quicinc.com> wrote:
>>>>>>>
>>>>>>> On 4/25/2022 7:18 PM, Doug Anderson wrote:
>>>>>>>> Hi,
>>>>>>>>
>>>>>>>> On Mon, Apr 25, 2022 at 6:42 PM Abhinav Kumar
>>>>>>>> <quic_abhinavk@quicinc.com> wrote:
>>>>>>>>>
>>>>>>>>>>> 2) When there was a valid EDID but no 640x480 mode
>>>>>>>>>>>
>>>>>>>>>>> This is the equipment specific case and the one even I was a bit
>>>>>>>>>>> surprised. There is a DP compliance equipment we have in-house
>>>>>>>>>>> and while
>>>>>>>>>>> validation, it was found that in its list of modes , it did not
>>>>>>>>>>> have any
>>>>>>>>>>> modes which chromebook supported ( due to 2 lanes ). But my
>>>>>>>>>>> understanding was that, all sinks should have atleast 640x480 but
>>>>>>>>>>> apparently this one did not have that. So to handle this DP
>>>>>>>>>>> compliance
>>>>>>>>>>> equipment behavior, we had to do this.
>>>>>>>>>>
>>>>>>>>>> That doesn't seem right. If there's a valid EDID and the valid EDID
>>>>>>>>>> doesn't contain 640x480, are you _sure_ you're supposed to be adding
>>>>>>>>>> 640x480? That doesn't sound right to me. I've got a tiny display in
>>>>>>>>>> front of me for testing that only has one mode:
>>>>>>>>>>
>>>>>>>>>>        #0 800x480 65.68 800 840 888 928 480 493 496 525 32000
>>>>>>>>>>
>>>>>>>>>
>>>>>>>>> As I had wrote, DRM core kicks in only when the count of modes is 0.
>>>>>>>>> Here what is happening is the count was not 0 but 640x480 was not
>>>>>>>>> present in the EDID. So we had to add it explicitly.
>>>>>>>>>
>>>>>>>>> Your tiny display is a display port display?
>>>>>>>>>
>>>>>>>>> I am referring to only display port monitors. If your tiny display is
>>>>>>>>> DP, it should have had 640x480 in its list of modes.
>>>>>>>>
>>>>>>>> My tiny display is actually a HDMI display hooked up to a HDMI to DP
>>>>>>>> (active) adapter.
>>>>>>>>
>>>>>>>> ...but this is a legal and common thing to have. I suppose possibly my
>>>>>>>> HDMI display is "illegal"?
>>>>>>>>
>>>>>>>> OK, so reading through the spec more carefully, I do see that the DP
>>>>>>>> spec makes numerous mentions of the fact that DP sinks _must_ support
>>>>>>>> 640x480. Even going back to DP 1.4, I see section "5.2.1.2 Video
>>>>>>>> Timing Format" says that we must support 640x480. It seems like that's
>>>>>>>> _intended_ to be used only if the EDID read fails, though or if we
>>>>>>>> somehow have to output video without knowledge of the EDID. It seems
>>>>>>>> hard to believe that there's a great reason to assume a display will
>>>>>>>> support 640x480 if we have more accurate knowledge.
>>>>>>>>
>>>>>>>> In any case, I guess I would still say that adding this mode belongs
>>>>>>>> in the DRM core. The core should notice that it's a DP connection
>>>>>>>> (bridge->type == DRM_MODE_CONNECTOR_DisplayPort) and that 640x480 was
>>>>>>>> left out and it should add it. We should also make sure it's not
>>>>>>>> "preferred" and is last in the list so we never accidentally pick it.
>>>>>>>> If DP truly says that we should always give the user 640x480 then
>>>>>>>> that's true for everyone, not just Qualcomm. We should add it in the
>>>>>>>> core. If, later, someone wants to hide this from the UI it would be
>>>>>>>> much easier if they only needed to modify one place.
>>>>>>>>
>>>>>>>
>>>>>>> So I debugged with kuogee just now using the DP compliance equipment.
>>>>>>> It turns out, the issue is not that 640x480 mode is not present.
>>>>>>>
>>>>>>> The issue is that it is not marked as preferred.
>>>>>>>
>>>>>>> Hence we missed this part during debugging this equipment failure.
>>>>>>>
>>>>>>> We still have to figure out the best way to either mark 640x480 as
>>>>>>> preferred or eliminate other modes during the test-case so that 640x480
>>>>>>> is actually picked by usermode.
>>>>>>>
>>>>>>> Now that being said, the fix still doesn't belong in the framework. It
>>>>>>> has to be in the msm/dp code.
>>>>>>>
>>>>>>> Different vendors handle this failure case differently looks like.
>>>>>>>
>>>>>>> Lets take below snippet from i915 as example.
>>>>>>>
>>>>>>> 3361    if (intel_connector->detect_edid == NULL ||
>>>>>>> 3362        connector->edid_corrupt ||
>>>>>>> 3363        intel_dp->aux.i2c_defer_count > 6) {
>>>>>>> 3364            /* Check EDID read for NACKs, DEFERs and corruption
>>>>>>> 3365             * (DP CTS 1.2 Core r1.1)
>>>>>>> 3366             *    4.2.2.4 : Failed EDID read, I2C_NAK
>>>>>>> 3367             *    4.2.2.5 : Failed EDID read, I2C_DEFER
>>>>>>> 3368             *    4.2.2.6 : EDID corruption detected
>>>>>>> 3369             * Use failsafe mode for all cases
>>>>>>> 3370             */
>>>>>>> 3371            if (intel_dp->aux.i2c_nack_count > 0 ||
>>>>>>> 3372                    intel_dp->aux.i2c_defer_count > 0)
>>>>>>> 3373                    drm_dbg_kms(&i915->drm,
>>>>>>> 3374                                "EDID read had %d NACKs, %d
>>>>>>> DEFERs\n",
>>>>>>> 3375                                intel_dp->aux.i2c_nack_count,
>>>>>>> 3376                                intel_dp->aux.i2c_defer_count);
>>>>>>> 3377            intel_dp->compliance.test_data.edid =
>>>>>>> INTEL_DP_RESOLUTION_FAILSAFE;
>>>>>>
>>>>>
>>>>> The reason I pointed to this code is to give an example of how other
>>>>> drivers handle this test-case.
>>>>>
>>>>> We added this patch for 4.2.2.1 and 4.2.2.6 EDID test cases.
>>>>>
>>>>> The challenge here as found out from our discussion here was to mark a
>>>>> particular mode as preferred so that the Chrome usermode can pick it.
>>>>>
>>>>> Now whats happening with that there was always a possibility of two
>>>>> modes being marked as preferred due to this and so-on.
>>>>>
>>>>> We had a pretty long discussion last night and thought of all possible
>>>>> solutions but all of them look like a hack to us in the driver because
>>>>> we end up breaking other things due to this.
>>>>>
>>>>> So we decided that driver is not the place to handle this test case.
>>>>> Since we do have IGT support for chromebooks, we will handle both these
>>>>> test cases there as other vendors do the same way and it works.
>>>>>
>>>>>
>>>>>> Just because Intel DRM has its own solution for something doesn't mean
>>>>>> everyone else should copy them and implement their own solution. Up
>>>>>> until recently DP AUX backlights were baked into different DRM
>>>>>> drivers. A recent effort was made to pull it out. I think the Intel
>>>>>> DRM code was the "first one" to the party and it wasn't clear how
>>>>>> things should be broken up to share with other drivers, so mostly it
>>>>>> did everything itself, but that's not the long term answer.
>>>>>>
>>>>>> I'm not saying that we need to block your change on a full re-design
>>>>>> or anything, but I'm just saying that:
>>>>>>
>>>>>> * You're trying to implement a generic DP rule, not something specific
>>>>>> to Qualcomm hardware. That implies that, if possible, it shouldn't be
>>>>>> in a Qualcomm driver.
>>>>>>
>>>>>> * It doesn't seem like it would be terrible to handle this in the core.
>>>>>>
>>>>>>
>>>>>>> This marks the fail safe mode and IGT test case reads this to set this
>>>>>>> mode and hence the test passes.
>>>>>>>
>>>>>>> We rely on the chromeOS usermode to output pixel data for this test-case
>>>>>>> and not IGT. We use IGT only for video pattern CTS today but this is a
>>>>>>> different test-case which is failing.
>>>>>>>
>>>>>>> ChromeOS usermode will not pick 640x480 unless we mark it as preferred
>>>>>>> or other modes are eliminated.
>>>>>>>
>>>>>>> So we have to come up with the right way for the usermode to pick
>>>>>>> 640x480.
>>>>>>>
>>>>>>> We will discuss this a bit more and come up with a different change.
>>>>>>
>>>>>> Can you provide the exact EDID from the failing test case? Maybe that
>>>>>> will help shed some light on what's going on. I looked at the original
>>>>>> commit and it just referred to 4.2.2.1, which I assume is "EDID Read
>>>>>> upon HPD Plug Event", but that doesn't give details that seem relevant
>>>>>> to the discussion here.
>>>>>
>>>>> Yes so it is 4.2.2.1 and 4.2.2.6.
>>>>>
>>>>> That alone wont give the full picture.
>>>>>
>>>>> So its a combination of things.
>>>>>
>>>>> While running the test, the test equipment published only one mode.
>>>>> But we could not support that mode because of 2 lanes.
>>>>> Equipment did not add 640x480 to the list of modes.
>>>>> DRM fwk will also not add it because count_modes is not 0 ( there was
>>>>> one mode ).
>>>>> So we ended up making these changes.
>>>>
>>>> I think a proper solution might be to rewrite
>>>> drm_helper_probe_single_connector_modes() in the following way:
>>>> - call get_modes()
>>>> - validate the result
>>>> - prune invalid
>>>>
>>>> - if the number of modes is 0, call drm_add_override_edid_modes()
>>>> - validate the result
>>>> - prune invalid
>>>>
>>>> - if the number of modes is still 0, call drm_add_modes_noedid()
>>>> - validate the result
>>>> - prune invalid
>>>>
>>>> [A separate change might happen here after all the checks: if the number
>>>> of modes is still 0 and if it is a DP, enforce adding 640x480 even w/o
>>>> validation. But generally I feel that this shouldn't be necessary
>>>> because the previous step should have added it.]
>>>>
>>>> This way we can be sure that all modes are validated, but still to do
>>>> our best to add something supported to the list of modes.
>>>
>>> I'm partway through implementing / testing something similar to this.
>>> ;-) My logic is slightly different than yours, though. In the very
>>> least I'm not convinced that we want to add the higher resolution
>>> modes (like 1024x768) if all the modes fail to validate. The DP spec
>>> only claims 640x480 is always supported. The higher resolution modes
>>> are for when the EDID fails to read I think. Similarly I'm not
>>> convinced that we should do pruning before deciding on
>>> drm_add_override_edid_modes().
>>
>>
>> I think pruning before drm_add_override_edid_modes() would allow one to
>> use override if the first read returned some modes which are invalid.
> 
> Yeah, I'm less certain about drm_add_override_edid_modes(), but as per
> documented it's only to be used if the EDID failed to read.
> 
> If someone has an actual use case where they need to add the override
> modes for this specific case then we can, but I think it can be done
> separately once someone has an actual use case.
> 
> 
>> Regarding 1024 vs 640. For the restructure we shouldn't change this. And
>> I'd actually point to the following commit message:
>>
>> commit 9632b41f00cc2fb2846569cc99dbeef78e5c94a0
>> Author: Adam Jackson <ajax@redhat.com>
>> Date:   Mon Nov 23 14:23:07 2009 -0500
>>
>>       drm/modes: Fall back to 1024x768 instead of 800x600
>>
>>       This matches the X server's fallback modes when using RANDR 1.2.
>>
>>       See also: http://bugzilla.redhat.com/538761
>>
>>       Signed-off-by: Adam Jackson <ajax@redhat.com>
>>       Signed-off-by: Dave Airlie <airlied@redhat.com>
>>
>>
>> So I'd say, let's leave 1024 as is and just try them if all other modes
>> are invalid.
> 
> I'm pretty strongly against adding 1024x768 when all modes fail to
> validate. Specifically:
> 
> If the EDID fully fails to read then adding these higher resolution
> modes makes sense. We have no knowledge at all about the display in
> this case and so we can guess that some standard higher resolutions
> might make sense. In the case we're dealing with here, we have very
> specific knowledge about what the display said it could handle and we
> can't support any of them. The DP spec _only_ lists 640x480 as a
> required mode so that's the only one we should add.

Then this should be a DP-specific override happening after all the pruning.
Doug Anderson April 26, 2022, 6:48 p.m. UTC | #20
Hi,

On Tue, Apr 26, 2022 at 8:37 AM Abhinav Kumar <quic_abhinavk@quicinc.com> wrote:
>
> > Can you provide the exact EDID from the failing test case? Maybe that
> > will help shed some light on what's going on. I looked at the original
> > commit and it just referred to 4.2.2.1, which I assume is "EDID Read
> > upon HPD Plug Event", but that doesn't give details that seem relevant
> > to the discussion here.
>
> Yes so it is 4.2.2.1 and 4.2.2.6.
>
> That alone wont give the full picture.
>
> So its a combination of things.
>
> While running the test, the test equipment published only one mode.
> But we could not support that mode because of 2 lanes.
> Equipment did not add 640x480 to the list of modes.
> DRM fwk will also not add it because count_modes is not 0 ( there was
> one mode ).
> So we ended up making these changes.

Ah! This is useful context and makes tons of sense.

This really feels like something that could be handled in the core. OK, See:

https://lore.kernel.org/r/20220426114627.2.I4ac7f55aa446699f8c200a23c10463256f6f439f@changeid


> > I guess maybe what's happening is that the test case is giving an EDID
> > where all the modes are not supportable by the current clock rate /
> > lanes? ...and then somehow we're not falling back to 640x480. It's
> > always possible that this is a userspace problem.
> >
> > In any case, would you object to a revert of the patches in the short term?
>
> Not sure, if you saw this change kuogee posted last night.
> https://patchwork.freedesktop.org/patch/483415/
> We did decided to remove all the code related to these test cases and
> handle them in IGT.

I hadn't seen it yet and I wasn't CCed. :( I'll take a look now.
diff mbox series

Patch

diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c
index 92cd50f..01453db 100644
--- a/drivers/gpu/drm/msm/dp/dp_display.c
+++ b/drivers/gpu/drm/msm/dp/dp_display.c
@@ -555,12 +555,6 @@  static int dp_hpd_plug_handle(struct dp_display_private *dp, u32 data)
 
 	mutex_unlock(&dp->event_mutex);
 
-	/*
-	 * add fail safe mode outside event_mutex scope
-	 * to avoid potiential circular lock with drm thread
-	 */
-	dp_panel_add_fail_safe_mode(dp->dp_display.connector);
-
 	/* uevent will complete connection part */
 	return 0;
 };
diff --git a/drivers/gpu/drm/msm/dp/dp_panel.c b/drivers/gpu/drm/msm/dp/dp_panel.c
index 1aa9aa8c..23fee42 100644
--- a/drivers/gpu/drm/msm/dp/dp_panel.c
+++ b/drivers/gpu/drm/msm/dp/dp_panel.c
@@ -151,15 +151,6 @@  static int dp_panel_update_modes(struct drm_connector *connector,
 	return rc;
 }
 
-void dp_panel_add_fail_safe_mode(struct drm_connector *connector)
-{
-	/* fail safe edid */
-	mutex_lock(&connector->dev->mode_config.mutex);
-	if (drm_add_modes_noedid(connector, 640, 480))
-		drm_set_preferred_mode(connector, 640, 480);
-	mutex_unlock(&connector->dev->mode_config.mutex);
-}
-
 int dp_panel_read_sink_caps(struct dp_panel *dp_panel,
 	struct drm_connector *connector)
 {
@@ -216,7 +207,11 @@  int dp_panel_read_sink_caps(struct dp_panel *dp_panel,
 			goto end;
 		}
 
-		dp_panel_add_fail_safe_mode(connector);
+		/* fail safe edid */
+		mutex_lock(&connector->dev->mode_config.mutex);
+		if (drm_add_modes_noedid(connector, 640, 480))
+			drm_set_preferred_mode(connector, 640, 480);
+		mutex_unlock(&connector->dev->mode_config.mutex);
 	}
 
 	if (panel->aux_cfg_update_done) {
@@ -266,6 +261,14 @@  int dp_panel_get_modes(struct dp_panel *dp_panel,
 		return -EINVAL;
 	}
 
+	/*
+	 * add fail safe mode (640x480) here
+	 * since we are executed in drm_thread context,
+	 * no mode_config.mutex acquired required
+	 */
+	if (drm_add_modes_noedid(connector, 640, 480))
+		drm_set_preferred_mode(connector, 640, 480);
+
 	if (dp_panel->edid)
 		return dp_panel_update_modes(connector, dp_panel->edid);