diff mbox

cpufreq, Fix overflow in busy_scaled due to long delay [v2]

Message ID 1434390209-23256-1-git-send-email-prarit@redhat.com (mailing list archive)
State Accepted, archived
Delegated to: Rafael Wysocki
Headers show

Commit Message

Prarit Bhargava June 15, 2015, 5:43 p.m. UTC
The kernel may delay interrupts for a long time which can result in timers
being delayed. If this occurs the intel_pstate driver will crash with a
divide by zero error:

divide error: 0000 [#1] SMP
Modules linked in: btrfs zlib_deflate raid6_pq xor msdos ext4 mbcache jbd2 binfmt_misc arc4 md4 nls_utf8 cifs dns_resolver tcp_lp bnep bluetooth rfkill fuse dm_service_time iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi nf_conntrack_netbios_ns nf_conntrack_broadcast nf_conntrack_ftp ip6t_rpfilter ip6t_REJECT ipt_REJECT xt_conntrack ebtable_nat ebtable_broute bridge stp llc ebtable_filter ebtables ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security ip6table_raw ip6table_filter ip6_tables iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_security iptable_raw iptable_filter ip_tables intel_powerclamp coretemp vfat fat kvm_intel iTCO_wdt iTCO_vendor_support ipmi_devintf sr_mod kvm crct10dif_pclmul
 crc32_pclmul crc32c_intel ghash_clmulni_intel aesni_intel cdc_ether lrw usbnet cdrom mii gf128mul glue_helper ablk_helper cryptd lpc_ich mfd_core pcspkr sb_edac edac_core ipmi_si ipmi_msghandler ioatdma wmi shpchp acpi_pad nfsd auth_rpcgss nfs_acl lockd uinput dm_multipath sunrpc xfs libcrc32c usb_storage sd_mod crc_t10dif crct10dif_common ixgbe mgag200 syscopyarea sysfillrect sysimgblt mdio drm_kms_helper ttm igb drm ptp pps_core dca i2c_algo_bit megaraid_sas i2c_core dm_mirror dm_region_hash dm_log dm_mod
CPU: 113 PID: 0 Comm: swapper/113 Tainted: G        W   --------------   3.10.0-229.1.2.el7.x86_64 #1
Hardware name: IBM x3950 X6 -[3837AC2]-/00FN827, BIOS -[A8E112BUS-1.00]- 08/27/2014
task: ffff880fe8abe660 ti: ffff880fe8ae4000 task.ti: ffff880fe8ae4000
RIP: 0010:[<ffffffff814a9279>]  [<ffffffff814a9279>] intel_pstate_timer_func+0x179/0x3d0
RSP: 0018:ffff883fff4e3db8  EFLAGS: 00010206
RAX: 0000000027100000 RBX: ffff883fe6965100 RCX: 0000000000000000
RDX: 0000000000000000 RSI: 0000000000000010 RDI: 000000002e53632d
RBP: ffff883fff4e3e20 R08: 000e6f69a5a125c0 R09: ffff883fe84ec001
R10: 0000000000000002 R11: 0000000000000005 R12: 00000000000049f5
R13: 0000000000271000 R14: 00000000000049f5 R15: 0000000000000246
FS:  0000000000000000(0000) GS:ffff883fff4e0000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f7668601000 CR3: 000000000190a000 CR4: 00000000001407e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Stack:
 ffff883fff4e3e58 ffffffff81099dc1 0000000000000086 0000000000000071
 ffff883fff4f3680 0000000000000071 fbdc8a965e33afee ffffffff810b69dd
 ffff883fe84ec000 ffff883fe6965108 0000000000000100 ffffffff814a9100
Call Trace:
 <IRQ>

 [<ffffffff81099dc1>] ? run_posix_cpu_timers+0x51/0x840
 [<ffffffff810b69dd>] ? trigger_load_balance+0x5d/0x200
 [<ffffffff814a9100>] ? pid_param_set+0x130/0x130
 [<ffffffff8107df56>] call_timer_fn+0x36/0x110
 [<ffffffff814a9100>] ? pid_param_set+0x130/0x130
 [<ffffffff8107fdcf>] run_timer_softirq+0x21f/0x320
 [<ffffffff81077b2f>] __do_softirq+0xef/0x280
 [<ffffffff816156dc>] call_softirq+0x1c/0x30
 [<ffffffff81015d95>] do_softirq+0x65/0xa0
 [<ffffffff81077ec5>] irq_exit+0x115/0x120
 [<ffffffff81616355>] smp_apic_timer_interrupt+0x45/0x60
 [<ffffffff81614a1d>] apic_timer_interrupt+0x6d/0x80
 <EOI>

 [<ffffffff814a9c32>] ? cpuidle_enter_state+0x52/0xc0
 [<ffffffff814a9c28>] ? cpuidle_enter_state+0x48/0xc0
 [<ffffffff814a9d65>] cpuidle_idle_call+0xc5/0x200
 [<ffffffff8101d14e>] arch_cpu_idle+0xe/0x30
 [<ffffffff810c67c1>] cpu_startup_entry+0xf1/0x290
 [<ffffffff8104228a>] start_secondary+0x1ba/0x230
Code: 42 0f 00 45 89 e6 48 01 c2 43 8d 44 6d 00 39 d0 73 26 49 c1 e5 08 89 d2 4d 63 f4 49 63 c5 48 c1 e2 08 48 c1 e0 08 48 63 ca 48 99 <48> f7 f9 48 98 4c 0f af f0 49 c1 ee 08 8b 43 78 c1 e0 08 44 29
RIP  [<ffffffff814a9279>] intel_pstate_timer_func+0x179/0x3d0
 RSP <ffff883fff4e3db8>

The kernel values for cpudata for CPU 113 were:

struct cpudata {
  cpu = 113,
  timer = {
    entry = {
      next = 0x0,
      prev = 0xdead000000200200
    },
    expires = 8357799745,
    base = 0xffff883fe84ec001,
    function = 0xffffffff814a9100 <intel_pstate_timer_func>,
    data = 18446612406765768960,
<snip>
    i_gain = 0,
    d_gain = 0,
    deadband = 0,
    last_err = 22489
  },
  last_sample_time = {
    tv64 = 4063132438017305
  },
  prev_aperf = 287326796397463,
  prev_mperf = 251427432090198,
  sample = {
    core_pct_busy = 23081,
    aperf = 2937407,
    mperf = 3257884,
    freq = 2524484,
    time = {
      tv64 = 4063149215234118
    }
  }
}

which results in the time between samples = last_sample_time - sample.time
= 4063149215234118 - 4063132438017305 = 16777216813 which is 16.777 seconds.

The duration between reads of the APERF and MPERF registers overflowed a s32
sized integer in intel_pstate_get_scaled_busy()'s call to div_fp().  The result
is that int_tofp(duration_us) == 0, and the kernel attempts to divide by 0.

While the kernel shouldn't be delaying for a long time, it can and does
happen and the intel_pstate driver should not panic in this situation.  This
patch changes the div_fp() function to use div64_s64() to allow for "long"
division.  This will avoid the overflow condition on long delays.

[v2]: use div64_s64() in div_fp()

Cc: Kristen Carlson Accardi <kristen@linux.intel.com>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: Doug Smythies <dsmythies@telus.net>
Cc: linux-pm@vger.kernel.org

Signed-off-by: Prarit Bhargava <prarit@redhat.com>
---
 drivers/cpufreq/intel_pstate.c |   10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

Comments

Kristen Carlson Accardi June 15, 2015, 10:12 p.m. UTC | #1
On Mon, 15 Jun 2015 13:43:29 -0400
Prarit Bhargava <prarit@redhat.com> wrote:

> The kernel may delay interrupts for a long time which can result in timers
> being delayed. If this occurs the intel_pstate driver will crash with a
> divide by zero error:
> 
> divide error: 0000 [#1] SMP
> Modules linked in: btrfs zlib_deflate raid6_pq xor msdos ext4 mbcache jbd2 binfmt_misc arc4 md4 nls_utf8 cifs dns_resolver tcp_lp bnep bluetooth rfkill fuse dm_service_time iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi nf_conntrack_netbios_ns nf_conntrack_broadcast nf_conntrack_ftp ip6t_rpfilter ip6t_REJECT ipt_REJECT xt_conntrack ebtable_nat ebtable_broute bridge stp llc ebtable_filter ebtables ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security ip6table_raw ip6table_filter ip6_tables iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_security iptable_raw iptable_filter ip_tables intel_powerclamp coretemp vfat fat kvm_intel iTCO_wdt iTCO_vendor_support ipmi_devintf sr_mod kvm crct10dif_pclmul
>  crc32_pclmul crc32c_intel ghash_clmulni_intel aesni_intel cdc_ether lrw usbnet cdrom mii gf128mul glue_helper ablk_helper cryptd lpc_ich mfd_core pcspkr sb_edac edac_core ipmi_si ipmi_msghandler ioatdma wmi shpchp acpi_pad nfsd auth_rpcgss nfs_acl lockd uinput dm_multipath sunrpc xfs libcrc32c usb_storage sd_mod crc_t10dif crct10dif_common ixgbe mgag200 syscopyarea sysfillrect sysimgblt mdio drm_kms_helper ttm igb drm ptp pps_core dca i2c_algo_bit megaraid_sas i2c_core dm_mirror dm_region_hash dm_log dm_mod
> CPU: 113 PID: 0 Comm: swapper/113 Tainted: G        W   --------------   3.10.0-229.1.2.el7.x86_64 #1
> Hardware name: IBM x3950 X6 -[3837AC2]-/00FN827, BIOS -[A8E112BUS-1.00]- 08/27/2014
> task: ffff880fe8abe660 ti: ffff880fe8ae4000 task.ti: ffff880fe8ae4000
> RIP: 0010:[<ffffffff814a9279>]  [<ffffffff814a9279>] intel_pstate_timer_func+0x179/0x3d0
> RSP: 0018:ffff883fff4e3db8  EFLAGS: 00010206
> RAX: 0000000027100000 RBX: ffff883fe6965100 RCX: 0000000000000000
> RDX: 0000000000000000 RSI: 0000000000000010 RDI: 000000002e53632d
> RBP: ffff883fff4e3e20 R08: 000e6f69a5a125c0 R09: ffff883fe84ec001
> R10: 0000000000000002 R11: 0000000000000005 R12: 00000000000049f5
> R13: 0000000000271000 R14: 00000000000049f5 R15: 0000000000000246
> FS:  0000000000000000(0000) GS:ffff883fff4e0000(0000) knlGS:0000000000000000
> CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> CR2: 00007f7668601000 CR3: 000000000190a000 CR4: 00000000001407e0
> DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
> Stack:
>  ffff883fff4e3e58 ffffffff81099dc1 0000000000000086 0000000000000071
>  ffff883fff4f3680 0000000000000071 fbdc8a965e33afee ffffffff810b69dd
>  ffff883fe84ec000 ffff883fe6965108 0000000000000100 ffffffff814a9100
> Call Trace:
>  <IRQ>
> 
>  [<ffffffff81099dc1>] ? run_posix_cpu_timers+0x51/0x840
>  [<ffffffff810b69dd>] ? trigger_load_balance+0x5d/0x200
>  [<ffffffff814a9100>] ? pid_param_set+0x130/0x130
>  [<ffffffff8107df56>] call_timer_fn+0x36/0x110
>  [<ffffffff814a9100>] ? pid_param_set+0x130/0x130
>  [<ffffffff8107fdcf>] run_timer_softirq+0x21f/0x320
>  [<ffffffff81077b2f>] __do_softirq+0xef/0x280
>  [<ffffffff816156dc>] call_softirq+0x1c/0x30
>  [<ffffffff81015d95>] do_softirq+0x65/0xa0
>  [<ffffffff81077ec5>] irq_exit+0x115/0x120
>  [<ffffffff81616355>] smp_apic_timer_interrupt+0x45/0x60
>  [<ffffffff81614a1d>] apic_timer_interrupt+0x6d/0x80
>  <EOI>
> 
>  [<ffffffff814a9c32>] ? cpuidle_enter_state+0x52/0xc0
>  [<ffffffff814a9c28>] ? cpuidle_enter_state+0x48/0xc0
>  [<ffffffff814a9d65>] cpuidle_idle_call+0xc5/0x200
>  [<ffffffff8101d14e>] arch_cpu_idle+0xe/0x30
>  [<ffffffff810c67c1>] cpu_startup_entry+0xf1/0x290
>  [<ffffffff8104228a>] start_secondary+0x1ba/0x230
> Code: 42 0f 00 45 89 e6 48 01 c2 43 8d 44 6d 00 39 d0 73 26 49 c1 e5 08 89 d2 4d 63 f4 49 63 c5 48 c1 e2 08 48 c1 e0 08 48 63 ca 48 99 <48> f7 f9 48 98 4c 0f af f0 49 c1 ee 08 8b 43 78 c1 e0 08 44 29
> RIP  [<ffffffff814a9279>] intel_pstate_timer_func+0x179/0x3d0
>  RSP <ffff883fff4e3db8>
> 
> The kernel values for cpudata for CPU 113 were:
> 
> struct cpudata {
>   cpu = 113,
>   timer = {
>     entry = {
>       next = 0x0,
>       prev = 0xdead000000200200
>     },
>     expires = 8357799745,
>     base = 0xffff883fe84ec001,
>     function = 0xffffffff814a9100 <intel_pstate_timer_func>,
>     data = 18446612406765768960,
> <snip>
>     i_gain = 0,
>     d_gain = 0,
>     deadband = 0,
>     last_err = 22489
>   },
>   last_sample_time = {
>     tv64 = 4063132438017305
>   },
>   prev_aperf = 287326796397463,
>   prev_mperf = 251427432090198,
>   sample = {
>     core_pct_busy = 23081,
>     aperf = 2937407,
>     mperf = 3257884,
>     freq = 2524484,
>     time = {
>       tv64 = 4063149215234118
>     }
>   }
> }
> 
> which results in the time between samples = last_sample_time - sample.time
> = 4063149215234118 - 4063132438017305 = 16777216813 which is 16.777 seconds.
> 
> The duration between reads of the APERF and MPERF registers overflowed a s32
> sized integer in intel_pstate_get_scaled_busy()'s call to div_fp().  The result
> is that int_tofp(duration_us) == 0, and the kernel attempts to divide by 0.
> 
> While the kernel shouldn't be delaying for a long time, it can and does
> happen and the intel_pstate driver should not panic in this situation.  This
> patch changes the div_fp() function to use div64_s64() to allow for "long"
> division.  This will avoid the overflow condition on long delays.
> 
> [v2]: use div64_s64() in div_fp()

Were you able to resolve your original concerns with doing this?  I
thought you mentioned that you'd tested it and it gave you some
negative side effects?

Thanks,
Kristen

> 
> Cc: Kristen Carlson Accardi <kristen@linux.intel.com>
> Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
> Cc: Viresh Kumar <viresh.kumar@linaro.org>
> Cc: Doug Smythies <dsmythies@telus.net>
> Cc: linux-pm@vger.kernel.org
> 
> Signed-off-by: Prarit Bhargava <prarit@redhat.com>
> ---
>  drivers/cpufreq/intel_pstate.c |   10 +++++-----
>  1 file changed, 5 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
> index 6414661..b153d86 100644
> --- a/drivers/cpufreq/intel_pstate.c
> +++ b/drivers/cpufreq/intel_pstate.c
> @@ -48,9 +48,9 @@ static inline int32_t mul_fp(int32_t x, int32_t y)
>  	return ((int64_t)x * (int64_t)y) >> FRAC_BITS;
>  }
>  
> -static inline int32_t div_fp(int32_t x, int32_t y)
> +static inline int32_t div_fp(s64 x, s64 y)
>  {
> -	return div_s64((int64_t)x << FRAC_BITS, y);
> +	return div64_s64((int64_t)x << FRAC_BITS, y);
>  }
>  
>  static inline int ceiling_fp(int32_t x)
> @@ -794,7 +794,7 @@ static inline void intel_pstate_set_sample_time(struct cpudata *cpu)
>  static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu)
>  {
>  	int32_t core_busy, max_pstate, current_pstate, sample_ratio;
> -	u32 duration_us;
> +	s64 duration_us;
>  	u32 sample_time;
>  
>  	/*
> @@ -821,8 +821,8 @@ static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu)
>  	 * to adjust our busyness.
>  	 */
>  	sample_time = pid_params.sample_rate_ms  * USEC_PER_MSEC;
> -	duration_us = (u32) ktime_us_delta(cpu->sample.time,
> -					   cpu->last_sample_time);
> +	duration_us = ktime_us_delta(cpu->sample.time,
> +				     cpu->last_sample_time);
>  	if (duration_us > sample_time * 3) {
>  		sample_ratio = div_fp(int_tofp(sample_time),
>  				      int_tofp(duration_us));

--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Prarit Bhargava June 15, 2015, 11:33 p.m. UTC | #2
On 06/15/2015 06:12 PM, Kristen Carlson Accardi wrote:
> On Mon, 15 Jun 2015 13:43:29 -0400
> Prarit Bhargava <prarit@redhat.com> wrote:
> 
>> The kernel may delay interrupts for a long time which can result in timers
>> being delayed. If this occurs the intel_pstate driver will crash with a
>> divide by zero error:
>>
>> divide error: 0000 [#1] SMP
>> Modules linked in: btrfs zlib_deflate raid6_pq xor msdos ext4 mbcache jbd2 binfmt_misc arc4 md4 nls_utf8 cifs dns_resolver tcp_lp bnep bluetooth rfkill fuse dm_service_time iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi nf_conntrack_netbios_ns nf_conntrack_broadcast nf_conntrack_ftp ip6t_rpfilter ip6t_REJECT ipt_REJECT xt_conntrack ebtable_nat ebtable_broute bridge stp llc ebtable_filter ebtables ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security ip6table_raw ip6table_filter ip6_tables iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_security iptable_raw iptable_filter ip_tables intel_powerclamp coretemp vfat fat kvm_intel iTCO_wdt iTCO_vendor_support ipmi_devintf sr_mod kvm crct10dif_pclmul
>>  crc32_pclmul crc32c_intel ghash_clmulni_intel aesni_intel cdc_ether lrw usbnet cdrom mii gf128mul glue_helper ablk_helper cryptd lpc_ich mfd_core pcspkr sb_edac edac_core ipmi_si ipmi_msghandler ioatdma wmi shpchp acpi_pad nfsd auth_rpcgss nfs_acl lockd uinput dm_multipath sunrpc xfs libcrc32c usb_storage sd_mod crc_t10dif crct10dif_common ixgbe mgag200 syscopyarea sysfillrect sysimgblt mdio drm_kms_helper ttm igb drm ptp pps_core dca i2c_algo_bit megaraid_sas i2c_core dm_mirror dm_region_hash dm_log dm_mod
>> CPU: 113 PID: 0 Comm: swapper/113 Tainted: G        W   --------------   3.10.0-229.1.2.el7.x86_64 #1
>> Hardware name: IBM x3950 X6 -[3837AC2]-/00FN827, BIOS -[A8E112BUS-1.00]- 08/27/2014
>> task: ffff880fe8abe660 ti: ffff880fe8ae4000 task.ti: ffff880fe8ae4000
>> RIP: 0010:[<ffffffff814a9279>]  [<ffffffff814a9279>] intel_pstate_timer_func+0x179/0x3d0
>> RSP: 0018:ffff883fff4e3db8  EFLAGS: 00010206
>> RAX: 0000000027100000 RBX: ffff883fe6965100 RCX: 0000000000000000
>> RDX: 0000000000000000 RSI: 0000000000000010 RDI: 000000002e53632d
>> RBP: ffff883fff4e3e20 R08: 000e6f69a5a125c0 R09: ffff883fe84ec001
>> R10: 0000000000000002 R11: 0000000000000005 R12: 00000000000049f5
>> R13: 0000000000271000 R14: 00000000000049f5 R15: 0000000000000246
>> FS:  0000000000000000(0000) GS:ffff883fff4e0000(0000) knlGS:0000000000000000
>> CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>> CR2: 00007f7668601000 CR3: 000000000190a000 CR4: 00000000001407e0
>> DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
>> DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
>> Stack:
>>  ffff883fff4e3e58 ffffffff81099dc1 0000000000000086 0000000000000071
>>  ffff883fff4f3680 0000000000000071 fbdc8a965e33afee ffffffff810b69dd
>>  ffff883fe84ec000 ffff883fe6965108 0000000000000100 ffffffff814a9100
>> Call Trace:
>>  <IRQ>
>>
>>  [<ffffffff81099dc1>] ? run_posix_cpu_timers+0x51/0x840
>>  [<ffffffff810b69dd>] ? trigger_load_balance+0x5d/0x200
>>  [<ffffffff814a9100>] ? pid_param_set+0x130/0x130
>>  [<ffffffff8107df56>] call_timer_fn+0x36/0x110
>>  [<ffffffff814a9100>] ? pid_param_set+0x130/0x130
>>  [<ffffffff8107fdcf>] run_timer_softirq+0x21f/0x320
>>  [<ffffffff81077b2f>] __do_softirq+0xef/0x280
>>  [<ffffffff816156dc>] call_softirq+0x1c/0x30
>>  [<ffffffff81015d95>] do_softirq+0x65/0xa0
>>  [<ffffffff81077ec5>] irq_exit+0x115/0x120
>>  [<ffffffff81616355>] smp_apic_timer_interrupt+0x45/0x60
>>  [<ffffffff81614a1d>] apic_timer_interrupt+0x6d/0x80
>>  <EOI>
>>
>>  [<ffffffff814a9c32>] ? cpuidle_enter_state+0x52/0xc0
>>  [<ffffffff814a9c28>] ? cpuidle_enter_state+0x48/0xc0
>>  [<ffffffff814a9d65>] cpuidle_idle_call+0xc5/0x200
>>  [<ffffffff8101d14e>] arch_cpu_idle+0xe/0x30
>>  [<ffffffff810c67c1>] cpu_startup_entry+0xf1/0x290
>>  [<ffffffff8104228a>] start_secondary+0x1ba/0x230
>> Code: 42 0f 00 45 89 e6 48 01 c2 43 8d 44 6d 00 39 d0 73 26 49 c1 e5 08 89 d2 4d 63 f4 49 63 c5 48 c1 e2 08 48 c1 e0 08 48 63 ca 48 99 <48> f7 f9 48 98 4c 0f af f0 49 c1 ee 08 8b 43 78 c1 e0 08 44 29
>> RIP  [<ffffffff814a9279>] intel_pstate_timer_func+0x179/0x3d0
>>  RSP <ffff883fff4e3db8>
>>
>> The kernel values for cpudata for CPU 113 were:
>>
>> struct cpudata {
>>   cpu = 113,
>>   timer = {
>>     entry = {
>>       next = 0x0,
>>       prev = 0xdead000000200200
>>     },
>>     expires = 8357799745,
>>     base = 0xffff883fe84ec001,
>>     function = 0xffffffff814a9100 <intel_pstate_timer_func>,
>>     data = 18446612406765768960,
>> <snip>
>>     i_gain = 0,
>>     d_gain = 0,
>>     deadband = 0,
>>     last_err = 22489
>>   },
>>   last_sample_time = {
>>     tv64 = 4063132438017305
>>   },
>>   prev_aperf = 287326796397463,
>>   prev_mperf = 251427432090198,
>>   sample = {
>>     core_pct_busy = 23081,
>>     aperf = 2937407,
>>     mperf = 3257884,
>>     freq = 2524484,
>>     time = {
>>       tv64 = 4063149215234118
>>     }
>>   }
>> }
>>
>> which results in the time between samples = last_sample_time - sample.time
>> = 4063149215234118 - 4063132438017305 = 16777216813 which is 16.777 seconds.
>>
>> The duration between reads of the APERF and MPERF registers overflowed a s32
>> sized integer in intel_pstate_get_scaled_busy()'s call to div_fp().  The result
>> is that int_tofp(duration_us) == 0, and the kernel attempts to divide by 0.
>>
>> While the kernel shouldn't be delaying for a long time, it can and does
>> happen and the intel_pstate driver should not panic in this situation.  This
>> patch changes the div_fp() function to use div64_s64() to allow for "long"
>> division.  This will avoid the overflow condition on long delays.
>>
>> [v2]: use div64_s64() in div_fp()
> 
> Were you able to resolve your original concerns with doing this?  I
> thought you mentioned that you'd tested it and it gave you some
> negative side effects?

Sorry Kristen, I think my comment confused you.  I was worried about passing "0"
back as the core_busy value after a long delay.  Given that it is already
possible to return 0 when the duration exceeds 3 * the expected delay I no
longer think there is any issue of returning 0 on a _really really really long_
delay.

IOW, there isn't a problem and I was being overly careful with v1 of the patch.

P.

> 
> Thanks,
> Kristen
> 
>>
>> Cc: Kristen Carlson Accardi <kristen@linux.intel.com>
>> Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
>> Cc: Viresh Kumar <viresh.kumar@linaro.org>
>> Cc: Doug Smythies <dsmythies@telus.net>
>> Cc: linux-pm@vger.kernel.org
>>
>> Signed-off-by: Prarit Bhargava <prarit@redhat.com>
>> ---
>>  drivers/cpufreq/intel_pstate.c |   10 +++++-----
>>  1 file changed, 5 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
>> index 6414661..b153d86 100644
>> --- a/drivers/cpufreq/intel_pstate.c
>> +++ b/drivers/cpufreq/intel_pstate.c
>> @@ -48,9 +48,9 @@ static inline int32_t mul_fp(int32_t x, int32_t y)
>>  	return ((int64_t)x * (int64_t)y) >> FRAC_BITS;
>>  }
>>  
>> -static inline int32_t div_fp(int32_t x, int32_t y)
>> +static inline int32_t div_fp(s64 x, s64 y)
>>  {
>> -	return div_s64((int64_t)x << FRAC_BITS, y);
>> +	return div64_s64((int64_t)x << FRAC_BITS, y);
>>  }
>>  
>>  static inline int ceiling_fp(int32_t x)
>> @@ -794,7 +794,7 @@ static inline void intel_pstate_set_sample_time(struct cpudata *cpu)
>>  static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu)
>>  {
>>  	int32_t core_busy, max_pstate, current_pstate, sample_ratio;
>> -	u32 duration_us;
>> +	s64 duration_us;
>>  	u32 sample_time;
>>  
>>  	/*
>> @@ -821,8 +821,8 @@ static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu)
>>  	 * to adjust our busyness.
>>  	 */
>>  	sample_time = pid_params.sample_rate_ms  * USEC_PER_MSEC;
>> -	duration_us = (u32) ktime_us_delta(cpu->sample.time,
>> -					   cpu->last_sample_time);
>> +	duration_us = ktime_us_delta(cpu->sample.time,
>> +				     cpu->last_sample_time);
>>  	if (duration_us > sample_time * 3) {
>>  		sample_ratio = div_fp(int_tofp(sample_time),
>>  				      int_tofp(duration_us));
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Rafael J. Wysocki June 16, 2015, 9:35 p.m. UTC | #3
On Monday, June 15, 2015 07:33:03 PM Prarit Bhargava wrote:
> 
> On 06/15/2015 06:12 PM, Kristen Carlson Accardi wrote:
> > On Mon, 15 Jun 2015 13:43:29 -0400
> > Prarit Bhargava <prarit@redhat.com> wrote:
> > 
> >> The kernel may delay interrupts for a long time which can result in timers
> >> being delayed. If this occurs the intel_pstate driver will crash with a
> >> divide by zero error:
> >>
> >> divide error: 0000 [#1] SMP
> >> Modules linked in: btrfs zlib_deflate raid6_pq xor msdos ext4 mbcache jbd2 binfmt_misc arc4 md4 nls_utf8 cifs dns_resolver tcp_lp bnep bluetooth rfkill fuse dm_service_time iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi nf_conntrack_netbios_ns nf_conntrack_broadcast nf_conntrack_ftp ip6t_rpfilter ip6t_REJECT ipt_REJECT xt_conntrack ebtable_nat ebtable_broute bridge stp llc ebtable_filter ebtables ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security ip6table_raw ip6table_filter ip6_tables iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_security iptable_raw iptable_filter ip_tables intel_powerclamp coretemp vfat fat kvm_intel iTCO_wdt iTCO_vendor_support ipmi_devintf sr_mod kvm crct10dif_pclmul
> >>  crc32_pclmul crc32c_intel ghash_clmulni_intel aesni_intel cdc_ether lrw usbnet cdrom mii gf128mul glue_helper ablk_helper cryptd lpc_ich mfd_core pcspkr sb_edac edac_core ipmi_si ipmi_msghandler ioatdma wmi shpchp acpi_pad nfsd auth_rpcgss nfs_acl lockd uinput dm_multipath sunrpc xfs libcrc32c usb_storage sd_mod crc_t10dif crct10dif_common ixgbe mgag200 syscopyarea sysfillrect sysimgblt mdio drm_kms_helper ttm igb drm ptp pps_core dca i2c_algo_bit megaraid_sas i2c_core dm_mirror dm_region_hash dm_log dm_mod
> >> CPU: 113 PID: 0 Comm: swapper/113 Tainted: G        W   --------------   3.10.0-229.1.2.el7.x86_64 #1
> >> Hardware name: IBM x3950 X6 -[3837AC2]-/00FN827, BIOS -[A8E112BUS-1.00]- 08/27/2014
> >> task: ffff880fe8abe660 ti: ffff880fe8ae4000 task.ti: ffff880fe8ae4000
> >> RIP: 0010:[<ffffffff814a9279>]  [<ffffffff814a9279>] intel_pstate_timer_func+0x179/0x3d0
> >> RSP: 0018:ffff883fff4e3db8  EFLAGS: 00010206
> >> RAX: 0000000027100000 RBX: ffff883fe6965100 RCX: 0000000000000000
> >> RDX: 0000000000000000 RSI: 0000000000000010 RDI: 000000002e53632d
> >> RBP: ffff883fff4e3e20 R08: 000e6f69a5a125c0 R09: ffff883fe84ec001
> >> R10: 0000000000000002 R11: 0000000000000005 R12: 00000000000049f5
> >> R13: 0000000000271000 R14: 00000000000049f5 R15: 0000000000000246
> >> FS:  0000000000000000(0000) GS:ffff883fff4e0000(0000) knlGS:0000000000000000
> >> CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> >> CR2: 00007f7668601000 CR3: 000000000190a000 CR4: 00000000001407e0
> >> DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> >> DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
> >> Stack:
> >>  ffff883fff4e3e58 ffffffff81099dc1 0000000000000086 0000000000000071
> >>  ffff883fff4f3680 0000000000000071 fbdc8a965e33afee ffffffff810b69dd
> >>  ffff883fe84ec000 ffff883fe6965108 0000000000000100 ffffffff814a9100
> >> Call Trace:
> >>  <IRQ>
> >>
> >>  [<ffffffff81099dc1>] ? run_posix_cpu_timers+0x51/0x840
> >>  [<ffffffff810b69dd>] ? trigger_load_balance+0x5d/0x200
> >>  [<ffffffff814a9100>] ? pid_param_set+0x130/0x130
> >>  [<ffffffff8107df56>] call_timer_fn+0x36/0x110
> >>  [<ffffffff814a9100>] ? pid_param_set+0x130/0x130
> >>  [<ffffffff8107fdcf>] run_timer_softirq+0x21f/0x320
> >>  [<ffffffff81077b2f>] __do_softirq+0xef/0x280
> >>  [<ffffffff816156dc>] call_softirq+0x1c/0x30
> >>  [<ffffffff81015d95>] do_softirq+0x65/0xa0
> >>  [<ffffffff81077ec5>] irq_exit+0x115/0x120
> >>  [<ffffffff81616355>] smp_apic_timer_interrupt+0x45/0x60
> >>  [<ffffffff81614a1d>] apic_timer_interrupt+0x6d/0x80
> >>  <EOI>
> >>
> >>  [<ffffffff814a9c32>] ? cpuidle_enter_state+0x52/0xc0
> >>  [<ffffffff814a9c28>] ? cpuidle_enter_state+0x48/0xc0
> >>  [<ffffffff814a9d65>] cpuidle_idle_call+0xc5/0x200
> >>  [<ffffffff8101d14e>] arch_cpu_idle+0xe/0x30
> >>  [<ffffffff810c67c1>] cpu_startup_entry+0xf1/0x290
> >>  [<ffffffff8104228a>] start_secondary+0x1ba/0x230
> >> Code: 42 0f 00 45 89 e6 48 01 c2 43 8d 44 6d 00 39 d0 73 26 49 c1 e5 08 89 d2 4d 63 f4 49 63 c5 48 c1 e2 08 48 c1 e0 08 48 63 ca 48 99 <48> f7 f9 48 98 4c 0f af f0 49 c1 ee 08 8b 43 78 c1 e0 08 44 29
> >> RIP  [<ffffffff814a9279>] intel_pstate_timer_func+0x179/0x3d0
> >>  RSP <ffff883fff4e3db8>
> >>
> >> The kernel values for cpudata for CPU 113 were:
> >>
> >> struct cpudata {
> >>   cpu = 113,
> >>   timer = {
> >>     entry = {
> >>       next = 0x0,
> >>       prev = 0xdead000000200200
> >>     },
> >>     expires = 8357799745,
> >>     base = 0xffff883fe84ec001,
> >>     function = 0xffffffff814a9100 <intel_pstate_timer_func>,
> >>     data = 18446612406765768960,
> >> <snip>
> >>     i_gain = 0,
> >>     d_gain = 0,
> >>     deadband = 0,
> >>     last_err = 22489
> >>   },
> >>   last_sample_time = {
> >>     tv64 = 4063132438017305
> >>   },
> >>   prev_aperf = 287326796397463,
> >>   prev_mperf = 251427432090198,
> >>   sample = {
> >>     core_pct_busy = 23081,
> >>     aperf = 2937407,
> >>     mperf = 3257884,
> >>     freq = 2524484,
> >>     time = {
> >>       tv64 = 4063149215234118
> >>     }
> >>   }
> >> }
> >>
> >> which results in the time between samples = last_sample_time - sample.time
> >> = 4063149215234118 - 4063132438017305 = 16777216813 which is 16.777 seconds.
> >>
> >> The duration between reads of the APERF and MPERF registers overflowed a s32
> >> sized integer in intel_pstate_get_scaled_busy()'s call to div_fp().  The result
> >> is that int_tofp(duration_us) == 0, and the kernel attempts to divide by 0.
> >>
> >> While the kernel shouldn't be delaying for a long time, it can and does
> >> happen and the intel_pstate driver should not panic in this situation.  This
> >> patch changes the div_fp() function to use div64_s64() to allow for "long"
> >> division.  This will avoid the overflow condition on long delays.
> >>
> >> [v2]: use div64_s64() in div_fp()
> > 
> > Were you able to resolve your original concerns with doing this?  I
> > thought you mentioned that you'd tested it and it gave you some
> > negative side effects?
> 
> Sorry Kristen, I think my comment confused you.  I was worried about passing "0"
> back as the core_busy value after a long delay.  Given that it is already
> possible to return 0 when the duration exceeds 3 * the expected delay I no
> longer think there is any issue of returning 0 on a _really really really long_
> delay.
> 
> IOW, there isn't a problem and I was being overly careful with v1 of the patch.

OK, patch queued up fo 4.1, thanks!
Rafael J. Wysocki June 16, 2015, 9:35 p.m. UTC | #4
On Tuesday, June 16, 2015 11:35:10 PM Rafael J. Wysocki wrote:
> On Monday, June 15, 2015 07:33:03 PM Prarit Bhargava wrote:
> > 
> > On 06/15/2015 06:12 PM, Kristen Carlson Accardi wrote:
> > > On Mon, 15 Jun 2015 13:43:29 -0400
> > > Prarit Bhargava <prarit@redhat.com> wrote:
> > > 
> > >> The kernel may delay interrupts for a long time which can result in timers
> > >> being delayed. If this occurs the intel_pstate driver will crash with a
> > >> divide by zero error:
> > >>
> > >> divide error: 0000 [#1] SMP
> > >> Modules linked in: btrfs zlib_deflate raid6_pq xor msdos ext4 mbcache jbd2 binfmt_misc arc4 md4 nls_utf8 cifs dns_resolver tcp_lp bnep bluetooth rfkill fuse dm_service_time iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi nf_conntrack_netbios_ns nf_conntrack_broadcast nf_conntrack_ftp ip6t_rpfilter ip6t_REJECT ipt_REJECT xt_conntrack ebtable_nat ebtable_broute bridge stp llc ebtable_filter ebtables ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security ip6table_raw ip6table_filter ip6_tables iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_security iptable_raw iptable_filter ip_tables intel_powerclamp coretemp vfat fat kvm_intel iTCO_wdt iTCO_vendor_support ipmi_devintf sr_mod kvm crct10dif_pclmul
> > >>  crc32_pclmul crc32c_intel ghash_clmulni_intel aesni_intel cdc_ether lrw usbnet cdrom mii gf128mul glue_helper ablk_helper cryptd lpc_ich mfd_core pcspkr sb_edac edac_core ipmi_si ipmi_msghandler ioatdma wmi shpchp acpi_pad nfsd auth_rpcgss nfs_acl lockd uinput dm_multipath sunrpc xfs libcrc32c usb_storage sd_mod crc_t10dif crct10dif_common ixgbe mgag200 syscopyarea sysfillrect sysimgblt mdio drm_kms_helper ttm igb drm ptp pps_core dca i2c_algo_bit megaraid_sas i2c_core dm_mirror dm_region_hash dm_log dm_mod
> > >> CPU: 113 PID: 0 Comm: swapper/113 Tainted: G        W   --------------   3.10.0-229.1.2.el7.x86_64 #1
> > >> Hardware name: IBM x3950 X6 -[3837AC2]-/00FN827, BIOS -[A8E112BUS-1.00]- 08/27/2014
> > >> task: ffff880fe8abe660 ti: ffff880fe8ae4000 task.ti: ffff880fe8ae4000
> > >> RIP: 0010:[<ffffffff814a9279>]  [<ffffffff814a9279>] intel_pstate_timer_func+0x179/0x3d0
> > >> RSP: 0018:ffff883fff4e3db8  EFLAGS: 00010206
> > >> RAX: 0000000027100000 RBX: ffff883fe6965100 RCX: 0000000000000000
> > >> RDX: 0000000000000000 RSI: 0000000000000010 RDI: 000000002e53632d
> > >> RBP: ffff883fff4e3e20 R08: 000e6f69a5a125c0 R09: ffff883fe84ec001
> > >> R10: 0000000000000002 R11: 0000000000000005 R12: 00000000000049f5
> > >> R13: 0000000000271000 R14: 00000000000049f5 R15: 0000000000000246
> > >> FS:  0000000000000000(0000) GS:ffff883fff4e0000(0000) knlGS:0000000000000000
> > >> CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > >> CR2: 00007f7668601000 CR3: 000000000190a000 CR4: 00000000001407e0
> > >> DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> > >> DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
> > >> Stack:
> > >>  ffff883fff4e3e58 ffffffff81099dc1 0000000000000086 0000000000000071
> > >>  ffff883fff4f3680 0000000000000071 fbdc8a965e33afee ffffffff810b69dd
> > >>  ffff883fe84ec000 ffff883fe6965108 0000000000000100 ffffffff814a9100
> > >> Call Trace:
> > >>  <IRQ>
> > >>
> > >>  [<ffffffff81099dc1>] ? run_posix_cpu_timers+0x51/0x840
> > >>  [<ffffffff810b69dd>] ? trigger_load_balance+0x5d/0x200
> > >>  [<ffffffff814a9100>] ? pid_param_set+0x130/0x130
> > >>  [<ffffffff8107df56>] call_timer_fn+0x36/0x110
> > >>  [<ffffffff814a9100>] ? pid_param_set+0x130/0x130
> > >>  [<ffffffff8107fdcf>] run_timer_softirq+0x21f/0x320
> > >>  [<ffffffff81077b2f>] __do_softirq+0xef/0x280
> > >>  [<ffffffff816156dc>] call_softirq+0x1c/0x30
> > >>  [<ffffffff81015d95>] do_softirq+0x65/0xa0
> > >>  [<ffffffff81077ec5>] irq_exit+0x115/0x120
> > >>  [<ffffffff81616355>] smp_apic_timer_interrupt+0x45/0x60
> > >>  [<ffffffff81614a1d>] apic_timer_interrupt+0x6d/0x80
> > >>  <EOI>
> > >>
> > >>  [<ffffffff814a9c32>] ? cpuidle_enter_state+0x52/0xc0
> > >>  [<ffffffff814a9c28>] ? cpuidle_enter_state+0x48/0xc0
> > >>  [<ffffffff814a9d65>] cpuidle_idle_call+0xc5/0x200
> > >>  [<ffffffff8101d14e>] arch_cpu_idle+0xe/0x30
> > >>  [<ffffffff810c67c1>] cpu_startup_entry+0xf1/0x290
> > >>  [<ffffffff8104228a>] start_secondary+0x1ba/0x230
> > >> Code: 42 0f 00 45 89 e6 48 01 c2 43 8d 44 6d 00 39 d0 73 26 49 c1 e5 08 89 d2 4d 63 f4 49 63 c5 48 c1 e2 08 48 c1 e0 08 48 63 ca 48 99 <48> f7 f9 48 98 4c 0f af f0 49 c1 ee 08 8b 43 78 c1 e0 08 44 29
> > >> RIP  [<ffffffff814a9279>] intel_pstate_timer_func+0x179/0x3d0
> > >>  RSP <ffff883fff4e3db8>
> > >>
> > >> The kernel values for cpudata for CPU 113 were:
> > >>
> > >> struct cpudata {
> > >>   cpu = 113,
> > >>   timer = {
> > >>     entry = {
> > >>       next = 0x0,
> > >>       prev = 0xdead000000200200
> > >>     },
> > >>     expires = 8357799745,
> > >>     base = 0xffff883fe84ec001,
> > >>     function = 0xffffffff814a9100 <intel_pstate_timer_func>,
> > >>     data = 18446612406765768960,
> > >> <snip>
> > >>     i_gain = 0,
> > >>     d_gain = 0,
> > >>     deadband = 0,
> > >>     last_err = 22489
> > >>   },
> > >>   last_sample_time = {
> > >>     tv64 = 4063132438017305
> > >>   },
> > >>   prev_aperf = 287326796397463,
> > >>   prev_mperf = 251427432090198,
> > >>   sample = {
> > >>     core_pct_busy = 23081,
> > >>     aperf = 2937407,
> > >>     mperf = 3257884,
> > >>     freq = 2524484,
> > >>     time = {
> > >>       tv64 = 4063149215234118
> > >>     }
> > >>   }
> > >> }
> > >>
> > >> which results in the time between samples = last_sample_time - sample.time
> > >> = 4063149215234118 - 4063132438017305 = 16777216813 which is 16.777 seconds.
> > >>
> > >> The duration between reads of the APERF and MPERF registers overflowed a s32
> > >> sized integer in intel_pstate_get_scaled_busy()'s call to div_fp().  The result
> > >> is that int_tofp(duration_us) == 0, and the kernel attempts to divide by 0.
> > >>
> > >> While the kernel shouldn't be delaying for a long time, it can and does
> > >> happen and the intel_pstate driver should not panic in this situation.  This
> > >> patch changes the div_fp() function to use div64_s64() to allow for "long"
> > >> division.  This will avoid the overflow condition on long delays.
> > >>
> > >> [v2]: use div64_s64() in div_fp()
> > > 
> > > Were you able to resolve your original concerns with doing this?  I
> > > thought you mentioned that you'd tested it and it gave you some
> > > negative side effects?
> > 
> > Sorry Kristen, I think my comment confused you.  I was worried about passing "0"
> > back as the core_busy value after a long delay.  Given that it is already
> > possible to return 0 when the duration exceeds 3 * the expected delay I no
> > longer think there is any issue of returning 0 on a _really really really long_
> > delay.
> > 
> > IOW, there isn't a problem and I was being overly careful with v1 of the patch.
> 
> OK, patch queued up fo 4.1, thanks!

That should have been 4.2, sorry.
diff mbox

Patch

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 6414661..b153d86 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -48,9 +48,9 @@  static inline int32_t mul_fp(int32_t x, int32_t y)
 	return ((int64_t)x * (int64_t)y) >> FRAC_BITS;
 }
 
-static inline int32_t div_fp(int32_t x, int32_t y)
+static inline int32_t div_fp(s64 x, s64 y)
 {
-	return div_s64((int64_t)x << FRAC_BITS, y);
+	return div64_s64((int64_t)x << FRAC_BITS, y);
 }
 
 static inline int ceiling_fp(int32_t x)
@@ -794,7 +794,7 @@  static inline void intel_pstate_set_sample_time(struct cpudata *cpu)
 static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu)
 {
 	int32_t core_busy, max_pstate, current_pstate, sample_ratio;
-	u32 duration_us;
+	s64 duration_us;
 	u32 sample_time;
 
 	/*
@@ -821,8 +821,8 @@  static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu)
 	 * to adjust our busyness.
 	 */
 	sample_time = pid_params.sample_rate_ms  * USEC_PER_MSEC;
-	duration_us = (u32) ktime_us_delta(cpu->sample.time,
-					   cpu->last_sample_time);
+	duration_us = ktime_us_delta(cpu->sample.time,
+				     cpu->last_sample_time);
 	if (duration_us > sample_time * 3) {
 		sample_ratio = div_fp(int_tofp(sample_time),
 				      int_tofp(duration_us));