Message ID | 20110626182725.3f2eac8d@schatten.dmk.lab (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Am 26.06.2011 18:27, schrieb Florian Mickler: >> Both of my bisection attempts ended near the same set of drm/i915 changes that >> resulted in non-bootable kernels. Considering I'm using a Intel(R) Core(TM) >> i7-2720QM with some Intel Sandybridge Chipset graphics those might sound >> somewhat plausible to someone knowing the internals, but don't help me at all. > There are 2 untested commits left. But they don't seem to be relevant. > Guessing on those gives me: > > The first bad commit could be any of: > b0b544cd37c060e261afb2cf486296983fcb56da > f67a559daaa0e2ba616bfe9438f202bc57bc8c72 > 18b2190ca5bd3f19717421b1591c79c9b0372428 > 6f06ce184c765fd8d50669a8d12fdd566c920859 > 0fc932b8ec36116bb759105ce910b0475e63112a > 311bd68e024f9006db66cbadc3bd9f62fd663f4b > 040484af3a4efa65786b6e107fbe74747679e17c > ccab5c82759e2ace74b2e84f82d1e0eedd932571 > aa9b500ddf1a6318e7cf8b1754696edddae86db9 > d9b6cb568bc6eca8db88357bf8bbb92d42a91b1e > 92f2584a083986c05fc811bbdf380c3fa7c12296 > 9a4114ffa7b6f5f4635e3745a8dc051d15d4596a > 633f2ea26665d37bb3c8ae30799aa14988622653 > 63d7bbe9ded4146e3f78e5742b119fa1fdb52665 > 417ae1476de3ae9689a374d70565f41b3474641e > ea0760cfc00b9e534423fdaf630d1c8ce7a5ede0 > b24e71798871089da1a4ab049db2800afc1aac0c > fe4402931e43e81a4129eba41d05cf8907603af5 > 65993d64a31844ad444694efb2d159eb9c883e49 > c0c06bd244179f754d68684fd87674585a153e40 > 01fe9dbde19a1a27b8ee63e2d964562962e1eb78 > a37f2f87edc1b6e5932becf6e51535d36b690f2a > bdd92c9ad287e03a2ec52f5a89c470cd5caae1c2 > > > > I'd guess ccab5c82759e2ace74b2e84f82d1e0eedd932571 could be the > cause. Can you check if the appended revert of that commit makes > things disappear? It seems like you guessed perfectly correct - reverting the commit makes those notifications go away at once.
On Tue, 28 Jun 2011 22:46:41 +0200 Olaf Freyer <aaron667@gmx.net> wrote: > Am 26.06.2011 18:27, schrieb Florian Mickler: > >> Both of my bisection attempts ended near the same set of drm/i915 changes that > >> resulted in non-bootable kernels. Considering I'm using a Intel(R) Core(TM) > >> i7-2720QM with some Intel Sandybridge Chipset graphics those might sound > >> somewhat plausible to someone knowing the internals, but don't help me at all. > > There are 2 untested commits left. But they don't seem to be relevant. > > Guessing on those gives me: > > > > The first bad commit could be any of: > > b0b544cd37c060e261afb2cf486296983fcb56da > > f67a559daaa0e2ba616bfe9438f202bc57bc8c72 > > 18b2190ca5bd3f19717421b1591c79c9b0372428 > > 6f06ce184c765fd8d50669a8d12fdd566c920859 > > 0fc932b8ec36116bb759105ce910b0475e63112a > > 311bd68e024f9006db66cbadc3bd9f62fd663f4b > > 040484af3a4efa65786b6e107fbe74747679e17c > > ccab5c82759e2ace74b2e84f82d1e0eedd932571 > > aa9b500ddf1a6318e7cf8b1754696edddae86db9 > > d9b6cb568bc6eca8db88357bf8bbb92d42a91b1e > > 92f2584a083986c05fc811bbdf380c3fa7c12296 > > 9a4114ffa7b6f5f4635e3745a8dc051d15d4596a > > 633f2ea26665d37bb3c8ae30799aa14988622653 > > 63d7bbe9ded4146e3f78e5742b119fa1fdb52665 > > 417ae1476de3ae9689a374d70565f41b3474641e > > ea0760cfc00b9e534423fdaf630d1c8ce7a5ede0 > > b24e71798871089da1a4ab049db2800afc1aac0c > > fe4402931e43e81a4129eba41d05cf8907603af5 > > 65993d64a31844ad444694efb2d159eb9c883e49 > > c0c06bd244179f754d68684fd87674585a153e40 > > 01fe9dbde19a1a27b8ee63e2d964562962e1eb78 > > a37f2f87edc1b6e5932becf6e51535d36b690f2a > > bdd92c9ad287e03a2ec52f5a89c470cd5caae1c2 > > > > > > > > I'd guess ccab5c82759e2ace74b2e84f82d1e0eedd932571 could be the > > cause. Can you check if the appended revert of that commit makes > > things disappear? > It seems like you guessed perfectly correct - reverting the commit makes > those notifications go away at once. > Without this reverted you see messages? I missed the earlier stuff, what message are you seeing?
Am 28.06.2011 22:59, schrieb Jesse Barnes: > On Tue, 28 Jun 2011 22:46:41 +0200 > Olaf Freyer <aaron667@gmx.net> wrote: > >> Am 26.06.2011 18:27, schrieb Florian Mickler: >>>> Both of my bisection attempts ended near the same set of drm/i915 changes that >>>> resulted in non-bootable kernels. Considering I'm using a Intel(R) Core(TM) >>>> i7-2720QM with some Intel Sandybridge Chipset graphics those might sound >>>> somewhat plausible to someone knowing the internals, but don't help me at all. >>> There are 2 untested commits left. But they don't seem to be relevant. >>> Guessing on those gives me: >>> >>> The first bad commit could be any of: >>> b0b544cd37c060e261afb2cf486296983fcb56da >>> f67a559daaa0e2ba616bfe9438f202bc57bc8c72 >>> 18b2190ca5bd3f19717421b1591c79c9b0372428 >>> 6f06ce184c765fd8d50669a8d12fdd566c920859 >>> 0fc932b8ec36116bb759105ce910b0475e63112a >>> 311bd68e024f9006db66cbadc3bd9f62fd663f4b >>> 040484af3a4efa65786b6e107fbe74747679e17c >>> ccab5c82759e2ace74b2e84f82d1e0eedd932571 >>> aa9b500ddf1a6318e7cf8b1754696edddae86db9 >>> d9b6cb568bc6eca8db88357bf8bbb92d42a91b1e >>> 92f2584a083986c05fc811bbdf380c3fa7c12296 >>> 9a4114ffa7b6f5f4635e3745a8dc051d15d4596a >>> 633f2ea26665d37bb3c8ae30799aa14988622653 >>> 63d7bbe9ded4146e3f78e5742b119fa1fdb52665 >>> 417ae1476de3ae9689a374d70565f41b3474641e >>> ea0760cfc00b9e534423fdaf630d1c8ce7a5ede0 >>> b24e71798871089da1a4ab049db2800afc1aac0c >>> fe4402931e43e81a4129eba41d05cf8907603af5 >>> 65993d64a31844ad444694efb2d159eb9c883e49 >>> c0c06bd244179f754d68684fd87674585a153e40 >>> 01fe9dbde19a1a27b8ee63e2d964562962e1eb78 >>> a37f2f87edc1b6e5932becf6e51535d36b690f2a >>> bdd92c9ad287e03a2ec52f5a89c470cd5caae1c2 >>> >>> >>> >>> I'd guess ccab5c82759e2ace74b2e84f82d1e0eedd932571 could be the >>> cause. Can you check if the appended revert of that commit makes >>> things disappear? >> It seems like you guessed perfectly correct - reverting the commit makes >> those notifications go away at once. >> > Without this reverted you see messages? I missed the earlier stuff, > what message are you seeing? > Since 2.6.39 I saw those as soon as I start up the xserver: May 22 14:41:34 localhost kernel: [ 57.525844] CPU4: Package power limit notification (total events = 1) May 22 14:41:34 localhost kernel: [ 57.525848] CPU0: Package power limit notification (total events = 1) May 22 14:41:34 localhost kernel: [ 57.525851] CPU1: Package power limit notification (total events = 1) May 22 14:41:34 localhost kernel: [ 57.525854] CPU2: Package power limit notification (total events = 1) May 22 14:41:34 localhost kernel: [ 57.525856] CPU5: Package power limit notification (total events = 1) May 22 14:41:34 localhost kernel: [ 57.525859] CPU3: Package power limit notification (total events = 1) May 22 14:41:34 localhost kernel: [ 57.525861] Disabling lock debugging due to kernel taint May 22 14:41:34 localhost kernel: [ 57.525869] CPU6: Package power limit notification (total events = 1) May 22 14:41:34 localhost kernel: [ 57.525872] CPU7: Package power limit notification (total events = 1) May 22 14:41:34 localhost kernel: [ 57.536890] CPU1: Package power limit normal May 22 14:41:34 localhost kernel: [ 57.536893] CPU4: Package power limit normal May 22 14:41:34 localhost kernel: [ 57.536896] CPU2: Package power limit normal May 22 14:41:34 localhost kernel: [ 57.536899] CPU3: Package power limit normal May 22 14:41:34 localhost kernel: [ 57.536901] CPU5: Package power limit normal May 22 14:41:34 localhost kernel: [ 57.536904] CPU0: Package power limit normal May 22 14:41:34 localhost kernel: [ 57.536915] CPU6: Package power limit normal May 22 14:41:34 localhost kernel: [ 57.536918] CPU7: Package power limit normal
On Tue, 28 Jun 2011 23:09:45 +0200 Olaf Freyer <aaron667@gmx.net> wrote: > >>> I'd guess ccab5c82759e2ace74b2e84f82d1e0eedd932571 could be the > >>> cause. Can you check if the appended revert of that commit makes > >>> things disappear? > >> It seems like you guessed perfectly correct - reverting the commit makes > >> those notifications go away at once. > >> > > Without this reverted you see messages? I missed the earlier stuff, > > what message are you seeing? > > > Since 2.6.39 I saw those as soon as I start up the xserver: > > May 22 14:41:34 localhost kernel: [ 57.525844] CPU4: Package power > limit notification (total events = 1) > May 22 14:41:34 localhost kernel: [ 57.525848] CPU0: Package power > limit notification (total events = 1) > May 22 14:41:34 localhost kernel: [ 57.525851] CPU1: Package power > limit notification (total events = 1) > May 22 14:41:34 localhost kernel: [ 57.525854] CPU2: Package power > limit notification (total events = 1) > May 22 14:41:34 localhost kernel: [ 57.525856] CPU5: Package power > limit notification (total events = 1) > May 22 14:41:34 localhost kernel: [ 57.525859] CPU3: Package power > limit notification (total events = 1) > May 22 14:41:34 localhost kernel: [ 57.525861] Disabling lock > debugging due to kernel taint > May 22 14:41:34 localhost kernel: [ 57.525869] CPU6: Package power > limit notification (total events = 1) > May 22 14:41:34 localhost kernel: [ 57.525872] CPU7: Package power > limit notification (total events = 1) > May 22 14:41:34 localhost kernel: [ 57.536890] CPU1: Package power > limit normal > May 22 14:41:34 localhost kernel: [ 57.536893] CPU4: Package power > limit normal > May 22 14:41:34 localhost kernel: [ 57.536896] CPU2: Package power > limit normal > May 22 14:41:34 localhost kernel: [ 57.536899] CPU3: Package power > limit normal > May 22 14:41:34 localhost kernel: [ 57.536901] CPU5: Package power > limit normal > May 22 14:41:34 localhost kernel: [ 57.536904] CPU0: Package power > limit normal > May 22 14:41:34 localhost kernel: [ 57.536915] CPU6: Package power > limit normal > May 22 14:41:34 localhost kernel: [ 57.536918] CPU7: Package power > limit normal Ok interesting, didn't realize X startup was so GPU intensive. :) The patch you reverted will definitely cause the GPU to ramp up its frequency much faster than before, but it sounds like on your system you might also see it with the revert if you run something GPU intensive like nexuiz. The CPU (and by extension the GPU) will take care of itself though; if things get too hot or over power, it will clock throttle to keep itself in a safe range.
Am 28.06.2011 23:18, schrieb Jesse Barnes: > On Tue, 28 Jun 2011 23:09:45 +0200 > Olaf Freyer <aaron667@gmx.net> wrote: >>>>> I'd guess ccab5c82759e2ace74b2e84f82d1e0eedd932571 could be the >>>>> cause. Can you check if the appended revert of that commit makes >>>>> things disappear? >>>> It seems like you guessed perfectly correct - reverting the commit makes >>>> those notifications go away at once. >>>> >>> Without this reverted you see messages? I missed the earlier stuff, >>> what message are you seeing? >>> >> Since 2.6.39 I saw those as soon as I start up the xserver: >> >> May 22 14:41:34 localhost kernel: [ 57.525848] CPU0: Package power >> limit notification (total events = 1) >> May 22 14:41:34 localhost kernel: [ 57.536904] CPU0: Package power >> limit normal > Ok interesting, didn't realize X startup was so GPU intensive. :) > > The patch you reverted will definitely cause the GPU to ramp up its > frequency much faster than before, but it sounds like on your system > you might also see it with the revert if you run something GPU > intensive like nexuiz. > > The CPU (and by extension the GPU) will take care of itself though; if > things get too hot or over power, it will clock throttle to keep itself > in a safe range. I also see the message alot during my daily average usage of my computer (just using Firefox, Thunderbird and IntelliJ) - seeing things like CPU3: Package power limit notification (total events = 90809) after a normal day in the office became normal since 2.6.39. I just gave nexuiz a try for about 30 minutes with the reversal patch applied - and not a single message appeared in my logs.
On Wed, 29 Jun 2011 00:01:58 +0200 Olaf Freyer <aaron667@gmx.net> wrote: > Am 28.06.2011 23:18, schrieb Jesse Barnes: > > On Tue, 28 Jun 2011 23:09:45 +0200 > > Olaf Freyer <aaron667@gmx.net> wrote: > >>>>> I'd guess ccab5c82759e2ace74b2e84f82d1e0eedd932571 could be the > >>>>> cause. Can you check if the appended revert of that commit makes > >>>>> things disappear? > >>>> It seems like you guessed perfectly correct - reverting the commit makes > >>>> those notifications go away at once. > >>>> > >>> Without this reverted you see messages? I missed the earlier stuff, > >>> what message are you seeing? > >>> > >> Since 2.6.39 I saw those as soon as I start up the xserver: > >> > >> May 22 14:41:34 localhost kernel: [ 57.525848] CPU0: Package power > >> limit notification (total events = 1) > >> May 22 14:41:34 localhost kernel: [ 57.536904] CPU0: Package power > >> limit normal > > Ok interesting, didn't realize X startup was so GPU intensive. :) > > > > The patch you reverted will definitely cause the GPU to ramp up its > > frequency much faster than before, but it sounds like on your system > > you might also see it with the revert if you run something GPU > > intensive like nexuiz. > > > > The CPU (and by extension the GPU) will take care of itself though; if > > things get too hot or over power, it will clock throttle to keep itself > > in a safe range. > I also see the message alot during my daily average usage of my computer > (just using Firefox, Thunderbird and IntelliJ) - seeing things like > CPU3: Package power limit notification (total events = 90809) > after a normal day in the office became normal since 2.6.39. > > I just gave nexuiz a try for about 30 minutes with the reversal patch > applied - > and not a single message appeared in my logs. Sounds like with the patch reverted we can't drive your GPU and CPU hard enough to generate the messages. Not sure if that's a good thing or a bad thing though...
On Thu, 30 Jun 2011 08:37:09 +0200 Olaf Freyer <aaron667@gmx.net> wrote: > Am 29.06.2011 00:06, schrieb Jesse Barnes: > > On Wed, 29 Jun 2011 00:01:58 +0200 > > Olaf Freyer <aaron667@gmx.net> wrote: > > > >> Am 28.06.2011 23:18, schrieb Jesse Barnes: > >>> Ok interesting, didn't realize X startup was so GPU intensive. :) > >>> > >>> The patch you reverted will definitely cause the GPU to ramp up its > >>> frequency much faster than before, but it sounds like on your system > >>> you might also see it with the revert if you run something GPU > >>> intensive like nexuiz. > >>> > >>> The CPU (and by extension the GPU) will take care of itself though; if > >>> things get too hot or over power, it will clock throttle to keep itself > >>> in a safe range. > >> I also see the message alot during my daily average usage of my computer > >> (just using Firefox, Thunderbird and IntelliJ) - seeing things like > >> CPU3: Package power limit notification (total events = 90809) > >> after a normal day in the office became normal since 2.6.39. > >> > >> I just gave nexuiz a try for about 30 minutes with the reversal patch > >> applied - > >> and not a single message appeared in my logs. > > Sounds like with the patch reverted we can't drive your GPU and CPU > > hard enough to generate the messages. Not sure if that's a good thing > > or a bad thing though... > > > I'm not sure either. I saw a single notification event yesterday while > in office - > previously I would have recieved 70000-90000 during that timeframe. > I consider the pure amount of notifications unsettling - and in case of > some > "real" issue it might even get lost inbetween those notifications. > > Maybe there is a possible compromise between the situation before and > after the patch? I'm willing to lose a few percent of GPU performance just > for the sake of getting lost of those notification events... Yeah we can probably tune these values a bit better... I'll see about doing that. We want to maximize performance across a variety of workloads, but not hit the power limit so hard even for basic stuff...
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 5d5def7..a245742 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3378,28 +3378,15 @@ #define GEN6_RP_DOWN_TIMEOUT 0xA010 #define GEN6_RP_INTERRUPT_LIMITS 0xA014 #define GEN6_RPSTAT1 0xA01C -#define GEN6_CAGF_SHIFT 8 -#define GEN6_CAGF_MASK (0x7f << GEN6_CAGF_SHIFT) #define GEN6_RP_CONTROL 0xA024 #define GEN6_RP_MEDIA_TURBO (1<<11) #define GEN6_RP_USE_NORMAL_FREQ (1<<9) #define GEN6_RP_MEDIA_IS_GFX (1<<8) #define GEN6_RP_ENABLE (1<<7) -#define GEN6_RP_UP_IDLE_MIN (0x1<<3) -#define GEN6_RP_UP_BUSY_AVG (0x2<<3) -#define GEN6_RP_UP_BUSY_CONT (0x4<<3) -#define GEN6_RP_DOWN_IDLE_CONT (0x1<<0) +#define GEN6_RP_UP_BUSY_MAX (0x2<<3) +#define GEN6_RP_DOWN_BUSY_MIN (0x2<<0) #define GEN6_RP_UP_THRESHOLD 0xA02C #define GEN6_RP_DOWN_THRESHOLD 0xA030 -#define GEN6_RP_CUR_UP_EI 0xA050 -#define GEN6_CURICONT_MASK 0xffffff -#define GEN6_RP_CUR_UP 0xA054 -#define GEN6_CURBSYTAVG_MASK 0xffffff -#define GEN6_RP_PREV_UP 0xA058 -#define GEN6_RP_CUR_DOWN_EI 0xA05C -#define GEN6_CURIAVG_MASK 0xffffff -#define GEN6_RP_CUR_DOWN 0xA060 -#define GEN6_RP_PREV_DOWN 0xA064 #define GEN6_RP_UP_EI 0xA068 #define GEN6_RP_DOWN_EI 0xA06C #define GEN6_RP_IDLE_HYSTERSIS 0xA070 diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index aa43e7b..be9890e 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -7101,18 +7101,18 @@ void gen6_enable_rps(struct drm_i915_private *dev_priv) I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, 18 << 24 | 6 << 16); - I915_WRITE(GEN6_RP_UP_THRESHOLD, 10000); - I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 1000000); + I915_WRITE(GEN6_RP_UP_THRESHOLD, 90000); + I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 100000); I915_WRITE(GEN6_RP_UP_EI, 100000); - I915_WRITE(GEN6_RP_DOWN_EI, 5000000); + I915_WRITE(GEN6_RP_DOWN_EI, 300000); I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); I915_WRITE(GEN6_RP_CONTROL, GEN6_RP_MEDIA_TURBO | GEN6_RP_USE_NORMAL_FREQ | GEN6_RP_MEDIA_IS_GFX | GEN6_RP_ENABLE | - GEN6_RP_UP_BUSY_AVG | - GEN6_RP_DOWN_IDLE_CONT); + GEN6_RP_UP_BUSY_MAX | + GEN6_RP_DOWN_BUSY_MIN); if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0, 500))