Message ID | 20230802135241.458855-3-badal.nilawar@intel.com (mailing list archive) |
---|---|
State | Changes Requested |
Headers | show |
Series | Add HWMON support for DGFX | expand |
Hi Badal, On Wed, Aug 02, 2023 at 07:22:37PM +0530, Badal Nilawar wrote: > Expose power_max (pl1) and power_rated_max (tdp) attributes. can you please write a few words more here to explain the interface being exposed and what these powers are? > +/* SPDX-License-Identifier: MIT */ > +/* > + * Copyright © 2023 Intel Corporation > + */ > + > +#ifndef _XE_MCHBAR_REGS_H__ > +#define _XE_MCHBAR_REGS_H_ there is an extra '_' in the ifndef > + [...] > #include <linux/hwmon.h> > > #include <drm/drm_managed.h> > +#include "regs/xe_mchbar_regs.h" > #include "regs/xe_gt_regs.h" > #include "xe_device.h" > #include "xe_hwmon.h" > +#include "xe_mmio.h" > +#include "xe_gt.h" can we keep these in alphabetical order? > +enum hwmon_reg_name { > + REG_PKG_RAPL_LIMIT, > + REG_PKG_POWER_SKU, > + REG_PKG_POWER_SKU_UNIT, > +}; Are these names or id's? With name I understand string/ > +enum hwmon_reg_operation { > + REG_READ, > + REG_WRITE, > + REG_RMW, > +}; I'm not checking on the prefixes here... I let someone more experienced than me comment if there anything wrong. > +/* > + * SF_* - scale factors for particular quantities according to hwmon spec. > + * - power - microwatts > + */ this comment looks a bit off to me, what does " - power - microwatts" stand for? > +#define SF_POWER 1000000 > > struct xe_hwmon_data { > struct device *hwmon_dev; > @@ -18,13 +39,268 @@ struct xe_hwmon_data { > > struct xe_hwmon { > struct xe_hwmon_data ddat; > - struct mutex hwmon_lock; > + struct mutex hwmon_lock; /* rmw operations*/ please put this change in the previous patch. > + bool reset_in_progress; > + wait_queue_head_t waitq; > + int scl_shift_power; > }; > > +#define ddat_to_xe_hwmon(ddat) ({ container_of(ddat, struct xe_hwmon, ddat); }) Any particular reason for the ({ ... }) ? > +static int process_hwmon_reg(struct xe_hwmon_data *ddat, enum hwmon_reg_name reg_name, > + enum hwmon_reg_operation operation, u32 *value, > + u32 clr, u32 set) > +{ > + struct xe_reg reg; > + int ret = 0; > + > + reg.raw = hwmon_get_reg(ddat, reg_name); > + > + if (!reg.raw) > + return -EOPNOTSUPP; > + > + switch (operation) { > + case REG_READ: > + *value = xe_mmio_read32(ddat->gt, reg); > + break; > + case REG_WRITE: > + xe_mmio_write32(ddat->gt, reg, *value); > + break; > + case REG_RMW: > + *value = xe_mmio_rmw32(ddat->gt, reg, clr, set); > + break; > + default: > + XE_MISSING_CASE(operation); > + ret = -EOPNOTSUPP; you could just return 0 or return -EOPNOTSUPP everywhere and save "ret" and a return (maybe not needed). Just a personal preference, feel free to ignro and do as you like it. > + break; > + } > + > + return ret; > +} [...] > +static int hwmon_power_max_read(struct xe_hwmon_data *ddat, long *value) > +{ > + struct xe_hwmon *hwmon = ddat_to_xe_hwmon(ddat); > + u32 reg_val; > + u64 r, min, max; > + > + process_hwmon_reg(ddat, REG_PKG_RAPL_LIMIT, REG_READ, ®_val, 0, 0); > + /* Check if PL1 limit is disabled */ > + if (!(reg_val & PKG_PWR_LIM_1_EN)) { > + *value = PL1_DISABLE; > + return 0; > + } > + > + reg_val = REG_FIELD_GET(PKG_PWR_LIM_1, reg_val); > + *value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power); > + > + process_hwmon_reg_read64(ddat, REG_PKG_POWER_SKU, &r); > + min = REG_FIELD_GET(PKG_MIN_PWR, r); > + min = mul_u64_u32_shr(min, SF_POWER, hwmon->scl_shift_power); > + max = REG_FIELD_GET(PKG_MAX_PWR, r); > + max = mul_u64_u32_shr(max, SF_POWER, hwmon->scl_shift_power); > + > + if (min && max) > + *value = clamp_t(u64, *value, min, max); > + > + return 0; you are returning '0' in any case, can we make this void? > +} > + > +static inline bool check_reset_in_progress(struct xe_hwmon *hwmon) > +{ > + mutex_lock(&hwmon->hwmon_lock); > + if (!hwmon->reset_in_progress) > + return true; > + mutex_unlock(&hwmon->hwmon_lock); > + return false; This is a bit scary (apart from the indentation) and without a strong explanation I can't let this go. I'm pretty sure that we don't need this... can you explain? > +} > + > +static int hwmon_power_max_write(struct xe_hwmon_data *ddat, long value) > +{ > + struct xe_hwmon *hwmon = ddat_to_xe_hwmon(ddat); > + DEFINE_WAIT(wait); > + int ret = 0; > + u32 nval; > + > + /* hwmon->hwmon_lock remain held till rmw operation is over */ > + wait_event(hwmon->waitq, check_reset_in_progress(hwmon)); > + > + /* Disable PL1 limit and verify, as limit cannot be disabled on all platforms */ > + if (value == PL1_DISABLE) { > + process_hwmon_reg(ddat, REG_PKG_RAPL_LIMIT, REG_RMW, &nval, > + PKG_PWR_LIM_1_EN, 0); > + process_hwmon_reg(ddat, REG_PKG_RAPL_LIMIT, REG_READ, &nval, > + PKG_PWR_LIM_1_EN, 0); > + > + if (nval & PKG_PWR_LIM_1_EN) > + ret = -ENODEV; > + goto unlock; > + } > + > + /* Computation in 64-bits to avoid overflow. Round to nearest. */ > + nval = DIV_ROUND_CLOSEST_ULL((u64)value << hwmon->scl_shift_power, SF_POWER); > + nval = PKG_PWR_LIM_1_EN | REG_FIELD_PREP(PKG_PWR_LIM_1, nval); > + > + process_hwmon_reg(ddat, REG_PKG_RAPL_LIMIT, REG_RMW, &nval, > + PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, nval); > +unlock: > + mutex_unlock(&hwmon->hwmon_lock); Where is this lock taken? Are you relying on the fact that this lock might not be taken? In any case it is not allowed to unlock a without previously locking. It's very error prone when you lock in a function and unlock in another function and in the rare cases when this is done it has to be written in the function name. > + return 0; > +} > + > +static int hwmon_power_rated_max_read(struct xe_hwmon_data *ddat, long *value) > +{ > + struct xe_hwmon *hwmon = ddat_to_xe_hwmon(ddat); > + u32 reg_val; > + > + process_hwmon_reg(ddat, REG_PKG_POWER_SKU, REG_READ, ®_val, 0, 0); > + reg_val = REG_FIELD_GET(PKG_PKG_TDP, reg_val); > + *value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power); > + > + return 0; Can this function be void? > +} [...] > +void xe_hwmon_power_max_disable(struct xe_device *xe, bool *old) > +{ > + struct xe_hwmon *hwmon = xe->hwmon; > + struct xe_hwmon_data *ddat = &hwmon->ddat; > + u32 r; > + > + if (!(hwmon && hwmon_get_reg(ddat, REG_PKG_RAPL_LIMIT))) > + return; > + > + xe_device_assert_mem_access(gt_to_xe(ddat->gt)); > + > + mutex_lock(&hwmon->hwmon_lock); > + > + hwmon->reset_in_progress = true; > + > + process_hwmon_reg(ddat, REG_PKG_RAPL_LIMIT, REG_RMW, &r, > + PKG_PWR_LIM_1_EN, 0); > + *old = !!(r & PKG_PWR_LIM_1_EN); do we need to place under lock these last to lines? > + mutex_unlock(&hwmon->hwmon_lock); > +} > + > +void xe_hwmon_power_max_restore(struct xe_device *xe, bool old) > +{ > + struct xe_hwmon *hwmon = xe->hwmon; > + struct xe_hwmon_data *ddat = &hwmon->ddat; > + u32 r; > + > + if (!(hwmon && hwmon_get_reg(ddat, REG_PKG_RAPL_LIMIT))) > + return; > + > + xe_device_assert_mem_access(gt_to_xe(ddat->gt)); > + > + mutex_lock(&hwmon->hwmon_lock); > + > + process_hwmon_reg(ddat, REG_PKG_RAPL_LIMIT, REG_RMW, &r, > + PKG_PWR_LIM_1_EN, old ? PKG_PWR_LIM_1_EN : 0); > + > + hwmon->reset_in_progress = false; > + wake_up_all(&hwmon->waitq); does the wake up need to be under lock? Now... does it eve happen that "check_reset_in_progress()" returns false and therefore unlocks the mutex? > + > + mutex_unlock(&hwmon->hwmon_lock); > +} [...] > void xe_hwmon_register(struct xe_device *xe) > @@ -128,13 +425,16 @@ void xe_hwmon_register(struct xe_device *xe) > > hwmon_get_preregistration_info(xe); > > + init_waitqueue_head(&hwmon->waitq); > + > drm_dbg(&xe->drm, "Register xe hwmon interface\n"); > > - /* hwmon_dev points to device hwmon<i> */ > + /* hwmon_dev points to device hwmon<i> */ Please this change needs to go in the previous patch. What is <i>? > hwmon_dev = devm_hwmon_device_register_with_info(dev, ddat->name, > ddat, > &hwmon_chip_info, > NULL); > + This change in the previous patch. > if (IS_ERR(hwmon_dev)) { > drm_warn(&xe->drm, "Fail to register xe hwmon, Err:%ld\n", PTR_ERR(hwmon_dev)); > xe->hwmon = NULL; > diff --git a/drivers/gpu/drm/xe/xe_hwmon.h b/drivers/gpu/drm/xe/xe_hwmon.h > index a078eeb0a68b..a5dc693569c5 100644 > --- a/drivers/gpu/drm/xe/xe_hwmon.h > +++ b/drivers/gpu/drm/xe/xe_hwmon.h > @@ -14,9 +14,13 @@ struct xe_device; > #if IS_REACHABLE(CONFIG_HWMON) > void xe_hwmon_register(struct xe_device *xe); > void xe_hwmon_unregister(struct xe_device *xe); > +void xe_hwmon_power_max_disable(struct xe_device *xe, bool *old); > +void xe_hwmon_power_max_restore(struct xe_device *xe, bool old); > #else > static inline void xe_hwmon_register(struct xe_device *xe) { }; > static inline void xe_hwmon_unregister(struct xe_device *xe) { }; > +static inline void xe_hwmon_power_max_disable(struct xe_device *xe, bool *old) { }; > +static inline void xe_hwmon_power_max_restore(struct xe_device *xe, bool old) { }; > #endif > > #endif /* __XE_HWMON_H__ */ > diff --git a/drivers/gpu/drm/xe/xe_macros.h b/drivers/gpu/drm/xe/xe_macros.h > index daf56c846d03..030296f8f863 100644 > --- a/drivers/gpu/drm/xe/xe_macros.h > +++ b/drivers/gpu/drm/xe/xe_macros.h > @@ -15,4 +15,7 @@ > "Ioctl argument check failed at %s:%d: %s", \ > __FILE__, __LINE__, #cond), 1)) > > +#define XE_MISSING_CASE(x) WARN(1, "Missing case (%s == %ld)\n", \ > + __stringify(x), (long)(x)) > + Should this have its own patch? Andi > #endif
On 03-08-2023 04:53, Andi Shyti wrote: > Hi Badal, > > On Wed, Aug 02, 2023 at 07:22:37PM +0530, Badal Nilawar wrote: >> Expose power_max (pl1) and power_rated_max (tdp) attributes. > > can you please write a few words more here to explain the > interface being exposed and what these powers are? > >> +/* SPDX-License-Identifier: MIT */ >> +/* >> + * Copyright © 2023 Intel Corporation >> + */ >> + >> +#ifndef _XE_MCHBAR_REGS_H__ >> +#define _XE_MCHBAR_REGS_H_ > > there is an extra '_' in the ifndef Sure I will fix this. > >> + > > [...] > >> #include <linux/hwmon.h> >> >> #include <drm/drm_managed.h> >> +#include "regs/xe_mchbar_regs.h" >> #include "regs/xe_gt_regs.h" >> #include "xe_device.h" >> #include "xe_hwmon.h" >> +#include "xe_mmio.h" >> +#include "xe_gt.h" > > can we keep these in alphabetical order? Sure > >> +enum hwmon_reg_name { >> + REG_PKG_RAPL_LIMIT, >> + REG_PKG_POWER_SKU, >> + REG_PKG_POWER_SKU_UNIT, >> +}; > > Are these names or id's? With name I understand string/Can't say ids. I will remove _name prefix to avoid confusion. > >> +enum hwmon_reg_operation { >> + REG_READ, >> + REG_WRITE, >> + REG_RMW, >> +}; > > I'm not checking on the prefixes here... I let someone more > experienced than me comment if there anything wrong. > >> +/* >> + * SF_* - scale factors for particular quantities according to hwmon spec. >> + * - power - microwatts >> + */ > > this comment looks a bit off to me, what does > " - power - microwatts" stand for? unit of power is microwatts as per hwmon spec. > >> +#define SF_POWER 1000000 >> >> struct xe_hwmon_data { >> struct device *hwmon_dev; >> @@ -18,13 +39,268 @@ struct xe_hwmon_data { >> >> struct xe_hwmon { >> struct xe_hwmon_data ddat; >> - struct mutex hwmon_lock; >> + struct mutex hwmon_lock; /* rmw operations*/ > > please put this change in the previous patch. Sure > >> + bool reset_in_progress; >> + wait_queue_head_t waitq; >> + int scl_shift_power; >> }; >> >> +#define ddat_to_xe_hwmon(ddat) ({ container_of(ddat, struct xe_hwmon, ddat); }) > > Any particular reason for the ({ ... }) ? > >> +static int process_hwmon_reg(struct xe_hwmon_data *ddat, enum hwmon_reg_name reg_name, >> + enum hwmon_reg_operation operation, u32 *value, >> + u32 clr, u32 set) >> +{ >> + struct xe_reg reg; >> + int ret = 0; >> + >> + reg.raw = hwmon_get_reg(ddat, reg_name); >> + >> + if (!reg.raw) >> + return -EOPNOTSUPP; >> + >> + switch (operation) { >> + case REG_READ: >> + *value = xe_mmio_read32(ddat->gt, reg); >> + break; >> + case REG_WRITE: >> + xe_mmio_write32(ddat->gt, reg, *value); >> + break; >> + case REG_RMW: >> + *value = xe_mmio_rmw32(ddat->gt, reg, clr, set); >> + break; >> + default: >> + XE_MISSING_CASE(operation); >> + ret = -EOPNOTSUPP; > > you could just return 0 or return -EOPNOTSUPP everywhere and save > "ret" and a return (maybe not needed). > > Just a personal preference, feel free to ignro and do as you like > it. Sure I will fix this in next rev. > >> + break; >> + } >> + >> + return ret; >> +} > > [...] > >> +static int hwmon_power_max_read(struct xe_hwmon_data *ddat, long *value) >> +{ >> + struct xe_hwmon *hwmon = ddat_to_xe_hwmon(ddat); >> + u32 reg_val; >> + u64 r, min, max; >> + >> + process_hwmon_reg(ddat, REG_PKG_RAPL_LIMIT, REG_READ, ®_val, 0, 0); >> + /* Check if PL1 limit is disabled */ >> + if (!(reg_val & PKG_PWR_LIM_1_EN)) { >> + *value = PL1_DISABLE; >> + return 0; >> + } >> + >> + reg_val = REG_FIELD_GET(PKG_PWR_LIM_1, reg_val); >> + *value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power); >> + >> + process_hwmon_reg_read64(ddat, REG_PKG_POWER_SKU, &r); >> + min = REG_FIELD_GET(PKG_MIN_PWR, r); >> + min = mul_u64_u32_shr(min, SF_POWER, hwmon->scl_shift_power); >> + max = REG_FIELD_GET(PKG_MAX_PWR, r); >> + max = mul_u64_u32_shr(max, SF_POWER, hwmon->scl_shift_power); >> + >> + if (min && max) >> + *value = clamp_t(u64, *value, min, max); >> + >> + return 0; > > you are returning '0' in any case, can we make this void? Top layer function expects return so added return here. > >> +} >> + >> +static inline bool check_reset_in_progress(struct xe_hwmon *hwmon) >> +{ >> + mutex_lock(&hwmon->hwmon_lock); >> + if (!hwmon->reset_in_progress) >> + return true; >> + mutex_unlock(&hwmon->hwmon_lock); >> + return false; > > This is a bit scary (apart from the indentation) and without a > strong explanation I can't let this go. > > I'm pretty sure that we don't need this... can you explain? In case of guc load not in progress (!reset_in_progress) mutex shouldn't be unlock, which will get unlocked once rmw operations are over. Other way could be get mutex_lock after !reset_in_progress but that will add race. wait_event(hwmon->waitq, reset_in_progress); At this place there is posibility that reset_in_progress get set. So this becomes racy. mutex_lock(&hwmon->hwmon_lock); Any better idea to implement this? > >> +} >> + >> +static int hwmon_power_max_write(struct xe_hwmon_data *ddat, long value) >> +{ >> + struct xe_hwmon *hwmon = ddat_to_xe_hwmon(ddat); >> + DEFINE_WAIT(wait); >> + int ret = 0; >> + u32 nval; >> + >> + /* hwmon->hwmon_lock remain held till rmw operation is over */ >> + wait_event(hwmon->waitq, check_reset_in_progress(hwmon)); >> + >> + /* Disable PL1 limit and verify, as limit cannot be disabled on all platforms */ >> + if (value == PL1_DISABLE) { >> + process_hwmon_reg(ddat, REG_PKG_RAPL_LIMIT, REG_RMW, &nval, >> + PKG_PWR_LIM_1_EN, 0); >> + process_hwmon_reg(ddat, REG_PKG_RAPL_LIMIT, REG_READ, &nval, >> + PKG_PWR_LIM_1_EN, 0); >> + >> + if (nval & PKG_PWR_LIM_1_EN) >> + ret = -ENODEV; >> + goto unlock; >> + } >> + >> + /* Computation in 64-bits to avoid overflow. Round to nearest. */ >> + nval = DIV_ROUND_CLOSEST_ULL((u64)value << hwmon->scl_shift_power, SF_POWER); >> + nval = PKG_PWR_LIM_1_EN | REG_FIELD_PREP(PKG_PWR_LIM_1, nval); >> + >> + process_hwmon_reg(ddat, REG_PKG_RAPL_LIMIT, REG_RMW, &nval, >> + PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, nval); >> +unlock: >> + mutex_unlock(&hwmon->hwmon_lock); > > Where is this lock taken? Are you relying on the fact that this > lock might not be taken? In any case it is not allowed to unlock > a without previously locking. Lock is taken in check_reset_in_progress(); > > It's very error prone when you lock in a function and unlock in > another function and in the rare cases when this is done it has > to be written in the function name. Sure I will add comment here. > >> + return 0; >> +} >> + >> +static int hwmon_power_rated_max_read(struct xe_hwmon_data *ddat, long *value) >> +{ >> + struct xe_hwmon *hwmon = ddat_to_xe_hwmon(ddat); >> + u32 reg_val; >> + >> + process_hwmon_reg(ddat, REG_PKG_POWER_SKU, REG_READ, ®_val, 0, 0); >> + reg_val = REG_FIELD_GET(PKG_PKG_TDP, reg_val); >> + *value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power); >> + >> + return 0; > > Can this function be void? Top level function expect return. > >> +} > > [...] > >> +void xe_hwmon_power_max_disable(struct xe_device *xe, bool *old) >> +{ >> + struct xe_hwmon *hwmon = xe->hwmon; >> + struct xe_hwmon_data *ddat = &hwmon->ddat; >> + u32 r; >> + >> + if (!(hwmon && hwmon_get_reg(ddat, REG_PKG_RAPL_LIMIT))) >> + return; >> + >> + xe_device_assert_mem_access(gt_to_xe(ddat->gt)); >> + >> + mutex_lock(&hwmon->hwmon_lock); >> + >> + hwmon->reset_in_progress = true; >> + >> + process_hwmon_reg(ddat, REG_PKG_RAPL_LIMIT, REG_RMW, &r, >> + PKG_PWR_LIM_1_EN, 0); >> + *old = !!(r & PKG_PWR_LIM_1_EN); > > do we need to place under lock these last to lines? Yes, want to guard this rmw operation. > >> + mutex_unlock(&hwmon->hwmon_lock); >> +} >> + >> +void xe_hwmon_power_max_restore(struct xe_device *xe, bool old) >> +{ >> + struct xe_hwmon *hwmon = xe->hwmon; >> + struct xe_hwmon_data *ddat = &hwmon->ddat; >> + u32 r; >> + >> + if (!(hwmon && hwmon_get_reg(ddat, REG_PKG_RAPL_LIMIT))) >> + return; >> + >> + xe_device_assert_mem_access(gt_to_xe(ddat->gt)); >> + >> + mutex_lock(&hwmon->hwmon_lock); >> + >> + process_hwmon_reg(ddat, REG_PKG_RAPL_LIMIT, REG_RMW, &r, >> + PKG_PWR_LIM_1_EN, old ? PKG_PWR_LIM_1_EN : 0); >> + >> + hwmon->reset_in_progress = false; >> + wake_up_all(&hwmon->waitq); > > does the wake up need to be under lock? wake up can be added after unlock. > > Now... does it eve happen that "check_reset_in_progress()" > returns false and therefore unlocks the mutex? Didn't get this? check_reset_in_progress() will keep waiting for mutex till it is released by this function. > >> + >> + mutex_unlock(&hwmon->hwmon_lock); >> +} > > [...] > >> void xe_hwmon_register(struct xe_device *xe) >> @@ -128,13 +425,16 @@ void xe_hwmon_register(struct xe_device *xe) >> >> hwmon_get_preregistration_info(xe); >> >> + init_waitqueue_head(&hwmon->waitq); >> + >> drm_dbg(&xe->drm, "Register xe hwmon interface\n"); >> >> - /* hwmon_dev points to device hwmon<i> */ >> + /* hwmon_dev points to device hwmon<i> */ > > Please this change needs to go in the previous patch. > What is <i>? > >> hwmon_dev = devm_hwmon_device_register_with_info(dev, ddat->name, >> ddat, >> &hwmon_chip_info, >> NULL); >> + > > This change in the previous patch. > >> if (IS_ERR(hwmon_dev)) { >> drm_warn(&xe->drm, "Fail to register xe hwmon, Err:%ld\n", PTR_ERR(hwmon_dev)); >> xe->hwmon = NULL; >> diff --git a/drivers/gpu/drm/xe/xe_hwmon.h b/drivers/gpu/drm/xe/xe_hwmon.h >> index a078eeb0a68b..a5dc693569c5 100644 >> --- a/drivers/gpu/drm/xe/xe_hwmon.h >> +++ b/drivers/gpu/drm/xe/xe_hwmon.h >> @@ -14,9 +14,13 @@ struct xe_device; >> #if IS_REACHABLE(CONFIG_HWMON) >> void xe_hwmon_register(struct xe_device *xe); >> void xe_hwmon_unregister(struct xe_device *xe); >> +void xe_hwmon_power_max_disable(struct xe_device *xe, bool *old); >> +void xe_hwmon_power_max_restore(struct xe_device *xe, bool old); >> #else >> static inline void xe_hwmon_register(struct xe_device *xe) { }; >> static inline void xe_hwmon_unregister(struct xe_device *xe) { }; >> +static inline void xe_hwmon_power_max_disable(struct xe_device *xe, bool *old) { }; >> +static inline void xe_hwmon_power_max_restore(struct xe_device *xe, bool old) { }; >> #endif >> >> #endif /* __XE_HWMON_H__ */ >> diff --git a/drivers/gpu/drm/xe/xe_macros.h b/drivers/gpu/drm/xe/xe_macros.h >> index daf56c846d03..030296f8f863 100644 >> --- a/drivers/gpu/drm/xe/xe_macros.h >> +++ b/drivers/gpu/drm/xe/xe_macros.h >> @@ -15,4 +15,7 @@ >> "Ioctl argument check failed at %s:%d: %s", \ >> __FILE__, __LINE__, #cond), 1)) >> >> +#define XE_MISSING_CASE(x) WARN(1, "Missing case (%s == %ld)\n", \ >> + __stringify(x), (long)(x)) >> + > > Should this have its own patch? Sure, I will create separate patch for this. > > Andi > >> #endif
diff --git a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon new file mode 100644 index 000000000000..d48d98f903ed --- /dev/null +++ b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon @@ -0,0 +1,22 @@ +What: /sys/devices/.../hwmon/hwmon<i>/power1_max +Date: August 2023 +KernelVersion: 6.4 +Contact: intel-xe@lists.freedesktop.org +Description: RW. Card reactive sustained (PL1/Tau) power limit in microwatts. + + The power controller will throttle the operating frequency + if the power averaged over a window (typically seconds) + exceeds this limit. A read value of 0 means that the PL1 + power limit is disabled, writing 0 disables the + limit. Writing values > 0 will enable the power limit. + + Only supported for particular Intel xe graphics platforms. + +What: /sys/devices/.../hwmon/hwmon<i>/power1_rated_max +Date: August 2023 +KernelVersion: 6.4 +Contact: intel-xe@lists.freedesktop.org +Description: RO. Card default power limit (default TDP setting). + + Only supported for particular Intel xe graphics platforms. + diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index d654f3311351..eb7210afbd2c 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -397,4 +397,8 @@ #define XEHPC_BCS5_BCS6_INTR_MASK XE_REG(0x190118) #define XEHPC_BCS7_BCS8_INTR_MASK XE_REG(0x19011c) +#define PVC_GT0_PACKAGE_RAPL_LIMIT XE_REG(0x281008) +#define PVC_GT0_PACKAGE_POWER_SKU_UNIT XE_REG(0x281068) +#define PVC_GT0_PACKAGE_POWER_SKU XE_REG(0x281080) + #endif diff --git a/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h b/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h new file mode 100644 index 000000000000..cb2d49b5c8a9 --- /dev/null +++ b/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_MCHBAR_REGS_H__ +#define _XE_MCHBAR_REGS_H_ + +#include "regs/xe_reg_defs.h" + +/* + * MCHBAR mirror. + * + * This mirrors the MCHBAR MMIO space whose location is determined by + * device 0 function 0's pci config register 0x44 or 0x48 and matches it in + * every way. + */ + +#define MCHBAR_MIRROR_BASE_SNB 0x140000 + +#define PCU_CR_PACKAGE_POWER_SKU XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x5930) +#define PKG_PKG_TDP GENMASK_ULL(14, 0) +#define PKG_MIN_PWR GENMASK_ULL(30, 16) +#define PKG_MAX_PWR GENMASK_ULL(46, 32) + +#define PCU_CR_PACKAGE_POWER_SKU_UNIT XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x5938) +#define PKG_PWR_UNIT REG_GENMASK(3, 0) + +#define PCU_CR_PACKAGE_RAPL_LIMIT XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x59a0) +#define PKG_PWR_LIM_1 REG_GENMASK(14, 0) +#define PKG_PWR_LIM_1_EN REG_BIT(15) + +#endif + diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index 5e35128a61a8..ce8dac2168f6 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -6,9 +6,30 @@ #include <linux/hwmon.h> #include <drm/drm_managed.h> +#include "regs/xe_mchbar_regs.h" #include "regs/xe_gt_regs.h" #include "xe_device.h" #include "xe_hwmon.h" +#include "xe_mmio.h" +#include "xe_gt.h" + +enum hwmon_reg_name { + REG_PKG_RAPL_LIMIT, + REG_PKG_POWER_SKU, + REG_PKG_POWER_SKU_UNIT, +}; + +enum hwmon_reg_operation { + REG_READ, + REG_WRITE, + REG_RMW, +}; + +/* + * SF_* - scale factors for particular quantities according to hwmon spec. + * - power - microwatts + */ +#define SF_POWER 1000000 struct xe_hwmon_data { struct device *hwmon_dev; @@ -18,13 +39,268 @@ struct xe_hwmon_data { struct xe_hwmon { struct xe_hwmon_data ddat; - struct mutex hwmon_lock; + struct mutex hwmon_lock; /* rmw operations*/ + bool reset_in_progress; + wait_queue_head_t waitq; + int scl_shift_power; }; +#define ddat_to_xe_hwmon(ddat) ({ container_of(ddat, struct xe_hwmon, ddat); }) + +static u32 hwmon_get_reg(struct xe_hwmon_data *ddat, enum hwmon_reg_name reg_name) +{ + struct xe_device *xe = gt_to_xe(ddat->gt); + struct xe_reg reg = XE_REG(0); + + switch (reg_name) { + case REG_PKG_RAPL_LIMIT: + if (xe->info.platform == XE_DG2) + reg = PCU_CR_PACKAGE_RAPL_LIMIT; + else if (xe->info.platform == XE_PVC) + reg = PVC_GT0_PACKAGE_RAPL_LIMIT; + break; + case REG_PKG_POWER_SKU: + if (xe->info.platform == XE_DG2) + reg = PCU_CR_PACKAGE_POWER_SKU; + else if (xe->info.platform == XE_PVC) + reg = PVC_GT0_PACKAGE_POWER_SKU; + break; + case REG_PKG_POWER_SKU_UNIT: + if (xe->info.platform == XE_DG2) + reg = PCU_CR_PACKAGE_POWER_SKU_UNIT; + else if (xe->info.platform == XE_PVC) + reg = PVC_GT0_PACKAGE_POWER_SKU_UNIT; + break; + default: + XE_MISSING_CASE(reg_name); + break; + } + + return reg.raw; +} + +static int process_hwmon_reg(struct xe_hwmon_data *ddat, enum hwmon_reg_name reg_name, + enum hwmon_reg_operation operation, u32 *value, + u32 clr, u32 set) +{ + struct xe_reg reg; + int ret = 0; + + reg.raw = hwmon_get_reg(ddat, reg_name); + + if (!reg.raw) + return -EOPNOTSUPP; + + switch (operation) { + case REG_READ: + *value = xe_mmio_read32(ddat->gt, reg); + break; + case REG_WRITE: + xe_mmio_write32(ddat->gt, reg, *value); + break; + case REG_RMW: + *value = xe_mmio_rmw32(ddat->gt, reg, clr, set); + break; + default: + XE_MISSING_CASE(operation); + ret = -EOPNOTSUPP; + break; + } + + return ret; +} + +int process_hwmon_reg_read64(struct xe_hwmon_data *ddat, enum hwmon_reg_name reg_name, u64 *value) +{ + struct xe_reg reg; + + reg.raw = hwmon_get_reg(ddat, reg_name); + + if (!reg.raw) + return -EOPNOTSUPP; + + *value = xe_mmio_read64(ddat->gt, reg); + + return 0; +} + +#define PL1_DISABLE 0 + +/* + * HW allows arbitrary PL1 limits to be set but silently clamps these values to + * "typical but not guaranteed" min/max values in rg.PKG_POWER_SKU. Follow the + * same pattern for sysfs, allow arbitrary PL1 limits to be set but display + * clamped values when read. + */ +static int hwmon_power_max_read(struct xe_hwmon_data *ddat, long *value) +{ + struct xe_hwmon *hwmon = ddat_to_xe_hwmon(ddat); + u32 reg_val; + u64 r, min, max; + + process_hwmon_reg(ddat, REG_PKG_RAPL_LIMIT, REG_READ, ®_val, 0, 0); + /* Check if PL1 limit is disabled */ + if (!(reg_val & PKG_PWR_LIM_1_EN)) { + *value = PL1_DISABLE; + return 0; + } + + reg_val = REG_FIELD_GET(PKG_PWR_LIM_1, reg_val); + *value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power); + + process_hwmon_reg_read64(ddat, REG_PKG_POWER_SKU, &r); + min = REG_FIELD_GET(PKG_MIN_PWR, r); + min = mul_u64_u32_shr(min, SF_POWER, hwmon->scl_shift_power); + max = REG_FIELD_GET(PKG_MAX_PWR, r); + max = mul_u64_u32_shr(max, SF_POWER, hwmon->scl_shift_power); + + if (min && max) + *value = clamp_t(u64, *value, min, max); + + return 0; +} + +static inline bool check_reset_in_progress(struct xe_hwmon *hwmon) +{ + mutex_lock(&hwmon->hwmon_lock); + if (!hwmon->reset_in_progress) + return true; + mutex_unlock(&hwmon->hwmon_lock); + return false; +} + +static int hwmon_power_max_write(struct xe_hwmon_data *ddat, long value) +{ + struct xe_hwmon *hwmon = ddat_to_xe_hwmon(ddat); + DEFINE_WAIT(wait); + int ret = 0; + u32 nval; + + /* hwmon->hwmon_lock remain held till rmw operation is over */ + wait_event(hwmon->waitq, check_reset_in_progress(hwmon)); + + /* Disable PL1 limit and verify, as limit cannot be disabled on all platforms */ + if (value == PL1_DISABLE) { + process_hwmon_reg(ddat, REG_PKG_RAPL_LIMIT, REG_RMW, &nval, + PKG_PWR_LIM_1_EN, 0); + process_hwmon_reg(ddat, REG_PKG_RAPL_LIMIT, REG_READ, &nval, + PKG_PWR_LIM_1_EN, 0); + + if (nval & PKG_PWR_LIM_1_EN) + ret = -ENODEV; + goto unlock; + } + + /* Computation in 64-bits to avoid overflow. Round to nearest. */ + nval = DIV_ROUND_CLOSEST_ULL((u64)value << hwmon->scl_shift_power, SF_POWER); + nval = PKG_PWR_LIM_1_EN | REG_FIELD_PREP(PKG_PWR_LIM_1, nval); + + process_hwmon_reg(ddat, REG_PKG_RAPL_LIMIT, REG_RMW, &nval, + PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, nval); +unlock: + mutex_unlock(&hwmon->hwmon_lock); + + return 0; +} + +static int hwmon_power_rated_max_read(struct xe_hwmon_data *ddat, long *value) +{ + struct xe_hwmon *hwmon = ddat_to_xe_hwmon(ddat); + u32 reg_val; + + process_hwmon_reg(ddat, REG_PKG_POWER_SKU, REG_READ, ®_val, 0, 0); + reg_val = REG_FIELD_GET(PKG_PKG_TDP, reg_val); + *value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power); + + return 0; +} + static const struct hwmon_channel_info *hwmon_info[] = { + HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX), NULL }; +static umode_t +hwmon_power_is_visible(struct xe_hwmon_data *ddat, u32 attr, int chan) +{ + switch (attr) { + case hwmon_power_max: + return hwmon_get_reg(ddat, REG_PKG_RAPL_LIMIT) ? 0664 : 0; + case hwmon_power_rated_max: + return hwmon_get_reg(ddat, REG_PKG_POWER_SKU) ? 0444 : 0; + default: + return 0; + } +} + +static int +hwmon_power_read(struct xe_hwmon_data *ddat, u32 attr, int chan, long *val) +{ + switch (attr) { + case hwmon_power_max: + return hwmon_power_max_read(ddat, val); + case hwmon_power_rated_max: + return hwmon_power_rated_max_read(ddat, val); + default: + return -EOPNOTSUPP; + } +} + +static int +hwmon_power_write(struct xe_hwmon_data *ddat, u32 attr, int chan, long val) +{ + switch (attr) { + case hwmon_power_max: + return hwmon_power_max_write(ddat, val); + default: + return -EOPNOTSUPP; + } +} + +void xe_hwmon_power_max_disable(struct xe_device *xe, bool *old) +{ + struct xe_hwmon *hwmon = xe->hwmon; + struct xe_hwmon_data *ddat = &hwmon->ddat; + u32 r; + + if (!(hwmon && hwmon_get_reg(ddat, REG_PKG_RAPL_LIMIT))) + return; + + xe_device_assert_mem_access(gt_to_xe(ddat->gt)); + + mutex_lock(&hwmon->hwmon_lock); + + hwmon->reset_in_progress = true; + + process_hwmon_reg(ddat, REG_PKG_RAPL_LIMIT, REG_RMW, &r, + PKG_PWR_LIM_1_EN, 0); + *old = !!(r & PKG_PWR_LIM_1_EN); + + mutex_unlock(&hwmon->hwmon_lock); +} + +void xe_hwmon_power_max_restore(struct xe_device *xe, bool old) +{ + struct xe_hwmon *hwmon = xe->hwmon; + struct xe_hwmon_data *ddat = &hwmon->ddat; + u32 r; + + if (!(hwmon && hwmon_get_reg(ddat, REG_PKG_RAPL_LIMIT))) + return; + + xe_device_assert_mem_access(gt_to_xe(ddat->gt)); + + mutex_lock(&hwmon->hwmon_lock); + + process_hwmon_reg(ddat, REG_PKG_RAPL_LIMIT, REG_RMW, &r, + PKG_PWR_LIM_1_EN, old ? PKG_PWR_LIM_1_EN : 0); + + hwmon->reset_in_progress = false; + wake_up_all(&hwmon->waitq); + + mutex_unlock(&hwmon->hwmon_lock); +} + static umode_t hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type, u32 attr, int channel) @@ -35,6 +311,9 @@ hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type, xe_device_mem_access_get(gt_to_xe(ddat->gt)); switch (type) { + case hwmon_power: + ret = hwmon_power_is_visible(ddat, attr, channel); + break; default: ret = 0; break; @@ -55,6 +334,9 @@ hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, xe_device_mem_access_get(gt_to_xe(ddat->gt)); switch (type) { + case hwmon_power: + ret = hwmon_power_read(ddat, attr, channel, val); + break; default: ret = -EOPNOTSUPP; break; @@ -75,6 +357,9 @@ hwmon_write(struct device *dev, enum hwmon_sensor_types type, u32 attr, xe_device_mem_access_get(gt_to_xe(ddat->gt)); switch (type) { + case hwmon_power: + ret = hwmon_power_write(ddat, attr, channel, val); + break; default: ret = -EOPNOTSUPP; break; @@ -99,6 +384,18 @@ static const struct hwmon_chip_info hwmon_chip_info = { static void hwmon_get_preregistration_info(struct xe_device *xe) { + struct xe_hwmon *hwmon = xe->hwmon; + struct xe_hwmon_data *ddat = &hwmon->ddat; + u32 val_sku_unit = 0; + int ret; + + ret = process_hwmon_reg(ddat, REG_PKG_POWER_SKU_UNIT, REG_READ, &val_sku_unit, 0, 0); + /* + * The contents of register PKG_POWER_SKU_UNIT do not change, + * so read it once and store the shift values. + */ + if (!ret) + hwmon->scl_shift_power = REG_FIELD_GET(PKG_PWR_UNIT, val_sku_unit); } void xe_hwmon_register(struct xe_device *xe) @@ -128,13 +425,16 @@ void xe_hwmon_register(struct xe_device *xe) hwmon_get_preregistration_info(xe); + init_waitqueue_head(&hwmon->waitq); + drm_dbg(&xe->drm, "Register xe hwmon interface\n"); - /* hwmon_dev points to device hwmon<i> */ + /* hwmon_dev points to device hwmon<i> */ hwmon_dev = devm_hwmon_device_register_with_info(dev, ddat->name, ddat, &hwmon_chip_info, NULL); + if (IS_ERR(hwmon_dev)) { drm_warn(&xe->drm, "Fail to register xe hwmon, Err:%ld\n", PTR_ERR(hwmon_dev)); xe->hwmon = NULL; diff --git a/drivers/gpu/drm/xe/xe_hwmon.h b/drivers/gpu/drm/xe/xe_hwmon.h index a078eeb0a68b..a5dc693569c5 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.h +++ b/drivers/gpu/drm/xe/xe_hwmon.h @@ -14,9 +14,13 @@ struct xe_device; #if IS_REACHABLE(CONFIG_HWMON) void xe_hwmon_register(struct xe_device *xe); void xe_hwmon_unregister(struct xe_device *xe); +void xe_hwmon_power_max_disable(struct xe_device *xe, bool *old); +void xe_hwmon_power_max_restore(struct xe_device *xe, bool old); #else static inline void xe_hwmon_register(struct xe_device *xe) { }; static inline void xe_hwmon_unregister(struct xe_device *xe) { }; +static inline void xe_hwmon_power_max_disable(struct xe_device *xe, bool *old) { }; +static inline void xe_hwmon_power_max_restore(struct xe_device *xe, bool old) { }; #endif #endif /* __XE_HWMON_H__ */ diff --git a/drivers/gpu/drm/xe/xe_macros.h b/drivers/gpu/drm/xe/xe_macros.h index daf56c846d03..030296f8f863 100644 --- a/drivers/gpu/drm/xe/xe_macros.h +++ b/drivers/gpu/drm/xe/xe_macros.h @@ -15,4 +15,7 @@ "Ioctl argument check failed at %s:%d: %s", \ __FILE__, __LINE__, #cond), 1)) +#define XE_MISSING_CASE(x) WARN(1, "Missing case (%s == %ld)\n", \ + __stringify(x), (long)(x)) + #endif diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c index addd6f2681b9..2e9c915ac707 100644 --- a/drivers/gpu/drm/xe/xe_uc.c +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -13,6 +13,7 @@ #include "xe_huc.h" #include "xe_uc_fw.h" #include "xe_wopcm.h" +#include "xe_hwmon.h" static struct xe_gt * uc_to_gt(struct xe_uc *uc) @@ -127,11 +128,15 @@ int xe_uc_init_hwconfig(struct xe_uc *uc) int xe_uc_init_hw(struct xe_uc *uc) { int ret; + bool pl1en; /* GuC submission not enabled, nothing to do */ if (!xe_device_guc_submission_enabled(uc_to_xe(uc))) return 0; + /* Disable a potentially low PL1 power limit to allow freq to be raised */ + xe_hwmon_power_max_disable(uc_to_xe(uc), &pl1en); + ret = xe_uc_sanitize_reset(uc); if (ret) return ret; @@ -160,6 +165,7 @@ int xe_uc_init_hw(struct xe_uc *uc) if (ret) return ret; + xe_hwmon_power_max_restore(uc_to_xe(uc), pl1en); /* We don't fail the driver load if HuC fails to auth, but let's warn */ ret = xe_huc_auth(&uc->huc); XE_WARN_ON(ret);
Expose power_max (pl1) and power_rated_max (tdp) attributes. v2: - Fix review comments (Riana) v3: - Convert enums to uppercase (Matt Brost) - Avoid extra reg read in hwmon_is_visible function (Riana) - Add XE_MISSING_CASE macro to warn on default case (Andi) - Serialize locking (Matt Brost, Andi) - Use xe_device_assert_mem_access when applicable (Matt Brost) - Add intel-xe@lists.freedesktop.org in Documentation (Matt Brost) Signed-off-by: Badal Nilawar <badal.nilawar@intel.com> --- .../ABI/testing/sysfs-driver-intel-xe-hwmon | 22 ++ drivers/gpu/drm/xe/regs/xe_gt_regs.h | 4 + drivers/gpu/drm/xe/regs/xe_mchbar_regs.h | 34 ++ drivers/gpu/drm/xe/xe_hwmon.c | 304 +++++++++++++++++- drivers/gpu/drm/xe/xe_hwmon.h | 4 + drivers/gpu/drm/xe/xe_macros.h | 3 + drivers/gpu/drm/xe/xe_uc.c | 6 + 7 files changed, 375 insertions(+), 2 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon create mode 100644 drivers/gpu/drm/xe/regs/xe_mchbar_regs.h