Message ID | 20241030-feature_poe_port_prio-v2-15-9559622ee47a@bootlin.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | Add support for PSE port priority | expand |
> struct pse_control_status { > u32 pse_id; > @@ -74,6 +83,10 @@ struct pse_control_status { > u32 c33_avail_pw_limit; > struct ethtool_c33_pse_pw_limit_range *c33_pw_limit_ranges; > u32 c33_pw_limit_nb_ranges; > + u32 c33_prio_supp_modes; > + enum pse_port_prio_modes c33_prio_mode; > + u32 c33_prio_max; > + u32 c33_prio; > }; > > /** > @@ -93,6 +106,8 @@ struct pse_control_status { > * set_current_limit regulator callback. > * Should not return an error in case of MAX_PI_CURRENT > * current value set. > + * @pi_set_prio: Configure the PSE PI priority. > + * @pi_get_pw_req: Get the power requested by a PD before enabling the PSE PI > */ > struct pse_controller_ops { > int (*ethtool_get_status)(struct pse_controller_dev *pcdev, > @@ -107,6 +122,9 @@ struct pse_controller_ops { > int id); > int (*pi_set_current_limit)(struct pse_controller_dev *pcdev, > int id, int max_uA); > + int (*pi_set_prio)(struct pse_controller_dev *pcdev, int id, > + unsigned int prio); > + int (*pi_get_pw_req)(struct pse_controller_dev *pcdev, int id); > }; > > struct module; > @@ -141,6 +159,10 @@ struct pse_pi_pairset { > * @rdev: regulator represented by the PSE PI > * @admin_state_enabled: PI enabled state > * @pw_d: Power domain of the PSE PI > + * @prio: Priority of the PSE PI. Used in static port priority mode > + * @pw_enabled: PSE PI power status in static port priority mode > + * @pw_allocated: Power allocated to a PSE PI to manage power budget in > + * static port priority mode > */ > struct pse_pi { > struct pse_pi_pairset pairset[2]; > @@ -148,6 +170,9 @@ struct pse_pi { > struct regulator_dev *rdev; > bool admin_state_enabled; > struct pse_power_domain *pw_d; > + int prio; > + bool pw_enabled; > + int pw_allocated; > }; > > /** > @@ -165,6 +190,9 @@ struct pse_pi { > * @pi: table of PSE PIs described in this controller device > * @no_of_pse_pi: flag set if the pse_pis devicetree node is not used > * @id: Index of the PSE > + * @pis_prio_max: Maximum value allowed for the PSE PIs priority > + * @port_prio_supp_modes: Bitfield of port priority mode supported by the PSE > + * @port_prio_mode: Current port priority mode of the PSE > */ > struct pse_controller_dev { > const struct pse_controller_ops *ops; > @@ -179,6 +207,9 @@ struct pse_controller_dev { > struct pse_pi *pi; > bool no_of_pse_pi; > int id; > + unsigned int pis_prio_max; > + u32 port_prio_supp_modes; > + enum pse_port_prio_modes port_prio_mode; > }; > diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h > index a1ad257b1ec1..22664b1ea4a2 100644 > --- a/include/uapi/linux/ethtool.h > +++ b/include/uapi/linux/ethtool.h > @@ -1002,11 +1002,35 @@ enum ethtool_c33_pse_pw_d_status { > * enum ethtool_c33_pse_events - event list of the C33 PSE controller. > * @ETHTOOL_C33_PSE_EVENT_OVER_CURRENT: PSE output current is too high. > * @ETHTOOL_C33_PSE_EVENT_OVER_TEMP: PSE in over temperature state. > + * @ETHTOOL_C33_PSE_EVENT_CONNECTED: PD detected on the PSE. > + * @ETHTOOL_C33_PSE_EVENT_DISCONNECTED: PD has been disconnected on the PSE. > + * @ETHTOOL_C33_PSE_EVENT_PORT_PRIO_STATIC_ERROR: PSE faced an error in static > + * port priority management mode. > */ > > enum ethtool_c33_pse_events { > - ETHTOOL_C33_PSE_EVENT_OVER_CURRENT = 1 << 0, > - ETHTOOL_C33_PSE_EVENT_OVER_TEMP = 1 << 1, > + ETHTOOL_C33_PSE_EVENT_OVER_CURRENT = 1 << 0, > + ETHTOOL_C33_PSE_EVENT_OVER_TEMP = 1 << 1, > + ETHTOOL_C33_PSE_EVENT_CONNECTED = 1 << 2, > + ETHTOOL_C33_PSE_EVENT_DISCONNECTED = 1 << 3, > + ETHTOOL_C33_PSE_EVENT_PORT_PRIO_STATIC_ERROR = 1 << 4, > +}; Same here, priority concept is not part of the spec, so the C33 prefix should be removed. > + > +/** > + * enum pse_port_prio_modes - PSE port priority modes. > + * @ETHTOOL_PSE_PORT_PRIO_DISABLED: Port priority disabled. > + * @ETHTOOL_PSE_PORT_PRIO_STATIC: PSE static port priority. Port priority > + * based on the power requested during PD classification. This mode > + * is managed by the PSE core. > + * @ETHTOOL_PSE_PORT_PRIO_DYNAMIC: PSE dynamic port priority. Port priority > + * based on the current consumption per ports compared to the total > + * power budget. This mode is managed by the PSE controller. > + */ This part will need some clarification about behavior with mixed port configurations. Here is my proposal: * Expected behaviors in mixed port priority configurations: * - When ports are configured with a mix of disabled, static, and dynamic * priority modes, the following behaviors are expected: * - Ports with priority disabled (ETHTOOL_PSE_PORT_PRIO_DISABLED) are * treated with lowest priority, receiving power only if the budget * remains after static and dynamic ports have been served. * - Static-priority ports are allocated power up to their requested * levels during PD classification, provided the budget allows. * - Dynamic-priority ports receive power based on real-time consumption, * as monitored by the PSE controller, relative to the remaining budget * after static ports. * * Handling scenarios where power budget is exceeded: * - Hot-plug behavior: If a new device is added that causes the total power * demand to exceed the PSE budget, the newly added device is de-prioritized * and shut down to maintain stability for previously connected devices. * This behavior ensures that existing connections are not disrupted, though * it may lead to inconsistent behavior if the device is disconnected and * reconnected (hot-plugged). * * - Startup behavior (boot): When the system initializes with attached devices, * the PSE allocates power based on a predefined order (e.g., by port index) * until the budget is exhausted. Devices connected later in this order may * not be enabled if they would exceed the power budget, resulting in consistent * behavior during startup but potentially differing from runtime behavior * (hot-plug). * * - Consistency challenge: These two scenarios—hot-plug vs. system boot—may lead * to different handling of devices. During system boot, power is allocated * sequentially, potentially leaving out high-priority devices added later due to * a first-come-first-serve approach. In contrast, hot-plug behavior favors the * status quo, maintaining stability for initially connected devices, which * might not align with the system's prioritization policy. * > +enum pse_port_prio_modes { > + ETHTOOL_PSE_PORT_PRIO_DISABLED, > + ETHTOOL_PSE_PORT_PRIO_STATIC, > + ETHTOOL_PSE_PORT_PRIO_DYNAMIC, > };
On Thu, 31 Oct 2024 07:54:08 +0100 Oleksij Rempel <o.rempel@pengutronix.de> wrote: > > diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h > > index a1ad257b1ec1..22664b1ea4a2 100644 > > --- a/include/uapi/linux/ethtool.h > > +++ b/include/uapi/linux/ethtool.h > > @@ -1002,11 +1002,35 @@ enum ethtool_c33_pse_pw_d_status { > > * enum ethtool_c33_pse_events - event list of the C33 PSE controller. > > * @ETHTOOL_C33_PSE_EVENT_OVER_CURRENT: PSE output current is too high. > > * @ETHTOOL_C33_PSE_EVENT_OVER_TEMP: PSE in over temperature state. > > + * @ETHTOOL_C33_PSE_EVENT_CONNECTED: PD detected on the PSE. > > + * @ETHTOOL_C33_PSE_EVENT_DISCONNECTED: PD has been disconnected on the > > PSE. > > + * @ETHTOOL_C33_PSE_EVENT_PORT_PRIO_STATIC_ERROR: PSE faced an error in > > static > > + * port priority management mode. > > */ > > > > enum ethtool_c33_pse_events { > > - ETHTOOL_C33_PSE_EVENT_OVER_CURRENT = 1 << 0, > > - ETHTOOL_C33_PSE_EVENT_OVER_TEMP = 1 << 1, > > + ETHTOOL_C33_PSE_EVENT_OVER_CURRENT = 1 << 0, > > + ETHTOOL_C33_PSE_EVENT_OVER_TEMP = 1 << 1, > > + ETHTOOL_C33_PSE_EVENT_CONNECTED = 1 << 2, > > + ETHTOOL_C33_PSE_EVENT_DISCONNECTED = 1 << 3, > > + ETHTOOL_C33_PSE_EVENT_PORT_PRIO_STATIC_ERROR = 1 << 4, > > +}; > > Same here, priority concept is not part of the spec, so the C33 prefix > should be removed. Ack. So we assume PoDL could have the same interruption events. > > +/** > > + * enum pse_port_prio_modes - PSE port priority modes. > > + * @ETHTOOL_PSE_PORT_PRIO_DISABLED: Port priority disabled. > > + * @ETHTOOL_PSE_PORT_PRIO_STATIC: PSE static port priority. Port priority > > + * based on the power requested during PD classification. This mode > > + * is managed by the PSE core. > > + * @ETHTOOL_PSE_PORT_PRIO_DYNAMIC: PSE dynamic port priority. Port priority > > + * based on the current consumption per ports compared to the total > > + * power budget. This mode is managed by the PSE controller. > > + */ > > This part will need some clarification about behavior with mixed port > configurations. Here is my proposal: > > * Expected behaviors in mixed port priority configurations: > * - When ports are configured with a mix of disabled, static, and dynamic > * priority modes, the following behaviors are expected: > * - Ports with priority disabled (ETHTOOL_PSE_PORT_PRIO_DISABLED) are > * treated with lowest priority, receiving power only if the budget > * remains after static and dynamic ports have been served. > * - Static-priority ports are allocated power up to their requested > * levels during PD classification, provided the budget allows. > * - Dynamic-priority ports receive power based on real-time consumption, > * as monitored by the PSE controller, relative to the remaining budget > * after static ports. I was not thinking of supporting mixed configuration but indeed why not. The thing is the Microchip PSE does not support static priority. I didn't find a way to have only detection and classification enabled without auto activation. Mixed priority could not be tested for now. "Requested Power: The requested power of the logical port, related to the requested class. In case of DSPD, it is the sum of the related class power for each pair-set. The value is in steps of 0.1 W. Assigned Class: The assigned classification depends on the requested class and the available power. An 0xC value means that classification was not assigned and power was not allocated to this port." We could set the current limit to all unconnected ports if the budget limit goes under 100W. This will add complexity as the PD692x0 can set current limit only inside specific ranges. Maybe it is a bit too specific to Microchip. Microchip PSE should only support dynamic mode. > * > * Handling scenarios where power budget is exceeded: > * - Hot-plug behavior: If a new device is added that causes the total power > * demand to exceed the PSE budget, the newly added device is de-prioritized > * and shut down to maintain stability for previously connected devices. > * This behavior ensures that existing connections are not disrupted, though > * it may lead to inconsistent behavior if the device is disconnected and > * reconnected (hot-plugged). Do we want this behavior even if the new device has an highest priority than other previously connected devices? > * - Startup behavior (boot): When the system initializes with attached > devices, > * the PSE allocates power based on a predefined order (e.g., by port index) > * until the budget is exhausted. Devices connected later in this order may > * not be enabled if they would exceed the power budget, resulting in > consistent > * behavior during startup but potentially differing from runtime behavior > * (hot-plug). > * > * - Consistency challenge: These two scenarios—hot-plug vs. system boot—may > lead > * to different handling of devices. During system boot, power is allocated > * sequentially, potentially leaving out high-priority devices added later > due to > * a first-come-first-serve approach. In contrast, hot-plug behavior favors > the > * status quo, maintaining stability for initially connected devices, which > * might not align with the system's prioritization policy. This could be solve by the future support of persistent configuration. Indeed the Microchip controller has a non-volatile memory to save the current configuration (3.1.3) and we could hope future PSE controller could do the same as there is indeed a consistency challenge. This support will be added in a later patch series. Regards,
On Thu, Oct 31, 2024 at 12:11:04PM +0100, Kory Maincent wrote: > > This part will need some clarification about behavior with mixed port > > configurations. Here is my proposal: > > > > * Expected behaviors in mixed port priority configurations: > > * - When ports are configured with a mix of disabled, static, and dynamic > > * priority modes, the following behaviors are expected: > > * - Ports with priority disabled (ETHTOOL_PSE_PORT_PRIO_DISABLED) are > > * treated with lowest priority, receiving power only if the budget > > * remains after static and dynamic ports have been served. > > * - Static-priority ports are allocated power up to their requested > > * levels during PD classification, provided the budget allows. > > * - Dynamic-priority ports receive power based on real-time consumption, > > * as monitored by the PSE controller, relative to the remaining budget > > * after static ports. > > I was not thinking of supporting mixed configuration but indeed why not. > The thing is the Microchip PSE does not support static priority. I didn't find a > way to have only detection and classification enabled without auto activation. > Mixed priority could not be tested for now. No, problem. > "Requested Power: The requested power of the logical port, related to the > requested class. In case of DSPD, it is the sum of the related class power for > each pair-set. The value is in steps of 0.1 W. > Assigned Class: The assigned classification depends on the requested class and > the available power. An 0xC value means that classification was not assigned > and power was not allocated to this port." > > We could set the current limit to all unconnected ports if the budget limit goes > under 100W. This will add complexity as the PD692x0 can set current limit only > inside specific ranges. Maybe it is a bit too specific to Microchip. > Microchip PSE should only support dynamic mode. No need to fake it right now, you came up with nice idea to have "configurable" method, wich in case of PD692x0 is only a single build in method. Since user space will be ably to see available and used prioritization methods - i'm happy with it. > > * > > * Handling scenarios where power budget is exceeded: > > * - Hot-plug behavior: If a new device is added that causes the total power > > * demand to exceed the PSE budget, the newly added device is de-prioritized > > * and shut down to maintain stability for previously connected devices. > > * This behavior ensures that existing connections are not disrupted, though > > * it may lead to inconsistent behavior if the device is disconnected and > > * reconnected (hot-plugged). > > Do we want this behavior even if the new device has an highest priority than > other previously connected devices? Huh... good question. I assume, if we go with policy in kernel, then it is ok to implement just some one. But, I assume, we will need this kind of interface soon or later: Warning! this is discussion, i'm in process of understanding :D /** * enum pse_disconnection_policy - Disconnection strategies for same-priority * devices when power budget is exceeded, tailored to specific priority modes. * * Each device can have multiple disconnection policies set as an array of * priorities. When the power budget is exceeded, the policies are executed * in the order defined by the user. This allows for a more nuanced and * flexible approach to handling power constraints across a range of devices * with similar priorities or attributes. * * Example Usage: * - Users can specify an ordered list of policies, such as starting with * `PSE_DISCON_STATIC_CLASS_HIGHEST_FIRST` to prioritize based on class, * followed by `PSE_DISCON_LRC` to break ties based on connection time. * This ordered execution ensures that power disconnections align closely * with the system’s operational requirements and priorities. * * @PSE_DISCON_LRC: Disconnect least recently connected device. * - Relevant for: ETHTOOL_PSE_PORT_PRIO_STATIC * - Behavior: When multiple devices share the same priority level, the * system disconnects the device that was most recently connected. * - Rationale: This strategy favors stability for longer-standing connections, * assuming that established devices may be more critical. * - Use Case: Suitable for systems prioritizing stable power allocation for * existing static-priority connections, making newer devices suitable * candidates for disconnection if limits are exceeded. * @PSE_DISCON_ROUND_ROBIN_IDX_LOWEST_FIRST: Disconnect based on port index in a * round-robin manner, starting with the lowest index. * - Relevant for: ETHTOOL_PSE_PORT_PRIO_STATIC * - Behavior: Disconnects devices sequentially based on port index, starting * with the lowest. If multiple disconnections are required, the process * continues in ascending order. * - Rationale: Provides a predictable, systematic approach for static-priority * devices, making it clear which device will be disconnected next if power * limits are reached. * - Use Case: Appropriate for systems where static-priority devices are equal * in role, and fairness in disconnections is prioritized. * @PSE_DISCON_ROUND_ROBIN_IDX_HIGHEST_FIRST: Disconnect based on port index in a * round-robin manner, starting with the highest index. * - Relevant for: ETHTOOL_PSE_PORT_PRIO_STATIC * - Behavior: Disconnects devices sequentially based on port index, starting * with the highest. If multiple disconnections are required, the process * continues in descending order. * - Rationale: Provides a predictable, systematic approach for static-priority * devices, prioritizing disconnection from the highest port number downwards. * - Use Case: Suitable for scenarios where higher port numbers are less critical, * or where devices connected to higher ports can be sacrificed first. * @PSE_DISCON_STATIC_CLASS_HIGHEST_FIRST: Disconnect based on static allocation * class, disconnecting higher-class devices first. * - Relevant for: ETHTOOL_PSE_PORT_PRIO_STATIC * - Behavior: Disconnects devices in order of their assigned power class, * with higher-class devices being disconnected first. * - Rationale: This strategy can be useful in scenarios where the goal is to * preserve lower-class devices for minimal essential services, possibly * sacrificing higher-class, power-intensive devices. * - Use Case: Fits scenarios where power classes represent power-hungry or * non-essential devices, allowing essential services to continue under * constrained power conditions. * @PSE_DISCON_STATIC_CLASS_LOWEST_FIRST: Disconnect based on static allocation * class, disconnecting lower-class devices first. * - Relevant for: ETHTOOL_PSE_PORT_PRIO_STATIC * - Behavior: Disconnects devices in order of their assigned power class, * with lower-class devices being disconnected first. * - Rationale: Ensures that higher-class, more critical devices retain power * during constrained conditions. * - Use Case: Fits scenarios where power classes represent priority, allowing * the system to maintain higher-class static devices under constrained * conditions. * @PSE_DISCON_STATIC_CLASS_BUDGET_MATCH: Disconnect based on static allocation * class, targeting devices that release enough allocated power to meet the * current power requirement. * - Relevant for: ETHTOOL_PSE_PORT_PRIO_STATIC * - Behavior: Searches for the lowest-priority device that can release * sufficient allocated power to meet the current budget requirement. * Ensures that disconnection occurs only when enough power is freed. * - Rationale: This strategy is useful when the goal is to balance power * budget requirements while minimizing the number of disconnected devices. * It ensures that the system does not needlessly disconnect multiple * devices if a single disconnection is sufficient to meet the power needs. * - Use Case: Ideal for systems where precise power budget management is * necessary, and disconnections must be efficient in terms of freeing * enough power with minimal impact on the system. * @PSE_DISCON_LOWEST_AVG_POWER: Disconnect device with the lowest average * power draw, minimizing impact on dynamic power allocation. * - Relevant for: ETHTOOL_PSE_PORT_PRIO_DYNAMIC * - Behavior: Among devices with the same priority level, the system * disconnects the device with the lowest average power draw. * - If multiple devices have the same average power draw and priority, * further tie-breaking mechanisms can be applied, such as disconnecting * the least recently connected device. * - Rationale: Minimizes disruption across dynamic devices, keeping as many * active as possible by removing the lowest-power ones. * - Use Case: Suitable for dynamic-priority systems where maximizing the * number of connected devices is more important than individual device * power requirements. * @PSE_DISCON_LONGEST_IDLE: Disconnect device with the longest idle time * (low or no recent active power usage). * - Relevant for: ETHTOOL_PSE_PORT_PRIO_DYNAMIC * - Behavior: Disconnects the device with the longest period of inactivity, * where "idle" is defined as low current draw or absence of recent data * transmission. * - If multiple devices have the same idle time and priority, a tie-breaking * mechanism, such as round-robin based on port index, can be used. * - Rationale: Optimizes resource allocation in dynamic-priority setups by * maintaining active devices while deprioritizing those with minimal * recent usage. * - Use Case: Ideal for dynamic environments, like sensor networks, where * devices may be intermittently active and can be deprioritized during * idle periods. * * These disconnection policies provide flexibility in handling cases where * multiple devices with the same priority exceed the PSE budget, aligning * with either static or dynamic port priority modes: * - `ETHTOOL_PSE_PORT_PRIO_STATIC` benefits from policies that maintain * stable power allocation, favoring longer-standing or higher-class * devices (e.g., `PSE_DISCON_LRC`, `PSE_DISCON_ROUND_ROBIN_IDX`, * `PSE_DISCON_STATIC_CLASS_HIGHEST_FIRST`, `PSE_DISCON_STATIC_CLASS_LOWEST_FIRST`, * `PSE_DISCON_STATIC_CLASS_BUDGET_MATCH`). * - `ETHTOOL_PSE_PORT_PRIO_DYNAMIC` supports policies that dynamically * adjust based on real-time metrics (e.g., `PSE_DISCON_LOWEST_AVG_POWER`, * `PSE_DISCON_LONGEST_IDLE`), ideal for setups where usage fluctuates * frequently. * - Users can define an ordered array of disconnection policies, allowing * the system to apply each policy in sequence, providing nuanced control * over how power disconnections are handled. */ PD692x0 seems to use @PSE_DISCON_ROUND_ROBIN_IDX_HIGHEST_FIRST disconnection policy. ETHTOOL_PSE_PORT_PRIO_DYNAMIC and ETHTOOL_PSE_PORT_PRIO_STATIC seems to be the source of information which should be used to trigger the disconnection policy. Correct?
On Thu, Oct 31, 2024 at 12:11:04PM +0100, Kory Maincent wrote: > On Thu, 31 Oct 2024 07:54:08 +0100 > Oleksij Rempel <o.rempel@pengutronix.de> wrote: > > > > diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h > > > index a1ad257b1ec1..22664b1ea4a2 100644 > > > --- a/include/uapi/linux/ethtool.h > > > +++ b/include/uapi/linux/ethtool.h > > > @@ -1002,11 +1002,35 @@ enum ethtool_c33_pse_pw_d_status { > > > * enum ethtool_c33_pse_events - event list of the C33 PSE controller. > > > * @ETHTOOL_C33_PSE_EVENT_OVER_CURRENT: PSE output current is too high. > > > * @ETHTOOL_C33_PSE_EVENT_OVER_TEMP: PSE in over temperature state. > > > + * @ETHTOOL_C33_PSE_EVENT_CONNECTED: PD detected on the PSE. > > > + * @ETHTOOL_C33_PSE_EVENT_DISCONNECTED: PD has been disconnected on the > > > PSE. > > > + * @ETHTOOL_C33_PSE_EVENT_PORT_PRIO_STATIC_ERROR: PSE faced an error in > > > static > > > + * port priority management mode. > > > */ > > > > > > enum ethtool_c33_pse_events { > > > - ETHTOOL_C33_PSE_EVENT_OVER_CURRENT = 1 << 0, > > > - ETHTOOL_C33_PSE_EVENT_OVER_TEMP = 1 << 1, > > > + ETHTOOL_C33_PSE_EVENT_OVER_CURRENT = 1 << 0, > > > + ETHTOOL_C33_PSE_EVENT_OVER_TEMP = 1 << 1, > > > + ETHTOOL_C33_PSE_EVENT_CONNECTED = 1 << 2, > > > + ETHTOOL_C33_PSE_EVENT_DISCONNECTED = 1 << 3, > > > + ETHTOOL_C33_PSE_EVENT_PORT_PRIO_STATIC_ERROR = 1 << 4, > > > +}; > > > > Same here, priority concept is not part of the spec, so the C33 prefix > > should be removed. > > Ack. So we assume PoDL could have the same interruption events. > > > > +/** > > > + * enum pse_port_prio_modes - PSE port priority modes. > > > + * @ETHTOOL_PSE_PORT_PRIO_DISABLED: Port priority disabled. > > > + * @ETHTOOL_PSE_PORT_PRIO_STATIC: PSE static port priority. Port priority > > > + * based on the power requested during PD classification. This mode > > > + * is managed by the PSE core. > > > + * @ETHTOOL_PSE_PORT_PRIO_DYNAMIC: PSE dynamic port priority. Port priority > > > + * based on the current consumption per ports compared to the total > > > + * power budget. This mode is managed by the PSE controller. > > > + */ After thinking about it more overnight, I wanted to revisit the idea of having a priority strategy per port. Right now, if one port is set to static or dynamic mode, all disabled ports seem to have to follow it somehow too. This makes it feel like we should have a strategy for the whole power domain, not just for each port. I'm having trouble imagining how a per-port priority strategy would work in this setup. Another point that came to mind is that we might have two different components here, and we need to keep these two parts separate in follow-up discussions: - **Budget Evaluation Strategy**: The static approach seems straightforward—if a class requests more than available, appropriate actions are taken. However, the dynamic approach has more complexity, such as determining the threshold, how long violations can be tolerated, and whether a safety margin should be maintained before exceeding maximum load. - **Disconnection Policy**: Once a budget violation is detected, this decides how to react, like which ports should be disconnected and in what order. Would it make more sense to have a unified strategy for power domains, where we apply the same budget evaluation mode (static or dynamic) and disconnection policy to all ports in that domain? This could make the configuration simpler and the power management more predictable.
On Fri, Nov 01, 2024 at 09:31:43AM +0100, Oleksij Rempel wrote: > On Thu, Oct 31, 2024 at 12:11:04PM +0100, Kory Maincent wrote: > > On Thu, 31 Oct 2024 07:54:08 +0100 > > Oleksij Rempel <o.rempel@pengutronix.de> wrote: > > > > > > diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h > > > > index a1ad257b1ec1..22664b1ea4a2 100644 > > > > --- a/include/uapi/linux/ethtool.h > > > > +++ b/include/uapi/linux/ethtool.h > > > > @@ -1002,11 +1002,35 @@ enum ethtool_c33_pse_pw_d_status { > > > > * enum ethtool_c33_pse_events - event list of the C33 PSE controller. > > > > * @ETHTOOL_C33_PSE_EVENT_OVER_CURRENT: PSE output current is too high. > > > > * @ETHTOOL_C33_PSE_EVENT_OVER_TEMP: PSE in over temperature state. > > > > + * @ETHTOOL_C33_PSE_EVENT_CONNECTED: PD detected on the PSE. > > > > + * @ETHTOOL_C33_PSE_EVENT_DISCONNECTED: PD has been disconnected on the > > > > PSE. > > > > + * @ETHTOOL_C33_PSE_EVENT_PORT_PRIO_STATIC_ERROR: PSE faced an error in > > > > static > > > > + * port priority management mode. > > > > */ > > > > > > > > enum ethtool_c33_pse_events { > > > > - ETHTOOL_C33_PSE_EVENT_OVER_CURRENT = 1 << 0, > > > > - ETHTOOL_C33_PSE_EVENT_OVER_TEMP = 1 << 1, > > > > + ETHTOOL_C33_PSE_EVENT_OVER_CURRENT = 1 << 0, > > > > + ETHTOOL_C33_PSE_EVENT_OVER_TEMP = 1 << 1, > > > > + ETHTOOL_C33_PSE_EVENT_CONNECTED = 1 << 2, > > > > + ETHTOOL_C33_PSE_EVENT_DISCONNECTED = 1 << 3, > > > > + ETHTOOL_C33_PSE_EVENT_PORT_PRIO_STATIC_ERROR = 1 << 4, > > > > +}; > > > > > > Same here, priority concept is not part of the spec, so the C33 prefix > > > should be removed. > > > > Ack. So we assume PoDL could have the same interruption events. > > > > > > +/** > > > > + * enum pse_port_prio_modes - PSE port priority modes. > > > > + * @ETHTOOL_PSE_PORT_PRIO_DISABLED: Port priority disabled. > > > > + * @ETHTOOL_PSE_PORT_PRIO_STATIC: PSE static port priority. Port priority > > > > + * based on the power requested during PD classification. This mode > > > > + * is managed by the PSE core. > > > > + * @ETHTOOL_PSE_PORT_PRIO_DYNAMIC: PSE dynamic port priority. Port priority > > > > + * based on the current consumption per ports compared to the total > > > > + * power budget. This mode is managed by the PSE controller. > > > > + */ > > After thinking about it more overnight, I wanted to revisit the idea of having > a priority strategy per port. Right now, if one port is set to static or > dynamic mode, all disabled ports seem to have to follow it somehow too. This > makes it feel like we should have a strategy for the whole power domain, not > just for each port. > > I'm having trouble imagining how a per-port priority strategy would work in > this setup. > > Another point that came to mind is that we might have two different components > here, and we need to keep these two parts separate in follow-up discussions: > > - **Budget Evaluation Strategy**: The static approach seems straightforward—if > a class requests more than available, appropriate actions are taken. However, > the dynamic approach has more complexity, such as determining the threshold, > how long violations can be tolerated, and whether a safety margin should be > maintained before exceeding maximum load. > > - **Disconnection Policy**: Once a budget violation is detected, this decides > how to react, like which ports should be disconnected and in what order. > > Would it make more sense to have a unified strategy for power domains, where we > apply the same budget evaluation mode (static or dynamic) and disconnection > policy to all ports in that domain? This could make the configuration simpler > and the power management more predictable. Except of user reports, do we have documented confirmation about dynamic Budget Evaluation Strategy in PD692x0 firmware? Do this configuration bits are what I called Budget Evaluation Strategy? Version 3.55: Bits [3..0]—BT port PM mode 0x0: The port power that is used for power management purposes is dynamic (Iport x Vmain). 0x1: The port power that is used for power management purposes is port TPPL_BT. 0x2: The port power that is used for power management purposes is dynamic for non LLDP/CDP/Autoclass ports and TPPL_BT for LLDP/CDP/Autoclass ports.
On Fri, 1 Nov 2024 11:23:06 +0100 Oleksij Rempel <o.rempel@pengutronix.de> wrote: > On Fri, Nov 01, 2024 at 09:31:43AM +0100, Oleksij Rempel wrote: > > On Thu, Oct 31, 2024 at 12:11:04PM +0100, Kory Maincent wrote: > > > On Thu, 31 Oct 2024 07:54:08 +0100 > > > Oleksij Rempel <o.rempel@pengutronix.de> wrote: > > > > [...] > [...] > > > > > > Ack. So we assume PoDL could have the same interruption events. > > > > [...] > > > > After thinking about it more overnight, I wanted to revisit the idea of > > having a priority strategy per port. Right now, if one port is set to > > static or dynamic mode, all disabled ports seem to have to follow it > > somehow too. This makes it feel like we should have a strategy for the > > whole power domain, not just for each port. > > > > I'm having trouble imagining how a per-port priority strategy would work in > > this setup. Indeed you are right. I was first thinking of using the same port priority for all the ports of a PSE but it seems indeed better to have it by Power domain. > > Another point that came to mind is that we might have two different > > components here, and we need to keep these two parts separate in follow-up > > discussions: > > > > - **Budget Evaluation Strategy**: The static approach seems > > straightforward—if a class requests more than available, appropriate > > actions are taken. However, the dynamic approach has more complexity, such > > as determining the threshold, how long violations can be tolerated, and > > whether a safety margin should be maintained before exceeding maximum load. > > > > - **Disconnection Policy**: Once a budget violation is detected, this > > decides how to react, like which ports should be disconnected and in what > > order. > > > > Would it make more sense to have a unified strategy for power domains, > > where we apply the same budget evaluation mode (static or dynamic) and > > disconnection policy to all ports in that domain? This could make the > > configuration simpler and the power management more predictable. Yes, these policies and the port priority mode should be per power domains. > Except of user reports, do we have documented confirmation about dynamic > Budget Evaluation Strategy in PD692x0 firmware? > > Do this configuration bits are what I called Budget Evaluation Strategy? > Version 3.55: > Bits [3..0]—BT port PM mode > 0x0: The port power that is used for power management purposes is > dynamic (Iport x Vmain). Yes it seems so. I can't find any more configurations on the budget evaluation strategy than the power limit. Regards,
On Thu, 31 Oct 2024 18:32:39 +0100 Oleksij Rempel <o.rempel@pengutronix.de> wrote: > > > * > > > * Handling scenarios where power budget is exceeded: > > > * - Hot-plug behavior: If a new device is added that causes the total > > > power > > > * demand to exceed the PSE budget, the newly added device is > > > de-prioritized > > > * and shut down to maintain stability for previously connected devices. > > > * This behavior ensures that existing connections are not disrupted, > > > though > > > * it may lead to inconsistent behavior if the device is disconnected > > > and > > > * reconnected (hot-plugged). > > > > Do we want this behavior even if the new device has an highest priority than > > other previously connected devices? > > Huh... good question. I assume, if we go with policy in kernel, then it > is ok to implement just some one. But, I assume, we will need this kind of > interface soon or later: > > Warning! this is discussion, i'm in process of understanding :D > > /** > * enum pse_disconnection_policy - Disconnection strategies for same-priority > * devices when power budget is exceeded, tailored to specific priority > modes. * > * Each device can have multiple disconnection policies set as an array of > * priorities. When the power budget is exceeded, the policies are executed > * in the order defined by the user. This allows for a more nuanced and > * flexible approach to handling power constraints across a range of devices > * with similar priorities or attributes. > * > * Example Usage: > * - Users can specify an ordered list of policies, such as starting with > * `PSE_DISCON_STATIC_CLASS_HIGHEST_FIRST` to prioritize based on class, > * followed by `PSE_DISCON_LRC` to break ties based on connection time. > * This ordered execution ensures that power disconnections align closely > * with the system’s operational requirements and priorities. ... > * @PSE_DISCON_STATIC_CLASS_BUDGET_MATCH: Disconnect based on static > allocation > * class, targeting devices that release enough allocated power to meet the > * current power requirement. > * - Relevant for: ETHTOOL_PSE_PORT_PRIO_STATIC > * - Behavior: Searches for the lowest-priority device that can release > * sufficient allocated power to meet the current budget requirement. > * Ensures that disconnection occurs only when enough power is freed. > * - Rationale: This strategy is useful when the goal is to balance power > * budget requirements while minimizing the number of disconnected > devices. > * It ensures that the system does not needlessly disconnect multiple > * devices if a single disconnection is sufficient to meet the power > needs. > * - Use Case: Ideal for systems where precise power budget management is > * necessary, and disconnections must be efficient in terms of freeing > * enough power with minimal impact on the system. Not sure about this one. PSE_DISCON_STATIC_CLASS_HIGHEST_FIRST would be sufficient for that case. > * @PSE_DISCON_LOWEST_AVG_POWER: Disconnect device with the lowest average > * power draw, minimizing impact on dynamic power allocation. > * - Relevant for: ETHTOOL_PSE_PORT_PRIO_DYNAMIC > * - Behavior: Among devices with the same priority level, the system > * disconnects the device with the lowest average power draw. > * - If multiple devices have the same average power draw and priority, > * further tie-breaking mechanisms can be applied, such as disconnecting > * the least recently connected device. > * - Rationale: Minimizes disruption across dynamic devices, keeping as many > * active as possible by removing the lowest-power ones. > * - Use Case: Suitable for dynamic-priority systems where maximizing the > * number of connected devices is more important than individual device > * power requirements. > > * @PSE_DISCON_LONGEST_IDLE: Disconnect device with the longest idle time > * (low or no recent active power usage). > * - Relevant for: ETHTOOL_PSE_PORT_PRIO_DYNAMIC > * - Behavior: Disconnects the device with the longest period of inactivity, > * where "idle" is defined as low current draw or absence of recent data > * transmission. > * - If multiple devices have the same idle time and priority, a > tie-breaking > * mechanism, such as round-robin based on port index, can be used. > * - Rationale: Optimizes resource allocation in dynamic-priority setups by > * maintaining active devices while deprioritizing those with minimal > * recent usage. > * - Use Case: Ideal for dynamic environments, like sensor networks, where > * devices may be intermittently active and can be deprioritized during > * idle periods. > * > * These disconnection policies provide flexibility in handling cases where > * multiple devices with the same priority exceed the PSE budget, aligning > * with either static or dynamic port priority modes: > * - `ETHTOOL_PSE_PORT_PRIO_STATIC` benefits from policies that maintain > * stable power allocation, favoring longer-standing or higher-class > * devices (e.g., `PSE_DISCON_LRC`, `PSE_DISCON_ROUND_ROBIN_IDX`, > * `PSE_DISCON_STATIC_CLASS_HIGHEST_FIRST`, > `PSE_DISCON_STATIC_CLASS_LOWEST_FIRST`, > * `PSE_DISCON_STATIC_CLASS_BUDGET_MATCH`). > * - `ETHTOOL_PSE_PORT_PRIO_DYNAMIC` supports policies that dynamically > * adjust based on real-time metrics (e.g., `PSE_DISCON_LOWEST_AVG_POWER`, > * `PSE_DISCON_LONGEST_IDLE`), ideal for setups where usage fluctuates > * frequently. > * - Users can define an ordered array of disconnection policies, allowing > * the system to apply each policy in sequence, providing nuanced control > * over how power disconnections are handled. > */ I think I can add support for one or two of these modes in this patch series. Modes relevant for dynamic port priority can't be used for now as nothing support them. Do you think I should add this full enumeration in ethtool UAPI even if not all of them are supported yet? > PD692x0 seems to use @PSE_DISCON_ROUND_ROBIN_IDX_HIGHEST_FIRST disconnection > policy. Yes. > ETHTOOL_PSE_PORT_PRIO_DYNAMIC and ETHTOOL_PSE_PORT_PRIO_STATIC seems to be the > source of information which should be used to trigger the disconnection > policy. Correct? Yes. The management of disconnection in ETHTOOL_PSE_PORT_PRIO_DYNAMIC case is managed directly by the PSE firmware on the PD692x0. Regards,
On Tue, Nov 05, 2024 at 02:49:13PM +0100, Kory Maincent wrote: > On Thu, 31 Oct 2024 18:32:39 +0100 > Oleksij Rempel <o.rempel@pengutronix.de> wrote: > > > * @PSE_DISCON_STATIC_CLASS_BUDGET_MATCH: Disconnect based on static > > allocation > > * class, targeting devices that release enough allocated power to meet the > > * current power requirement. > > * - Relevant for: ETHTOOL_PSE_PORT_PRIO_STATIC > > * - Behavior: Searches for the lowest-priority device that can release > > * sufficient allocated power to meet the current budget requirement. > > * Ensures that disconnection occurs only when enough power is freed. > > * - Rationale: This strategy is useful when the goal is to balance power > > * budget requirements while minimizing the number of disconnected > > devices. > > * It ensures that the system does not needlessly disconnect multiple > > * devices if a single disconnection is sufficient to meet the power > > needs. > > * - Use Case: Ideal for systems where precise power budget management is > > * necessary, and disconnections must be efficient in terms of freeing > > * enough power with minimal impact on the system. > > Not sure about this one. PSE_DISCON_STATIC_CLASS_HIGHEST_FIRST would be > sufficient for that case. ack > > * @PSE_DISCON_LOWEST_AVG_POWER: Disconnect device with the lowest average > > * power draw, minimizing impact on dynamic power allocation. > > * - Relevant for: ETHTOOL_PSE_PORT_PRIO_DYNAMIC > > * - Behavior: Among devices with the same priority level, the system > > * disconnects the device with the lowest average power draw. > > * - If multiple devices have the same average power draw and priority, > > * further tie-breaking mechanisms can be applied, such as disconnecting > > * the least recently connected device. > > * - Rationale: Minimizes disruption across dynamic devices, keeping as many > > * active as possible by removing the lowest-power ones. > > * - Use Case: Suitable for dynamic-priority systems where maximizing the > > * number of connected devices is more important than individual device > > * power requirements. > > > > * @PSE_DISCON_LONGEST_IDLE: Disconnect device with the longest idle time > > * (low or no recent active power usage). > > * - Relevant for: ETHTOOL_PSE_PORT_PRIO_DYNAMIC > > * - Behavior: Disconnects the device with the longest period of inactivity, > > * where "idle" is defined as low current draw or absence of recent data > > * transmission. > > * - If multiple devices have the same idle time and priority, a > > tie-breaking > > * mechanism, such as round-robin based on port index, can be used. > > * - Rationale: Optimizes resource allocation in dynamic-priority setups by > > * maintaining active devices while deprioritizing those with minimal > > * recent usage. > > * - Use Case: Ideal for dynamic environments, like sensor networks, where > > * devices may be intermittently active and can be deprioritized during > > * idle periods. > > * > > * These disconnection policies provide flexibility in handling cases where > > * multiple devices with the same priority exceed the PSE budget, aligning > > * with either static or dynamic port priority modes: > > * - `ETHTOOL_PSE_PORT_PRIO_STATIC` benefits from policies that maintain > > * stable power allocation, favoring longer-standing or higher-class > > * devices (e.g., `PSE_DISCON_LRC`, `PSE_DISCON_ROUND_ROBIN_IDX`, > > * `PSE_DISCON_STATIC_CLASS_HIGHEST_FIRST`, > > `PSE_DISCON_STATIC_CLASS_LOWEST_FIRST`, > > * `PSE_DISCON_STATIC_CLASS_BUDGET_MATCH`). > > * - `ETHTOOL_PSE_PORT_PRIO_DYNAMIC` supports policies that dynamically > > * adjust based on real-time metrics (e.g., `PSE_DISCON_LOWEST_AVG_POWER`, > > * `PSE_DISCON_LONGEST_IDLE`), ideal for setups where usage fluctuates > > * frequently. > > * - Users can define an ordered array of disconnection policies, allowing > > * the system to apply each policy in sequence, providing nuanced control > > * over how power disconnections are handled. > > */ > > I think I can add support for one or two of these modes in this patch series. > Modes relevant for dynamic port priority can't be used for now as nothing > support them. ack > Do you think I should add this full enumeration in ethtool UAPI even if not all > of them are supported yet? No, do not worry, it was just my brain dump. Care only about actually used variants. If some one will need something different, we will already know how to address it.
diff --git a/drivers/net/pse-pd/pse_core.c b/drivers/net/pse-pd/pse_core.c index 29374b1ce378..25911083ff3b 100644 --- a/drivers/net/pse-pd/pse_core.c +++ b/drivers/net/pse-pd/pse_core.c @@ -229,6 +229,9 @@ static int pse_pi_is_enabled(struct regulator_dev *rdev) return -EOPNOTSUPP; id = rdev_get_id(rdev); + if (pcdev->port_prio_mode == ETHTOOL_PSE_PORT_PRIO_STATIC) + return pcdev->pi[id].pw_enabled; + mutex_lock(&pcdev->lock); ret = ops->pi_is_enabled(pcdev, id); mutex_unlock(&pcdev->lock); @@ -248,6 +251,16 @@ static int pse_pi_enable(struct regulator_dev *rdev) id = rdev_get_id(rdev); mutex_lock(&pcdev->lock); + if (pcdev->port_prio_mode == ETHTOOL_PSE_PORT_PRIO_STATIC) { + /* Manage enabled status by software. + * Real enable process will happen after a port connected + * event. + */ + pcdev->pi[id].admin_state_enabled = 1; + mutex_unlock(&pcdev->lock); + return 0; + } + ret = ops->pi_enable(pcdev, id); if (!ret) pcdev->pi[id].admin_state_enabled = 1; @@ -268,9 +281,12 @@ static int pse_pi_disable(struct regulator_dev *rdev) id = rdev_get_id(rdev); mutex_lock(&pcdev->lock); + ret = ops->pi_disable(pcdev, id); - if (!ret) + if (!ret) { pcdev->pi[id].admin_state_enabled = 0; + pcdev->pi[id].pw_enabled = 0; + } mutex_unlock(&pcdev->lock); return ret; @@ -564,6 +580,7 @@ int pse_controller_register(struct pse_controller_dev *pcdev) if (ret < 0) return ret; pcdev->id = ret; + pcdev->port_prio_supp_modes |= BIT(ETHTOOL_PSE_PORT_PRIO_DISABLED); if (!pcdev->nr_lines) pcdev->nr_lines = 1; @@ -704,10 +721,166 @@ pse_control_find_phy_by_id(struct pse_controller_dev *pcdev, int id) return psec->attached_phydev; } mutex_unlock(&pse_list_mutex); - return NULL; } +static void pse_deallocate_pw_budget(struct pse_controller_dev *pcdev, int id) +{ + struct pse_power_domain *pw_d = pcdev->pi[id].pw_d; + + if (!pw_d) + return; + + pw_d->pw_budget += pcdev->pi[id].pw_allocated; +} + +static int pse_pi_disable_isr(struct pse_controller_dev *pcdev, int id, + struct netlink_ext_ack *extack) +{ + const struct pse_controller_ops *ops = pcdev->ops; + int ret; + + if (!ops->pi_disable) { + NL_SET_ERR_MSG(extack, "PSE does not support disable control"); + return -EOPNOTSUPP; + } + + if (!pcdev->pi[id].admin_state_enabled || + !pcdev->pi[id].pw_enabled) + return 0; + + ret = ops->pi_disable(pcdev, id); + if (ret) { + NL_SET_ERR_MSG_FMT(extack, + "PI %d: disable error %d", + id, ret); + return ret; + } + + pse_deallocate_pw_budget(pcdev, id); + pcdev->pi[id].pw_enabled = 0; + return 0; +} + +static int pse_disable_pis_prio(struct pse_controller_dev *pcdev, int prio) +{ + int i, ret; + + for (i = 0; i < pcdev->nr_lines; i++) { + struct netlink_ext_ack extack = {}; + struct phy_device *phydev; + + if (pcdev->pi[i].prio != prio) + continue; + + dev_dbg(pcdev->dev, + "Disabling PI %d to free power budget\n", + i); + + NL_SET_ERR_MSG_FMT(&extack, + "Disabling PI %d to free power budget", + i); + + ret = pse_pi_disable_isr(pcdev, i, &extack); + phydev = pse_control_find_phy_by_id(pcdev, i); + if (phydev) + ethnl_pse_send_ntf(phydev, + ETHTOOL_C33_PSE_EVENT_DISCONNECTED, + &extack); + if (ret) + return ret; + } + + return 0; +} + +static int pse_allocate_pw_budget(struct pse_controller_dev *pcdev, int id, + int pw_req, struct netlink_ext_ack *extack) +{ + struct pse_power_domain *pw_d = pcdev->pi[id].pw_d; + int ret, _prio; + + if (!pw_d) + return 0; + + _prio = pcdev->nr_lines; + while (pw_req > pw_d->pw_budget && _prio > pcdev->pi[id].prio) { + ret = pse_disable_pis_prio(pcdev, _prio--); + if (ret) + return ret; + } + + if (pw_req > pw_d->pw_budget) { + NL_SET_ERR_MSG_FMT(extack, + "PI %d: not enough power budget available", + id); + return -ERANGE; + } + + pw_d->pw_budget -= pw_req; + pcdev->pi[id].pw_allocated = pw_req; + return 0; +} + +static int pse_pi_enable_isr(struct pse_controller_dev *pcdev, int id, + struct netlink_ext_ack *extack) +{ + const struct pse_controller_ops *ops = pcdev->ops; + int ret, pw_req; + + if (!ops->pi_enable || !ops->pi_get_pw_req) { + NL_SET_ERR_MSG(extack, "PSE does not support enable control"); + return -EOPNOTSUPP; + } + + if (!pcdev->pi[id].admin_state_enabled || + pcdev->pi[id].pw_enabled) + return 0; + + ret = ops->pi_get_pw_req(pcdev, id); + if (ret < 0) + return ret; + + pw_req = ret; + ret = pse_allocate_pw_budget(pcdev, id, pw_req, extack); + if (ret) + return ret; + + ret = ops->pi_enable(pcdev, id); + if (ret) { + pse_deallocate_pw_budget(pcdev, id); + NL_SET_ERR_MSG_FMT(extack, + "PI %d: enable error %d", + id, ret); + return ret; + } + + pcdev->pi[id].pw_enabled = 1; + return 0; +} + +static int pse_set_config_isr(struct pse_controller_dev *pcdev, int id, + unsigned long notifs, + struct netlink_ext_ack *extack) +{ + int ret = 0; + + if (notifs & ETHTOOL_C33_PSE_EVENT_CONNECTED && + notifs & ETHTOOL_C33_PSE_EVENT_DISCONNECTED) { + NL_SET_ERR_MSG_FMT(extack, + "PI %d: error, connection and disconnection reported simultaneously", + id); + return -EINVAL; + } + + if (notifs & ETHTOOL_C33_PSE_EVENT_CONNECTED) + ret = pse_pi_enable_isr(pcdev, id, extack); + else if (notifs & ETHTOOL_C33_PSE_EVENT_DISCONNECTED) + ret = pse_pi_disable_isr(pcdev, id, extack); + + return ret; +} + static irqreturn_t pse_notifier_isr(int irq, void *data) { struct netlink_ext_ack extack = {}; @@ -724,7 +897,6 @@ static irqreturn_t pse_notifier_isr(int irq, void *data) memset(h->notifs, 0, pcdev->nr_lines * sizeof(*h->notifs)); mutex_lock(&pcdev->lock); ret = desc->map_event(irq, pcdev, h->notifs, ¬ifs_mask); - mutex_unlock(&pcdev->lock); if (ret || !notifs_mask) return IRQ_NONE; @@ -737,6 +909,12 @@ static irqreturn_t pse_notifier_isr(int irq, void *data) continue; notifs = h->notifs[i]; + if (pcdev->port_prio_mode == ETHTOOL_PSE_PORT_PRIO_STATIC) { + ret = pse_set_config_isr(pcdev, i, notifs, &extack); + if (ret) + notifs |= ETHTOOL_C33_PSE_EVENT_PORT_PRIO_STATIC_ERROR; + } + dev_dbg(h->pcdev->dev, "Sending PSE notification EVT 0x%lx\n", notifs); @@ -748,6 +926,8 @@ static irqreturn_t pse_notifier_isr(int irq, void *data) NULL); } + mutex_unlock(&pcdev->lock); + return IRQ_HANDLED; } @@ -1001,6 +1181,20 @@ static int _pse_ethtool_get_status(struct pse_controller_dev *pcdev, status->pse_id = pcdev->id; status->pw_d_id = pcdev->pi[id].pw_d->id; + status->c33_prio_supp_modes = pcdev->port_prio_supp_modes; + status->c33_prio_mode = pcdev->port_prio_mode; + switch (pcdev->port_prio_mode) { + case ETHTOOL_PSE_PORT_PRIO_STATIC: + status->c33_prio_max = pcdev->nr_lines; + status->c33_prio = pcdev->pi[id].prio; + break; + case ETHTOOL_PSE_PORT_PRIO_DYNAMIC: + status->c33_prio_max = pcdev->pis_prio_max; + break; + default: + break; + } + return ops->ethtool_get_status(pcdev, id, extack, status); } @@ -1038,11 +1232,12 @@ static int pse_ethtool_c33_set_config(struct pse_control *psec, case ETHTOOL_C33_PSE_ADMIN_STATE_ENABLED: /* We could have mismatch between admin_state_enabled and * state reported by regulator_is_enabled. This can occur when - * the PI is forcibly turn off by the controller. Call - * regulator_disable on that case to fix the counters state. + * the PI is forcibly turn off by the controller or by the + * interrupt context. Call regulator_disable on that case + * to fix the counters state. */ - if (psec->pcdev->pi[psec->id].admin_state_enabled && - !regulator_is_enabled(psec->ps)) { + if (!regulator_is_enabled(psec->ps) && + psec->pcdev->pi[psec->id].admin_state_enabled) { err = regulator_disable(psec->ps); if (err) break; @@ -1149,6 +1344,102 @@ int pse_ethtool_set_pw_limit(struct pse_control *psec, } EXPORT_SYMBOL_GPL(pse_ethtool_set_pw_limit); +int pse_ethtool_set_prio(struct pse_control *psec, + struct netlink_ext_ack *extack, + unsigned int prio) +{ + struct pse_controller_dev *pcdev = psec->pcdev; + const struct pse_controller_ops *ops; + int ret = 0; + + switch (pcdev->port_prio_mode) { + case ETHTOOL_PSE_PORT_PRIO_STATIC: + if (prio > pcdev->nr_lines) { + NL_SET_ERR_MSG_FMT(extack, + "priority %d exceed priority max %d", + prio, pcdev->nr_lines); + return -ERANGE; + } + + /* We don't want priority change in the middle of an + * enable/disable call + */ + mutex_lock(&pcdev->lock); + pcdev->pi[psec->id].prio = prio; + mutex_unlock(&pcdev->lock); + break; + + case ETHTOOL_PSE_PORT_PRIO_DYNAMIC: + ops = psec->pcdev->ops; + if (!ops->pi_set_prio) { + NL_SET_ERR_MSG(extack, + "pse driver does not support setting port priority"); + return -EOPNOTSUPP; + } + + if (prio > pcdev->pis_prio_max) { + NL_SET_ERR_MSG_FMT(extack, + "priority %d exceed priority max %d", + prio, pcdev->pis_prio_max); + return -ERANGE; + } + + mutex_lock(&pcdev->lock); + ret = ops->pi_set_prio(pcdev, psec->id, prio); + mutex_unlock(&pcdev->lock); + break; + + default: + ret = -EOPNOTSUPP; + } + + return ret; +} +EXPORT_SYMBOL_GPL(pse_ethtool_set_prio); + +int pse_ethtool_set_prio_mode(struct pse_control *psec, + struct netlink_ext_ack *extack, + enum pse_port_prio_modes prio_mode) +{ + struct pse_controller_dev *pcdev = psec->pcdev; + const struct pse_controller_ops *ops; + int ret = 0, i; + + if (!(BIT(prio_mode) & pcdev->port_prio_supp_modes)) { + NL_SET_ERR_MSG(extack, "priority mode not supported"); + return -EOPNOTSUPP; + } + + ops = psec->pcdev->ops; + + /* We don't want priority mode change in the middle of an + * enable/disable call + */ + mutex_lock(&pcdev->lock); + pcdev->port_prio_mode = prio_mode; + + /* Reset all priorities */ + for (i = 0; i < psec->pcdev->nr_lines; i++) { + /* PI not described */ + if (!pcdev->pi[i].rdev) + continue; + + pcdev->pi[i].prio = 0; + + if (!ops->pi_set_prio) + continue; + + if (pcdev->port_prio_supp_modes & + BIT(ETHTOOL_PSE_PORT_PRIO_DYNAMIC)) + ret = ops->pi_set_prio(pcdev, psec->id, 0); + } + + mutex_unlock(&psec->pcdev->lock); + + return ret; +} +EXPORT_SYMBOL_GPL(pse_ethtool_set_prio_mode); + bool pse_has_podl(struct pse_control *psec) { return psec->pcdev->types & ETHTOOL_PSE_PODL; diff --git a/include/linux/pse-pd/pse.h b/include/linux/pse-pd/pse.h index e275ef7e1eb0..653f9e3634bb 100644 --- a/include/linux/pse-pd/pse.h +++ b/include/linux/pse-pd/pse.h @@ -9,9 +9,12 @@ #include <linux/list.h> #include <uapi/linux/ethtool.h> #include <linux/regulator/driver.h> +#include <linux/workqueue.h> /* Maximum current in uA according to IEEE 802.3-2022 Table 145-1 */ #define MAX_PI_CURRENT 1920000 +/* Maximum power in mW according to IEEE 802.3-2022 Table 145-16 */ +#define MAX_PI_PW 99900 struct phy_device; struct pse_controller_dev; @@ -60,6 +63,12 @@ struct pse_control_config { * is in charge of the memory allocation. * @c33_pw_limit_nb_ranges: number of supported power limit configuration * ranges + * @c33_prio_supp_modes: PSE port priority modes supported. Set by PSE core. + * @c33_prio_mode: PSE port priority mode selected. Set by PSE core. + * @c33_prio_max: max priority allowed for the c33_prio variable value. Set + * by PSE core. + * @c33_prio: priority of the PSE. Set by PSE core in case of static port + * priority mode. */ struct pse_control_status { u32 pse_id; @@ -74,6 +83,10 @@ struct pse_control_status { u32 c33_avail_pw_limit; struct ethtool_c33_pse_pw_limit_range *c33_pw_limit_ranges; u32 c33_pw_limit_nb_ranges; + u32 c33_prio_supp_modes; + enum pse_port_prio_modes c33_prio_mode; + u32 c33_prio_max; + u32 c33_prio; }; /** @@ -93,6 +106,8 @@ struct pse_control_status { * set_current_limit regulator callback. * Should not return an error in case of MAX_PI_CURRENT * current value set. + * @pi_set_prio: Configure the PSE PI priority. + * @pi_get_pw_req: Get the power requested by a PD before enabling the PSE PI */ struct pse_controller_ops { int (*ethtool_get_status)(struct pse_controller_dev *pcdev, @@ -107,6 +122,9 @@ struct pse_controller_ops { int id); int (*pi_set_current_limit)(struct pse_controller_dev *pcdev, int id, int max_uA); + int (*pi_set_prio)(struct pse_controller_dev *pcdev, int id, + unsigned int prio); + int (*pi_get_pw_req)(struct pse_controller_dev *pcdev, int id); }; struct module; @@ -141,6 +159,10 @@ struct pse_pi_pairset { * @rdev: regulator represented by the PSE PI * @admin_state_enabled: PI enabled state * @pw_d: Power domain of the PSE PI + * @prio: Priority of the PSE PI. Used in static port priority mode + * @pw_enabled: PSE PI power status in static port priority mode + * @pw_allocated: Power allocated to a PSE PI to manage power budget in + * static port priority mode */ struct pse_pi { struct pse_pi_pairset pairset[2]; @@ -148,6 +170,9 @@ struct pse_pi { struct regulator_dev *rdev; bool admin_state_enabled; struct pse_power_domain *pw_d; + int prio; + bool pw_enabled; + int pw_allocated; }; /** @@ -165,6 +190,9 @@ struct pse_pi { * @pi: table of PSE PIs described in this controller device * @no_of_pse_pi: flag set if the pse_pis devicetree node is not used * @id: Index of the PSE + * @pis_prio_max: Maximum value allowed for the PSE PIs priority + * @port_prio_supp_modes: Bitfield of port priority mode supported by the PSE + * @port_prio_mode: Current port priority mode of the PSE */ struct pse_controller_dev { const struct pse_controller_ops *ops; @@ -179,6 +207,9 @@ struct pse_controller_dev { struct pse_pi *pi; bool no_of_pse_pi; int id; + unsigned int pis_prio_max; + u32 port_prio_supp_modes; + enum pse_port_prio_modes port_prio_mode; }; #if IS_ENABLED(CONFIG_PSE_CONTROLLER) @@ -203,6 +234,12 @@ int pse_ethtool_set_config(struct pse_control *psec, int pse_ethtool_set_pw_limit(struct pse_control *psec, struct netlink_ext_ack *extack, const unsigned int pw_limit); +int pse_ethtool_set_prio(struct pse_control *psec, + struct netlink_ext_ack *extack, + unsigned int prio); +int pse_ethtool_set_prio_mode(struct pse_control *psec, + struct netlink_ext_ack *extack, + enum pse_port_prio_modes prio_mode); bool pse_has_podl(struct pse_control *psec); bool pse_has_c33(struct pse_control *psec); @@ -240,6 +277,20 @@ static inline int pse_ethtool_set_pw_limit(struct pse_control *psec, return -EOPNOTSUPP; } +static inline int pse_ethtool_set_prio(struct pse_control *psec, + struct netlink_ext_ack *extack, + unsigned int prio) +{ + return -EOPNOTSUPP; +} + +static inline int pse_ethtool_set_prio_mode(struct pse_control *psec, + struct netlink_ext_ack *extack, + enum pse_port_prio_modes prio_mode) +{ + return -EOPNOTSUPP; +} + static inline bool pse_has_podl(struct pse_control *psec) { return false; diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index a1ad257b1ec1..22664b1ea4a2 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1002,11 +1002,35 @@ enum ethtool_c33_pse_pw_d_status { * enum ethtool_c33_pse_events - event list of the C33 PSE controller. * @ETHTOOL_C33_PSE_EVENT_OVER_CURRENT: PSE output current is too high. * @ETHTOOL_C33_PSE_EVENT_OVER_TEMP: PSE in over temperature state. + * @ETHTOOL_C33_PSE_EVENT_CONNECTED: PD detected on the PSE. + * @ETHTOOL_C33_PSE_EVENT_DISCONNECTED: PD has been disconnected on the PSE. + * @ETHTOOL_C33_PSE_EVENT_PORT_PRIO_STATIC_ERROR: PSE faced an error in static + * port priority management mode. */ enum ethtool_c33_pse_events { - ETHTOOL_C33_PSE_EVENT_OVER_CURRENT = 1 << 0, - ETHTOOL_C33_PSE_EVENT_OVER_TEMP = 1 << 1, + ETHTOOL_C33_PSE_EVENT_OVER_CURRENT = 1 << 0, + ETHTOOL_C33_PSE_EVENT_OVER_TEMP = 1 << 1, + ETHTOOL_C33_PSE_EVENT_CONNECTED = 1 << 2, + ETHTOOL_C33_PSE_EVENT_DISCONNECTED = 1 << 3, + ETHTOOL_C33_PSE_EVENT_PORT_PRIO_STATIC_ERROR = 1 << 4, +}; + +/** + * enum pse_port_prio_modes - PSE port priority modes. + * @ETHTOOL_PSE_PORT_PRIO_DISABLED: Port priority disabled. + * @ETHTOOL_PSE_PORT_PRIO_STATIC: PSE static port priority. Port priority + * based on the power requested during PD classification. This mode + * is managed by the PSE core. + * @ETHTOOL_PSE_PORT_PRIO_DYNAMIC: PSE dynamic port priority. Port priority + * based on the current consumption per ports compared to the total + * power budget. This mode is managed by the PSE controller. + */ + +enum pse_port_prio_modes { + ETHTOOL_PSE_PORT_PRIO_DISABLED, + ETHTOOL_PSE_PORT_PRIO_STATIC, + ETHTOOL_PSE_PORT_PRIO_DYNAMIC, }; /**