diff mbox series

[v12,1/2] pwm: add microchip soft ip corePWM driver

Message ID 20221110093512.333881-2-conor.dooley@microchip.com (mailing list archive)
State Not Applicable
Headers show
Series Hey Uwe, all, | expand

Checks

Context Check Description
conchuod/patch_count success Link
conchuod/cover_letter success Series has a cover letter
conchuod/tree_selection success Guessed tree name to be for-next
conchuod/fixes_present success Fixes tag not required for -next series
conchuod/verify_signedoff success Signed-off-by tag matches author and committer
conchuod/kdoc success Errors and warnings before: 0 this patch: 0
conchuod/module_param success Was 0 now: 0
conchuod/build_rv32_defconfig success Build OK
conchuod/build_warn_rv64 success Errors and warnings before: 0 this patch: 0
conchuod/dtb_warn_rv64 success Errors and warnings before: 0 this patch: 0
conchuod/header_inline success No static functions without inline keyword in header files
conchuod/checkpatch warning WARNING: added, moved or deleted file(s), does MAINTAINERS need updating? WARNING: please write a help paragraph that fully describes the config symbol
conchuod/source_inline warning Was 0 now: 2
conchuod/build_rv64_nommu_k210_defconfig success Build OK
conchuod/verify_fixes success No Fixes tag
conchuod/build_rv64_nommu_virt_defconfig success Build OK

Commit Message

Conor Dooley Nov. 10, 2022, 9:35 a.m. UTC
Add a driver that supports the Microchip FPGA "soft" PWM IP core.

Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
---
 drivers/pwm/Kconfig              |  10 +
 drivers/pwm/Makefile             |   1 +
 drivers/pwm/pwm-microchip-core.c | 389 +++++++++++++++++++++++++++++++
 3 files changed, 400 insertions(+)
 create mode 100644 drivers/pwm/pwm-microchip-core.c

Comments

Uwe Kleine-König Nov. 17, 2022, 4:49 p.m. UTC | #1
Hello Conor,

On Thu, Nov 10, 2022 at 09:35:12AM +0000, Conor Dooley wrote:
> [...]
> +
> +static void mchp_core_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm,
> +				 bool enable, u64 period)
> +{
> +	struct mchp_core_pwm_chip *mchp_core_pwm = to_mchp_core_pwm(chip);
> +	u8 channel_enable, reg_offset, shift;
> +
> +	/*
> +	 * There are two adjacent 8 bit control regs, the lower reg controls
> +	 * 0-7 and the upper reg 8-15. Check if the pwm is in the upper reg
> +	 * and if so, offset by the bus width.
> +	 */
> +	reg_offset = MCHPCOREPWM_EN(pwm->hwpwm >> 3);
> +	shift = pwm->hwpwm & 7;
> +
> +	channel_enable = readb_relaxed(mchp_core_pwm->base + reg_offset);
> +	channel_enable &= ~(1 << shift);
> +	channel_enable |= (enable << shift);
> +
> +	writel_relaxed(channel_enable, mchp_core_pwm->base + reg_offset);
> +	mchp_core_pwm->channel_enabled &= ~BIT(pwm->hwpwm);
> +	mchp_core_pwm->channel_enabled |= enable << pwm->hwpwm;
> +
> +	/*
> +	 * Notify the block to update the waveform from the shadow registers.
> +	 * The updated values will not appear on the bus until they have been
> +	 * applied to the waveform at the beginning of the next period. We must
> +	 * write these registers and wait for them to be applied before
> +	 * considering the channel enabled.
> +	 * If the delay is under 1 us, sleep for at least 1 us anyway.
> +	 */
> +	if (mchp_core_pwm->sync_update_mask & (1 << pwm->hwpwm)) {
> +		u64 delay;
> +
> +		delay = div_u64(period, 1000u) ? : 1u;
> +		writel_relaxed(1U, mchp_core_pwm->base + MCHPCOREPWM_SYNC_UPD);
> +		usleep_range(delay, delay * 2);
> +	}

In some cases the delay could be prevented. e.g. when going from one
disabled state to another. If you don't want to complicate the driver
here, maybe point it out in a comment at least?

It's not well defined if pwm_apply should only return when the new
setting is actually active. (e.g. mxs doesn't wait)
So I wonder: Are there any hardware restrictions between setting the
SYNC_UPD flag and modifying the registers for duty and period? (I assume
writing a new duty and period might then result in a glitch if the
period just ends between the two writes.) Can you check if the hardware
waits on such a completion, e.g. by reading that register?

> +}
> +
> [...]
> +
> +static int mchp_core_pwm_apply_locked(struct pwm_chip *chip, struct pwm_device *pwm,
> +				      const struct pwm_state *state)
> +{
> +	struct mchp_core_pwm_chip *mchp_core_pwm = to_mchp_core_pwm(chip);
> +	struct pwm_state current_state = pwm->state;

You're doing a copy of pwm->state just to use one of the members to pass
it to mchp_core_pwm_enable.

> +	bool period_locked;
> +	u64 duty_steps, clk_rate;

I think using unsigned long for clk_rate would be beneficial. The
comparison against NSEC_PER_SEC might get cheaper (depending on how
clever the compiler is), and calling mchp_core_pwm_calc_period
should get cheaper, too. (At least on 32 bit archs.)

> +	u16 prescale;
> +	u8 period_steps;
> +
> +	if (!state->enabled) {
> +		mchp_core_pwm_enable(chip, pwm, false, current_state.period);
> +		return 0;
> +	}
> +
> +	/*
> +	 * If clk_rate is too big, the following multiplication might overflow.
> +	 * However this is implausible, as the fabric of current FPGAs cannot
> +	 * provide clocks at a rate high enough.
> +	 */
> +	clk_rate = clk_get_rate(mchp_core_pwm->clk);
> +	if (clk_rate >= NSEC_PER_SEC)
> +		return -EINVAL;
> +
> +	mchp_core_pwm_calc_period(state, clk_rate, &prescale, &period_steps);
> +
> +	/*
> +	 * If the only thing that has changed is the duty cycle or the polarity,
> +	 * we can shortcut the calculations and just compute/apply the new duty
> +	 * cycle pos & neg edges
> +	 * As all the channels share the same period, do not allow it to be
> +	 * changed if any other channels are enabled.
> +	 * If the period is locked, it may not be possible to use a period
> +	 * less than that requested. In that case, we just abort.
> +	 */
> +	period_locked = mchp_core_pwm->channel_enabled & ~(1 << pwm->hwpwm);
> +
> +	if (period_locked) {
> +		u16 hw_prescale;
> +		u8 hw_period_steps;
> +
> +		hw_prescale = readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_PRESCALE);
> +		hw_period_steps = readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_PERIOD);
> +
> +		if ((period_steps + 1) * (prescale + 1) <
> +		    (hw_period_steps + 1) * (hw_prescale + 1))
> +			return -EINVAL;
> +
> +		/*
> +		 * It is possible that something could have set the period_steps
> +		 * register to 0xff, which would prevent us from setting a 100%
> +		 * or 0% relative duty cycle, as explained above in
> +		 * mchp_core_pwm_calc_period().
> +		 * The period is locked and we cannot change this, so we abort.
> +		 */
> +		if (hw_period_steps == MCHPCOREPWM_PERIOD_STEPS_MAX)
> +			return -EINVAL;
> +
> +		prescale = hw_prescale;
> +		period_steps = hw_period_steps;
> +	} else {
> +		mchp_core_pwm_apply_period(mchp_core_pwm, prescale, period_steps);
> +	}
> +
> +	duty_steps = mchp_core_pwm_calc_duty(state, clk_rate, prescale, period_steps);
> +
> +	/*
> +	 * Because the period is per channel, it is possible that the requested
> +	 * duty cycle is longer than the period, in which case cap it to the
> +	 * period, IOW a 100% duty cycle.
> +	 */
> +	if (duty_steps > period_steps)
> +		duty_steps = period_steps + 1;
> +
> +	mchp_core_pwm_apply_duty(chip, pwm, state, duty_steps, period_steps);
> +
> +	mchp_core_pwm_enable(chip, pwm, true, state->period);

Don't you need to pass the previously configured period here?

> +
> +	return 0;
> +}
> [...]

Best regards
Uwe
Conor Dooley Nov. 17, 2022, 5:38 p.m. UTC | #2
On Thu, Nov 17, 2022 at 05:49:50PM +0100, Uwe Kleine-König wrote:
> Hello Conor,

Hello Uwe,

> On Thu, Nov 10, 2022 at 09:35:12AM +0000, Conor Dooley wrote:
> > [...]
> > +
> > +static void mchp_core_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm,
> > +				 bool enable, u64 period)
> > +{
> > +	struct mchp_core_pwm_chip *mchp_core_pwm = to_mchp_core_pwm(chip);
> > +	u8 channel_enable, reg_offset, shift;
> > +
> > +	/*
> > +	 * There are two adjacent 8 bit control regs, the lower reg controls
> > +	 * 0-7 and the upper reg 8-15. Check if the pwm is in the upper reg
> > +	 * and if so, offset by the bus width.
> > +	 */
> > +	reg_offset = MCHPCOREPWM_EN(pwm->hwpwm >> 3);
> > +	shift = pwm->hwpwm & 7;
> > +
> > +	channel_enable = readb_relaxed(mchp_core_pwm->base + reg_offset);
> > +	channel_enable &= ~(1 << shift);
> > +	channel_enable |= (enable << shift);
> > +
> > +	writel_relaxed(channel_enable, mchp_core_pwm->base + reg_offset);
> > +	mchp_core_pwm->channel_enabled &= ~BIT(pwm->hwpwm);
> > +	mchp_core_pwm->channel_enabled |= enable << pwm->hwpwm;
> > +
> > +	/*
> > +	 * Notify the block to update the waveform from the shadow registers.
> > +	 * The updated values will not appear on the bus until they have been
> > +	 * applied to the waveform at the beginning of the next period. We must
> > +	 * write these registers and wait for them to be applied before
> > +	 * considering the channel enabled.
> > +	 * If the delay is under 1 us, sleep for at least 1 us anyway.
> > +	 */
> > +	if (mchp_core_pwm->sync_update_mask & (1 << pwm->hwpwm)) {
> > +		u64 delay;
> > +
> > +		delay = div_u64(period, 1000u) ? : 1u;
> > +		writel_relaxed(1U, mchp_core_pwm->base + MCHPCOREPWM_SYNC_UPD);
> > +		usleep_range(delay, delay * 2);
> > +	}
> 
> In some cases the delay could be prevented. e.g. when going from one
> disabled state to another. If you don't want to complicate the driver
> here, maybe point it out in a comment at least?

Maybe this is my naivity talking, but I'd rather wait. Is there not the
chance that we re-enter pwm_apply() before the update has actually gone
through?
IIRC, but I'll have to confirm it, when the "shadow registers" are
enabled reads show the values that the hardware is using rather than the
values that are queued in the shadow registers. I'd rather avoid that
sort of mess and always sleep.

Now that I think of it, the reason I moved to unconditionally sleeping
was that if I turned on the PWM debugging it'd get tripped up. When it
tried to read the state, it got the old one rather than what'd just been
written.

Pasting my comment from above:
> > +	/*
> > +	 * Notify the block to update the waveform from the shadow registers.
> > +	 * The updated values will not appear on the bus until they have been

By "bus" in this statement, I meant on the AXI/AHB etc bus that the IP
core is connected to the CPUs on rather than the output. Perhaps my
wording of the comment could be improved and replace the word "bus" with
some wording containing "CPU" instead. "The updated values will not
appear to the CPU until" maybe.

I can also add some words relating to unconditionally sleeping w.r.t to
disabled states.

> > +	 * applied to the waveform at the beginning of the next period. We must
> > +	 * write these registers and wait for them to be applied before
> > +	 * considering the channel enabled.
> > +	 * If the delay is under 1 us, sleep for at least 1 us anyway.
> > +	 */

> It's not well defined if pwm_apply should only return when the new
> setting is actually active. (e.g. mxs doesn't wait)
> So I wonder: Are there any hardware restrictions between setting the
> SYNC_UPD flag and modifying the registers for duty and period? (I assume
> writing a new duty and period might then result in a glitch if the
> period just ends between the two writes.) Can you check if the hardware
> waits on such a completion, e.g. by reading that register?

Not entirely sure by what you mean: "waits on such a completion".
The hardware updates the registers at the first end-of-period after
SYNC_UPD is set. Don't write the bit, nothing happens. From the docs:

> > A shadow register holds all values and writes them when the SYNC_UPDATE
> > register is set to 1. In other words, for all channel synchronous
> > updates, write a "1" to the SYNC_UPDATE register after writing to all
> > the channel registers.

The docs also say:
> > SYNC_UPDATE: When this bit is set to "1" and SHADOW_REG_EN
> > is selected, all POSEDGE and NEGEDGE registers are updated
> > synchronously. Synchronous updates to the PWM waveform occur only
> > when SHADOW_REG_EN is asserted and SYNC_UPDATE is set to “1”.
> >
> > When this bit is set to "0", all the POSEDGE and NEGEDGE registers
> > are updated asynchronously

The second statement is at best vague (if the this bit in "when this
bit" refers to the bit in SHADOW_REG_EN) or contradictory at worse.
I suspect it's the former meaning, as shadow registers are a per-channel
thing. I suppose I have to go get some docs changed, **sigh**. It
doesn't make all that much sense to me, SHADOW_REG_EN is a RTL parameter
not a register that can be accessed from the AXI interface.

Anyways, back to the topic at hand.. if you were to do the following
(in really pseudocode form..):
	write(SYNC_UPD)
	write(period)
	<end-of-period>
	write(duty)

Then the duty cycle would not get updated, ever. At least, per doc
comment #1 & my "experimental" data. The RTL is rather dumb, since
AFAICT, this is meant to be cheap to implement in FPGA fabric.
Hence the default core configuration option is no shadow registers
& just immediately updates the output, waveform glitches be damned.

Hopefully that all helps?

> > +}
> > +
> > [...]
> > +
> > +static int mchp_core_pwm_apply_locked(struct pwm_chip *chip, struct pwm_device *pwm,
> > +				      const struct pwm_state *state)
> > +{
> > +	struct mchp_core_pwm_chip *mchp_core_pwm = to_mchp_core_pwm(chip);
> > +	struct pwm_state current_state = pwm->state;
> 
> You're doing a copy of pwm->state just to use one of the members to pass
> it to mchp_core_pwm_enable.

Fallout from refactoring I assume. I'll drop it.

> > +	bool period_locked;
> > +	u64 duty_steps, clk_rate;
> 
> I think using unsigned long for clk_rate would be beneficial. The
> comparison against NSEC_PER_SEC might get cheaper (depending on how
> clever the compiler is), and calling mchp_core_pwm_calc_period
> should get cheaper, too. (At least on 32 bit archs.)

Sure.

> > +	u16 prescale;
> > +	u8 period_steps;
> > +
> > +	if (!state->enabled) {
> > +		mchp_core_pwm_enable(chip, pwm, false, current_state.period);
> > +		return 0;
> > +	}
> > +
> > +	/*
> > +	 * If clk_rate is too big, the following multiplication might overflow.
> > +	 * However this is implausible, as the fabric of current FPGAs cannot
> > +	 * provide clocks at a rate high enough.
> > +	 */
> > +	clk_rate = clk_get_rate(mchp_core_pwm->clk);
> > +	if (clk_rate >= NSEC_PER_SEC)
> > +		return -EINVAL;
> > +
> > +	mchp_core_pwm_calc_period(state, clk_rate, &prescale, &period_steps);
> > +
> > +	/*
> > +	 * If the only thing that has changed is the duty cycle or the polarity,
> > +	 * we can shortcut the calculations and just compute/apply the new duty
> > +	 * cycle pos & neg edges
> > +	 * As all the channels share the same period, do not allow it to be
> > +	 * changed if any other channels are enabled.
> > +	 * If the period is locked, it may not be possible to use a period
> > +	 * less than that requested. In that case, we just abort.
> > +	 */
> > +	period_locked = mchp_core_pwm->channel_enabled & ~(1 << pwm->hwpwm);
> > +
> > +	if (period_locked) {
> > +		u16 hw_prescale;
> > +		u8 hw_period_steps;
> > +
> > +		hw_prescale = readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_PRESCALE);
> > +		hw_period_steps = readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_PERIOD);
> > +
> > +		if ((period_steps + 1) * (prescale + 1) <
> > +		    (hw_period_steps + 1) * (hw_prescale + 1))
> > +			return -EINVAL;
> > +
> > +		/*
> > +		 * It is possible that something could have set the period_steps
> > +		 * register to 0xff, which would prevent us from setting a 100%
> > +		 * or 0% relative duty cycle, as explained above in
> > +		 * mchp_core_pwm_calc_period().
> > +		 * The period is locked and we cannot change this, so we abort.
> > +		 */
> > +		if (hw_period_steps == MCHPCOREPWM_PERIOD_STEPS_MAX)
> > +			return -EINVAL;
> > +
> > +		prescale = hw_prescale;
> > +		period_steps = hw_period_steps;
> > +	} else {
> > +		mchp_core_pwm_apply_period(mchp_core_pwm, prescale, period_steps);
> > +	}
> > +
> > +	duty_steps = mchp_core_pwm_calc_duty(state, clk_rate, prescale, period_steps);
> > +
> > +	/*
> > +	 * Because the period is per channel, it is possible that the requested
> > +	 * duty cycle is longer than the period, in which case cap it to the
> > +	 * period, IOW a 100% duty cycle.
> > +	 */
> > +	if (duty_steps > period_steps)
> > +		duty_steps = period_steps + 1;
> > +
> > +	mchp_core_pwm_apply_duty(chip, pwm, state, duty_steps, period_steps);
> > +
> > +	mchp_core_pwm_enable(chip, pwm, true, state->period);
> 
> Don't you need to pass the previously configured period here?

Yeah, should be current_state. Thanks.

Conor.
Uwe Kleine-König Nov. 17, 2022, 9:04 p.m. UTC | #3
On Thu, Nov 17, 2022 at 05:38:26PM +0000, Conor Dooley wrote:
> On Thu, Nov 17, 2022 at 05:49:50PM +0100, Uwe Kleine-König wrote:
> > Hello Conor,
> 
> Hello Uwe,
> 
> > On Thu, Nov 10, 2022 at 09:35:12AM +0000, Conor Dooley wrote:
> > > [...]
> > > +
> > > +static void mchp_core_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm,
> > > +				 bool enable, u64 period)
> > > +{
> > > +	struct mchp_core_pwm_chip *mchp_core_pwm = to_mchp_core_pwm(chip);
> > > +	u8 channel_enable, reg_offset, shift;
> > > +
> > > +	/*
> > > +	 * There are two adjacent 8 bit control regs, the lower reg controls
> > > +	 * 0-7 and the upper reg 8-15. Check if the pwm is in the upper reg
> > > +	 * and if so, offset by the bus width.
> > > +	 */
> > > +	reg_offset = MCHPCOREPWM_EN(pwm->hwpwm >> 3);
> > > +	shift = pwm->hwpwm & 7;
> > > +
> > > +	channel_enable = readb_relaxed(mchp_core_pwm->base + reg_offset);
> > > +	channel_enable &= ~(1 << shift);
> > > +	channel_enable |= (enable << shift);
> > > +
> > > +	writel_relaxed(channel_enable, mchp_core_pwm->base + reg_offset);
> > > +	mchp_core_pwm->channel_enabled &= ~BIT(pwm->hwpwm);
> > > +	mchp_core_pwm->channel_enabled |= enable << pwm->hwpwm;
> > > +
> > > +	/*
> > > +	 * Notify the block to update the waveform from the shadow registers.
> > > +	 * The updated values will not appear on the bus until they have been
> > > +	 * applied to the waveform at the beginning of the next period. We must
> > > +	 * write these registers and wait for them to be applied before
> > > +	 * considering the channel enabled.
> > > +	 * If the delay is under 1 us, sleep for at least 1 us anyway.
> > > +	 */
> > > +	if (mchp_core_pwm->sync_update_mask & (1 << pwm->hwpwm)) {
> > > +		u64 delay;
> > > +
> > > +		delay = div_u64(period, 1000u) ? : 1u;
> > > +		writel_relaxed(1U, mchp_core_pwm->base + MCHPCOREPWM_SYNC_UPD);
> > > +		usleep_range(delay, delay * 2);
> > > +	}
> > 
> > In some cases the delay could be prevented. e.g. when going from one
> > disabled state to another. If you don't want to complicate the driver
> > here, maybe point it out in a comment at least?
> 
> Maybe this is my naivity talking, but I'd rather wait. Is there not the
> chance that we re-enter pwm_apply() before the update has actually gone
> through?

My idea was to do something like that:

	int mchp_core_pwm_apply(....)
	{
		if (mchp_core_pwm->sync_update_mask & (1 << pwm->hwpwm)) {
			/*
			 * We're still waiting for an update, don't
			 * interfer until it's completed.
			 */
			while (readl_relaxed(mchp_core_pwm->base + MCHPCOREPWM_SYNC_UPD)) {
				cpu_relax();
				if (waited_unreasonably_long())
					return -ETIMEOUT;
			}
		}

		update_period_and_duty(...);
		return 0;
	}

This way you don't have to wait at all if the calls to pwm_apply() are
infrequent. Of course this only works this way, if you can determine if
there is a pending update.

From a simplicity POV always waiting is fine. But if you consider
periods of say 5s, letting a call to pwm_apply() do a sleep between 5
and 10 seconds at the end is quite long and blocks the caller
considerably.

> IIRC, but I'll have to confirm it, when the "shadow registers" are
> enabled reads show the values that the hardware is using rather than the
> values that are queued in the shadow registers. I'd rather avoid that
> sort of mess and always sleep.
> 
> Now that I think of it, the reason I moved to unconditionally sleeping
> was that if I turned on the PWM debugging it'd get tripped up. When it
> tried to read the state, it got the old one rather than what'd just been
> written.
> 
> Pasting my comment from above:
> > > +	/*
> > > +	 * Notify the block to update the waveform from the shadow registers.
> > > +	 * The updated values will not appear on the bus until they have been
> 
> By "bus" in this statement, I meant on the AXI/AHB etc bus that the IP
> core is connected to the CPUs on rather than the output. Perhaps my
> wording of the comment could be improved and replace the word "bus" with
> some wording containing "CPU" instead. "The updated values will not
> appear to the CPU until" maybe.

I'd write: Reading the registers yields the currently implemented
settings, the new ones are only readable once the current period ended.

> I can also add some words relating to unconditionally sleeping w.r.t to
> disabled states.
> 
> > > +	 * applied to the waveform at the beginning of the next period. We must
> > > +	 * write these registers and wait for them to be applied before
> > > +	 * considering the channel enabled.
> > > +	 * If the delay is under 1 us, sleep for at least 1 us anyway.
> > > +	 */
> 
> > It's not well defined if pwm_apply should only return when the new
> > setting is actually active. (e.g. mxs doesn't wait)
> > So I wonder: Are there any hardware restrictions between setting the
> > SYNC_UPD flag and modifying the registers for duty and period? (I assume
> > writing a new duty and period might then result in a glitch if the
> > period just ends between the two writes.) Can you check if the hardware
> > waits on such a completion, e.g. by reading that register?
> 
> Not entirely sure by what you mean: "waits on such a completion".

I wanted to say that it's okish to return from .apply() without the
sleep and so return to the caller before the requested setting appears
on the output. At least your driver wouldn't be the first to do it that
way.

> The hardware updates the registers at the first end-of-period after
> SYNC_UPD is set. Don't write the bit, nothing happens. From the docs:
> 
> > > A shadow register holds all values and writes them when the SYNC_UPDATE
> > > register is set to 1. In other words, for all channel synchronous
> > > updates, write a "1" to the SYNC_UPDATE register after writing to all
> > > the channel registers.
> 
> The docs also say:
> > > SYNC_UPDATE: When this bit is set to "1" and SHADOW_REG_EN
> > > is selected, all POSEDGE and NEGEDGE registers are updated
> > > synchronously. Synchronous updates to the PWM waveform occur only
> > > when SHADOW_REG_EN is asserted and SYNC_UPDATE is set to “1”.
> > >
> > > When this bit is set to "0", all the POSEDGE and NEGEDGE registers
> > > are updated asynchronously
> 
> The second statement is at best vague (if the this bit in "when this
> bit" refers to the bit in SHADOW_REG_EN) or contradictory at worse.
> I suspect it's the former meaning, as shadow registers are a per-channel
> thing. I suppose I have to go get some docs changed, **sigh**. It
> doesn't make all that much sense to me, SHADOW_REG_EN is a RTL parameter
> not a register that can be accessed from the AXI interface.
> 
> Anyways, back to the topic at hand.. if you were to do the following
> (in really pseudocode form..):
> 	write(SYNC_UPD)
> 	write(period)
> 	<end-of-period>
> 	write(duty)
> 
> Then the duty cycle would not get updated, ever. At least, per doc
> comment #1 & my "experimental" data. The RTL is rather dumb, since
> AFAICT, this is meant to be cheap to implement in FPGA fabric.
> Hence the default core configuration option is no shadow registers
> & just immediately updates the output, waveform glitches be damned.
> 
> Hopefully that all helps?

I already understood it that way. I hope I was able to clarify my
thoughts above.

Best regards
Uwe
Conor Dooley Nov. 17, 2022, 10:03 p.m. UTC | #4
On Thu, Nov 17, 2022 at 10:04:33PM +0100, Uwe Kleine-König wrote:
> On Thu, Nov 17, 2022 at 05:38:26PM +0000, Conor Dooley wrote:
> > On Thu, Nov 17, 2022 at 05:49:50PM +0100, Uwe Kleine-König wrote:
> > > Hello Conor,
> > 
> > Hello Uwe,
> > 
> > > On Thu, Nov 10, 2022 at 09:35:12AM +0000, Conor Dooley wrote:
> > > > [...]
> > > > +
> > > > +static void mchp_core_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm,
> > > > +				 bool enable, u64 period)
> > > > +{
> > > > +	struct mchp_core_pwm_chip *mchp_core_pwm = to_mchp_core_pwm(chip);
> > > > +	u8 channel_enable, reg_offset, shift;
> > > > +
> > > > +	/*
> > > > +	 * There are two adjacent 8 bit control regs, the lower reg controls
> > > > +	 * 0-7 and the upper reg 8-15. Check if the pwm is in the upper reg
> > > > +	 * and if so, offset by the bus width.
> > > > +	 */
> > > > +	reg_offset = MCHPCOREPWM_EN(pwm->hwpwm >> 3);
> > > > +	shift = pwm->hwpwm & 7;
> > > > +
> > > > +	channel_enable = readb_relaxed(mchp_core_pwm->base + reg_offset);
> > > > +	channel_enable &= ~(1 << shift);
> > > > +	channel_enable |= (enable << shift);
> > > > +
> > > > +	writel_relaxed(channel_enable, mchp_core_pwm->base + reg_offset);
> > > > +	mchp_core_pwm->channel_enabled &= ~BIT(pwm->hwpwm);
> > > > +	mchp_core_pwm->channel_enabled |= enable << pwm->hwpwm;
> > > > +
> > > > +	/*
> > > > +	 * Notify the block to update the waveform from the shadow registers.
> > > > +	 * The updated values will not appear on the bus until they have been
> > > > +	 * applied to the waveform at the beginning of the next period. We must
> > > > +	 * write these registers and wait for them to be applied before
> > > > +	 * considering the channel enabled.
> > > > +	 * If the delay is under 1 us, sleep for at least 1 us anyway.
> > > > +	 */
> > > > +	if (mchp_core_pwm->sync_update_mask & (1 << pwm->hwpwm)) {
> > > > +		u64 delay;
> > > > +
> > > > +		delay = div_u64(period, 1000u) ? : 1u;
> > > > +		writel_relaxed(1U, mchp_core_pwm->base + MCHPCOREPWM_SYNC_UPD);
> > > > +		usleep_range(delay, delay * 2);
> > > > +	}
> > > 
> > > In some cases the delay could be prevented. e.g. when going from one
> > > disabled state to another. If you don't want to complicate the driver
> > > here, maybe point it out in a comment at least?
> > 
> > Maybe this is my naivity talking, but I'd rather wait. Is there not the
> > chance that we re-enter pwm_apply() before the update has actually gone
> > through?
> 
> My idea was to do something like that:
> 
> 	int mchp_core_pwm_apply(....)
> 	{
> 		if (mchp_core_pwm->sync_update_mask & (1 << pwm->hwpwm)) {
> 			/*
> 			 * We're still waiting for an update, don't
> 			 * interfer until it's completed.
> 			 */
> 			while (readl_relaxed(mchp_core_pwm->base + MCHPCOREPWM_SYNC_UPD)) {
> 				cpu_relax();
> 				if (waited_unreasonably_long())
> 					return -ETIMEOUT;
> 			}
> 		}
> 
> 		update_period_and_duty(...);
> 		return 0;
> 	}
> 
> This way you don't have to wait at all if the calls to pwm_apply() are
> infrequent. Of course this only works this way, if you can determine if
> there is a pending update.

Ah I think I get what you mean now about waiting for completion &
reading the bit. I don't know off the top of my head if that bit is
readable. Docs say that they're R/W but I don't know if that means that
an AXI read works or if the value is actually readable. I'll try
something like this if I can.

> From a simplicity POV always waiting is fine. But if you consider
> periods of say 5s, letting a call to pwm_apply() do a sleep between 5
> and 10 seconds at the end is quite long and blocks the caller
> considerably.

Yeah, I know. At the end of the day, you're the one familiar with what
PWM consumers expect. If things go the wait-but-maybe-exit-early
direction I think I'll add something to the limitations to cover that.

> > IIRC, but I'll have to confirm it, when the "shadow registers" are
> > enabled reads show the values that the hardware is using rather than the
> > values that are queued in the shadow registers. I'd rather avoid that
> > sort of mess and always sleep.
> > 
> > Now that I think of it, the reason I moved to unconditionally sleeping
> > was that if I turned on the PWM debugging it'd get tripped up. When it
> > tried to read the state, it got the old one rather than what'd just been
> > written.
> > 
> > Pasting my comment from above:
> > > > +	/*
> > > > +	 * Notify the block to update the waveform from the shadow registers.
> > > > +	 * The updated values will not appear on the bus until they have been
> > 
> > By "bus" in this statement, I meant on the AXI/AHB etc bus that the IP
> > core is connected to the CPUs on rather than the output. Perhaps my
> > wording of the comment could be improved and replace the word "bus" with
> > some wording containing "CPU" instead. "The updated values will not
> > appear to the CPU until" maybe.
> 
> I'd write: Reading the registers yields the currently implemented
> settings, the new ones are only readable once the current period ended.

Cool, will use that so.

> > I can also add some words relating to unconditionally sleeping w.r.t to
> > disabled states.
> > 
> > > > +	 * applied to the waveform at the beginning of the next period. We must
> > > > +	 * write these registers and wait for them to be applied before
> > > > +	 * considering the channel enabled.
> > > > +	 * If the delay is under 1 us, sleep for at least 1 us anyway.
> > > > +	 */
> > 
> > > It's not well defined if pwm_apply should only return when the new
> > > setting is actually active. (e.g. mxs doesn't wait)
> > > So I wonder: Are there any hardware restrictions between setting the
> > > SYNC_UPD flag and modifying the registers for duty and period? (I assume
> > > writing a new duty and period might then result in a glitch if the
> > > period just ends between the two writes.) Can you check if the hardware
> > > waits on such a completion, e.g. by reading that register?
> > 
> > Not entirely sure by what you mean: "waits on such a completion".
> 
> I wanted to say that it's okish to return from .apply() without the
> sleep and so return to the caller before the requested setting appears
> on the output. At least your driver wouldn't be the first to do it that
> way.

I'd be more comfortable with it if the readable registers didn't hold
the old value. 

> > The hardware updates the registers at the first end-of-period after
> > SYNC_UPD is set. Don't write the bit, nothing happens. From the docs:
> > 
> > > > A shadow register holds all values and writes them when the SYNC_UPDATE
> > > > register is set to 1. In other words, for all channel synchronous
> > > > updates, write a "1" to the SYNC_UPDATE register after writing to all
> > > > the channel registers.
> > 
> > The docs also say:
> > > > SYNC_UPDATE: When this bit is set to "1" and SHADOW_REG_EN
> > > > is selected, all POSEDGE and NEGEDGE registers are updated
> > > > synchronously. Synchronous updates to the PWM waveform occur only
> > > > when SHADOW_REG_EN is asserted and SYNC_UPDATE is set to “1”.
> > > >
> > > > When this bit is set to "0", all the POSEDGE and NEGEDGE registers
> > > > are updated asynchronously
> > 
> > The second statement is at best vague (if the this bit in "when this
> > bit" refers to the bit in SHADOW_REG_EN) or contradictory at worse.
> > I suspect it's the former meaning, as shadow registers are a per-channel
> > thing. I suppose I have to go get some docs changed, **sigh**. It
> > doesn't make all that much sense to me, SHADOW_REG_EN is a RTL parameter
> > not a register that can be accessed from the AXI interface.
> > 
> > Anyways, back to the topic at hand.. if you were to do the following
> > (in really pseudocode form..):
> > 	write(SYNC_UPD)
> > 	write(period)
> > 	<end-of-period>
> > 	write(duty)
> > 
> > Then the duty cycle would not get updated, ever. At least, per doc
> > comment #1 & my "experimental" data. The RTL is rather dumb, since
> > AFAICT, this is meant to be cheap to implement in FPGA fabric.
> > Hence the default core configuration option is no shadow registers
> > & just immediately updates the output, waveform glitches be damned.
> > 
> > Hopefully that all helps?
> 
> I already understood it that way. I hope I was able to clarify my
> thoughts above.

Yeah, I think you did!

Thanks again,
Conor.
Conor Dooley Nov. 21, 2022, 3:29 p.m. UTC | #5
On Thu, Nov 17, 2022 at 10:03:13PM +0000, Conor Dooley wrote:
> On Thu, Nov 17, 2022 at 10:04:33PM +0100, Uwe Kleine-König wrote:
> > On Thu, Nov 17, 2022 at 05:38:26PM +0000, Conor Dooley wrote:
> > > On Thu, Nov 17, 2022 at 05:49:50PM +0100, Uwe Kleine-König wrote:
> > > > Hello Conor,
> > > 
> > > Hello Uwe,
> > > 
> > > > On Thu, Nov 10, 2022 at 09:35:12AM +0000, Conor Dooley wrote:
> > > > > [...]
> > > > > +
> > > > > +static void mchp_core_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm,
> > > > > +				 bool enable, u64 period)
> > > > > +{
> > > > > +	struct mchp_core_pwm_chip *mchp_core_pwm = to_mchp_core_pwm(chip);
> > > > > +	u8 channel_enable, reg_offset, shift;
> > > > > +
> > > > > +	/*
> > > > > +	 * There are two adjacent 8 bit control regs, the lower reg controls
> > > > > +	 * 0-7 and the upper reg 8-15. Check if the pwm is in the upper reg
> > > > > +	 * and if so, offset by the bus width.
> > > > > +	 */
> > > > > +	reg_offset = MCHPCOREPWM_EN(pwm->hwpwm >> 3);
> > > > > +	shift = pwm->hwpwm & 7;
> > > > > +
> > > > > +	channel_enable = readb_relaxed(mchp_core_pwm->base + reg_offset);
> > > > > +	channel_enable &= ~(1 << shift);
> > > > > +	channel_enable |= (enable << shift);
> > > > > +
> > > > > +	writel_relaxed(channel_enable, mchp_core_pwm->base + reg_offset);
> > > > > +	mchp_core_pwm->channel_enabled &= ~BIT(pwm->hwpwm);
> > > > > +	mchp_core_pwm->channel_enabled |= enable << pwm->hwpwm;
> > > > > +
> > > > > +	/*
> > > > > +	 * Notify the block to update the waveform from the shadow registers.
> > > > > +	 * The updated values will not appear on the bus until they have been
> > > > > +	 * applied to the waveform at the beginning of the next period. We must
> > > > > +	 * write these registers and wait for them to be applied before
> > > > > +	 * considering the channel enabled.
> > > > > +	 * If the delay is under 1 us, sleep for at least 1 us anyway.
> > > > > +	 */
> > > > > +	if (mchp_core_pwm->sync_update_mask & (1 << pwm->hwpwm)) {
> > > > > +		u64 delay;
> > > > > +
> > > > > +		delay = div_u64(period, 1000u) ? : 1u;
> > > > > +		writel_relaxed(1U, mchp_core_pwm->base + MCHPCOREPWM_SYNC_UPD);
> > > > > +		usleep_range(delay, delay * 2);
> > > > > +	}
> > > > 
> > > > In some cases the delay could be prevented. e.g. when going from one
> > > > disabled state to another. If you don't want to complicate the driver
> > > > here, maybe point it out in a comment at least?
> > > 
> > > Maybe this is my naivity talking, but I'd rather wait. Is there not the
> > > chance that we re-enter pwm_apply() before the update has actually gone
> > > through?
> > 
> > My idea was to do something like that:
> > 
> > 	int mchp_core_pwm_apply(....)
> > 	{
> > 		if (mchp_core_pwm->sync_update_mask & (1 << pwm->hwpwm)) {
> > 			/*
> > 			 * We're still waiting for an update, don't
> > 			 * interfer until it's completed.
> > 			 */
> > 			while (readl_relaxed(mchp_core_pwm->base + MCHPCOREPWM_SYNC_UPD)) {
> > 				cpu_relax();
> > 				if (waited_unreasonably_long())
> > 					return -ETIMEOUT;
> > 			}
> > 		}
> > 
> > 		update_period_and_duty(...);
> > 		return 0;
> > 	}

So I was doing some fiddling, and the following works reasonably well:
	if (mchp_core_pwm->sync_update_mask & (1 << pwm->hwpwm)) {
		u32 delay = MCHPCOREPWM_TIMEOUT_US;
		u32 sync_upd;
		int ret;

		writel_relaxed(1u, mchp_core_pwm->base + MCHPCOREPWM_SYNC_UPD);

		ret = read_poll_timeout(readl, sync_upd, !sync_upd, delay/100, delay,
					false, mchp_core_pwm->base + MCHPCOREPWM_SYNC_UPD);
		if (ret)
			dev_dbg(mchp_core_pwm->chip.dev,
				"timed out waiting for shadow register sync\n");
	}

but...

> > This way you don't have to wait at all if the calls to pwm_apply() are
> > infrequent. Of course this only works this way, if you can determine if
> > there is a pending update.
> 
> Ah I think I get what you mean now about waiting for completion &
> reading the bit. I don't know off the top of my head if that bit is
> readable. Docs say that they're R/W but I don't know if that means that
> an AXI read works or if the value is actually readable. I'll try
> something like this if I can.

...it does not implement what I think you suggested & comes with the
drawback of inconsistent behaviour depending on whether the timeout is
hit or not.

Instead, waiting in apply(), as you suggested, & get_state() looks to be the
better option, using the same sort of logic as above, say:
static int mchp_core_pwm_wait_for_sync_update(struct mchp_core_pwm_chip *mchp_core_pwm,
					      unsigned int channel)
{
	int ret;

	/*
	 * If a shadow register is used for this PWM channel, and iff there is
	 * a pending update to the waveform, we must wait for it to be applied
	 * before attempting to read its state, as reading the registers yields
	 * the currently implemented settings, the new ones are only readable
	 * once the current period has ended.
	 *
	 * Rather large delays are possible, in the seconds, so to avoid waiting
	 * around for **too** long - cap the wait at 100 ms.
	 */
	if (mchp_core_pwm->sync_update_mask & (1 << channel)) {
		u32 delay = MCHPCOREPWM_TIMEOUT_US;
		u32 sync_upd;

		writel_relaxed(1u, mchp_core_pwm->base + MCHPCOREPWM_SYNC_UPD);

		ret = read_poll_timeout(readl, sync_upd, !sync_upd, delay/100, delay,
					false, mchp_core_pwm->base + MCHPCOREPWM_SYNC_UPD);
		if (ret)
			return -ETIMEDOUT;
	}

	return 0;
}

I think that strikes a good balance? We return quickly & don't blocker
the caller, but simultaneously try to prevent them from either trying to
apply new settings or get the current settings until the last request
has gone though?

get_state() returns void though, is it valid behaviour to wait for the
timeout there?
I had a check in the core code and found some places where the call in
looks like:
	struct pwm_state s1, s2; 
	chip->ops->get_state(chip, pwm, &s1);
In this case, exiting early would leave us with a completely wrong
idead of the state, if it was to time out.

Either way, it seems like either way we would be misleading the caller
of get_state() - perhaps the way around that is to do the wait & then
just carry on with get_state()?
In that scenario, you'd get the new settings where possible and the old ones
otherwise.
Returning if the timeout is hit would give you the new settings where possible
& otherwise you'd get whatever was passed to get_state().
I'm not really sure which of those two situations would be preferred?

Thanks,
Conor.
Conor Dooley Nov. 30, 2022, 9:53 a.m. UTC | #6
Hey Uwe,

On Mon, Nov 21, 2022 at 03:29:39PM +0000, Conor Dooley wrote:
> On Thu, Nov 17, 2022 at 10:03:13PM +0000, Conor Dooley wrote:
> > On Thu, Nov 17, 2022 at 10:04:33PM +0100, Uwe Kleine-König wrote:
> > > On Thu, Nov 17, 2022 at 05:38:26PM +0000, Conor Dooley wrote:
> > > > On Thu, Nov 17, 2022 at 05:49:50PM +0100, Uwe Kleine-König wrote:
> > > > > Hello Conor,
> > > > 
> > > > Hello Uwe,
> > > > 
> > > > > On Thu, Nov 10, 2022 at 09:35:12AM +0000, Conor Dooley wrote:
> > > > > > [...]
> > > > > > +
> > > > > > +static void mchp_core_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm,
> > > > > > +				 bool enable, u64 period)
> > > > > > +{
> > > > > > +	struct mchp_core_pwm_chip *mchp_core_pwm = to_mchp_core_pwm(chip);
> > > > > > +	u8 channel_enable, reg_offset, shift;
> > > > > > +
> > > > > > +	/*
> > > > > > +	 * There are two adjacent 8 bit control regs, the lower reg controls
> > > > > > +	 * 0-7 and the upper reg 8-15. Check if the pwm is in the upper reg
> > > > > > +	 * and if so, offset by the bus width.
> > > > > > +	 */
> > > > > > +	reg_offset = MCHPCOREPWM_EN(pwm->hwpwm >> 3);
> > > > > > +	shift = pwm->hwpwm & 7;
> > > > > > +
> > > > > > +	channel_enable = readb_relaxed(mchp_core_pwm->base + reg_offset);
> > > > > > +	channel_enable &= ~(1 << shift);
> > > > > > +	channel_enable |= (enable << shift);
> > > > > > +
> > > > > > +	writel_relaxed(channel_enable, mchp_core_pwm->base + reg_offset);
> > > > > > +	mchp_core_pwm->channel_enabled &= ~BIT(pwm->hwpwm);
> > > > > > +	mchp_core_pwm->channel_enabled |= enable << pwm->hwpwm;
> > > > > > +
> > > > > > +	/*
> > > > > > +	 * Notify the block to update the waveform from the shadow registers.
> > > > > > +	 * The updated values will not appear on the bus until they have been
> > > > > > +	 * applied to the waveform at the beginning of the next period. We must
> > > > > > +	 * write these registers and wait for them to be applied before
> > > > > > +	 * considering the channel enabled.
> > > > > > +	 * If the delay is under 1 us, sleep for at least 1 us anyway.
> > > > > > +	 */
> > > > > > +	if (mchp_core_pwm->sync_update_mask & (1 << pwm->hwpwm)) {
> > > > > > +		u64 delay;
> > > > > > +
> > > > > > +		delay = div_u64(period, 1000u) ? : 1u;
> > > > > > +		writel_relaxed(1U, mchp_core_pwm->base + MCHPCOREPWM_SYNC_UPD);
> > > > > > +		usleep_range(delay, delay * 2);
> > > > > > +	}
> > > > > 
> > > > > In some cases the delay could be prevented. e.g. when going from one
> > > > > disabled state to another. If you don't want to complicate the driver
> > > > > here, maybe point it out in a comment at least?
> > > > 
> > > > Maybe this is my naivity talking, but I'd rather wait. Is there not the
> > > > chance that we re-enter pwm_apply() before the update has actually gone
> > > > through?
> > > 
> > > My idea was to do something like that:
> > > 
> > > 	int mchp_core_pwm_apply(....)
> > > 	{
> > > 		if (mchp_core_pwm->sync_update_mask & (1 << pwm->hwpwm)) {
> > > 			/*
> > > 			 * We're still waiting for an update, don't
> > > 			 * interfer until it's completed.
> > > 			 */
> > > 			while (readl_relaxed(mchp_core_pwm->base + MCHPCOREPWM_SYNC_UPD)) {
> > > 				cpu_relax();
> > > 				if (waited_unreasonably_long())
> > > 					return -ETIMEOUT;
> > > 			}
> > > 		}
> > > 
> > > 		update_period_and_duty(...);
> > > 		return 0;
> > > 	}
> 
> So I was doing some fiddling, and the following works reasonably well:
> 	if (mchp_core_pwm->sync_update_mask & (1 << pwm->hwpwm)) {
> 		u32 delay = MCHPCOREPWM_TIMEOUT_US;
> 		u32 sync_upd;
> 		int ret;
> 
> 		writel_relaxed(1u, mchp_core_pwm->base + MCHPCOREPWM_SYNC_UPD);
> 
> 		ret = read_poll_timeout(readl, sync_upd, !sync_upd, delay/100, delay,
> 					false, mchp_core_pwm->base + MCHPCOREPWM_SYNC_UPD);
> 		if (ret)
> 			dev_dbg(mchp_core_pwm->chip.dev,
> 				"timed out waiting for shadow register sync\n");
> 	}
> 
> but...
> 
> > > This way you don't have to wait at all if the calls to pwm_apply() are
> > > infrequent. Of course this only works this way, if you can determine if
> > > there is a pending update.
> > 
> > Ah I think I get what you mean now about waiting for completion &
> > reading the bit. I don't know off the top of my head if that bit is
> > readable. Docs say that they're R/W but I don't know if that means that
> > an AXI read works or if the value is actually readable. I'll try
> > something like this if I can.
> 
> ...it does not implement what I think you suggested & comes with the
> drawback of inconsistent behaviour depending on whether the timeout is
> hit or not.
> 
> Instead, waiting in apply(), as you suggested, & get_state() looks to be the
> better option, using the same sort of logic as above, say:
> static int mchp_core_pwm_wait_for_sync_update(struct mchp_core_pwm_chip *mchp_core_pwm,
> 					      unsigned int channel)
> {
> 	int ret;
> 
> 	/*
> 	 * If a shadow register is used for this PWM channel, and iff there is
> 	 * a pending update to the waveform, we must wait for it to be applied
> 	 * before attempting to read its state, as reading the registers yields
> 	 * the currently implemented settings, the new ones are only readable
> 	 * once the current period has ended.
> 	 *
> 	 * Rather large delays are possible, in the seconds, so to avoid waiting
> 	 * around for **too** long - cap the wait at 100 ms.
> 	 */
> 	if (mchp_core_pwm->sync_update_mask & (1 << channel)) {
> 		u32 delay = MCHPCOREPWM_TIMEOUT_US;
> 		u32 sync_upd;
> 
> 		writel_relaxed(1u, mchp_core_pwm->base + MCHPCOREPWM_SYNC_UPD);
> 
> 		ret = read_poll_timeout(readl, sync_upd, !sync_upd, delay/100, delay,
> 					false, mchp_core_pwm->base + MCHPCOREPWM_SYNC_UPD);
> 		if (ret)
> 			return -ETIMEDOUT;
> 	}
> 
> 	return 0;
> }
> 
> I think that strikes a good balance? We return quickly & don't blocker
> the caller, but simultaneously try to prevent them from either trying to
> apply new settings or get the current settings until the last request
> has gone though?
> 
> get_state() returns void though, is it valid behaviour to wait for the
> timeout there?
> I had a check in the core code and found some places where the call in
> looks like:
> 	struct pwm_state s1, s2; 
> 	chip->ops->get_state(chip, pwm, &s1);
> In this case, exiting early would leave us with a completely wrong
> idead of the state, if it was to time out.
> 
> Either way, it seems like either way we would be misleading the caller
> of get_state() - perhaps the way around that is to do the wait & then
> just carry on with get_state()?
> In that scenario, you'd get the new settings where possible and the old ones
> otherwise.
> Returning if the timeout is hit would give you the new settings where possible
> & otherwise you'd get whatever was passed to get_state().
> I'm not really sure which of those two situations would be preferred?

Apologies for bumping this, I was wondering if any thoughts on the
above? I'm not sure which is the lesser evil here (or if I have
misunderstood something).

Thanks,
Conor.
Uwe Kleine-König Nov. 30, 2022, 10:37 a.m. UTC | #7
Hello Conor,

On Wed, Nov 30, 2022 at 09:53:51AM +0000, Conor Dooley wrote:
> On Mon, Nov 21, 2022 at 03:29:39PM +0000, Conor Dooley wrote:
> > On Thu, Nov 17, 2022 at 10:03:13PM +0000, Conor Dooley wrote:
> > > On Thu, Nov 17, 2022 at 10:04:33PM +0100, Uwe Kleine-König wrote:
> > > > On Thu, Nov 17, 2022 at 05:38:26PM +0000, Conor Dooley wrote:
> > > > > On Thu, Nov 17, 2022 at 05:49:50PM +0100, Uwe Kleine-König wrote:
> > > > > > Hello Conor,
> > > > > 
> > > > > Hello Uwe,
> > > > > 
> > > > > > On Thu, Nov 10, 2022 at 09:35:12AM +0000, Conor Dooley wrote:
> > > > > > > [...]
> > > > > > > +
> > > > > > > +static void mchp_core_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm,
> > > > > > > +				 bool enable, u64 period)
> > > > > > > +{
> > > > > > > +	struct mchp_core_pwm_chip *mchp_core_pwm = to_mchp_core_pwm(chip);
> > > > > > > +	u8 channel_enable, reg_offset, shift;
> > > > > > > +
> > > > > > > +	/*
> > > > > > > +	 * There are two adjacent 8 bit control regs, the lower reg controls
> > > > > > > +	 * 0-7 and the upper reg 8-15. Check if the pwm is in the upper reg
> > > > > > > +	 * and if so, offset by the bus width.
> > > > > > > +	 */
> > > > > > > +	reg_offset = MCHPCOREPWM_EN(pwm->hwpwm >> 3);
> > > > > > > +	shift = pwm->hwpwm & 7;
> > > > > > > +
> > > > > > > +	channel_enable = readb_relaxed(mchp_core_pwm->base + reg_offset);
> > > > > > > +	channel_enable &= ~(1 << shift);
> > > > > > > +	channel_enable |= (enable << shift);
> > > > > > > +
> > > > > > > +	writel_relaxed(channel_enable, mchp_core_pwm->base + reg_offset);
> > > > > > > +	mchp_core_pwm->channel_enabled &= ~BIT(pwm->hwpwm);
> > > > > > > +	mchp_core_pwm->channel_enabled |= enable << pwm->hwpwm;
> > > > > > > +
> > > > > > > +	/*
> > > > > > > +	 * Notify the block to update the waveform from the shadow registers.
> > > > > > > +	 * The updated values will not appear on the bus until they have been
> > > > > > > +	 * applied to the waveform at the beginning of the next period. We must
> > > > > > > +	 * write these registers and wait for them to be applied before
> > > > > > > +	 * considering the channel enabled.
> > > > > > > +	 * If the delay is under 1 us, sleep for at least 1 us anyway.
> > > > > > > +	 */
> > > > > > > +	if (mchp_core_pwm->sync_update_mask & (1 << pwm->hwpwm)) {
> > > > > > > +		u64 delay;
> > > > > > > +
> > > > > > > +		delay = div_u64(period, 1000u) ? : 1u;
> > > > > > > +		writel_relaxed(1U, mchp_core_pwm->base + MCHPCOREPWM_SYNC_UPD);
> > > > > > > +		usleep_range(delay, delay * 2);
> > > > > > > +	}
> > > > > > 
> > > > > > In some cases the delay could be prevented. e.g. when going from one
> > > > > > disabled state to another. If you don't want to complicate the driver
> > > > > > here, maybe point it out in a comment at least?
> > > > > 
> > > > > Maybe this is my naivity talking, but I'd rather wait. Is there not the
> > > > > chance that we re-enter pwm_apply() before the update has actually gone
> > > > > through?
> > > > 
> > > > My idea was to do something like that:
> > > > 
> > > > 	int mchp_core_pwm_apply(....)
> > > > 	{
> > > > 		if (mchp_core_pwm->sync_update_mask & (1 << pwm->hwpwm)) {
> > > > 			/*
> > > > 			 * We're still waiting for an update, don't
> > > > 			 * interfer until it's completed.
> > > > 			 */
> > > > 			while (readl_relaxed(mchp_core_pwm->base + MCHPCOREPWM_SYNC_UPD)) {
> > > > 				cpu_relax();
> > > > 				if (waited_unreasonably_long())
> > > > 					return -ETIMEOUT;
> > > > 			}
> > > > 		}
> > > > 
> > > > 		update_period_and_duty(...);
> > > > 		return 0;
> > > > 	}
> > 
> > So I was doing some fiddling, and the following works reasonably well:
> > 	if (mchp_core_pwm->sync_update_mask & (1 << pwm->hwpwm)) {
> > 		u32 delay = MCHPCOREPWM_TIMEOUT_US;
> > 		u32 sync_upd;
> > 		int ret;
> > 
> > 		writel_relaxed(1u, mchp_core_pwm->base + MCHPCOREPWM_SYNC_UPD);
> > 
> > 		ret = read_poll_timeout(readl, sync_upd, !sync_upd, delay/100, delay,
> > 					false, mchp_core_pwm->base + MCHPCOREPWM_SYNC_UPD);
> > 		if (ret)
> > 			dev_dbg(mchp_core_pwm->chip.dev,
> > 				"timed out waiting for shadow register sync\n");
> > 	}
> > 
> > but...
> > 
> > > > This way you don't have to wait at all if the calls to pwm_apply() are
> > > > infrequent. Of course this only works this way, if you can determine if
> > > > there is a pending update.
> > > 
> > > Ah I think I get what you mean now about waiting for completion &
> > > reading the bit. I don't know off the top of my head if that bit is
> > > readable. Docs say that they're R/W but I don't know if that means that
> > > an AXI read works or if the value is actually readable. I'll try
> > > something like this if I can.
> > 
> > ...it does not implement what I think you suggested & comes with the
> > drawback of inconsistent behaviour depending on whether the timeout is
> > hit or not.
> > 
> > Instead, waiting in apply(), as you suggested, & get_state() looks to be the
> > better option, using the same sort of logic as above, say:
> > static int mchp_core_pwm_wait_for_sync_update(struct mchp_core_pwm_chip *mchp_core_pwm,
> > 					      unsigned int channel)
> > {
> > 	int ret;
> > 
> > 	/*
> > 	 * If a shadow register is used for this PWM channel, and iff there is
> > 	 * a pending update to the waveform, we must wait for it to be applied
> > 	 * before attempting to read its state, as reading the registers yields
> > 	 * the currently implemented settings, the new ones are only readable
> > 	 * once the current period has ended.
> > 	 *
> > 	 * Rather large delays are possible, in the seconds, so to avoid waiting
> > 	 * around for **too** long - cap the wait at 100 ms.
> > 	 */
> > 	if (mchp_core_pwm->sync_update_mask & (1 << channel)) {
> > 		u32 delay = MCHPCOREPWM_TIMEOUT_US;
> > 		u32 sync_upd;
> > 
> > 		writel_relaxed(1u, mchp_core_pwm->base + MCHPCOREPWM_SYNC_UPD);
> > 
> > 		ret = read_poll_timeout(readl, sync_upd, !sync_upd, delay/100, delay,
> > 					false, mchp_core_pwm->base + MCHPCOREPWM_SYNC_UPD);
> > 		if (ret)
> > 			return -ETIMEDOUT;
> > 	}
> > 
> > 	return 0;
> > }
> > 
> > I think that strikes a good balance? We return quickly & don't blocker
> > the caller, but simultaneously try to prevent them from either trying to
> > apply new settings or get the current settings until the last request
> > has gone though?
> > 
> > get_state() returns void though, is it valid behaviour to wait for the
> > timeout there?

There was an approach to change that, see
https://lore.kernel.org/linux-pwm/20220916151506.298488-1-u.kleine-koenig@pengutronix.de

I need to send a v2.

> > I had a check in the core code and found some places where the call in
> > looks like:
> > 	struct pwm_state s1, s2; 
> > 	chip->ops->get_state(chip, pwm, &s1);
> > In this case, exiting early would leave us with a completely wrong
> > idead of the state, if it was to time out.
> > 
> > Either way, it seems like either way we would be misleading the caller
> > of get_state() - perhaps the way around that is to do the wait & then
> > just carry on with get_state()?
> > In that scenario, you'd get the new settings where possible and the old ones
> > otherwise.
> > Returning if the timeout is hit would give you the new settings where possible
> > & otherwise you'd get whatever was passed to get_state().
> > I'm not really sure which of those two situations would be preferred?

Hmm, .get_state should not return the old state. We really want
.get_state to return an error code. Maybe postpone that question until
we have that?

> Apologies for bumping this, I was wondering if any thoughts on the
> above? I'm not sure which is the lesser evil here (or if I have
> misunderstood something).

That's fine. I'm sorry to be not more responsive. This development cycle
is somehow crazy and there are so many open mails in my inbox ... :-\

Best regards
Uwe
Conor Dooley Nov. 30, 2022, 11:15 a.m. UTC | #8
On Wed, Nov 30, 2022 at 11:37:55AM +0100, Uwe Kleine-König wrote:
> Hello Conor,

> > > get_state() returns void though, is it valid behaviour to wait for the
> > > timeout there?
> 
> There was an approach to change that, see
> https://lore.kernel.org/linux-pwm/20220916151506.298488-1-u.kleine-koenig@pengutronix.de
> 
> I need to send a v2.

Ahh, yeah. That looks like a better idea. I'd much rather be able to
return an actual error.

> > > I had a check in the core code and found some places where the call in
> > > looks like:
> > > 	struct pwm_state s1, s2; 
> > > 	chip->ops->get_state(chip, pwm, &s1);
> > > In this case, exiting early would leave us with a completely wrong
> > > idead of the state, if it was to time out.
> > > 
> > > Either way, it seems like either way we would be misleading the caller
> > > of get_state() - perhaps the way around that is to do the wait & then
> > > just carry on with get_state()?
> > > In that scenario, you'd get the new settings where possible and the old ones
> > > otherwise.
> > > Returning if the timeout is hit would give you the new settings where possible
> > > & otherwise you'd get whatever was passed to get_state().
> > > I'm not really sure which of those two situations would be preferred?
> 
> Hmm, .get_state should not return the old state. We really want
> .get_state to return an error code. Maybe postpone that question until
> we have that?

If get_state() can return an error, there's no need for the question I
think. I'd rather return what's in the shadow registers *and* on the bus
or an error than an inconsistent state.

I'll send a v(N+1) based on the non-void get_state() at some point
soon-ish.

> > Apologies for bumping this, I was wondering if any thoughts on the
> > above? I'm not sure which is the lesser evil here (or if I have
> > misunderstood something).
> 
> That's fine. I'm sorry to be not more responsive. This development cycle
> is somehow crazy and there are so many open mails in my inbox ... :-\

Oh nw about that at all. I feel bad pinging stuff since I know everyone
is busy.

Thanks,
Conor.
Conor Dooley Dec. 5, 2022, 3:21 p.m. UTC | #9
Hey Uwe,

Preserving the context..

On Wed, Nov 30, 2022 at 11:37:55AM +0100, Uwe Kleine-König wrote:
> On Wed, Nov 30, 2022 at 09:53:51AM +0000, Conor Dooley wrote:
> > On Mon, Nov 21, 2022 at 03:29:39PM +0000, Conor Dooley wrote:
> > > On Thu, Nov 17, 2022 at 10:03:13PM +0000, Conor Dooley wrote:
> > > > On Thu, Nov 17, 2022 at 10:04:33PM +0100, Uwe Kleine-König wrote:
> > > > > On Thu, Nov 17, 2022 at 05:38:26PM +0000, Conor Dooley wrote:
> > > > > > On Thu, Nov 17, 2022 at 05:49:50PM +0100, Uwe Kleine-König wrote:
> > > > > > > Hello Conor,
> > > > > > 
> > > > > > Hello Uwe,
> > > > > > 
> > > > > > > On Thu, Nov 10, 2022 at 09:35:12AM +0000, Conor Dooley wrote:
> > > > > > > > [...]
> > > > > > > > +
> > > > > > > > +static void mchp_core_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm,
> > > > > > > > +				 bool enable, u64 period)
> > > > > > > > +{
> > > > > > > > +	struct mchp_core_pwm_chip *mchp_core_pwm = to_mchp_core_pwm(chip);
> > > > > > > > +	u8 channel_enable, reg_offset, shift;
> > > > > > > > +
> > > > > > > > +	/*
> > > > > > > > +	 * There are two adjacent 8 bit control regs, the lower reg controls
> > > > > > > > +	 * 0-7 and the upper reg 8-15. Check if the pwm is in the upper reg
> > > > > > > > +	 * and if so, offset by the bus width.
> > > > > > > > +	 */
> > > > > > > > +	reg_offset = MCHPCOREPWM_EN(pwm->hwpwm >> 3);
> > > > > > > > +	shift = pwm->hwpwm & 7;
> > > > > > > > +
> > > > > > > > +	channel_enable = readb_relaxed(mchp_core_pwm->base + reg_offset);
> > > > > > > > +	channel_enable &= ~(1 << shift);
> > > > > > > > +	channel_enable |= (enable << shift);
> > > > > > > > +
> > > > > > > > +	writel_relaxed(channel_enable, mchp_core_pwm->base + reg_offset);
> > > > > > > > +	mchp_core_pwm->channel_enabled &= ~BIT(pwm->hwpwm);
> > > > > > > > +	mchp_core_pwm->channel_enabled |= enable << pwm->hwpwm;
> > > > > > > > +
> > > > > > > > +	/*
> > > > > > > > +	 * Notify the block to update the waveform from the shadow registers.
> > > > > > > > +	 * The updated values will not appear on the bus until they have been
> > > > > > > > +	 * applied to the waveform at the beginning of the next period. We must
> > > > > > > > +	 * write these registers and wait for them to be applied before
> > > > > > > > +	 * considering the channel enabled.
> > > > > > > > +	 * If the delay is under 1 us, sleep for at least 1 us anyway.
> > > > > > > > +	 */
> > > > > > > > +	if (mchp_core_pwm->sync_update_mask & (1 << pwm->hwpwm)) {
> > > > > > > > +		u64 delay;
> > > > > > > > +
> > > > > > > > +		delay = div_u64(period, 1000u) ? : 1u;
> > > > > > > > +		writel_relaxed(1U, mchp_core_pwm->base + MCHPCOREPWM_SYNC_UPD);
> > > > > > > > +		usleep_range(delay, delay * 2);
> > > > > > > > +	}
> > > > > > > 
> > > > > > > In some cases the delay could be prevented. e.g. when going from one
> > > > > > > disabled state to another. If you don't want to complicate the driver
> > > > > > > here, maybe point it out in a comment at least?
> > > > > > 
> > > > > > Maybe this is my naivity talking, but I'd rather wait. Is there not the
> > > > > > chance that we re-enter pwm_apply() before the update has actually gone
> > > > > > through?
> > > > > 
> > > > > My idea was to do something like that:
> > > > > 
> > > > > 	int mchp_core_pwm_apply(....)
> > > > > 	{
> > > > > 		if (mchp_core_pwm->sync_update_mask & (1 << pwm->hwpwm)) {
> > > > > 			/*
> > > > > 			 * We're still waiting for an update, don't
> > > > > 			 * interfer until it's completed.
> > > > > 			 */
> > > > > 			while (readl_relaxed(mchp_core_pwm->base + MCHPCOREPWM_SYNC_UPD)) {
> > > > > 				cpu_relax();
> > > > > 				if (waited_unreasonably_long())
> > > > > 					return -ETIMEOUT;
> > > > > 			}
> > > > > 		}
> > > > > 
> > > > > 		update_period_and_duty(...);
> > > > > 		return 0;
> > > > > 	}
> > > 
> > > So I was doing some fiddling, and the following works reasonably well:
> > > 	if (mchp_core_pwm->sync_update_mask & (1 << pwm->hwpwm)) {
> > > 		u32 delay = MCHPCOREPWM_TIMEOUT_US;
> > > 		u32 sync_upd;
> > > 		int ret;
> > > 
> > > 		writel_relaxed(1u, mchp_core_pwm->base + MCHPCOREPWM_SYNC_UPD);
> > > 
> > > 		ret = read_poll_timeout(readl, sync_upd, !sync_upd, delay/100, delay,
> > > 					false, mchp_core_pwm->base + MCHPCOREPWM_SYNC_UPD);
> > > 		if (ret)
> > > 			dev_dbg(mchp_core_pwm->chip.dev,
> > > 				"timed out waiting for shadow register sync\n");
> > > 	}
> > > 
> > > but...
> > > 
> > > > > This way you don't have to wait at all if the calls to pwm_apply() are
> > > > > infrequent. Of course this only works this way, if you can determine if
> > > > > there is a pending update.
> > > > 
> > > > Ah I think I get what you mean now about waiting for completion &
> > > > reading the bit. I don't know off the top of my head if that bit is
> > > > readable. Docs say that they're R/W but I don't know if that means that
> > > > an AXI read works or if the value is actually readable. I'll try
> > > > something like this if I can.
> > > 
> > > ...it does not implement what I think you suggested & comes with the
> > > drawback of inconsistent behaviour depending on whether the timeout is
> > > hit or not.
> > > 
> > > Instead, waiting in apply(), as you suggested, & get_state() looks to be the
> > > better option, using the same sort of logic as above, say:
> > > static int mchp_core_pwm_wait_for_sync_update(struct mchp_core_pwm_chip *mchp_core_pwm,
> > > 					      unsigned int channel)
> > > {
> > > 	int ret;
> > > 
> > > 	/*
> > > 	 * If a shadow register is used for this PWM channel, and iff there is
> > > 	 * a pending update to the waveform, we must wait for it to be applied
> > > 	 * before attempting to read its state, as reading the registers yields
> > > 	 * the currently implemented settings, the new ones are only readable
> > > 	 * once the current period has ended.
> > > 	 *
> > > 	 * Rather large delays are possible, in the seconds, so to avoid waiting
> > > 	 * around for **too** long - cap the wait at 100 ms.
> > > 	 */
> > > 	if (mchp_core_pwm->sync_update_mask & (1 << channel)) {
> > > 		u32 delay = MCHPCOREPWM_TIMEOUT_US;
> > > 		u32 sync_upd;
> > > 
> > > 		writel_relaxed(1u, mchp_core_pwm->base + MCHPCOREPWM_SYNC_UPD);
> > > 
> > > 		ret = read_poll_timeout(readl, sync_upd, !sync_upd, delay/100, delay,
> > > 					false, mchp_core_pwm->base + MCHPCOREPWM_SYNC_UPD);
> > > 		if (ret)
> > > 			return -ETIMEDOUT;
> > > 	}
> > > 
> > > 	return 0;
> > > }
> > > 
> > > I think that strikes a good balance? We return quickly & don't blocker
> > > the caller, but simultaneously try to prevent them from either trying to
> > > apply new settings or get the current settings until the last request
> > > has gone though?
> > > 
> > > get_state() returns void though, is it valid behaviour to wait for the
> > > timeout there?
> 
> There was an approach to change that, see
> https://lore.kernel.org/linux-pwm/20220916151506.298488-1-u.kleine-koenig@pengutronix.de
> 
> I need to send a v2.
> 
> > > I had a check in the core code and found some places where the call in
> > > looks like:
> > > 	struct pwm_state s1, s2; 
> > > 	chip->ops->get_state(chip, pwm, &s1);
> > > In this case, exiting early would leave us with a completely wrong
> > > idead of the state, if it was to time out.
> > > 
> > > Either way, it seems like either way we would be misleading the caller
> > > of get_state() - perhaps the way around that is to do the wait & then
> > > just carry on with get_state()?
> > > In that scenario, you'd get the new settings where possible and the old ones
> > > otherwise.
> > > Returning if the timeout is hit would give you the new settings where possible
> > > & otherwise you'd get whatever was passed to get_state().
> > > I'm not really sure which of those two situations would be preferred?
> 
> Hmm, .get_state should not return the old state. We really want
> .get_state to return an error code. Maybe postpone that question until
> we have that?

I came into work today thinking that I could just rebase on top of your
patchset and send out a v13, but that was unfortunately not the case :/

So uh, it turns out that I was wrong about the behaviour of the
sync_update register's bit.
It turns out that that bit holds it's value until the IP block is reset,
and /does not/ get cleared at the start of the next period.
I'm really not sure how it worked when I tested the other week [0], so I
spent the first half of the day trying to figure out what on earth had
happened to my FPGA image. I must've picked the wrong image when I went
to test it the other week that had the wrong configuration somehow.

As a result, I've gone and hacked up another way of transferring the
burden of waiting - setting a timer for the period, backed by a
completion. get_state() and apply() now both check for the completion
and time out otherwise. I'm half tempted to tack RFC back onto the
series as I have not really messed with timers at all before and may
have done something off the wall.

I pushed it out (see [1] in case you'd like to look) so that the bots
can have a play with it, since it'll be a few weeks before I'll have a
chance to properly test that I've broken nothing with this.

It's not nearly as neat or contained, but still benefits from the
non-void get_state() & doesn't "confuse" a caller of get_state() with
some potential garbage.

The diff on top of the read_poll_timeout() approach from above is pasted
here. Hopefully I'll refine it a bit before sending a v13, checkpatch
may have an aneurysm with what's below. Or have a better idea and throw
it out..

Thanks again for sending that v2 ~immediately,
Conor.

0 - https://lore.kernel.org/linux-riscv/Y3uZY5mt%2FZIWk3sS@wendy/
1 - https://git.kernel.org/pub/scm/linux/kernel/git/conor/linux.git/tree/drivers/pwm/pwm-microchip-core.c?h=pwm-dev-v13&id=ddbd59fb5480b1be74645f0a84d934b1f91d833d

diff --git a/drivers/pwm/pwm-microchip-core.c b/drivers/pwm/pwm-microchip-core.c
index c88fa8f8d96d..f565e8be46b3 100644
--- a/drivers/pwm/pwm-microchip-core.c
+++ b/drivers/pwm/pwm-microchip-core.c
@@ -34,6 +34,7 @@
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pwm.h>
+#include <linux/timer.h>
 
 #define PREG_TO_VAL(PREG) ((PREG) + 1)
 
@@ -47,12 +48,14 @@
 #define MCHPCOREPWM_POSEDGE(i)	(0x10 + 0x08 * (i)) /* 0x10, 0x18, ..., 0x88 */
 #define MCHPCOREPWM_NEGEDGE(i)	(0x14 + 0x08 * (i)) /* 0x14, 0x1c, ..., 0x8c */
 #define MCHPCOREPWM_SYNC_UPD	0xe4
-#define MCHPCOREPWM_TIMEOUT_US	100000u
+#define MCHPCOREPWM_TIMEOUT_MS	100u
 
 struct mchp_core_pwm_chip {
 	struct pwm_chip chip;
 	struct clk *clk;
 	struct mutex lock; /* protect the shared period */
+	struct completion sync_update_complete;
+	struct timer_list sync_update_timer;
 	void __iomem *base;
 	u32 sync_update_mask;
 	u16 channel_enabled;
@@ -91,9 +94,54 @@ static void mchp_core_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm,
 	 * applied to the waveform at the beginning of the next period.
 	 * This is a NO-OP if the channel does not have shadow registers.
 	 */
+	if (mchp_core_pwm->sync_update_mask & (1 << pwm->hwpwm)) {
+		u64 delay;
+
+		/* Have to convert to jiffies... */
+		delay = div_u64(period, 1000000u) ? : 1u;
+		reinit_completion(&mchp_core_pwm->sync_update_complete);
+		mchp_core_pwm->sync_update_timer.expires = jiffies + delay;
+		add_timer(&mchp_core_pwm->sync_update_timer);
+	}
+}
+
+static int mchp_core_pwm_wait_for_sync_update(struct mchp_core_pwm_chip *mchp_core_pwm,
+					      unsigned int channel)
+{
+	/*
+	 * If a shadow register is used for this PWM channel, and iff there is
+	 * a pending update to the waveform, we must wait for it to be applied
+	 * before attempting to read its state, as reading the registers yields
+	 * the currently implemented settings, the new ones are only readable
+	 * once the current period has ended.
+	 *
+	 * Rather large delays are possible, in the seconds, so to avoid waiting
+	 * around for **too** long - cap the wait at 100 ms.
+	 */
+	if (!timer_pending(&mchp_core_pwm->sync_update_timer))
+		return 0;
+
+	if (mchp_core_pwm->sync_update_mask & (1 << channel)) {
+		int ret;
+		ret = wait_for_completion_interruptible_timeout(&mchp_core_pwm->sync_update_complete,
+								msecs_to_jiffies(MCHPCOREPWM_TIMEOUT_MS));
+		if (!ret)
+			return -ETIMEDOUT;
+
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
 
-	writel_relaxed(1u, mchp_core_pwm->base + MCHPCOREPWM_SYNC_UPD);
+static void mchp_core_pwm_complete_sync_update(struct timer_list *t)
+{
+	struct mchp_core_pwm_chip *mchp_core_pwm = from_timer(mchp_core_pwm,
+							      t, sync_update_timer);
 
+	complete(&mchp_core_pwm->sync_update_complete);
+	del_timer(&mchp_core_pwm->sync_update_timer);
 }
 
 static u64 mchp_core_pwm_calc_duty(const struct pwm_state *state, u64 clk_rate,
@@ -181,35 +229,6 @@ static inline void mchp_core_pwm_apply_period(struct mchp_core_pwm_chip *mchp_co
 	writel_relaxed(period_steps, mchp_core_pwm->base + MCHPCOREPWM_PERIOD);
 }
 
-static int mchp_core_pwm_wait_for_sync_update(struct mchp_core_pwm_chip *mchp_core_pwm,
-					      unsigned int channel)
-{
-	/*
-	 * If a shadow register is used for this PWM channel, and iff there is
-	 * a pending update to the waveform, we must wait for it to be applied
-	 * before attempting to read its state, as reading the registers yields
-	 * the currently implemented settings, the new ones are only readable
-	 * once the current period has ended.
-	 *
-	 * Rather large delays are possible, in the seconds, so to avoid waiting
-	 * around for **too** long - cap the wait at 100 ms.
-	 */
-	if (mchp_core_pwm->sync_update_mask & (1 << channel)) {
-		u32 delay = MCHPCOREPWM_TIMEOUT_US;
-		u32 sync_upd;
-		int ret;
-
-		writel_relaxed(1u, mchp_core_pwm->base + MCHPCOREPWM_SYNC_UPD);
-
-		ret = read_poll_timeout(readl, sync_upd, !sync_upd, delay/100, delay,
-					false, mchp_core_pwm->base + MCHPCOREPWM_SYNC_UPD);
-		if (ret)
-			return -ETIMEDOUT;
-	}
-
-	return 0;
-}
-
 static int mchp_core_pwm_apply_locked(struct pwm_chip *chip, struct pwm_device *pwm,
 				      const struct pwm_state *state)
 {
@@ -297,31 +316,37 @@ static int mchp_core_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 	struct mchp_core_pwm_chip *mchp_core_pwm = to_mchp_core_pwm(chip);
 	int ret;
 
+	mutex_lock(&mchp_core_pwm->lock);
+
 	ret = mchp_core_pwm_wait_for_sync_update(mchp_core_pwm, pwm->hwpwm);
 	if (ret)
-		return ret;
-
-	mutex_lock(&mchp_core_pwm->lock);
+		goto exit_unlock;
 
 	ret = mchp_core_pwm_apply_locked(chip, pwm, state);
 
+exit_unlock:
 	mutex_unlock(&mchp_core_pwm->lock);
 
 	return ret;
 }
 
-static void mchp_core_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
-				    struct pwm_state *state)
+static int mchp_core_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
+				   struct pwm_state *state)
 {
 	struct mchp_core_pwm_chip *mchp_core_pwm = to_mchp_core_pwm(chip);
 	u64 rate;
 	u16 prescale;
 	u8 period_steps, duty_steps, posedge, negedge;
-
-	mchp_core_pwm_wait_for_sync_update(mchp_core_pwm, pwm->hwpwm);
+	int ret;
 
 	mutex_lock(&mchp_core_pwm->lock);
 
+	ret = mchp_core_pwm_wait_for_sync_update(mchp_core_pwm, pwm->hwpwm);
+	if (ret) {
+		mutex_unlock(&mchp_core_pwm->lock);
+		return ret;
+	}
+
 	if (mchp_core_pwm->channel_enabled & (1 << pwm->hwpwm))
 		state->enabled = true;
 	else
@@ -351,6 +376,8 @@ static void mchp_core_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pw
 	}
 
 	state->polarity = negedge < posedge ? PWM_POLARITY_INVERSED : PWM_POLARITY_NORMAL;
+
+	return 0;
 }
 
 static const struct pwm_ops mchp_core_pwm_ops = {
@@ -403,6 +430,10 @@ static int mchp_core_pwm_probe(struct platform_device *pdev)
 	if (ret < 0)
 		return dev_err_probe(&pdev->dev, ret, "failed to add PWM chip\n");
 
+	init_completion(&mchp_pwm->sync_update_complete);
+	timer_setup(&mchp_pwm->sync_update_timer, mchp_core_pwm_complete_sync_update, 0);
+	writel_relaxed(1U, mchp_pwm->base + MCHPCOREPWM_SYNC_UPD);
+
 	return 0;
 }
Uwe Kleine-König Dec. 5, 2022, 4:03 p.m. UTC | #10
Hello Conor,

On Mon, Dec 05, 2022 at 03:21:55PM +0000, Conor Dooley wrote:
> I came into work today thinking that I could just rebase on top of your
> patchset and send out a v13, but that was unfortunately not the case :/
> 
> So uh, it turns out that I was wrong about the behaviour of the
> sync_update register's bit.
> It turns out that that bit holds it's value until the IP block is reset,
> and /does not/ get cleared at the start of the next period.
> I'm really not sure how it worked when I tested the other week [0], so I
> spent the first half of the day trying to figure out what on earth had
> happened to my FPGA image. I must've picked the wrong image when I went
> to test it the other week that had the wrong configuration somehow.
> 
> As a result, I've gone and hacked up another way of transferring the
> burden of waiting - setting a timer for the period, backed by a
> completion. get_state() and apply() now both check for the completion
> and time out otherwise. I'm half tempted to tack RFC back onto the
> series as I have not really messed with timers at all before and may
> have done something off the wall.
> 
> I pushed it out (see [1] in case you'd like to look) so that the bots
> can have a play with it, since it'll be a few weeks before I'll have a
> chance to properly test that I've broken nothing with this.

I didn't look, but I'm convinced you don't need a timer. Something like
the following should work, shouldn't it?:

 - in .apply() check the current time, add the current period and store
   the result to ddata->updatetimestamp
 - in .get_state do:
     if (current_time >= ddata->updatetimestamp)
       process fine
     else:
       timeout (or wait until ddata->updatetimestamp?)

Actually I'd prefer to wait instead of -ETIMEOUT.
 
Best regards
Uwe
Conor Dooley Dec. 5, 2022, 5:13 p.m. UTC | #11
On Mon, Dec 05, 2022 at 05:03:28PM +0100, Uwe Kleine-König wrote:
> Hello Conor,
> 
> On Mon, Dec 05, 2022 at 03:21:55PM +0000, Conor Dooley wrote:
> > I came into work today thinking that I could just rebase on top of your
> > patchset and send out a v13, but that was unfortunately not the case :/
> > 
> > So uh, it turns out that I was wrong about the behaviour of the
> > sync_update register's bit.
> > It turns out that that bit holds it's value until the IP block is reset,
> > and /does not/ get cleared at the start of the next period.
> > I'm really not sure how it worked when I tested the other week [0], so I
> > spent the first half of the day trying to figure out what on earth had
> > happened to my FPGA image. I must've picked the wrong image when I went
> > to test it the other week that had the wrong configuration somehow.
> > 
> > As a result, I've gone and hacked up another way of transferring the
> > burden of waiting - setting a timer for the period, backed by a
> > completion. get_state() and apply() now both check for the completion
> > and time out otherwise. I'm half tempted to tack RFC back onto the
> > series as I have not really messed with timers at all before and may
> > have done something off the wall.
> > 
> > I pushed it out (see [1] in case you'd like to look) so that the bots
> > can have a play with it, since it'll be a few weeks before I'll have a
> > chance to properly test that I've broken nothing with this.
> 
> I didn't look, but I'm convinced you don't need a timer. Something like
> the following should work, shouldn't it?:

Yeah & I did think of something along these lines. I was torn between
something that seemed heavy handed (timers) and calculating if enough
time had elapsed, which seemed a bit hacky.

Figured I was better off doing something quickly & asking rather than
polishing only to find out it was disliked ;)

> 
>  - in .apply() check the current time, add the current period and store
>    the result to ddata->updatetimestamp
>  - in .get_state do:
>      if (current_time >= ddata->updatetimestamp)
>        process fine
>      else:
>        timeout (or wait until ddata->updatetimestamp?)
> 
> Actually I'd prefer to wait instead of -ETIMEOUT.

Prefer to wait in get_state() or in both it & apply()?
Depending on how far away updatetimestamp is, would we still not want to
time out if it is going to be a long time, no?

Thanks again Uwe,
Conor.
Uwe Kleine-König Dec. 5, 2022, 6:13 p.m. UTC | #12
Hello Conor,

On Mon, Dec 05, 2022 at 05:13:42PM +0000, Conor Dooley wrote:
> Figured I was better off doing something quickly & asking rather than
> polishing only to find out it was disliked ;)

:-)

> >  - in .apply() check the current time, add the current period and store
> >    the result to ddata->updatetimestamp
> >  - in .get_state do:
> >      if (current_time >= ddata->updatetimestamp)
> >        process fine
> >      else:
> >        timeout (or wait until ddata->updatetimestamp?)
> > 
> > Actually I'd prefer to wait instead of -ETIMEOUT.
> 
> Prefer to wait in get_state() or in both it & apply()?

Prefer to wait where it's necessary ...

> Depending on how far away updatetimestamp is, would we still not want to
> time out if it is going to be a long time, no?

I'd prefer a slow but right value over a quick error.

The sun4i driver has in its .apply():

        /* We need a full period to elapse before disabling the channel. */
        delay_us = DIV_ROUND_UP_ULL(cstate.period, NSEC_PER_USEC);
        if ((delay_us / 500) > MAX_UDELAY_MS)
                msleep(delay_us / 1000 + 1);
        else
                usleep_range(delay_us, delay_us * 2);

so it's at least not new in general to have a waiting time in O(period).

Best regards
Uwe
diff mbox series

Patch

diff --git a/drivers/pwm/Kconfig b/drivers/pwm/Kconfig
index 60d13a949bc5..e4de8c02c3c0 100644
--- a/drivers/pwm/Kconfig
+++ b/drivers/pwm/Kconfig
@@ -393,6 +393,16 @@  config PWM_MEDIATEK
 	  To compile this driver as a module, choose M here: the module
 	  will be called pwm-mediatek.
 
+config PWM_MICROCHIP_CORE
+	tristate "Microchip corePWM PWM support"
+	depends on SOC_MICROCHIP_POLARFIRE || COMPILE_TEST
+	depends on HAS_IOMEM && OF
+	help
+	  PWM driver for Microchip FPGA soft IP core.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called pwm-microchip-core.
+
 config PWM_MXS
 	tristate "Freescale MXS PWM support"
 	depends on ARCH_MXS || COMPILE_TEST
diff --git a/drivers/pwm/Makefile b/drivers/pwm/Makefile
index 7bf1a29f02b8..a65625359ece 100644
--- a/drivers/pwm/Makefile
+++ b/drivers/pwm/Makefile
@@ -34,6 +34,7 @@  obj-$(CONFIG_PWM_LPSS_PCI)	+= pwm-lpss-pci.o
 obj-$(CONFIG_PWM_LPSS_PLATFORM)	+= pwm-lpss-platform.o
 obj-$(CONFIG_PWM_MESON)		+= pwm-meson.o
 obj-$(CONFIG_PWM_MEDIATEK)	+= pwm-mediatek.o
+obj-$(CONFIG_PWM_MICROCHIP_CORE)	+= pwm-microchip-core.o
 obj-$(CONFIG_PWM_MTK_DISP)	+= pwm-mtk-disp.o
 obj-$(CONFIG_PWM_MXS)		+= pwm-mxs.o
 obj-$(CONFIG_PWM_NTXEC)		+= pwm-ntxec.o
diff --git a/drivers/pwm/pwm-microchip-core.c b/drivers/pwm/pwm-microchip-core.c
new file mode 100644
index 000000000000..5dd2b04537e8
--- /dev/null
+++ b/drivers/pwm/pwm-microchip-core.c
@@ -0,0 +1,389 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * corePWM driver for Microchip "soft" FPGA IP cores.
+ *
+ * Copyright (c) 2021-2022 Microchip Corporation. All rights reserved.
+ * Author: Conor Dooley <conor.dooley@microchip.com>
+ * Documentation:
+ * https://www.microsemi.com/document-portal/doc_download/1245275-corepwm-hb
+ *
+ * Limitations:
+ * - If the IP block is configured without "shadow registers", all register
+ *   writes will take effect immediately, causing glitches on the output.
+ *   If shadow registers *are* enabled, a write to the "SYNC_UPDATE" register
+ *   notifies the core that it needs to update the registers defining the
+ *   waveform from the contents of the "shadow registers".
+ * - The IP block has no concept of a duty cycle, only rising/falling edges of
+ *   the waveform. Unfortunately, if the rising & falling edges registers have
+ *   the same value written to them the IP block will do whichever of a rising
+ *   or a falling edge is possible. I.E. a 50% waveform at twice the requested
+ *   period. Therefore to get a 0% waveform, the output is set the max high/low
+ *   time depending on polarity.
+ * - The PWM period is set for the whole IP block not per channel. The driver
+ *   will only change the period if no other PWM output is enabled.
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/math.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/pwm.h>
+
+#define PREG_TO_VAL(PREG) ((PREG) + 1)
+
+#define MCHPCOREPWM_PRESCALE_MAX	0x100
+#define MCHPCOREPWM_PERIOD_STEPS_MAX	0xff
+#define MCHPCOREPWM_PERIOD_MAX		0xff00
+
+#define MCHPCOREPWM_PRESCALE	0x00
+#define MCHPCOREPWM_PERIOD	0x04
+#define MCHPCOREPWM_EN(i)	(0x08 + 0x04 * (i)) /* 0x08, 0x0c */
+#define MCHPCOREPWM_POSEDGE(i)	(0x10 + 0x08 * (i)) /* 0x10, 0x18, ..., 0x88 */
+#define MCHPCOREPWM_NEGEDGE(i)	(0x14 + 0x08 * (i)) /* 0x14, 0x1c, ..., 0x8c */
+#define MCHPCOREPWM_SYNC_UPD	0xe4
+
+struct mchp_core_pwm_chip {
+	struct pwm_chip chip;
+	struct clk *clk;
+	struct mutex lock; /* protect the shared period */
+	void __iomem *base;
+	u32 sync_update_mask;
+	u16 channel_enabled;
+};
+
+static inline struct mchp_core_pwm_chip *to_mchp_core_pwm(struct pwm_chip *chip)
+{
+	return container_of(chip, struct mchp_core_pwm_chip, chip);
+}
+
+static void mchp_core_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm,
+				 bool enable, u64 period)
+{
+	struct mchp_core_pwm_chip *mchp_core_pwm = to_mchp_core_pwm(chip);
+	u8 channel_enable, reg_offset, shift;
+
+	/*
+	 * There are two adjacent 8 bit control regs, the lower reg controls
+	 * 0-7 and the upper reg 8-15. Check if the pwm is in the upper reg
+	 * and if so, offset by the bus width.
+	 */
+	reg_offset = MCHPCOREPWM_EN(pwm->hwpwm >> 3);
+	shift = pwm->hwpwm & 7;
+
+	channel_enable = readb_relaxed(mchp_core_pwm->base + reg_offset);
+	channel_enable &= ~(1 << shift);
+	channel_enable |= (enable << shift);
+
+	writel_relaxed(channel_enable, mchp_core_pwm->base + reg_offset);
+	mchp_core_pwm->channel_enabled &= ~BIT(pwm->hwpwm);
+	mchp_core_pwm->channel_enabled |= enable << pwm->hwpwm;
+
+	/*
+	 * Notify the block to update the waveform from the shadow registers.
+	 * The updated values will not appear on the bus until they have been
+	 * applied to the waveform at the beginning of the next period. We must
+	 * write these registers and wait for them to be applied before
+	 * considering the channel enabled.
+	 * If the delay is under 1 us, sleep for at least 1 us anyway.
+	 */
+	if (mchp_core_pwm->sync_update_mask & (1 << pwm->hwpwm)) {
+		u64 delay;
+
+		delay = div_u64(period, 1000u) ? : 1u;
+		writel_relaxed(1U, mchp_core_pwm->base + MCHPCOREPWM_SYNC_UPD);
+		usleep_range(delay, delay * 2);
+	}
+}
+
+static u64 mchp_core_pwm_calc_duty(const struct pwm_state *state, u64 clk_rate,
+				   u8 prescale, u8 period_steps)
+{
+	u64 duty_steps, tmp;
+	u16 prescale_val = PREG_TO_VAL(prescale);
+
+	/*
+	 * Calculate the duty cycle in multiples of the prescaled period:
+	 * duty_steps = duty_in_ns / step_in_ns
+	 * step_in_ns = (prescale * NSEC_PER_SEC) / clk_rate
+	 * The code below is rearranged slightly to only divide once.
+	 */
+	tmp = prescale_val * NSEC_PER_SEC;
+	duty_steps = mul_u64_u64_div_u64(state->duty_cycle, clk_rate, tmp);
+
+	return duty_steps;
+}
+
+static void mchp_core_pwm_apply_duty(struct pwm_chip *chip, struct pwm_device *pwm,
+				     const struct pwm_state *state, u64 duty_steps, u8 period_steps)
+{
+	struct mchp_core_pwm_chip *mchp_core_pwm = to_mchp_core_pwm(chip);
+	u8 posedge, negedge;
+	u8 period_steps_val = PREG_TO_VAL(period_steps);
+
+	/*
+	 * Setting posedge == negedge doesn't yield a constant output,
+	 * so that's an unsuitable setting to model duty_steps = 0.
+	 * In that case set the unwanted edge to a value that never
+	 * triggers.
+	 */
+	if (state->polarity == PWM_POLARITY_INVERSED) {
+		negedge = !duty_steps ? period_steps_val : 0u;
+		posedge = duty_steps;
+	} else {
+		posedge = !duty_steps ? period_steps_val : 0u;
+		negedge = duty_steps;
+	}
+
+	writel_relaxed(posedge, mchp_core_pwm->base + MCHPCOREPWM_POSEDGE(pwm->hwpwm));
+	writel_relaxed(negedge, mchp_core_pwm->base + MCHPCOREPWM_NEGEDGE(pwm->hwpwm));
+}
+
+static void mchp_core_pwm_calc_period(const struct pwm_state *state, u64 clk_rate,
+				      u16 *prescale, u8 *period_steps)
+{
+	u64 tmp;
+
+	/*
+	 * Calculate the period cycles and prescale values.
+	 * The registers are each 8 bits wide & multiplied to compute the period
+	 * using the formula:
+	 * (clock_period) * (prescale + 1) * (period_steps + 1)
+	 * so the maximum period that can be generated is 0x10000 times the
+	 * period of the input clock.
+	 * However, due to the design of the "hardware", it is not possible to
+	 * attain a 100% duty cycle if the full range of period_steps is used.
+	 * Therefore period_steps is restricted to 0xFE and the maximum multiple
+	 * of the clock period attainable is 0xFF00.
+	 */
+	tmp = mul_u64_u64_div_u64(state->period, clk_rate, NSEC_PER_SEC);
+
+	/*
+	 * The hardware adds one to the register value, so decrement by one to
+	 * account for the offset
+	 */
+	if (tmp >= MCHPCOREPWM_PERIOD_MAX) {
+		*prescale = MCHPCOREPWM_PRESCALE_MAX - 1;
+		*period_steps = MCHPCOREPWM_PERIOD_STEPS_MAX - 1;
+
+		return;
+	}
+
+	*prescale = div_u64(tmp, MCHPCOREPWM_PERIOD_STEPS_MAX);
+	/* PREG_TO_VAL() can produce a value larger than UINT8_MAX */
+	*period_steps = div_u64(tmp, PREG_TO_VAL(*prescale)) - 1;
+}
+
+static inline void mchp_core_pwm_apply_period(struct mchp_core_pwm_chip *mchp_core_pwm,
+					      u8 prescale, u8 period_steps)
+{
+	writel_relaxed(prescale, mchp_core_pwm->base + MCHPCOREPWM_PRESCALE);
+	writel_relaxed(period_steps, mchp_core_pwm->base + MCHPCOREPWM_PERIOD);
+}
+
+static int mchp_core_pwm_apply_locked(struct pwm_chip *chip, struct pwm_device *pwm,
+				      const struct pwm_state *state)
+{
+	struct mchp_core_pwm_chip *mchp_core_pwm = to_mchp_core_pwm(chip);
+	struct pwm_state current_state = pwm->state;
+	bool period_locked;
+	u64 duty_steps, clk_rate;
+	u16 prescale;
+	u8 period_steps;
+
+	if (!state->enabled) {
+		mchp_core_pwm_enable(chip, pwm, false, current_state.period);
+		return 0;
+	}
+
+	/*
+	 * If clk_rate is too big, the following multiplication might overflow.
+	 * However this is implausible, as the fabric of current FPGAs cannot
+	 * provide clocks at a rate high enough.
+	 */
+	clk_rate = clk_get_rate(mchp_core_pwm->clk);
+	if (clk_rate >= NSEC_PER_SEC)
+		return -EINVAL;
+
+	mchp_core_pwm_calc_period(state, clk_rate, &prescale, &period_steps);
+
+	/*
+	 * If the only thing that has changed is the duty cycle or the polarity,
+	 * we can shortcut the calculations and just compute/apply the new duty
+	 * cycle pos & neg edges
+	 * As all the channels share the same period, do not allow it to be
+	 * changed if any other channels are enabled.
+	 * If the period is locked, it may not be possible to use a period
+	 * less than that requested. In that case, we just abort.
+	 */
+	period_locked = mchp_core_pwm->channel_enabled & ~(1 << pwm->hwpwm);
+
+	if (period_locked) {
+		u16 hw_prescale;
+		u8 hw_period_steps;
+
+		hw_prescale = readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_PRESCALE);
+		hw_period_steps = readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_PERIOD);
+
+		if ((period_steps + 1) * (prescale + 1) <
+		    (hw_period_steps + 1) * (hw_prescale + 1))
+			return -EINVAL;
+
+		/*
+		 * It is possible that something could have set the period_steps
+		 * register to 0xff, which would prevent us from setting a 100%
+		 * or 0% relative duty cycle, as explained above in
+		 * mchp_core_pwm_calc_period().
+		 * The period is locked and we cannot change this, so we abort.
+		 */
+		if (hw_period_steps == MCHPCOREPWM_PERIOD_STEPS_MAX)
+			return -EINVAL;
+
+		prescale = hw_prescale;
+		period_steps = hw_period_steps;
+	} else {
+		mchp_core_pwm_apply_period(mchp_core_pwm, prescale, period_steps);
+	}
+
+	duty_steps = mchp_core_pwm_calc_duty(state, clk_rate, prescale, period_steps);
+
+	/*
+	 * Because the period is per channel, it is possible that the requested
+	 * duty cycle is longer than the period, in which case cap it to the
+	 * period, IOW a 100% duty cycle.
+	 */
+	if (duty_steps > period_steps)
+		duty_steps = period_steps + 1;
+
+	mchp_core_pwm_apply_duty(chip, pwm, state, duty_steps, period_steps);
+
+	mchp_core_pwm_enable(chip, pwm, true, state->period);
+
+	return 0;
+}
+
+static int mchp_core_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
+			       const struct pwm_state *state)
+{
+	struct mchp_core_pwm_chip *mchp_core_pwm = to_mchp_core_pwm(chip);
+	int ret;
+
+	mutex_lock(&mchp_core_pwm->lock);
+
+	ret = mchp_core_pwm_apply_locked(chip, pwm, state);
+
+	mutex_unlock(&mchp_core_pwm->lock);
+
+	return ret;
+}
+
+static void mchp_core_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
+				    struct pwm_state *state)
+{
+	struct mchp_core_pwm_chip *mchp_core_pwm = to_mchp_core_pwm(chip);
+	u64 rate;
+	u16 prescale;
+	u8 period_steps, duty_steps, posedge, negedge;
+
+	mutex_lock(&mchp_core_pwm->lock);
+
+	if (mchp_core_pwm->channel_enabled & (1 << pwm->hwpwm))
+		state->enabled = true;
+	else
+		state->enabled = false;
+
+	rate = clk_get_rate(mchp_core_pwm->clk);
+
+	prescale = PREG_TO_VAL(readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_PRESCALE));
+
+	period_steps = PREG_TO_VAL(readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_PERIOD));
+	state->period = period_steps * prescale;
+	state->period *= NSEC_PER_SEC;
+	state->period = DIV64_U64_ROUND_UP(state->period, rate);
+
+	posedge = readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_POSEDGE(pwm->hwpwm));
+	negedge = readb_relaxed(mchp_core_pwm->base + MCHPCOREPWM_NEGEDGE(pwm->hwpwm));
+
+	mutex_unlock(&mchp_core_pwm->lock);
+
+	if (negedge == posedge) {
+		state->duty_cycle = state->period;
+		state->period *= 2;
+	} else {
+		duty_steps = abs((s16)posedge - (s16)negedge);
+		state->duty_cycle = duty_steps * prescale * NSEC_PER_SEC;
+		state->duty_cycle = DIV64_U64_ROUND_UP(state->duty_cycle, rate);
+	}
+
+	state->polarity = negedge < posedge ? PWM_POLARITY_INVERSED : PWM_POLARITY_NORMAL;
+}
+
+static const struct pwm_ops mchp_core_pwm_ops = {
+	.apply = mchp_core_pwm_apply,
+	.get_state = mchp_core_pwm_get_state,
+	.owner = THIS_MODULE,
+};
+
+static const struct of_device_id mchp_core_of_match[] = {
+	{
+		.compatible = "microchip,corepwm-rtl-v4",
+	},
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, mchp_core_of_match);
+
+static int mchp_core_pwm_probe(struct platform_device *pdev)
+{
+	struct mchp_core_pwm_chip *mchp_pwm;
+	struct resource *regs;
+	int ret;
+
+	mchp_pwm = devm_kzalloc(&pdev->dev, sizeof(*mchp_pwm), GFP_KERNEL);
+	if (!mchp_pwm)
+		return -ENOMEM;
+
+	mchp_pwm->base = devm_platform_get_and_ioremap_resource(pdev, 0, &regs);
+	if (IS_ERR(mchp_pwm->base))
+		return PTR_ERR(mchp_pwm->base);
+
+	mchp_pwm->clk = devm_clk_get_enabled(&pdev->dev, NULL);
+	if (IS_ERR(mchp_pwm->clk))
+		return dev_err_probe(&pdev->dev, PTR_ERR(mchp_pwm->clk),
+				     "failed to get PWM clock\n");
+
+	if (of_property_read_u32(pdev->dev.of_node, "microchip,sync-update-mask",
+				 &mchp_pwm->sync_update_mask))
+		mchp_pwm->sync_update_mask = 0;
+
+	mutex_init(&mchp_pwm->lock);
+
+	mchp_pwm->chip.dev = &pdev->dev;
+	mchp_pwm->chip.ops = &mchp_core_pwm_ops;
+	mchp_pwm->chip.npwm = 16;
+
+	mchp_pwm->channel_enabled = readb_relaxed(mchp_pwm->base + MCHPCOREPWM_EN(0));
+	mchp_pwm->channel_enabled |= readb_relaxed(mchp_pwm->base + MCHPCOREPWM_EN(1)) << 8;
+
+	ret = devm_pwmchip_add(&pdev->dev, &mchp_pwm->chip);
+	if (ret < 0)
+		return dev_err_probe(&pdev->dev, ret, "failed to add PWM chip\n");
+
+	return 0;
+}
+
+static struct platform_driver mchp_core_pwm_driver = {
+	.driver = {
+		.name = "mchp-core-pwm",
+		.of_match_table = mchp_core_of_match,
+	},
+	.probe = mchp_core_pwm_probe,
+};
+module_platform_driver(mchp_core_pwm_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Conor Dooley <conor.dooley@microchip.com>");
+MODULE_DESCRIPTION("corePWM driver for Microchip FPGAs");