diff mbox

i2c: aspeed: Improve driver to support multi-master use cases stably

Message ID 20180626165812.4141-1-jae.hyun.yoo@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Jae Hyun Yoo June 26, 2018, 4:58 p.m. UTC
BMC firmware should support some multi-master use cases such as multi-node,
IPMB, BMC-ME link and so on but the current ASPEED I2C driver is a bit
unstable for the multi-master use case. So this patch improves ASPEED I2C
driver to support the multi-master use case stably.

Changes:
* Added XFER_MODE status register checking logic into
  aspeed_i2c_master_xfer to improve the current bus busy checking logic.
* Changed the order of enum aspeed_i2c_master_state and
  enum aspeed_i2c_slave_state defines to make their initial values set to
  ASPEED_I2C_MASTER_INACTIVE and ASPEED_I2C_SLAVE_STOP respectively.
  In case of multi-master use with previous code, if a slave data comes
  ahead of the first master xfer, master_state starts from an invalid
  state. This change fixed the issue.
* Adjusted spin_lock scope to make it wrap the whole irq handler using
  a single lock and unlock pair covers both master and slave handlers.
* Added irq_status variable as a member of the struct aspeed_i2c_bus to
  collect handled interrupt bits throughout the master and the slave irq
  handlers.
* Added control logic to put an order on calling the master and the slave
  irq handlers based on their current states.

Signed-off-by: Jae Hyun Yoo <jae.hyun.yoo@linux.intel.com>
---
 drivers/i2c/busses/i2c-aspeed.c | 200 +++++++++++++++++++-------------
 1 file changed, 118 insertions(+), 82 deletions(-)

Comments

Brendan Higgins June 27, 2018, 7:36 a.m. UTC | #1
On Tue, Jun 26, 2018 at 9:58 AM Jae Hyun Yoo
<jae.hyun.yoo@linux.intel.com> wrote:
>
> BMC firmware should support some multi-master use cases such as multi-node,
> IPMB, BMC-ME link and so on but the current ASPEED I2C driver is a bit
> unstable for the multi-master use case. So this patch improves ASPEED I2C
> driver to support the multi-master use case stably.
>
> Changes:
> * Added XFER_MODE status register checking logic into
>   aspeed_i2c_master_xfer to improve the current bus busy checking logic.
> * Changed the order of enum aspeed_i2c_master_state and
>   enum aspeed_i2c_slave_state defines to make their initial values set to
>   ASPEED_I2C_MASTER_INACTIVE and ASPEED_I2C_SLAVE_STOP respectively.
>   In case of multi-master use with previous code, if a slave data comes
>   ahead of the first master xfer, master_state starts from an invalid
>   state. This change fixed the issue.
> * Adjusted spin_lock scope to make it wrap the whole irq handler using
>   a single lock and unlock pair covers both master and slave handlers.
> * Added irq_status variable as a member of the struct aspeed_i2c_bus to
>   collect handled interrupt bits throughout the master and the slave irq
>   handlers.
> * Added control logic to put an order on calling the master and the slave
>   irq handlers based on their current states.
>
> Signed-off-by: Jae Hyun Yoo <jae.hyun.yoo@linux.intel.com>
> ---
>  drivers/i2c/busses/i2c-aspeed.c | 200 +++++++++++++++++++-------------
>  1 file changed, 118 insertions(+), 82 deletions(-)
>
> diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c
> index 60e4d0e939a3..ac3e17d9a365 100644
> --- a/drivers/i2c/busses/i2c-aspeed.c
> +++ b/drivers/i2c/busses/i2c-aspeed.c
> @@ -4,6 +4,7 @@
>   *  Copyright (C) 2012-2017 ASPEED Technology Inc.
>   *  Copyright 2017 IBM Corporation
>   *  Copyright 2017 Google, Inc.
> + *  Copyright (c) 2018 Intel Corporation
>   *
>   *  This program is free software; you can redistribute it and/or modify
>   *  it under the terms of the GNU General Public License version 2 as
> @@ -12,6 +13,7 @@
>
>  #include <linux/clk.h>
>  #include <linux/completion.h>
> +#include <linux/delay.h>
>  #include <linux/err.h>
>  #include <linux/errno.h>
>  #include <linux/i2c.h>
> @@ -82,6 +84,11 @@
>  #define ASPEED_I2CD_INTR_RX_DONE                       BIT(2)
>  #define ASPEED_I2CD_INTR_TX_NAK                                BIT(1)
>  #define ASPEED_I2CD_INTR_TX_ACK                                BIT(0)
> +#define ASPEED_I2CD_INTR_ERRORS                                                       \
> +               (ASPEED_I2CD_INTR_SDA_DL_TIMEOUT |                             \
> +                ASPEED_I2CD_INTR_SCL_TIMEOUT |                                \
> +                ASPEED_I2CD_INTR_ABNORMAL |                                   \
> +                ASPEED_I2CD_INTR_ARBIT_LOSS)
>  #define ASPEED_I2CD_INTR_ALL                                                  \
>                 (ASPEED_I2CD_INTR_SDA_DL_TIMEOUT |                             \
>                  ASPEED_I2CD_INTR_BUS_RECOVER_DONE |                           \
> @@ -94,6 +101,7 @@
>                  ASPEED_I2CD_INTR_TX_ACK)
>
>  /* 0x14 : I2CD Command/Status Register   */
> +#define ASPEED_I2CD_XFER_MODE_STS_MASK                 GENMASK(22, 19)
>  #define ASPEED_I2CD_SCL_LINE_STS                       BIT(18)
>  #define ASPEED_I2CD_SDA_LINE_STS                       BIT(17)
>  #define ASPEED_I2CD_BUS_BUSY_STS                       BIT(16)
> @@ -110,23 +118,27 @@
>  /* 0x18 : I2CD Slave Device Address Register   */
>  #define ASPEED_I2CD_DEV_ADDR_MASK                      GENMASK(6, 0)
>
> +/* Timeout for bus busy checking */
> +#define BUS_BUSY_CHECK_TIMEOUT                         250000 /* 250ms */
> +#define BUS_BUSY_CHECK_INTERVAL                                10000  /* 10ms */

Could you add a comment on where you got these values from?

Also, please use the same naming pattern as above.

> +
>  enum aspeed_i2c_master_state {
> +       ASPEED_I2C_MASTER_INACTIVE,
>         ASPEED_I2C_MASTER_START,
>         ASPEED_I2C_MASTER_TX_FIRST,
>         ASPEED_I2C_MASTER_TX,
>         ASPEED_I2C_MASTER_RX_FIRST,
>         ASPEED_I2C_MASTER_RX,
>         ASPEED_I2C_MASTER_STOP,
> -       ASPEED_I2C_MASTER_INACTIVE,
>  };
>
>  enum aspeed_i2c_slave_state {
> +       ASPEED_I2C_SLAVE_STOP,
>         ASPEED_I2C_SLAVE_START,
>         ASPEED_I2C_SLAVE_READ_REQUESTED,
>         ASPEED_I2C_SLAVE_READ_PROCESSED,
>         ASPEED_I2C_SLAVE_WRITE_REQUESTED,
>         ASPEED_I2C_SLAVE_WRITE_RECEIVED,
> -       ASPEED_I2C_SLAVE_STOP,
>  };
>
>  struct aspeed_i2c_bus {
> @@ -150,6 +162,7 @@ struct aspeed_i2c_bus {
>         int                             cmd_err;
>         /* Protected only by i2c_lock_bus */
>         int                             master_xfer_result;
> +       u32                             irq_status;
>  #if IS_ENABLED(CONFIG_I2C_SLAVE)
>         struct i2c_client               *slave;
>         enum aspeed_i2c_slave_state     slave_state;
> @@ -229,37 +242,30 @@ static int aspeed_i2c_recover_bus(struct aspeed_i2c_bus *bus)
>  #if IS_ENABLED(CONFIG_I2C_SLAVE)
>  static bool aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus)
>  {
> -       u32 command, irq_status, status_ack = 0;
> +       u32 command, status_ack = 0;
>         struct i2c_client *slave = bus->slave;
> -       bool irq_handled = true;
>         u8 value;
>
> -       spin_lock(&bus->lock);
> -       if (!slave) {
> -               irq_handled = false;
> -               goto out;
> -       }
> +       if (!slave)
> +               return false;
>
>         command = readl(bus->base + ASPEED_I2C_CMD_REG);
> -       irq_status = readl(bus->base + ASPEED_I2C_INTR_STS_REG);
>
>         /* Slave was requested, restart state machine. */
> -       if (irq_status & ASPEED_I2CD_INTR_SLAVE_MATCH) {
> +       if (bus->irq_status & ASPEED_I2CD_INTR_SLAVE_MATCH) {
>                 status_ack |= ASPEED_I2CD_INTR_SLAVE_MATCH;
>                 bus->slave_state = ASPEED_I2C_SLAVE_START;
>         }
>
>         /* Slave is not currently active, irq was for someone else. */
> -       if (bus->slave_state == ASPEED_I2C_SLAVE_STOP) {
> -               irq_handled = false;
> -               goto out;
> -       }
> +       if (bus->slave_state == ASPEED_I2C_SLAVE_STOP)
> +               return false;
>
>         dev_dbg(bus->dev, "slave irq status 0x%08x, cmd 0x%08x\n",
> -               irq_status, command);
> +               bus->irq_status, command);
>
>         /* Slave was sent something. */
> -       if (irq_status & ASPEED_I2CD_INTR_RX_DONE) {
> +       if (bus->irq_status & ASPEED_I2CD_INTR_RX_DONE) {
>                 value = readl(bus->base + ASPEED_I2C_BYTE_BUF_REG) >> 8;
>                 /* Handle address frame. */
>                 if (bus->slave_state == ASPEED_I2C_SLAVE_START) {
> @@ -274,28 +280,29 @@ static bool aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus)
>         }
>
>         /* Slave was asked to stop. */
> -       if (irq_status & ASPEED_I2CD_INTR_NORMAL_STOP) {
> +       if (bus->irq_status & ASPEED_I2CD_INTR_NORMAL_STOP) {
>                 status_ack |= ASPEED_I2CD_INTR_NORMAL_STOP;
>                 bus->slave_state = ASPEED_I2C_SLAVE_STOP;
>         }
> -       if (irq_status & ASPEED_I2CD_INTR_TX_NAK) {
> +       if (bus->irq_status & ASPEED_I2CD_INTR_TX_NAK) {
>                 status_ack |= ASPEED_I2CD_INTR_TX_NAK;
>                 bus->slave_state = ASPEED_I2C_SLAVE_STOP;
>         }
> +       if (bus->irq_status & ASPEED_I2CD_INTR_TX_ACK) {
> +               status_ack |= ASPEED_I2CD_INTR_TX_ACK;
> +       }
>
>         switch (bus->slave_state) {
>         case ASPEED_I2C_SLAVE_READ_REQUESTED:
> -               if (irq_status & ASPEED_I2CD_INTR_TX_ACK)
> +               if (bus->irq_status & ASPEED_I2CD_INTR_TX_ACK)
>                         dev_err(bus->dev, "Unexpected ACK on read request.\n");
>                 bus->slave_state = ASPEED_I2C_SLAVE_READ_PROCESSED;
> -
>                 i2c_slave_event(slave, I2C_SLAVE_READ_REQUESTED, &value);
>                 writel(value, bus->base + ASPEED_I2C_BYTE_BUF_REG);
>                 writel(ASPEED_I2CD_S_TX_CMD, bus->base + ASPEED_I2C_CMD_REG);
>                 break;
>         case ASPEED_I2C_SLAVE_READ_PROCESSED:
> -               status_ack |= ASPEED_I2CD_INTR_TX_ACK;
> -               if (!(irq_status & ASPEED_I2CD_INTR_TX_ACK))
> +               if (!(bus->irq_status & ASPEED_I2CD_INTR_TX_ACK))
>                         dev_err(bus->dev,
>                                 "Expected ACK after processed read.\n");
>                 i2c_slave_event(slave, I2C_SLAVE_READ_PROCESSED, &value);
> @@ -318,15 +325,8 @@ static bool aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus)
>                 break;
>         }
>
> -       if (status_ack != irq_status)
> -               dev_err(bus->dev,
> -                       "irq handled != irq. expected %x, but was %x\n",
> -                       irq_status, status_ack);
> -       writel(status_ack, bus->base + ASPEED_I2C_INTR_STS_REG);
> -
> -out:
> -       spin_unlock(&bus->lock);
> -       return irq_handled;
> +       bus->irq_status ^= status_ack;
> +       return !bus->irq_status;
>  }
>  #endif /* CONFIG_I2C_SLAVE */
>
> @@ -384,20 +384,19 @@ static int aspeed_i2c_is_irq_error(u32 irq_status)
>
>  static bool aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus)
>  {
> -       u32 irq_status, status_ack = 0, command = 0;
> +       u32 status_ack = 0, command = 0;
>         struct i2c_msg *msg;
>         u8 recv_byte;
>         int ret;
>
> -       spin_lock(&bus->lock);
> -       irq_status = readl(bus->base + ASPEED_I2C_INTR_STS_REG);
> -       /* Ack all interrupt bits. */
> -       writel(irq_status, bus->base + ASPEED_I2C_INTR_STS_REG);
> -
> -       if (irq_status & ASPEED_I2CD_INTR_BUS_RECOVER_DONE) {
> +       if (bus->irq_status & ASPEED_I2CD_INTR_BUS_RECOVER_DONE) {
>                 bus->master_state = ASPEED_I2C_MASTER_INACTIVE;
>                 status_ack |= ASPEED_I2CD_INTR_BUS_RECOVER_DONE;
>                 goto out_complete;
> +       } else {
> +               /* Master is not currently active, irq was for someone else. */
> +               if (bus->master_state == ASPEED_I2C_MASTER_INACTIVE)
> +                       goto out_no_complete;
>         }
>
>         /*
> @@ -405,20 +404,23 @@ static bool aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus)
>          * should clear the command queue effectively taking us back to the
>          * INACTIVE state.
>          */
> -       ret = aspeed_i2c_is_irq_error(irq_status);
> -       if (ret < 0) {
> -               dev_dbg(bus->dev, "received error interrupt: 0x%08x",
> -                       irq_status);
> +       ret = aspeed_i2c_is_irq_error(bus->irq_status);
> +       if (ret) {
> +               dev_dbg(bus->dev, "received error interrupt: 0x%08x\n",
> +                       bus->irq_status);
>                 bus->cmd_err = ret;
>                 bus->master_state = ASPEED_I2C_MASTER_INACTIVE;
> +               status_ack |= (bus->irq_status & ASPEED_I2CD_INTR_ERRORS);
>                 goto out_complete;
>         }
>
>         /* We are in an invalid state; reset bus to a known state. */
>         if (!bus->msgs) {
> -               dev_err(bus->dev, "bus in unknown state");
> +               dev_err(bus->dev, "bus in unknown state irq_status: 0x%x\n",
> +                       bus->irq_status);
>                 bus->cmd_err = -EIO;
> -               if (bus->master_state != ASPEED_I2C_MASTER_STOP)
> +               if (bus->master_state != ASPEED_I2C_MASTER_STOP &&
> +                   bus->master_state != ASPEED_I2C_MASTER_INACTIVE)
>                         aspeed_i2c_do_stop(bus);
>                 goto out_no_complete;
>         }
> @@ -430,7 +432,13 @@ static bool aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus)
>          * then update the state and handle the new state below.
>          */
>         if (bus->master_state == ASPEED_I2C_MASTER_START) {
> -               if (unlikely(!(irq_status & ASPEED_I2CD_INTR_TX_ACK))) {
> +               if (unlikely(!(bus->irq_status & ASPEED_I2CD_INTR_TX_ACK))) {
> +                       if (unlikely(!(bus->irq_status &
> +                                    ASPEED_I2CD_INTR_TX_NAK))) {
> +                               bus->cmd_err = -ENXIO;
> +                               bus->master_state = ASPEED_I2C_MASTER_INACTIVE;
> +                               goto out_complete;
> +                       }
>                         pr_devel("no slave present at %02x", msg->addr);
>                         status_ack |= ASPEED_I2CD_INTR_TX_NAK;
>                         bus->cmd_err = -ENXIO;
> @@ -450,12 +458,13 @@ static bool aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus)
>
>         switch (bus->master_state) {
>         case ASPEED_I2C_MASTER_TX:
> -               if (unlikely(irq_status & ASPEED_I2CD_INTR_TX_NAK)) {
> -                       dev_dbg(bus->dev, "slave NACKed TX");
> +               if (unlikely(bus->irq_status & ASPEED_I2CD_INTR_TX_NAK)) {
> +                       dev_dbg(bus->dev, "slave NACKed TX\n");
>                         status_ack |= ASPEED_I2CD_INTR_TX_NAK;
>                         goto error_and_stop;
> -               } else if (unlikely(!(irq_status & ASPEED_I2CD_INTR_TX_ACK))) {
> -                       dev_err(bus->dev, "slave failed to ACK TX");
> +               } else if (unlikely(!(bus->irq_status &
> +                                     ASPEED_I2CD_INTR_TX_ACK))) {
> +                       dev_err(bus->dev, "slave failed to ACK TX\n");
>                         goto error_and_stop;
>                 }
>                 status_ack |= ASPEED_I2CD_INTR_TX_ACK;
> @@ -473,12 +482,12 @@ static bool aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus)
>                 goto out_no_complete;
>         case ASPEED_I2C_MASTER_RX_FIRST:
>                 /* RX may not have completed yet (only address cycle) */
> -               if (!(irq_status & ASPEED_I2CD_INTR_RX_DONE))
> +               if (!(bus->irq_status & ASPEED_I2CD_INTR_RX_DONE))
>                         goto out_no_complete;
>                 /* fallthrough intended */
>         case ASPEED_I2C_MASTER_RX:
> -               if (unlikely(!(irq_status & ASPEED_I2CD_INTR_RX_DONE))) {
> -                       dev_err(bus->dev, "master failed to RX");
> +               if (unlikely(!(bus->irq_status & ASPEED_I2CD_INTR_RX_DONE))) {
> +                       dev_err(bus->dev, "master failed to RX\n");
>                         goto error_and_stop;
>                 }
>                 status_ack |= ASPEED_I2CD_INTR_RX_DONE;
> @@ -508,8 +517,11 @@ static bool aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus)
>                 }
>                 goto out_no_complete;
>         case ASPEED_I2C_MASTER_STOP:
> -               if (unlikely(!(irq_status & ASPEED_I2CD_INTR_NORMAL_STOP))) {
> -                       dev_err(bus->dev, "master failed to STOP");
> +               if (unlikely(!(bus->irq_status &
> +                              ASPEED_I2CD_INTR_NORMAL_STOP))) {
> +                       dev_err(bus->dev,
> +                               "master failed to STOP irq_status:0x%x\n",
> +                               bus->irq_status);
>                         bus->cmd_err = -EIO;
>                         /* Do not STOP as we have already tried. */
>                 } else {
> @@ -520,8 +532,8 @@ static bool aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus)
>                 goto out_complete;
>         case ASPEED_I2C_MASTER_INACTIVE:
>                 dev_err(bus->dev,
> -                       "master received interrupt 0x%08x, but is inactive",
> -                       irq_status);
> +                       "master received interrupt 0x%08x, but is inactive\n",
> +                       bus->irq_status);
>                 bus->cmd_err = -EIO;
>                 /* Do not STOP as we should be inactive. */
>                 goto out_complete;
> @@ -543,26 +555,61 @@ static bool aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus)
>                 bus->master_xfer_result = bus->msgs_index + 1;
>         complete(&bus->cmd_complete);
>  out_no_complete:
> -       if (irq_status != status_ack)
> -               dev_err(bus->dev,
> -                       "irq handled != irq. expected 0x%08x, but was 0x%08x\n",
> -                       irq_status, status_ack);
> -       spin_unlock(&bus->lock);
> -       return !!irq_status;
> +       bus->irq_status ^= status_ack;
> +       return !bus->irq_status;
>  }
>
>  static irqreturn_t aspeed_i2c_bus_irq(int irq, void *dev_id)
>  {
>         struct aspeed_i2c_bus *bus = dev_id;
> +       u32 irq_received;
> +
> +       spin_lock(&bus->lock);
> +       irq_received = readl(bus->base + ASPEED_I2C_INTR_STS_REG);
> +       bus->irq_status = irq_received;
>
>  #if IS_ENABLED(CONFIG_I2C_SLAVE)
> -       if (aspeed_i2c_slave_irq(bus)) {
> -               dev_dbg(bus->dev, "irq handled by slave.\n");
> -               return IRQ_HANDLED;
> +       if (bus->master_state != ASPEED_I2C_MASTER_INACTIVE) {
> +               if (!aspeed_i2c_master_irq(bus))

Why do you check the slave if master fails (or vice versa)? I
understand that there are some status bits that have not been handled,
but it doesn't seem reasonable to assume that there is state that the
other should do something with; the only way this would happen is if
the state that you think you are in does not match the status bits you
have been given, but if this is the case, you are already hosed; I
don't think trying the other handler is likely to make things better,
unless there is something that I am missing.

> +                       aspeed_i2c_slave_irq(bus);
> +       } else {
> +               if (!aspeed_i2c_slave_irq(bus))
> +                       aspeed_i2c_master_irq(bus);
>         }
> +#else
> +       aspeed_i2c_master_irq(bus);
>  #endif /* CONFIG_I2C_SLAVE */
>
> -       return aspeed_i2c_master_irq(bus) ? IRQ_HANDLED : IRQ_NONE;
> +       if (bus->irq_status)
> +               dev_err(bus->dev,
> +                       "irq handled != irq. expected 0x%08x, but was 0x%08x\n",
> +                       irq_received, irq_received ^ bus->irq_status);
> +
> +       /* Ack all interrupt bits. */
> +       writel(irq_received, bus->base + ASPEED_I2C_INTR_STS_REG);
> +       spin_unlock(&bus->lock);
> +       return bus->irq_status ? IRQ_NONE : IRQ_HANDLED;
> +}
> +
> +static int aspeed_i2c_check_bus_busy_timeout(struct aspeed_i2c_bus *bus)
> +{
> +       ktime_t timeout = ktime_add_us(ktime_get(), BUS_BUSY_CHECK_TIMEOUT);
> +
> +       might_sleep();
> +
> +       for (;;) {
> +               if (!(readl(bus->base + ASPEED_I2C_CMD_REG) &
> +                     (ASPEED_I2CD_BUS_BUSY_STS |
> +                      ASPEED_I2CD_XFER_MODE_STS_MASK)))

Is using the Transfer Mode State Machine bits necessary? The
documentation marks it as "for debugging purpose only," so relying on
it makes me nervous.

> +                       return 0;
> +               if (ktime_compare(ktime_get(), timeout) > 0)
> +                       break;
> +               usleep_range((BUS_BUSY_CHECK_INTERVAL >> 2) + 1,

Where did you get this minimum value?

> +                            BUS_BUSY_CHECK_INTERVAL);
> +       }
> +
> +       dev_err(bus->dev, "timeout waiting for idle. attempting recovery\n");
> +       return aspeed_i2c_recover_bus(bus);
>  }
>
>  static int aspeed_i2c_master_xfer(struct i2c_adapter *adap,
> @@ -570,22 +617,11 @@ static int aspeed_i2c_master_xfer(struct i2c_adapter *adap,
>  {
>         struct aspeed_i2c_bus *bus = i2c_get_adapdata(adap);
>         unsigned long time_left, flags;
> -       int ret = 0;
> -
> -       spin_lock_irqsave(&bus->lock, flags);
> -       bus->cmd_err = 0;
>
> -       /* If bus is busy, attempt recovery. We assume a single master
> -        * environment.
> -        */
> -       if (readl(bus->base + ASPEED_I2C_CMD_REG) & ASPEED_I2CD_BUS_BUSY_STS) {
> -               spin_unlock_irqrestore(&bus->lock, flags);
> -               ret = aspeed_i2c_recover_bus(bus);
> -               if (ret)
> -                       return ret;
> -               spin_lock_irqsave(&bus->lock, flags);
> -       }
> +       if (aspeed_i2c_check_bus_busy_timeout(bus))
> +               return -EAGAIN;
>
> +       spin_lock_irqsave(&bus->lock, flags);
>         bus->cmd_err = 0;
>         bus->msgs = msgs;
>         bus->msgs_index = 0;
> @@ -851,7 +887,7 @@ static int aspeed_i2c_probe_bus(struct platform_device *pdev)
>         bus->rst = devm_reset_control_get_shared(&pdev->dev, NULL);
>         if (IS_ERR(bus->rst)) {
>                 dev_err(&pdev->dev,
> -                       "missing or invalid reset controller device tree entry");
> +                       "missing or invalid reset controller device tree entry\n");
>                 return PTR_ERR(bus->rst);
>         }
>         reset_control_deassert(bus->rst);
> --
> 2.17.1
>
Jarkko Nikula June 27, 2018, 7:48 a.m. UTC | #2
Hi

On 06/26/2018 07:58 PM, Jae Hyun Yoo wrote:
> BMC firmware should support some multi-master use cases such as multi-node,
> IPMB, BMC-ME link and so on but the current ASPEED I2C driver is a bit
> unstable for the multi-master use case. So this patch improves ASPEED I2C
> driver to support the multi-master use case stably.
> 
> Changes:
> * Added XFER_MODE status register checking logic into
>    aspeed_i2c_master_xfer to improve the current bus busy checking logic.
> * Changed the order of enum aspeed_i2c_master_state and
>    enum aspeed_i2c_slave_state defines to make their initial values set to
>    ASPEED_I2C_MASTER_INACTIVE and ASPEED_I2C_SLAVE_STOP respectively.
>    In case of multi-master use with previous code, if a slave data comes
>    ahead of the first master xfer, master_state starts from an invalid
>    state. This change fixed the issue.
> * Adjusted spin_lock scope to make it wrap the whole irq handler using
>    a single lock and unlock pair covers both master and slave handlers.
> * Added irq_status variable as a member of the struct aspeed_i2c_bus to
>    collect handled interrupt bits throughout the master and the slave irq
>    handlers.
> * Added control logic to put an order on calling the master and the slave
>    irq handlers based on their current states.
> 
This does many unrelated looking changes in one patch making it more 
vulnerable for potential multiple regressions. For instance busy 
checking goes from single read to loop with 250 ms timeout in this patch 
while changing also spin lock logic and interrupt handling.

Now if there is some regression it might be difficult to find what 
change in this patch is causing it and more over things goes more 
complicated if some other kind of regressions are found pointing into 
the same commit.

I suggest splitting this into multiple smaller patches. For instance 
having first simple conversions patches that are unlikely to cause a 
regression like one patch adding '\n' to error print, another moving 
irq_status variable into struct aspeed_i2c_bus and so on followed by 
patches that change logic like busy checking, spin lock change and then 
patch or more for multi-master support.
Jae Hyun Yoo June 27, 2018, 5:55 p.m. UTC | #3
Thanks Brendan for the review. Please see my answers inline.

On 6/27/2018 12:36 AM, Brendan Higgins wrote:
> On Tue, Jun 26, 2018 at 9:58 AM Jae Hyun Yoo
> <jae.hyun.yoo@linux.intel.com> wrote:
>>
>> BMC firmware should support some multi-master use cases such as multi-node,
>> IPMB, BMC-ME link and so on but the current ASPEED I2C driver is a bit
>> unstable for the multi-master use case. So this patch improves ASPEED I2C
>> driver to support the multi-master use case stably.
>>
>> Changes:
>> * Added XFER_MODE status register checking logic into
>>    aspeed_i2c_master_xfer to improve the current bus busy checking logic.
>> * Changed the order of enum aspeed_i2c_master_state and
>>    enum aspeed_i2c_slave_state defines to make their initial values set to
>>    ASPEED_I2C_MASTER_INACTIVE and ASPEED_I2C_SLAVE_STOP respectively.
>>    In case of multi-master use with previous code, if a slave data comes
>>    ahead of the first master xfer, master_state starts from an invalid
>>    state. This change fixed the issue.
>> * Adjusted spin_lock scope to make it wrap the whole irq handler using
>>    a single lock and unlock pair covers both master and slave handlers.
>> * Added irq_status variable as a member of the struct aspeed_i2c_bus to
>>    collect handled interrupt bits throughout the master and the slave irq
>>    handlers.
>> * Added control logic to put an order on calling the master and the slave
>>    irq handlers based on their current states.
>>
>> Signed-off-by: Jae Hyun Yoo <jae.hyun.yoo@linux.intel.com>
>> ---
>>   drivers/i2c/busses/i2c-aspeed.c | 200 +++++++++++++++++++-------------
>>   1 file changed, 118 insertions(+), 82 deletions(-)
>>
>> diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c
>> index 60e4d0e939a3..ac3e17d9a365 100644
>> --- a/drivers/i2c/busses/i2c-aspeed.c
>> +++ b/drivers/i2c/busses/i2c-aspeed.c
>> @@ -4,6 +4,7 @@
>>    *  Copyright (C) 2012-2017 ASPEED Technology Inc.
>>    *  Copyright 2017 IBM Corporation
>>    *  Copyright 2017 Google, Inc.
>> + *  Copyright (c) 2018 Intel Corporation
>>    *
>>    *  This program is free software; you can redistribute it and/or modify
>>    *  it under the terms of the GNU General Public License version 2 as
>> @@ -12,6 +13,7 @@
>>
>>   #include <linux/clk.h>
>>   #include <linux/completion.h>
>> +#include <linux/delay.h>
>>   #include <linux/err.h>
>>   #include <linux/errno.h>
>>   #include <linux/i2c.h>
>> @@ -82,6 +84,11 @@
>>   #define ASPEED_I2CD_INTR_RX_DONE                       BIT(2)
>>   #define ASPEED_I2CD_INTR_TX_NAK                                BIT(1)
>>   #define ASPEED_I2CD_INTR_TX_ACK                                BIT(0)
>> +#define ASPEED_I2CD_INTR_ERRORS                                                       \
>> +               (ASPEED_I2CD_INTR_SDA_DL_TIMEOUT |                             \
>> +                ASPEED_I2CD_INTR_SCL_TIMEOUT |                                \
>> +                ASPEED_I2CD_INTR_ABNORMAL |                                   \
>> +                ASPEED_I2CD_INTR_ARBIT_LOSS)
>>   #define ASPEED_I2CD_INTR_ALL                                                  \
>>                  (ASPEED_I2CD_INTR_SDA_DL_TIMEOUT |                             \
>>                   ASPEED_I2CD_INTR_BUS_RECOVER_DONE |                           \
>> @@ -94,6 +101,7 @@
>>                   ASPEED_I2CD_INTR_TX_ACK)
>>
>>   /* 0x14 : I2CD Command/Status Register   */
>> +#define ASPEED_I2CD_XFER_MODE_STS_MASK                 GENMASK(22, 19)
>>   #define ASPEED_I2CD_SCL_LINE_STS                       BIT(18)
>>   #define ASPEED_I2CD_SDA_LINE_STS                       BIT(17)
>>   #define ASPEED_I2CD_BUS_BUSY_STS                       BIT(16)
>> @@ -110,23 +118,27 @@
>>   /* 0x18 : I2CD Slave Device Address Register   */
>>   #define ASPEED_I2CD_DEV_ADDR_MASK                      GENMASK(6, 0)
>>
>> +/* Timeout for bus busy checking */
>> +#define BUS_BUSY_CHECK_TIMEOUT                         250000 /* 250ms */
>> +#define BUS_BUSY_CHECK_INTERVAL                                10000  /* 10ms */
> 
> Could you add a comment on where you got these values from?
> 

These are coming from ASPEED SDK code. Actually, they use 100ms for
timeout and 10ms for interval but I increased the timeout value to
250ms so that it covers a various range of bus speed. I think, it
should be computed at run time based on the current bus speed, or
we could add these as device tree settings. How do you think about it?

 >
> Also, please use the same naming pattern as above.
> 

Sure. Will change it using the same naming pattern as above.

>> +
>>   enum aspeed_i2c_master_state {
>> +       ASPEED_I2C_MASTER_INACTIVE,
>>          ASPEED_I2C_MASTER_START,
>>          ASPEED_I2C_MASTER_TX_FIRST,
>>          ASPEED_I2C_MASTER_TX,
>>          ASPEED_I2C_MASTER_RX_FIRST,
>>          ASPEED_I2C_MASTER_RX,
>>          ASPEED_I2C_MASTER_STOP,
>> -       ASPEED_I2C_MASTER_INACTIVE,
>>   };
>>
>>   enum aspeed_i2c_slave_state {
>> +       ASPEED_I2C_SLAVE_STOP,
>>          ASPEED_I2C_SLAVE_START,
>>          ASPEED_I2C_SLAVE_READ_REQUESTED,
>>          ASPEED_I2C_SLAVE_READ_PROCESSED,
>>          ASPEED_I2C_SLAVE_WRITE_REQUESTED,
>>          ASPEED_I2C_SLAVE_WRITE_RECEIVED,
>> -       ASPEED_I2C_SLAVE_STOP,
>>   };
>>
>>   struct aspeed_i2c_bus {
>> @@ -150,6 +162,7 @@ struct aspeed_i2c_bus {
>>          int                             cmd_err;
>>          /* Protected only by i2c_lock_bus */
>>          int                             master_xfer_result;
>> +       u32                             irq_status;
>>   #if IS_ENABLED(CONFIG_I2C_SLAVE)
>>          struct i2c_client               *slave;
>>          enum aspeed_i2c_slave_state     slave_state;
>> @@ -229,37 +242,30 @@ static int aspeed_i2c_recover_bus(struct aspeed_i2c_bus *bus)
>>   #if IS_ENABLED(CONFIG_I2C_SLAVE)
>>   static bool aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus)
>>   {
>> -       u32 command, irq_status, status_ack = 0;
>> +       u32 command, status_ack = 0;
>>          struct i2c_client *slave = bus->slave;
>> -       bool irq_handled = true;
>>          u8 value;
>>
>> -       spin_lock(&bus->lock);
>> -       if (!slave) {
>> -               irq_handled = false;
>> -               goto out;
>> -       }
>> +       if (!slave)
>> +               return false;
>>
>>          command = readl(bus->base + ASPEED_I2C_CMD_REG);
>> -       irq_status = readl(bus->base + ASPEED_I2C_INTR_STS_REG);
>>
>>          /* Slave was requested, restart state machine. */
>> -       if (irq_status & ASPEED_I2CD_INTR_SLAVE_MATCH) {
>> +       if (bus->irq_status & ASPEED_I2CD_INTR_SLAVE_MATCH) {
>>                  status_ack |= ASPEED_I2CD_INTR_SLAVE_MATCH;
>>                  bus->slave_state = ASPEED_I2C_SLAVE_START;
>>          }
>>
>>          /* Slave is not currently active, irq was for someone else. */
>> -       if (bus->slave_state == ASPEED_I2C_SLAVE_STOP) {
>> -               irq_handled = false;
>> -               goto out;
>> -       }
>> +       if (bus->slave_state == ASPEED_I2C_SLAVE_STOP)
>> +               return false;
>>
>>          dev_dbg(bus->dev, "slave irq status 0x%08x, cmd 0x%08x\n",
>> -               irq_status, command);
>> +               bus->irq_status, command);
>>
>>          /* Slave was sent something. */
>> -       if (irq_status & ASPEED_I2CD_INTR_RX_DONE) {
>> +       if (bus->irq_status & ASPEED_I2CD_INTR_RX_DONE) {
>>                  value = readl(bus->base + ASPEED_I2C_BYTE_BUF_REG) >> 8;
>>                  /* Handle address frame. */
>>                  if (bus->slave_state == ASPEED_I2C_SLAVE_START) {
>> @@ -274,28 +280,29 @@ static bool aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus)
>>          }
>>
>>          /* Slave was asked to stop. */
>> -       if (irq_status & ASPEED_I2CD_INTR_NORMAL_STOP) {
>> +       if (bus->irq_status & ASPEED_I2CD_INTR_NORMAL_STOP) {
>>                  status_ack |= ASPEED_I2CD_INTR_NORMAL_STOP;
>>                  bus->slave_state = ASPEED_I2C_SLAVE_STOP;
>>          }
>> -       if (irq_status & ASPEED_I2CD_INTR_TX_NAK) {
>> +       if (bus->irq_status & ASPEED_I2CD_INTR_TX_NAK) {
>>                  status_ack |= ASPEED_I2CD_INTR_TX_NAK;
>>                  bus->slave_state = ASPEED_I2C_SLAVE_STOP;
>>          }
>> +       if (bus->irq_status & ASPEED_I2CD_INTR_TX_ACK) {
>> +               status_ack |= ASPEED_I2CD_INTR_TX_ACK;
>> +       }
>>
>>          switch (bus->slave_state) {
>>          case ASPEED_I2C_SLAVE_READ_REQUESTED:
>> -               if (irq_status & ASPEED_I2CD_INTR_TX_ACK)
>> +               if (bus->irq_status & ASPEED_I2CD_INTR_TX_ACK)
>>                          dev_err(bus->dev, "Unexpected ACK on read request.\n");
>>                  bus->slave_state = ASPEED_I2C_SLAVE_READ_PROCESSED;
>> -
>>                  i2c_slave_event(slave, I2C_SLAVE_READ_REQUESTED, &value);
>>                  writel(value, bus->base + ASPEED_I2C_BYTE_BUF_REG);
>>                  writel(ASPEED_I2CD_S_TX_CMD, bus->base + ASPEED_I2C_CMD_REG);
>>                  break;
>>          case ASPEED_I2C_SLAVE_READ_PROCESSED:
>> -               status_ack |= ASPEED_I2CD_INTR_TX_ACK;
>> -               if (!(irq_status & ASPEED_I2CD_INTR_TX_ACK))
>> +               if (!(bus->irq_status & ASPEED_I2CD_INTR_TX_ACK))
>>                          dev_err(bus->dev,
>>                                  "Expected ACK after processed read.\n");
>>                  i2c_slave_event(slave, I2C_SLAVE_READ_PROCESSED, &value);
>> @@ -318,15 +325,8 @@ static bool aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus)
>>                  break;
>>          }
>>
>> -       if (status_ack != irq_status)
>> -               dev_err(bus->dev,
>> -                       "irq handled != irq. expected %x, but was %x\n",
>> -                       irq_status, status_ack);
>> -       writel(status_ack, bus->base + ASPEED_I2C_INTR_STS_REG);
>> -
>> -out:
>> -       spin_unlock(&bus->lock);
>> -       return irq_handled;
>> +       bus->irq_status ^= status_ack;
>> +       return !bus->irq_status;
>>   }
>>   #endif /* CONFIG_I2C_SLAVE */
>>
>> @@ -384,20 +384,19 @@ static int aspeed_i2c_is_irq_error(u32 irq_status)
>>
>>   static bool aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus)
>>   {
>> -       u32 irq_status, status_ack = 0, command = 0;
>> +       u32 status_ack = 0, command = 0;
>>          struct i2c_msg *msg;
>>          u8 recv_byte;
>>          int ret;
>>
>> -       spin_lock(&bus->lock);
>> -       irq_status = readl(bus->base + ASPEED_I2C_INTR_STS_REG);
>> -       /* Ack all interrupt bits. */
>> -       writel(irq_status, bus->base + ASPEED_I2C_INTR_STS_REG);
>> -
>> -       if (irq_status & ASPEED_I2CD_INTR_BUS_RECOVER_DONE) {
>> +       if (bus->irq_status & ASPEED_I2CD_INTR_BUS_RECOVER_DONE) {
>>                  bus->master_state = ASPEED_I2C_MASTER_INACTIVE;
>>                  status_ack |= ASPEED_I2CD_INTR_BUS_RECOVER_DONE;
>>                  goto out_complete;
>> +       } else {
>> +               /* Master is not currently active, irq was for someone else. */
>> +               if (bus->master_state == ASPEED_I2C_MASTER_INACTIVE)
>> +                       goto out_no_complete;
>>          }
>>
>>          /*
>> @@ -405,20 +404,23 @@ static bool aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus)
>>           * should clear the command queue effectively taking us back to the
>>           * INACTIVE state.
>>           */
>> -       ret = aspeed_i2c_is_irq_error(irq_status);
>> -       if (ret < 0) {
>> -               dev_dbg(bus->dev, "received error interrupt: 0x%08x",
>> -                       irq_status);
>> +       ret = aspeed_i2c_is_irq_error(bus->irq_status);
>> +       if (ret) {
>> +               dev_dbg(bus->dev, "received error interrupt: 0x%08x\n",
>> +                       bus->irq_status);
>>                  bus->cmd_err = ret;
>>                  bus->master_state = ASPEED_I2C_MASTER_INACTIVE;
>> +               status_ack |= (bus->irq_status & ASPEED_I2CD_INTR_ERRORS);
>>                  goto out_complete;
>>          }
>>
>>          /* We are in an invalid state; reset bus to a known state. */
>>          if (!bus->msgs) {
>> -               dev_err(bus->dev, "bus in unknown state");
>> +               dev_err(bus->dev, "bus in unknown state irq_status: 0x%x\n",
>> +                       bus->irq_status);
>>                  bus->cmd_err = -EIO;
>> -               if (bus->master_state != ASPEED_I2C_MASTER_STOP)
>> +               if (bus->master_state != ASPEED_I2C_MASTER_STOP &&
>> +                   bus->master_state != ASPEED_I2C_MASTER_INACTIVE)
>>                          aspeed_i2c_do_stop(bus);
>>                  goto out_no_complete;
>>          }
>> @@ -430,7 +432,13 @@ static bool aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus)
>>           * then update the state and handle the new state below.
>>           */
>>          if (bus->master_state == ASPEED_I2C_MASTER_START) {
>> -               if (unlikely(!(irq_status & ASPEED_I2CD_INTR_TX_ACK))) {
>> +               if (unlikely(!(bus->irq_status & ASPEED_I2CD_INTR_TX_ACK))) {
>> +                       if (unlikely(!(bus->irq_status &
>> +                                    ASPEED_I2CD_INTR_TX_NAK))) {
>> +                               bus->cmd_err = -ENXIO;
>> +                               bus->master_state = ASPEED_I2C_MASTER_INACTIVE;
>> +                               goto out_complete;
>> +                       }
>>                          pr_devel("no slave present at %02x", msg->addr);
>>                          status_ack |= ASPEED_I2CD_INTR_TX_NAK;
>>                          bus->cmd_err = -ENXIO;
>> @@ -450,12 +458,13 @@ static bool aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus)
>>
>>          switch (bus->master_state) {
>>          case ASPEED_I2C_MASTER_TX:
>> -               if (unlikely(irq_status & ASPEED_I2CD_INTR_TX_NAK)) {
>> -                       dev_dbg(bus->dev, "slave NACKed TX");
>> +               if (unlikely(bus->irq_status & ASPEED_I2CD_INTR_TX_NAK)) {
>> +                       dev_dbg(bus->dev, "slave NACKed TX\n");
>>                          status_ack |= ASPEED_I2CD_INTR_TX_NAK;
>>                          goto error_and_stop;
>> -               } else if (unlikely(!(irq_status & ASPEED_I2CD_INTR_TX_ACK))) {
>> -                       dev_err(bus->dev, "slave failed to ACK TX");
>> +               } else if (unlikely(!(bus->irq_status &
>> +                                     ASPEED_I2CD_INTR_TX_ACK))) {
>> +                       dev_err(bus->dev, "slave failed to ACK TX\n");
>>                          goto error_and_stop;
>>                  }
>>                  status_ack |= ASPEED_I2CD_INTR_TX_ACK;
>> @@ -473,12 +482,12 @@ static bool aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus)
>>                  goto out_no_complete;
>>          case ASPEED_I2C_MASTER_RX_FIRST:
>>                  /* RX may not have completed yet (only address cycle) */
>> -               if (!(irq_status & ASPEED_I2CD_INTR_RX_DONE))
>> +               if (!(bus->irq_status & ASPEED_I2CD_INTR_RX_DONE))
>>                          goto out_no_complete;
>>                  /* fallthrough intended */
>>          case ASPEED_I2C_MASTER_RX:
>> -               if (unlikely(!(irq_status & ASPEED_I2CD_INTR_RX_DONE))) {
>> -                       dev_err(bus->dev, "master failed to RX");
>> +               if (unlikely(!(bus->irq_status & ASPEED_I2CD_INTR_RX_DONE))) {
>> +                       dev_err(bus->dev, "master failed to RX\n");
>>                          goto error_and_stop;
>>                  }
>>                  status_ack |= ASPEED_I2CD_INTR_RX_DONE;
>> @@ -508,8 +517,11 @@ static bool aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus)
>>                  }
>>                  goto out_no_complete;
>>          case ASPEED_I2C_MASTER_STOP:
>> -               if (unlikely(!(irq_status & ASPEED_I2CD_INTR_NORMAL_STOP))) {
>> -                       dev_err(bus->dev, "master failed to STOP");
>> +               if (unlikely(!(bus->irq_status &
>> +                              ASPEED_I2CD_INTR_NORMAL_STOP))) {
>> +                       dev_err(bus->dev,
>> +                               "master failed to STOP irq_status:0x%x\n",
>> +                               bus->irq_status);
>>                          bus->cmd_err = -EIO;
>>                          /* Do not STOP as we have already tried. */
>>                  } else {
>> @@ -520,8 +532,8 @@ static bool aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus)
>>                  goto out_complete;
>>          case ASPEED_I2C_MASTER_INACTIVE:
>>                  dev_err(bus->dev,
>> -                       "master received interrupt 0x%08x, but is inactive",
>> -                       irq_status);
>> +                       "master received interrupt 0x%08x, but is inactive\n",
>> +                       bus->irq_status);
>>                  bus->cmd_err = -EIO;
>>                  /* Do not STOP as we should be inactive. */
>>                  goto out_complete;
>> @@ -543,26 +555,61 @@ static bool aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus)
>>                  bus->master_xfer_result = bus->msgs_index + 1;
>>          complete(&bus->cmd_complete);
>>   out_no_complete:
>> -       if (irq_status != status_ack)
>> -               dev_err(bus->dev,
>> -                       "irq handled != irq. expected 0x%08x, but was 0x%08x\n",
>> -                       irq_status, status_ack);
>> -       spin_unlock(&bus->lock);
>> -       return !!irq_status;
>> +       bus->irq_status ^= status_ack;
>> +       return !bus->irq_status;
>>   }
>>
>>   static irqreturn_t aspeed_i2c_bus_irq(int irq, void *dev_id)
>>   {
>>          struct aspeed_i2c_bus *bus = dev_id;
>> +       u32 irq_received;
>> +
>> +       spin_lock(&bus->lock);
>> +       irq_received = readl(bus->base + ASPEED_I2C_INTR_STS_REG);
>> +       bus->irq_status = irq_received;
>>
>>   #if IS_ENABLED(CONFIG_I2C_SLAVE)
>> -       if (aspeed_i2c_slave_irq(bus)) {
>> -               dev_dbg(bus->dev, "irq handled by slave.\n");
>> -               return IRQ_HANDLED;
>> +       if (bus->master_state != ASPEED_I2C_MASTER_INACTIVE) {
>> +               if (!aspeed_i2c_master_irq(bus))
> 
> Why do you check the slave if master fails (or vice versa)? I
> understand that there are some status bits that have not been handled,
> but it doesn't seem reasonable to assume that there is state that the
> other should do something with; the only way this would happen is if
> the state that you think you are in does not match the status bits you
> have been given, but if this is the case, you are already hosed; I
> don't think trying the other handler is likely to make things better,
> unless there is something that I am missing.
> 

In most of cases, interrupt bits are set one by one but there are also a
lot of other cases that ASPEED I2C H/W sends multiple interrupt bits
with combining master and slave events using a single interrupt call. It
happens much in multi-master environment than single-master. For
example, when master is waiting for a NORMAL_STOP interrupt in its
MASTER_STOP state, SLAVE_MATCH and RX_DONE interrupts could come along
with the NORMAL_STOP in case of an another master immediately sends data
just after acquiring the bus - it happens a lot in BMC-ME connection
practically. In this case, the NORMAL_STOP interrupt should be handled
by master_irq and the SLAVE_MATCH and RX_DONE interrupts should be
handled by slave_irq so it's the reason why this code is added.

>> +                       aspeed_i2c_slave_irq(bus);
>> +       } else {
>> +               if (!aspeed_i2c_slave_irq(bus))
>> +                       aspeed_i2c_master_irq(bus);
>>          }
>> +#else
>> +       aspeed_i2c_master_irq(bus);
>>   #endif /* CONFIG_I2C_SLAVE */
>>
>> -       return aspeed_i2c_master_irq(bus) ? IRQ_HANDLED : IRQ_NONE;
>> +       if (bus->irq_status)
>> +               dev_err(bus->dev,
>> +                       "irq handled != irq. expected 0x%08x, but was 0x%08x\n",
>> +                       irq_received, irq_received ^ bus->irq_status);
>> +
>> +       /* Ack all interrupt bits. */
>> +       writel(irq_received, bus->base + ASPEED_I2C_INTR_STS_REG);
>> +       spin_unlock(&bus->lock);
>> +       return bus->irq_status ? IRQ_NONE : IRQ_HANDLED;
>> +}
>> +
>> +static int aspeed_i2c_check_bus_busy_timeout(struct aspeed_i2c_bus *bus)
>> +{
>> +       ktime_t timeout = ktime_add_us(ktime_get(), BUS_BUSY_CHECK_TIMEOUT);
>> +
>> +       might_sleep();
>> +
>> +       for (;;) {
>> +               if (!(readl(bus->base + ASPEED_I2C_CMD_REG) &
>> +                     (ASPEED_I2CD_BUS_BUSY_STS |
>> +                      ASPEED_I2CD_XFER_MODE_STS_MASK)))
> 
> Is using the Transfer Mode State Machine bits necessary? The
> documentation marks it as "for debugging purpose only," so relying on
> it makes me nervous.
> 

As you said, the documentation marks it as "for debugging purpose only."
but ASPEED also uses this way in their SDK code because it's the best
way for checking bus busy status which can cover both single and
multi-master use cases.

>> +                       return 0;
>> +               if (ktime_compare(ktime_get(), timeout) > 0)
>> +                       break;
>> +               usleep_range((BUS_BUSY_CHECK_INTERVAL >> 2) + 1,
> 
> Where did you get this minimum value?
> 

No source for the minimum value. ASPEED uses mdelay(10) in their SDK
but I changed that code using usleep_range and the range value was set
with considering time stretching of usleep_range.
regmap_read_poll_timeout was a reference for this code.

Thanks,

Jae

>> +                            BUS_BUSY_CHECK_INTERVAL);
>> +       }
>> +
>> +       dev_err(bus->dev, "timeout waiting for idle. attempting recovery\n");
>> +       return aspeed_i2c_recover_bus(bus);
>>   }
>>
>>   static int aspeed_i2c_master_xfer(struct i2c_adapter *adap,
>> @@ -570,22 +617,11 @@ static int aspeed_i2c_master_xfer(struct i2c_adapter *adap,
>>   {
>>          struct aspeed_i2c_bus *bus = i2c_get_adapdata(adap);
>>          unsigned long time_left, flags;
>> -       int ret = 0;
>> -
>> -       spin_lock_irqsave(&bus->lock, flags);
>> -       bus->cmd_err = 0;
>>
>> -       /* If bus is busy, attempt recovery. We assume a single master
>> -        * environment.
>> -        */
>> -       if (readl(bus->base + ASPEED_I2C_CMD_REG) & ASPEED_I2CD_BUS_BUSY_STS) {
>> -               spin_unlock_irqrestore(&bus->lock, flags);
>> -               ret = aspeed_i2c_recover_bus(bus);
>> -               if (ret)
>> -                       return ret;
>> -               spin_lock_irqsave(&bus->lock, flags);
>> -       }
>> +       if (aspeed_i2c_check_bus_busy_timeout(bus))
>> +               return -EAGAIN;
>>
>> +       spin_lock_irqsave(&bus->lock, flags);
>>          bus->cmd_err = 0;
>>          bus->msgs = msgs;
>>          bus->msgs_index = 0;
>> @@ -851,7 +887,7 @@ static int aspeed_i2c_probe_bus(struct platform_device *pdev)
>>          bus->rst = devm_reset_control_get_shared(&pdev->dev, NULL);
>>          if (IS_ERR(bus->rst)) {
>>                  dev_err(&pdev->dev,
>> -                       "missing or invalid reset controller device tree entry");
>> +                       "missing or invalid reset controller device tree entry\n");
>>                  return PTR_ERR(bus->rst);
>>          }
>>          reset_control_deassert(bus->rst);
>> --
>> 2.17.1
>>
Jae Hyun Yoo June 27, 2018, 6:01 p.m. UTC | #4
Hi Jarkko,

Thanks for the review. Please see my answer below.

On 6/27/2018 12:48 AM, Jarkko Nikula wrote:
> Hi
> 
> On 06/26/2018 07:58 PM, Jae Hyun Yoo wrote:
>> BMC firmware should support some multi-master use cases such as 
>> multi-node,
>> IPMB, BMC-ME link and so on but the current ASPEED I2C driver is a bit
>> unstable for the multi-master use case. So this patch improves ASPEED I2C
>> driver to support the multi-master use case stably.
>>
>> Changes:
>> * Added XFER_MODE status register checking logic into
>>    aspeed_i2c_master_xfer to improve the current bus busy checking logic.
>> * Changed the order of enum aspeed_i2c_master_state and
>>    enum aspeed_i2c_slave_state defines to make their initial values 
>> set to
>>    ASPEED_I2C_MASTER_INACTIVE and ASPEED_I2C_SLAVE_STOP respectively.
>>    In case of multi-master use with previous code, if a slave data comes
>>    ahead of the first master xfer, master_state starts from an invalid
>>    state. This change fixed the issue.
>> * Adjusted spin_lock scope to make it wrap the whole irq handler using
>>    a single lock and unlock pair covers both master and slave handlers.
>> * Added irq_status variable as a member of the struct aspeed_i2c_bus to
>>    collect handled interrupt bits throughout the master and the slave irq
>>    handlers.
>> * Added control logic to put an order on calling the master and the slave
>>    irq handlers based on their current states.
>>
> This does many unrelated looking changes in one patch making it more 
> vulnerable for potential multiple regressions. For instance busy 
> checking goes from single read to loop with 250 ms timeout in this patch 
> while changing also spin lock logic and interrupt handling.
> 
> Now if there is some regression it might be difficult to find what 
> change in this patch is causing it and more over things goes more 
> complicated if some other kind of regressions are found pointing into 
> the same commit.
> 
> I suggest splitting this into multiple smaller patches. For instance 
> having first simple conversions patches that are unlikely to cause a 
> regression like one patch adding '\n' to error print, another moving 
> irq_status variable into struct aspeed_i2c_bus and so on followed by 
> patches that change logic like busy checking, spin lock change and then 
> patch or more for multi-master support.
> 

Yes, that makes sense and I agree with you. I'll split out this patch
into multiple smaller patches as you suggested.

Thanks,

Jae
Brendan Higgins July 12, 2018, 9:33 a.m. UTC | #5
On Wed, Jun 27, 2018 at 10:55 AM Jae Hyun Yoo
<jae.hyun.yoo@linux.intel.com> wrote:
>
<snip>
> >> +/* Timeout for bus busy checking */
> >> +#define BUS_BUSY_CHECK_TIMEOUT                         250000 /* 250ms */
> >> +#define BUS_BUSY_CHECK_INTERVAL                                10000  /* 10ms */
> >
> > Could you add a comment on where you got these values from?
> >
>
> These are coming from ASPEED SDK code. Actually, they use 100ms for
> timeout and 10ms for interval but I increased the timeout value to
> 250ms so that it covers a various range of bus speed. I think, it
> should be computed at run time based on the current bus speed, or
> we could add these as device tree settings. How do you think about it?
>

This should definitely be a device tree setting. If one of the busses is being
used as a regular I2C bus, it could hold the bus for an unlimited amount of
time before sending a STOP. As for a default, 100ms is probably fine given
that, a) the limit will only apply to multi-master mode, and b) multi-master
mode will probably almost always be used with IPMB, or MCTP (MCTP actually
recommends a 100ms timeout for this purpose, see
https://www.dmtf.org/sites/default/files/standards/documents/DSP0237_1.1.0.pdf,
symbol PT2a). That being said, if you actually want to implement IPMB, or MCTP
arbitration logic, it is much more complicated.

>  >
<snip>
> >>   #if IS_ENABLED(CONFIG_I2C_SLAVE)
> >> -       if (aspeed_i2c_slave_irq(bus)) {
> >> -               dev_dbg(bus->dev, "irq handled by slave.\n");
> >> -               return IRQ_HANDLED;
> >> +       if (bus->master_state != ASPEED_I2C_MASTER_INACTIVE) {
> >> +               if (!aspeed_i2c_master_irq(bus))
> >
> > Why do you check the slave if master fails (or vice versa)? I
> > understand that there are some status bits that have not been handled,
> > but it doesn't seem reasonable to assume that there is state that the
> > other should do something with; the only way this would happen is if
> > the state that you think you are in does not match the status bits you
> > have been given, but if this is the case, you are already hosed; I
> > don't think trying the other handler is likely to make things better,
> > unless there is something that I am missing.
> >
>
> In most of cases, interrupt bits are set one by one but there are also a
> lot of other cases that ASPEED I2C H/W sends multiple interrupt bits
> with combining master and slave events using a single interrupt call. It
> happens much in multi-master environment than single-master. For
> example, when master is waiting for a NORMAL_STOP interrupt in its
> MASTER_STOP state, SLAVE_MATCH and RX_DONE interrupts could come along
> with the NORMAL_STOP in case of an another master immediately sends data
> just after acquiring the bus - it happens a lot in BMC-ME connection
> practically. In this case, the NORMAL_STOP interrupt should be handled
> by master_irq and the SLAVE_MATCH and RX_DONE interrupts should be
> handled by slave_irq so it's the reason why this code is added.

That sucks. Well, it sounds like there are only a handful of cases in which
this can happen. Maybe enumerate these cases and error out or at least warn if
it is not one of them?

>
<snip>
> >> +       for (;;) {
> >> +               if (!(readl(bus->base + ASPEED_I2C_CMD_REG) &
> >> +                     (ASPEED_I2CD_BUS_BUSY_STS |
> >> +                      ASPEED_I2CD_XFER_MODE_STS_MASK)))
> >
> > Is using the Transfer Mode State Machine bits necessary? The
> > documentation marks it as "for debugging purpose only," so relying on
> > it makes me nervous.
> >
>
> As you said, the documentation marks it as "for debugging purpose only."
> but ASPEED also uses this way in their SDK code because it's the best
> way for checking bus busy status which can cover both single and
> multi-master use cases.
>

Well, it would also be really nice to have access to this bit if someone wants
to implement MCTP. Could we maybe check with Aspeed what them meant by "for
debugging purposes only" and document it here? It makes me nervous to rely on
debugging functionality for normal usage.

> >> +                       return 0;
> >> +               if (ktime_compare(ktime_get(), timeout) > 0)
> >> +                       break;
> >> +               usleep_range((BUS_BUSY_CHECK_INTERVAL >> 2) + 1,
> >
> > Where did you get this minimum value?
> >
>
> No source for the minimum value. ASPEED uses mdelay(10) in their SDK
> but I changed that code using usleep_range and the range value was set
> with considering time stretching of usleep_range.
> regmap_read_poll_timeout was a reference for this code.

What protocol are you trying to implement on top of this? You mentioned BMC-ME
above; that's IPMB, right? For most use cases, this should work, but if you
need arbitration, you will need to do quite a bit more work.

>
> Thanks,
>
> Jae
<snip>

Cheers
Jae Hyun Yoo July 12, 2018, 6:21 p.m. UTC | #6
On 7/12/2018 2:33 AM, Brendan Higgins wrote:
> On Wed, Jun 27, 2018 at 10:55 AM Jae Hyun Yoo
> <jae.hyun.yoo@linux.intel.com> wrote:
>>
> <snip>
>>>> +/* Timeout for bus busy checking */
>>>> +#define BUS_BUSY_CHECK_TIMEOUT                         250000 /* 250ms */
>>>> +#define BUS_BUSY_CHECK_INTERVAL                                10000  /* 10ms */
>>>
>>> Could you add a comment on where you got these values from?
>>>
>>
>> These are coming from ASPEED SDK code. Actually, they use 100ms for
>> timeout and 10ms for interval but I increased the timeout value to
>> 250ms so that it covers a various range of bus speed. I think, it
>> should be computed at run time based on the current bus speed, or
>> we could add these as device tree settings. How do you think about it?
>>
> 
> This should definitely be a device tree setting. If one of the busses is being
> used as a regular I2C bus, it could hold the bus for an unlimited amount of
> time before sending a STOP. As for a default, 100ms is probably fine given
> that, a) the limit will only apply to multi-master mode, and b) multi-master
> mode will probably almost always be used with IPMB, or MCTP (MCTP actually
> recommends a 100ms timeout for this purpose, see
> https://www.dmtf.org/sites/default/files/standards/documents/DSP0237_1.1.0.pdf,
> symbol PT2a). That being said, if you actually want to implement IPMB, or MCTP
> arbitration logic, it is much more complicated.
> 

Okay then, I think, we can fix the timeout value to 100ms and enable the
bus busy checking logic only when 'multi-master' is set in device tree.
My thought is, no additional arbitration logic is needed because
arbitration is performed in H/W level and H/W reports
ASPEED_I2CD_INTR_ARBIT_LOSS when it fails acquiring a bus. The
ARBIT_LOSS event is already being handled well by this driver code you
implemented.

>>   >
> <snip>
>>>>    #if IS_ENABLED(CONFIG_I2C_SLAVE)
>>>> -       if (aspeed_i2c_slave_irq(bus)) {
>>>> -               dev_dbg(bus->dev, "irq handled by slave.\n");
>>>> -               return IRQ_HANDLED;
>>>> +       if (bus->master_state != ASPEED_I2C_MASTER_INACTIVE) {
>>>> +               if (!aspeed_i2c_master_irq(bus))
>>>
>>> Why do you check the slave if master fails (or vice versa)? I
>>> understand that there are some status bits that have not been handled,
>>> but it doesn't seem reasonable to assume that there is state that the
>>> other should do something with; the only way this would happen is if
>>> the state that you think you are in does not match the status bits you
>>> have been given, but if this is the case, you are already hosed; I
>>> don't think trying the other handler is likely to make things better,
>>> unless there is something that I am missing.
>>>
>>
>> In most of cases, interrupt bits are set one by one but there are also a
>> lot of other cases that ASPEED I2C H/W sends multiple interrupt bits
>> with combining master and slave events using a single interrupt call. It
>> happens much in multi-master environment than single-master. For
>> example, when master is waiting for a NORMAL_STOP interrupt in its
>> MASTER_STOP state, SLAVE_MATCH and RX_DONE interrupts could come along
>> with the NORMAL_STOP in case of an another master immediately sends data
>> just after acquiring the bus - it happens a lot in BMC-ME connection
>> practically. In this case, the NORMAL_STOP interrupt should be handled
>> by master_irq and the SLAVE_MATCH and RX_DONE interrupts should be
>> handled by slave_irq so it's the reason why this code is added.
> 
> That sucks. Well, it sounds like there are only a handful of cases in which
> this can happen. Maybe enumerate these cases and error out or at least warn if
> it is not one of them?
> 

Yes, that sucks but that is Aspeed's I2C IP behavior and that's the
reason why they implemented some combination bits handling code in
their SDK. Actually, the cases are happening somewhat frequently
but that would not be a problem if we handle the cases properly instead
of making error out or warn.

>>
> <snip>
>>>> +       for (;;) {
>>>> +               if (!(readl(bus->base + ASPEED_I2C_CMD_REG) &
>>>> +                     (ASPEED_I2CD_BUS_BUSY_STS |
>>>> +                      ASPEED_I2CD_XFER_MODE_STS_MASK)))
>>>
>>> Is using the Transfer Mode State Machine bits necessary? The
>>> documentation marks it as "for debugging purpose only," so relying on
>>> it makes me nervous.
>>>
>>
>> As you said, the documentation marks it as "for debugging purpose only."
>> but ASPEED also uses this way in their SDK code because it's the best
>> way for checking bus busy status which can cover both single and
>> multi-master use cases.
>>
> 
> Well, it would also be really nice to have access to this bit if someone wants
> to implement MCTP. Could we maybe check with Aspeed what them meant by "for
> debugging purposes only" and document it here? It makes me nervous to rely on
> debugging functionality for normal usage.
> 

Okay, I'll check it with Aspeed. Will let you know their response.

>>>> +                       return 0;
>>>> +               if (ktime_compare(ktime_get(), timeout) > 0)
>>>> +                       break;
>>>> +               usleep_range((BUS_BUSY_CHECK_INTERVAL >> 2) + 1,
>>>
>>> Where did you get this minimum value?
>>>
>>
>> No source for the minimum value. ASPEED uses mdelay(10) in their SDK
>> but I changed that code using usleep_range and the range value was set
>> with considering time stretching of usleep_range.
>> regmap_read_poll_timeout was a reference for this code.
> 
> What protocol are you trying to implement on top of this? You mentioned BMC-ME
> above; that's IPMB, right? For most use cases, this should work, but if you
> need arbitration, you will need to do quite a bit more work.
> 

Yes, I'm implementing IPMB for a BMC-ME channel. As I said above,
arbitration will be performed in H/W level and it's already been handled
well by your code. This bus busy checking logic is for checking whether
any slave operation is currently ongoing or not at the timing of
master_xfer is called. It's not for arbitration but for preventing state
conflicts between master and slave operations.

FYI, I broke down this patch into smaller patches you reviewed
Today. Thanks for sharing your time for reviewing the patches.
I'll send remaining patches after completing review on those
patches because the remaining patches have dependency on them.

Thanks!

>>
>> Thanks,
>>
>> Jae
> <snip>
> 
> Cheers
>
Jae Hyun Yoo July 13, 2018, 5:21 p.m. UTC | #7
On 7/12/2018 11:21 AM, Jae Hyun Yoo wrote:
> On 7/12/2018 2:33 AM, Brendan Higgins wrote:
>> On Wed, Jun 27, 2018 at 10:55 AM Jae Hyun Yoo
>> <jae.hyun.yoo@linux.intel.com> wrote:
>>>
>> <snip>
>>>>> +/* Timeout for bus busy checking */
>>>>> +#define BUS_BUSY_CHECK_TIMEOUT                         250000 /* 
>>>>> 250ms */
>>>>> +#define BUS_BUSY_CHECK_INTERVAL                                
>>>>> 10000  /* 10ms */
>>>>
>>>> Could you add a comment on where you got these values from?
>>>>
>>>
>>> These are coming from ASPEED SDK code. Actually, they use 100ms for
>>> timeout and 10ms for interval but I increased the timeout value to
>>> 250ms so that it covers a various range of bus speed. I think, it
>>> should be computed at run time based on the current bus speed, or
>>> we could add these as device tree settings. How do you think about it?
>>>
>>
>> This should definitely be a device tree setting. If one of the busses 
>> is being
>> used as a regular I2C bus, it could hold the bus for an unlimited 
>> amount of
>> time before sending a STOP. As for a default, 100ms is probably fine 
>> given
>> that, a) the limit will only apply to multi-master mode, and b) 
>> multi-master
>> mode will probably almost always be used with IPMB, or MCTP (MCTP 
>> actually
>> recommends a 100ms timeout for this purpose, see
>> https://www.dmtf.org/sites/default/files/standards/documents/DSP0237_1.1.0.pdf, 
>>
>> symbol PT2a). That being said, if you actually want to implement IPMB, 
>> or MCTP
>> arbitration logic, it is much more complicated.
>>
> 
> Okay then, I think, we can fix the timeout value to 100ms and enable the
> bus busy checking logic only when 'multi-master' is set in device tree.
> My thought is, no additional arbitration logic is needed because
> arbitration is performed in H/W level and H/W reports
> ASPEED_I2CD_INTR_ARBIT_LOSS when it fails acquiring a bus. The
> ARBIT_LOSS event is already being handled well by this driver code you
> implemented.
> 
>>>   >
>> <snip>
>>>>>    #if IS_ENABLED(CONFIG_I2C_SLAVE)
>>>>> -       if (aspeed_i2c_slave_irq(bus)) {
>>>>> -               dev_dbg(bus->dev, "irq handled by slave.\n");
>>>>> -               return IRQ_HANDLED;
>>>>> +       if (bus->master_state != ASPEED_I2C_MASTER_INACTIVE) {
>>>>> +               if (!aspeed_i2c_master_irq(bus))
>>>>
>>>> Why do you check the slave if master fails (or vice versa)? I
>>>> understand that there are some status bits that have not been handled,
>>>> but it doesn't seem reasonable to assume that there is state that the
>>>> other should do something with; the only way this would happen is if
>>>> the state that you think you are in does not match the status bits you
>>>> have been given, but if this is the case, you are already hosed; I
>>>> don't think trying the other handler is likely to make things better,
>>>> unless there is something that I am missing.
>>>>
>>>
>>> In most of cases, interrupt bits are set one by one but there are also a
>>> lot of other cases that ASPEED I2C H/W sends multiple interrupt bits
>>> with combining master and slave events using a single interrupt call. It
>>> happens much in multi-master environment than single-master. For
>>> example, when master is waiting for a NORMAL_STOP interrupt in its
>>> MASTER_STOP state, SLAVE_MATCH and RX_DONE interrupts could come along
>>> with the NORMAL_STOP in case of an another master immediately sends data
>>> just after acquiring the bus - it happens a lot in BMC-ME connection
>>> practically. In this case, the NORMAL_STOP interrupt should be handled
>>> by master_irq and the SLAVE_MATCH and RX_DONE interrupts should be
>>> handled by slave_irq so it's the reason why this code is added.
>>
>> That sucks. Well, it sounds like there are only a handful of cases in 
>> which
>> this can happen. Maybe enumerate these cases and error out or at least 
>> warn if
>> it is not one of them?
>>
> 
> Yes, that sucks but that is Aspeed's I2C IP behavior and that's the
> reason why they implemented some combination bits handling code in
> their SDK. Actually, the cases are happening somewhat frequently
> but that would not be a problem if we handle the cases properly instead
> of making error out or warn.
> 
>>>
>> <snip>
>>>>> +       for (;;) {
>>>>> +               if (!(readl(bus->base + ASPEED_I2C_CMD_REG) &
>>>>> +                     (ASPEED_I2CD_BUS_BUSY_STS |
>>>>> +                      ASPEED_I2CD_XFER_MODE_STS_MASK)))
>>>>
>>>> Is using the Transfer Mode State Machine bits necessary? The
>>>> documentation marks it as "for debugging purpose only," so relying on
>>>> it makes me nervous.
>>>>
>>>
>>> As you said, the documentation marks it as "for debugging purpose only."
>>> but ASPEED also uses this way in their SDK code because it's the best
>>> way for checking bus busy status which can cover both single and
>>> multi-master use cases.
>>>
>>
>> Well, it would also be really nice to have access to this bit if 
>> someone wants
>> to implement MCTP. Could we maybe check with Aspeed what them meant by 
>> "for
>> debugging purposes only" and document it here? It makes me nervous to 
>> rely on
>> debugging functionality for normal usage.
>>
> 
> Okay, I'll check it with Aspeed. Will let you know their response.
> 

I've checked it with Gary Hsu <gary_hsu@aspeedtech.com> and he confirmed
that the bits reflect real information and good to be used in practical
code.

I'll add a comment like below:

/*
  * This is marked as 'for debugging purpose only' in datasheet but
  * ASPEED confirmed that this reflects real information and good
  * to be used in practical code.
  */

Is it acceptable then?

>>>>> +                       return 0;
>>>>> +               if (ktime_compare(ktime_get(), timeout) > 0)
>>>>> +                       break;
>>>>> +               usleep_range((BUS_BUSY_CHECK_INTERVAL >> 2) + 1,
>>>>
>>>> Where did you get this minimum value?
>>>>
>>>
>>> No source for the minimum value. ASPEED uses mdelay(10) in their SDK
>>> but I changed that code using usleep_range and the range value was set
>>> with considering time stretching of usleep_range.
>>> regmap_read_poll_timeout was a reference for this code.
>>
>> What protocol are you trying to implement on top of this? You 
>> mentioned BMC-ME
>> above; that's IPMB, right? For most use cases, this should work, but 
>> if you
>> need arbitration, you will need to do quite a bit more work.
>>
> 
> Yes, I'm implementing IPMB for a BMC-ME channel. As I said above,
> arbitration will be performed in H/W level and it's already been handled
> well by your code. This bus busy checking logic is for checking whether
> any slave operation is currently ongoing or not at the timing of
> master_xfer is called. It's not for arbitration but for preventing state
> conflicts between master and slave operations.
> 
> FYI, I broke down this patch into smaller patches you reviewed
> Today. Thanks for sharing your time for reviewing the patches.
> I'll send remaining patches after completing review on those
> patches because the remaining patches have dependency on them.
> 
> Thanks!
> 
>>>
>>> Thanks,
>>>
>>> Jae
>> <snip>
>>
>> Cheers
>>
>
Brendan Higgins July 13, 2018, 6:02 p.m. UTC | #8
On Thu, Jul 12, 2018 at 11:21 AM Jae Hyun Yoo
<jae.hyun.yoo@linux.intel.com> wrote:
>
> On 7/12/2018 2:33 AM, Brendan Higgins wrote:
> > On Wed, Jun 27, 2018 at 10:55 AM Jae Hyun Yoo
> > <jae.hyun.yoo@linux.intel.com> wrote:
> >>
> > <snip>
> >>>> +/* Timeout for bus busy checking */
> >>>> +#define BUS_BUSY_CHECK_TIMEOUT                         250000 /* 250ms */
> >>>> +#define BUS_BUSY_CHECK_INTERVAL                                10000  /* 10ms */
> >>>
> >>> Could you add a comment on where you got these values from?
> >>>
> >>
> >> These are coming from ASPEED SDK code. Actually, they use 100ms for
> >> timeout and 10ms for interval but I increased the timeout value to
> >> 250ms so that it covers a various range of bus speed. I think, it
> >> should be computed at run time based on the current bus speed, or
> >> we could add these as device tree settings. How do you think about it?
> >>
> >
> > This should definitely be a device tree setting. If one of the busses is being
> > used as a regular I2C bus, it could hold the bus for an unlimited amount of
> > time before sending a STOP. As for a default, 100ms is probably fine given
> > that, a) the limit will only apply to multi-master mode, and b) multi-master
> > mode will probably almost always be used with IPMB, or MCTP (MCTP actually
> > recommends a 100ms timeout for this purpose, see
> > https://www.dmtf.org/sites/default/files/standards/documents/DSP0237_1.1.0.pdf,
> > symbol PT2a). That being said, if you actually want to implement IPMB, or MCTP
> > arbitration logic, it is much more complicated.
> >
>
> Okay then, I think, we can fix the timeout value to 100ms and enable the
> bus busy checking logic only when 'multi-master' is set in device tree.
> My thought is, no additional arbitration logic is needed because
> arbitration is performed in H/W level and H/W reports
> ASPEED_I2CD_INTR_ARBIT_LOSS when it fails acquiring a bus. The
> ARBIT_LOSS event is already being handled well by this driver code you
> implemented.

I still think it would be best to provide an option to specify the timeout value
it in the device tree regardless of master mode or not. Also, I am talking about
fairness arbitration not the physical level arbitration provided by the I2C
spec. The physical arbitration that Aspeed provides just allows multiple masters
to operate on the same bus according to the specification; this bus arbitration
does not guarantee forward progress or even guarentee that the actual message
will be sent, which is what you are trying to do here.

Since you are planning on implementing IPMB, you will probably need to implement
fairness arbitration. I am not familiar with your BMC-ME channel. It sounds like
it pre-dates MCTP, so it must implement its own fairness arbitration on top of
the IPMB layer (more like you bake in some assumptions about what the possible
state is at anytime that guarentee fairness).

Are you using the Aspeed BMC on both sides of the connection? If so, you might
be further ahead to implement MCTP fairness arbitration which can be used in
conjunction with IPMB. This will require a bit of work to do, but everyone will
be much happier in the long term (assuming MCTP does eventually become a thing).

>
> >>   >
> > <snip>
> >>>>    #if IS_ENABLED(CONFIG_I2C_SLAVE)
> >>>> -       if (aspeed_i2c_slave_irq(bus)) {
> >>>> -               dev_dbg(bus->dev, "irq handled by slave.\n");
> >>>> -               return IRQ_HANDLED;
> >>>> +       if (bus->master_state != ASPEED_I2C_MASTER_INACTIVE) {
> >>>> +               if (!aspeed_i2c_master_irq(bus))
> >>>
> >>> Why do you check the slave if master fails (or vice versa)? I
> >>> understand that there are some status bits that have not been handled,
> >>> but it doesn't seem reasonable to assume that there is state that the
> >>> other should do something with; the only way this would happen is if
> >>> the state that you think you are in does not match the status bits you
> >>> have been given, but if this is the case, you are already hosed; I
> >>> don't think trying the other handler is likely to make things better,
> >>> unless there is something that I am missing.
> >>>
> >>
> >> In most of cases, interrupt bits are set one by one but there are also a
> >> lot of other cases that ASPEED I2C H/W sends multiple interrupt bits
> >> with combining master and slave events using a single interrupt call. It
> >> happens much in multi-master environment than single-master. For
> >> example, when master is waiting for a NORMAL_STOP interrupt in its
> >> MASTER_STOP state, SLAVE_MATCH and RX_DONE interrupts could come along
> >> with the NORMAL_STOP in case of an another master immediately sends data
> >> just after acquiring the bus - it happens a lot in BMC-ME connection
> >> practically. In this case, the NORMAL_STOP interrupt should be handled
> >> by master_irq and the SLAVE_MATCH and RX_DONE interrupts should be
> >> handled by slave_irq so it's the reason why this code is added.
> >
> > That sucks. Well, it sounds like there are only a handful of cases in which
> > this can happen. Maybe enumerate these cases and error out or at least warn if
> > it is not one of them?
> >
>
> Yes, that sucks but that is Aspeed's I2C IP behavior and that's the
> reason why they implemented some combination bits handling code in
> their SDK. Actually, the cases are happening somewhat frequently
> but that would not be a problem if we handle the cases properly instead
> of making error out or warn.
>
> >>
> > <snip>
> >>>> +       for (;;) {
> >>>> +               if (!(readl(bus->base + ASPEED_I2C_CMD_REG) &
> >>>> +                     (ASPEED_I2CD_BUS_BUSY_STS |
> >>>> +                      ASPEED_I2CD_XFER_MODE_STS_MASK)))
> >>>
> >>> Is using the Transfer Mode State Machine bits necessary? The
> >>> documentation marks it as "for debugging purpose only," so relying on
> >>> it makes me nervous.
> >>>
> >>
> >> As you said, the documentation marks it as "for debugging purpose only."
> >> but ASPEED also uses this way in their SDK code because it's the best
> >> way for checking bus busy status which can cover both single and
> >> multi-master use cases.
> >>
> >
> > Well, it would also be really nice to have access to this bit if someone wants
> > to implement MCTP. Could we maybe check with Aspeed what them meant by "for
> > debugging purposes only" and document it here? It makes me nervous to rely on
> > debugging functionality for normal usage.
> >
>
> Okay, I'll check it with Aspeed. Will let you know their response.

Sounds good.

>
> >>>> +                       return 0;
> >>>> +               if (ktime_compare(ktime_get(), timeout) > 0)
> >>>> +                       break;
> >>>> +               usleep_range((BUS_BUSY_CHECK_INTERVAL >> 2) + 1,
> >>>
> >>> Where did you get this minimum value?
> >>>
> >>
> >> No source for the minimum value. ASPEED uses mdelay(10) in their SDK
> >> but I changed that code using usleep_range and the range value was set
> >> with considering time stretching of usleep_range.
> >> regmap_read_poll_timeout was a reference for this code.
> >
> > What protocol are you trying to implement on top of this? You mentioned BMC-ME
> > above; that's IPMB, right? For most use cases, this should work, but if you
> > need arbitration, you will need to do quite a bit more work.
> >
>
> Yes, I'm implementing IPMB for a BMC-ME channel. As I said above,
> arbitration will be performed in H/W level and it's already been handled
> well by your code. This bus busy checking logic is for checking whether
> any slave operation is currently ongoing or not at the timing of
> master_xfer is called. It's not for arbitration but for preventing state
> conflicts between master and slave operations.

Discussed above.

>
> FYI, I broke down this patch into smaller patches you reviewed
> Today. Thanks for sharing your time for reviewing the patches.
> I'll send remaining patches after completing review on those
> patches because the remaining patches have dependency on them.

Sounds good.

>
> Thanks!
>

Cheers
On Thu, Jul 12, 2018 at 11:21 AM Jae Hyun Yoo
<jae.hyun.yoo@linux.intel.com> wrote:
>
> On 7/12/2018 2:33 AM, Brendan Higgins wrote:
> > On Wed, Jun 27, 2018 at 10:55 AM Jae Hyun Yoo
> > <jae.hyun.yoo@linux.intel.com> wrote:
> >>
> > <snip>
> >>>> +/* Timeout for bus busy checking */
> >>>> +#define BUS_BUSY_CHECK_TIMEOUT                         250000 /* 250ms */
> >>>> +#define BUS_BUSY_CHECK_INTERVAL                                10000  /* 10ms */
> >>>
> >>> Could you add a comment on where you got these values from?
> >>>
> >>
> >> These are coming from ASPEED SDK code. Actually, they use 100ms for
> >> timeout and 10ms for interval but I increased the timeout value to
> >> 250ms so that it covers a various range of bus speed. I think, it
> >> should be computed at run time based on the current bus speed, or
> >> we could add these as device tree settings. How do you think about it?
> >>
> >
> > This should definitely be a device tree setting. If one of the busses is being
> > used as a regular I2C bus, it could hold the bus for an unlimited amount of
> > time before sending a STOP. As for a default, 100ms is probably fine given
> > that, a) the limit will only apply to multi-master mode, and b) multi-master
> > mode will probably almost always be used with IPMB, or MCTP (MCTP actually
> > recommends a 100ms timeout for this purpose, see
> > https://www.dmtf.org/sites/default/files/standards/documents/DSP0237_1.1.0.pdf,
> > symbol PT2a). That being said, if you actually want to implement IPMB, or MCTP
> > arbitration logic, it is much more complicated.
> >
>
> Okay then, I think, we can fix the timeout value to 100ms and enable the
> bus busy checking logic only when 'multi-master' is set in device tree.
> My thought is, no additional arbitration logic is needed because
> arbitration is performed in H/W level and H/W reports
> ASPEED_I2CD_INTR_ARBIT_LOSS when it fails acquiring a bus. The
> ARBIT_LOSS event is already being handled well by this driver code you
> implemented.

I still think it would be best to provide an option to specify the timeout value
it in the device tree regardless of master mode or not. Also, I am talking about
fairness arbitration not the physical level arbitration provided by the I2C
spec. The physical arbitration that Aspeed provides just allows multiple masters
to operate on the same bus according to the specification; this bus arbitration
does not guarantee forward progress or even guarentee that the actual message
will be sent, which is what you are trying to do here.

Since you are planning on implementing IPMB, you will probably need to implement
fairness arbitration. I am not familiar with your BMC-ME channel. It sounds like
it pre-dates MCTP, so it must implement its own fairness arbitration on top of
the IPMB layer (more like you bake in some assumptions about what the possible
state is at anytime that guarentee fairness).

Are you using the Aspeed BMC on both sides of the connection? If so, you might
be further ahead to implement MCTP fairness arbitration which can be used in
conjunction with IPMB. This will require a bit of work to do, but everyone will
be much happier in the long term (assuming MCTP does eventually become a thing).

>
> >>   >
> > <snip>
> >>>>    #if IS_ENABLED(CONFIG_I2C_SLAVE)
> >>>> -       if (aspeed_i2c_slave_irq(bus)) {
> >>>> -               dev_dbg(bus->dev, "irq handled by slave.\n");
> >>>> -               return IRQ_HANDLED;
> >>>> +       if (bus->master_state != ASPEED_I2C_MASTER_INACTIVE) {
> >>>> +               if (!aspeed_i2c_master_irq(bus))
> >>>
> >>> Why do you check the slave if master fails (or vice versa)? I
> >>> understand that there are some status bits that have not been handled,
> >>> but it doesn't seem reasonable to assume that there is state that the
> >>> other should do something with; the only way this would happen is if
> >>> the state that you think you are in does not match the status bits you
> >>> have been given, but if this is the case, you are already hosed; I
> >>> don't think trying the other handler is likely to make things better,
> >>> unless there is something that I am missing.
> >>>
> >>
> >> In most of cases, interrupt bits are set one by one but there are also a
> >> lot of other cases that ASPEED I2C H/W sends multiple interrupt bits
> >> with combining master and slave events using a single interrupt call. It
> >> happens much in multi-master environment than single-master. For
> >> example, when master is waiting for a NORMAL_STOP interrupt in its
> >> MASTER_STOP state, SLAVE_MATCH and RX_DONE interrupts could come along
> >> with the NORMAL_STOP in case of an another master immediately sends data
> >> just after acquiring the bus - it happens a lot in BMC-ME connection
> >> practically. In this case, the NORMAL_STOP interrupt should be handled
> >> by master_irq and the SLAVE_MATCH and RX_DONE interrupts should be
> >> handled by slave_irq so it's the reason why this code is added.
> >
> > That sucks. Well, it sounds like there are only a handful of cases in which
> > this can happen. Maybe enumerate these cases and error out or at least warn if
> > it is not one of them?
> >
>
> Yes, that sucks but that is Aspeed's I2C IP behavior and that's the
> reason why they implemented some combination bits handling code in
> their SDK. Actually, the cases are happening somewhat frequently
> but that would not be a problem if we handle the cases properly instead
> of making error out or warn.
>
> >>
> > <snip>
> >>>> +       for (;;) {
> >>>> +               if (!(readl(bus->base + ASPEED_I2C_CMD_REG) &
> >>>> +                     (ASPEED_I2CD_BUS_BUSY_STS |
> >>>> +                      ASPEED_I2CD_XFER_MODE_STS_MASK)))
> >>>
> >>> Is using the Transfer Mode State Machine bits necessary? The
> >>> documentation marks it as "for debugging purpose only," so relying on
> >>> it makes me nervous.
> >>>
> >>
> >> As you said, the documentation marks it as "for debugging purpose only."
> >> but ASPEED also uses this way in their SDK code because it's the best
> >> way for checking bus busy status which can cover both single and
> >> multi-master use cases.
> >>
> >
> > Well, it would also be really nice to have access to this bit if someone wants
> > to implement MCTP. Could we maybe check with Aspeed what them meant by "for
> > debugging purposes only" and document it here? It makes me nervous to rely on
> > debugging functionality for normal usage.
> >
>
> Okay, I'll check it with Aspeed. Will let you know their response.

Sounds good.

>
> >>>> +                       return 0;
> >>>> +               if (ktime_compare(ktime_get(), timeout) > 0)
> >>>> +                       break;
> >>>> +               usleep_range((BUS_BUSY_CHECK_INTERVAL >> 2) + 1,
> >>>
> >>> Where did you get this minimum value?
> >>>
> >>
> >> No source for the minimum value. ASPEED uses mdelay(10) in their SDK
> >> but I changed that code using usleep_range and the range value was set
> >> with considering time stretching of usleep_range.
> >> regmap_read_poll_timeout was a reference for this code.
> >
> > What protocol are you trying to implement on top of this? You mentioned BMC-ME
> > above; that's IPMB, right? For most use cases, this should work, but if you
> > need arbitration, you will need to do quite a bit more work.
> >
>
> Yes, I'm implementing IPMB for a BMC-ME channel. As I said above,
> arbitration will be performed in H/W level and it's already been handled
> well by your code. This bus busy checking logic is for checking whether
> any slave operation is currently ongoing or not at the timing of
> master_xfer is called. It's not for arbitration but for preventing state
> conflicts between master and slave operations.

Discussed above.

>
> FYI, I broke down this patch into smaller patches you reviewed
> Today. Thanks for sharing your time for reviewing the patches.
> I'll send remaining patches after completing review on those
> patches because the remaining patches have dependency on them.

Sounds good.

>
> Thanks!
>

Cheers
Brendan Higgins July 13, 2018, 6:12 p.m. UTC | #9
On Fri, Jul 13, 2018 at 10:22 AM Jae Hyun Yoo
<jae.hyun.yoo@linux.intel.com> wrote:
>
> On 7/12/2018 11:21 AM, Jae Hyun Yoo wrote:
> > On 7/12/2018 2:33 AM, Brendan Higgins wrote:
> >> On Wed, Jun 27, 2018 at 10:55 AM Jae Hyun Yoo
> >> <jae.hyun.yoo@linux.intel.com> wrote:
<snip>
> >> <snip>
> >>>>> +       for (;;) {
> >>>>> +               if (!(readl(bus->base + ASPEED_I2C_CMD_REG) &
> >>>>> +                     (ASPEED_I2CD_BUS_BUSY_STS |
> >>>>> +                      ASPEED_I2CD_XFER_MODE_STS_MASK)))
> >>>>
> >>>> Is using the Transfer Mode State Machine bits necessary? The
> >>>> documentation marks it as "for debugging purpose only," so relying on
> >>>> it makes me nervous.
> >>>>
> >>>
> >>> As you said, the documentation marks it as "for debugging purpose only."
> >>> but ASPEED also uses this way in their SDK code because it's the best
> >>> way for checking bus busy status which can cover both single and
> >>> multi-master use cases.
> >>>
> >>
> >> Well, it would also be really nice to have access to this bit if
> >> someone wants
> >> to implement MCTP. Could we maybe check with Aspeed what them meant by
> >> "for
> >> debugging purposes only" and document it here? It makes me nervous to
> >> rely on
> >> debugging functionality for normal usage.
> >>
> >
> > Okay, I'll check it with Aspeed. Will let you know their response.
> >
>
> I've checked it with Gary Hsu <gary_hsu@aspeedtech.com> and he confirmed
> that the bits reflect real information and good to be used in practical
> code.

Huh. For my own edification, could you ask them why they said "for debugging
purpose only" in the documentation? I am just really curious what they meant by
that. I would be satisfied if you just CC'ed me on your email thread with Gary,
and I can ask him myself.

>
> I'll add a comment like below:
>
> /*
>   * This is marked as 'for debugging purpose only' in datasheet but
>   * ASPEED confirmed that this reflects real information and good
>   * to be used in practical code.
>   */
>
> Is it acceptable then?

Yeah, that's fine.

<snip>

Cheers
Jae Hyun Yoo July 13, 2018, 6:50 p.m. UTC | #10
On 7/13/2018 11:02 AM, Brendan Higgins wrote:
> On Thu, Jul 12, 2018 at 11:21 AM Jae Hyun Yoo
> <jae.hyun.yoo@linux.intel.com> wrote:
>>
>> On 7/12/2018 2:33 AM, Brendan Higgins wrote:
>>> On Wed, Jun 27, 2018 at 10:55 AM Jae Hyun Yoo
>>> <jae.hyun.yoo@linux.intel.com> wrote:
>>>>
>>> <snip>
>>>>>> +/* Timeout for bus busy checking */
>>>>>> +#define BUS_BUSY_CHECK_TIMEOUT                         250000 /* 250ms */
>>>>>> +#define BUS_BUSY_CHECK_INTERVAL                                10000  /* 10ms */
>>>>>
>>>>> Could you add a comment on where you got these values from?
>>>>>
>>>>
>>>> These are coming from ASPEED SDK code. Actually, they use 100ms for
>>>> timeout and 10ms for interval but I increased the timeout value to
>>>> 250ms so that it covers a various range of bus speed. I think, it
>>>> should be computed at run time based on the current bus speed, or
>>>> we could add these as device tree settings. How do you think about it?
>>>>
>>>
>>> This should definitely be a device tree setting. If one of the busses is being
>>> used as a regular I2C bus, it could hold the bus for an unlimited amount of
>>> time before sending a STOP. As for a default, 100ms is probably fine given
>>> that, a) the limit will only apply to multi-master mode, and b) multi-master
>>> mode will probably almost always be used with IPMB, or MCTP (MCTP actually
>>> recommends a 100ms timeout for this purpose, see
>>> https://www.dmtf.org/sites/default/files/standards/documents/DSP0237_1.1.0.pdf,
>>> symbol PT2a). That being said, if you actually want to implement IPMB, or MCTP
>>> arbitration logic, it is much more complicated.
>>>
>>
>> Okay then, I think, we can fix the timeout value to 100ms and enable the
>> bus busy checking logic only when 'multi-master' is set in device tree.
>> My thought is, no additional arbitration logic is needed because
>> arbitration is performed in H/W level and H/W reports
>> ASPEED_I2CD_INTR_ARBIT_LOSS when it fails acquiring a bus. The
>> ARBIT_LOSS event is already being handled well by this driver code you
>> implemented.
> 
> I still think it would be best to provide an option to specify the timeout value
> it in the device tree regardless of master mode or not. Also, I am talking about
> fairness arbitration not the physical level arbitration provided by the I2C
> spec. The physical arbitration that Aspeed provides just allows multiple masters
> to operate on the same bus according to the specification; this bus arbitration
> does not guarantee forward progress or even guarentee that the actual message
> will be sent, which is what you are trying to do here.
> 
> Since you are planning on implementing IPMB, you will probably need to implement
> fairness arbitration. I am not familiar with your BMC-ME channel. It sounds like
> it pre-dates MCTP, so it must implement its own fairness arbitration on top of
> the IPMB layer (more like you bake in some assumptions about what the possible
> state is at anytime that guarentee fairness).
> 
> Are you using the Aspeed BMC on both sides of the connection? If so, you might
> be further ahead to implement MCTP fairness arbitration which can be used in
> conjunction with IPMB. This will require a bit of work to do, but everyone will
> be much happier in the long term (assuming MCTP does eventually become a thing).
> 

Okay. I'll add an additional option into device tree to set the timeout
value.

My thought is, the fairness arbitration you are saying should be
considered in IPMB or MCTP layer not in this driver code.

The intention of the code I added is, preventing state corruption in
this driver code. In multi-master environment, this driver side master
cannot know exactly when peer master send data to this side master so a
case can be happened that this master tries to send data through the
master_xfer function but slave data from peer master is still being
processed by this driver. Previous code treats this as an error and
tries recovering a bus which is definitely wrong. So the patch code
checks whether there is any ongoing slave operation or not and wait up
to the timeout duration before start master xfer.

>>
>>>>    >
>>> <snip>
>>>>>>     #if IS_ENABLED(CONFIG_I2C_SLAVE)
>>>>>> -       if (aspeed_i2c_slave_irq(bus)) {
>>>>>> -               dev_dbg(bus->dev, "irq handled by slave.\n");
>>>>>> -               return IRQ_HANDLED;
>>>>>> +       if (bus->master_state != ASPEED_I2C_MASTER_INACTIVE) {
>>>>>> +               if (!aspeed_i2c_master_irq(bus))
>>>>>
>>>>> Why do you check the slave if master fails (or vice versa)? I
>>>>> understand that there are some status bits that have not been handled,
>>>>> but it doesn't seem reasonable to assume that there is state that the
>>>>> other should do something with; the only way this would happen is if
>>>>> the state that you think you are in does not match the status bits you
>>>>> have been given, but if this is the case, you are already hosed; I
>>>>> don't think trying the other handler is likely to make things better,
>>>>> unless there is something that I am missing.
>>>>>
>>>>
>>>> In most of cases, interrupt bits are set one by one but there are also a
>>>> lot of other cases that ASPEED I2C H/W sends multiple interrupt bits
>>>> with combining master and slave events using a single interrupt call. It
>>>> happens much in multi-master environment than single-master. For
>>>> example, when master is waiting for a NORMAL_STOP interrupt in its
>>>> MASTER_STOP state, SLAVE_MATCH and RX_DONE interrupts could come along
>>>> with the NORMAL_STOP in case of an another master immediately sends data
>>>> just after acquiring the bus - it happens a lot in BMC-ME connection
>>>> practically. In this case, the NORMAL_STOP interrupt should be handled
>>>> by master_irq and the SLAVE_MATCH and RX_DONE interrupts should be
>>>> handled by slave_irq so it's the reason why this code is added.
>>>
>>> That sucks. Well, it sounds like there are only a handful of cases in which
>>> this can happen. Maybe enumerate these cases and error out or at least warn if
>>> it is not one of them?
>>>
>>
>> Yes, that sucks but that is Aspeed's I2C IP behavior and that's the
>> reason why they implemented some combination bits handling code in
>> their SDK. Actually, the cases are happening somewhat frequently
>> but that would not be a problem if we handle the cases properly instead
>> of making error out or warn.
>>
>>>>
>>> <snip>
>>>>>> +       for (;;) {
>>>>>> +               if (!(readl(bus->base + ASPEED_I2C_CMD_REG) &
>>>>>> +                     (ASPEED_I2CD_BUS_BUSY_STS |
>>>>>> +                      ASPEED_I2CD_XFER_MODE_STS_MASK)))
>>>>>
>>>>> Is using the Transfer Mode State Machine bits necessary? The
>>>>> documentation marks it as "for debugging purpose only," so relying on
>>>>> it makes me nervous.
>>>>>
>>>>
>>>> As you said, the documentation marks it as "for debugging purpose only."
>>>> but ASPEED also uses this way in their SDK code because it's the best
>>>> way for checking bus busy status which can cover both single and
>>>> multi-master use cases.
>>>>
>>>
>>> Well, it would also be really nice to have access to this bit if someone wants
>>> to implement MCTP. Could we maybe check with Aspeed what them meant by "for
>>> debugging purposes only" and document it here? It makes me nervous to rely on
>>> debugging functionality for normal usage.
>>>
>>
>> Okay, I'll check it with Aspeed. Will let you know their response.
> 
> Sounds good.
> 
>>
>>>>>> +                       return 0;
>>>>>> +               if (ktime_compare(ktime_get(), timeout) > 0)
>>>>>> +                       break;
>>>>>> +               usleep_range((BUS_BUSY_CHECK_INTERVAL >> 2) + 1,
>>>>>
>>>>> Where did you get this minimum value?
>>>>>
>>>>
>>>> No source for the minimum value. ASPEED uses mdelay(10) in their SDK
>>>> but I changed that code using usleep_range and the range value was set
>>>> with considering time stretching of usleep_range.
>>>> regmap_read_poll_timeout was a reference for this code.
>>>
>>> What protocol are you trying to implement on top of this? You mentioned BMC-ME
>>> above; that's IPMB, right? For most use cases, this should work, but if you
>>> need arbitration, you will need to do quite a bit more work.
>>>
>>
>> Yes, I'm implementing IPMB for a BMC-ME channel. As I said above,
>> arbitration will be performed in H/W level and it's already been handled
>> well by your code. This bus busy checking logic is for checking whether
>> any slave operation is currently ongoing or not at the timing of
>> master_xfer is called. It's not for arbitration but for preventing state
>> conflicts between master and slave operations.
> 
> Discussed above.
> 
>>
>> FYI, I broke down this patch into smaller patches you reviewed
>> Today. Thanks for sharing your time for reviewing the patches.
>> I'll send remaining patches after completing review on those
>> patches because the remaining patches have dependency on them.
> 
> Sounds good.
> 
>>
>> Thanks!
>>
> 
> Cheers
Jae Hyun Yoo July 13, 2018, 6:54 p.m. UTC | #11
On 7/13/2018 11:12 AM, Brendan Higgins wrote:
> On Fri, Jul 13, 2018 at 10:22 AM Jae Hyun Yoo
> <jae.hyun.yoo@linux.intel.com> wrote:
>>
>> On 7/12/2018 11:21 AM, Jae Hyun Yoo wrote:
>>> On 7/12/2018 2:33 AM, Brendan Higgins wrote:
>>>> On Wed, Jun 27, 2018 at 10:55 AM Jae Hyun Yoo
>>>> <jae.hyun.yoo@linux.intel.com> wrote:
> <snip>
>>>> <snip>
>>>>>>> +       for (;;) {
>>>>>>> +               if (!(readl(bus->base + ASPEED_I2C_CMD_REG) &
>>>>>>> +                     (ASPEED_I2CD_BUS_BUSY_STS |
>>>>>>> +                      ASPEED_I2CD_XFER_MODE_STS_MASK)))
>>>>>>
>>>>>> Is using the Transfer Mode State Machine bits necessary? The
>>>>>> documentation marks it as "for debugging purpose only," so relying on
>>>>>> it makes me nervous.
>>>>>>
>>>>>
>>>>> As you said, the documentation marks it as "for debugging purpose only."
>>>>> but ASPEED also uses this way in their SDK code because it's the best
>>>>> way for checking bus busy status which can cover both single and
>>>>> multi-master use cases.
>>>>>
>>>>
>>>> Well, it would also be really nice to have access to this bit if
>>>> someone wants
>>>> to implement MCTP. Could we maybe check with Aspeed what them meant by
>>>> "for
>>>> debugging purposes only" and document it here? It makes me nervous to
>>>> rely on
>>>> debugging functionality for normal usage.
>>>>
>>>
>>> Okay, I'll check it with Aspeed. Will let you know their response.
>>>
>>
>> I've checked it with Gary Hsu <gary_hsu@aspeedtech.com> and he confirmed
>> that the bits reflect real information and good to be used in practical
>> code.
> 
> Huh. For my own edification, could you ask them why they said "for debugging
> purpose only" in the documentation? I am just really curious what they meant by
> that. I would be satisfied if you just CC'ed me on your email thread with Gary,
> and I can ask him myself.
> 

I've already CC'ed Gary and Ryan in this thread.

Hi Gary,

Can you explain why the documentation says that the bit field is 'for
debugging purpose only'? Any plan to change the description?

Thanks,

Jae

>>
>> I'll add a comment like below:
>>
>> /*
>>    * This is marked as 'for debugging purpose only' in datasheet but
>>    * ASPEED confirmed that this reflects real information and good
>>    * to be used in practical code.
>>    */
>>
>> Is it acceptable then?
> 
> Yeah, that's fine.
> 
> <snip>
> 
> Cheers
>
Gary Hsu July 16, 2018, 3:05 a.m. UTC | #12
Hi Jae,

In originally, we reserved these register bits for debug purpose. But for some error handling case, we found it is also useful to help to clarify some error conditions. So driver also can use these fields information to check something.
As for how driver use these information in their code, I have no comment. I don’t understand the driver. But these information is the real controller state, it had no problem to use information.

Best Regards,

許馥疇 Gary Hsu

信驊科技股份有限公司
ASPEED Technology Inc.

2F,No.15,Industry East Road 4.,Hsinchu Science Park, Hsinchu City 30077, Taiwan
新竹科學園區工業東四路 15 號 2F

Tel : 886-3-5789568 ext:807
Fax : 886-3-5789586
Web : http://www.aspeedtech.com

************* Email Confidentiality Notice ********************
免責聲明:
因應個人資料保護法施行,本信件(或其附件)可能包含機密資訊,並受法律保護。如 台端非指定之收件者,請以電子郵件通知本電子郵件之發送者, 並請立即刪除本電子郵件及其附件和銷毀所有複印件。謝謝您的合作!

DISCLAIMER:
This message (and any attachments) may contain legally privileged and/or other confidential information. If you have received it in error, please notify the sender by reply e-mail and immediately delete the e-mail and any attachments without copying or disclosing the contents. Thank you.

-----Original Message-----
From: Jae Hyun Yoo [mailto:jae.hyun.yoo@linux.intel.com] 

Sent: Saturday, July 14, 2018 2:54 AM
To: Brendan Higgins <brendanhiggins@google.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>; Joel Stanley <joel@jms.id.au>; Andrew Jeffery <andrew@aj.id.au>; linux-i2c@vger.kernel.org; OpenBMC Maillist <openbmc@lists.ozlabs.org>; Linux ARM <linux-arm-kernel@lists.infradead.org>; linux-aspeed@lists.ozlabs.org; Linux Kernel Mailing List <linux-kernel@vger.kernel.org>; james.feist@linux.intel.com; vernon.mauery@linux.intel.com; Benjamin Fair <benjaminfair@google.com>; Patrick Venture <venture@google.com>; Gary Hsu <gary_hsu@aspeedtech.com>; Ryan Chen <ryan_chen@aspeedtech.com>
Subject: Re: [PATCH] i2c: aspeed: Improve driver to support multi-master use cases stably

On 7/13/2018 11:12 AM, Brendan Higgins wrote:
> On Fri, Jul 13, 2018 at 10:22 AM Jae Hyun Yoo 

> <jae.hyun.yoo@linux.intel.com> wrote:

>>

>> On 7/12/2018 11:21 AM, Jae Hyun Yoo wrote:

>>> On 7/12/2018 2:33 AM, Brendan Higgins wrote:

>>>> On Wed, Jun 27, 2018 at 10:55 AM Jae Hyun Yoo 

>>>> <jae.hyun.yoo@linux.intel.com> wrote:

> <snip>

>>>> <snip>

>>>>>>> +       for (;;) {

>>>>>>> +               if (!(readl(bus->base + ASPEED_I2C_CMD_REG) &

>>>>>>> +                     (ASPEED_I2CD_BUS_BUSY_STS |

>>>>>>> +                      ASPEED_I2CD_XFER_MODE_STS_MASK)))

>>>>>>

>>>>>> Is using the Transfer Mode State Machine bits necessary? The 

>>>>>> documentation marks it as "for debugging purpose only," so 

>>>>>> relying on it makes me nervous.

>>>>>>

>>>>>

>>>>> As you said, the documentation marks it as "for debugging purpose only."

>>>>> but ASPEED also uses this way in their SDK code because it's the 

>>>>> best way for checking bus busy status which can cover both single 

>>>>> and multi-master use cases.

>>>>>

>>>>

>>>> Well, it would also be really nice to have access to this bit if 

>>>> someone wants to implement MCTP. Could we maybe check with Aspeed 

>>>> what them meant by "for debugging purposes only" and document it 

>>>> here? It makes me nervous to rely on debugging functionality for 

>>>> normal usage.

>>>>

>>>

>>> Okay, I'll check it with Aspeed. Will let you know their response.

>>>

>>

>> I've checked it with Gary Hsu <gary_hsu@aspeedtech.com> and he 

>> confirmed that the bits reflect real information and good to be used 

>> in practical code.

> 

> Huh. For my own edification, could you ask them why they said "for 

> debugging purpose only" in the documentation? I am just really curious 

> what they meant by that. I would be satisfied if you just CC'ed me on 

> your email thread with Gary, and I can ask him myself.

> 


I've already CC'ed Gary and Ryan in this thread.

Hi Gary,

Can you explain why the documentation says that the bit field is 'for debugging purpose only'? Any plan to change the description?

Thanks,

Jae

>>

>> I'll add a comment like below:

>>

>> /*

>>    * This is marked as 'for debugging purpose only' in datasheet but

>>    * ASPEED confirmed that this reflects real information and good

>>    * to be used in practical code.

>>    */

>>

>> Is it acceptable then?

> 

> Yeah, that's fine.

> 

> <snip>

> 

> Cheers

>
Jae Hyun Yoo July 17, 2018, 4:18 p.m. UTC | #13
On 7/15/2018 8:05 PM, Gary Hsu wrote:
> Hi Jae,
> 
> In originally, we reserved these register bits for debug purpose. But for some error handling case, we found it is also useful to help to clarify some error conditions. So driver also can use these fields information to check something.
> As for how driver use these information in their code, I have no comment. I don’t understand the driver. But these information is the real controller state, it had no problem to use information.
> 

Thanks Gary!

Hi Brendan,
Is it acceptable now if I add this as a comment like below?

Thanks,
Jae

> Best Regards,
> 
> 許馥疇 Gary Hsu
> 
> 信驊科技股份有限公司
> ASPEED Technology Inc.
> 
> 2F,No.15,Industry East Road 4.,Hsinchu Science Park, Hsinchu City 30077, Taiwan
> 新竹科學園區工業東四路 15 號 2F
> 
> Tel : 886-3-5789568 ext:807
> Fax : 886-3-5789586
> Web : http://www.aspeedtech.com
> 
> ************* Email Confidentiality Notice ********************
> 免責聲明:
> 因應個人資料保護法施行,本信件(或其附件)可能包含機密資訊,並受法律保護。如 台端非指定之收件者,請以電子郵件通知本電子郵件之發送者, 並請立即刪除本電子郵件及其附件和銷毀所有複印件。謝謝您的合作!
> 
> DISCLAIMER:
> This message (and any attachments) may contain legally privileged and/or other confidential information. If you have received it in error, please notify the sender by reply e-mail and immediately delete the e-mail and any attachments without copying or disclosing the contents. Thank you.
> 
> -----Original Message-----
> From: Jae Hyun Yoo [mailto:jae.hyun.yoo@linux.intel.com]
> Sent: Saturday, July 14, 2018 2:54 AM
> To: Brendan Higgins <brendanhiggins@google.com>
> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>; Joel Stanley <joel@jms.id.au>; Andrew Jeffery <andrew@aj.id.au>; linux-i2c@vger.kernel.org; OpenBMC Maillist <openbmc@lists.ozlabs.org>; Linux ARM <linux-arm-kernel@lists.infradead.org>; linux-aspeed@lists.ozlabs.org; Linux Kernel Mailing List <linux-kernel@vger.kernel.org>; james.feist@linux.intel.com; vernon.mauery@linux.intel.com; Benjamin Fair <benjaminfair@google.com>; Patrick Venture <venture@google.com>; Gary Hsu <gary_hsu@aspeedtech.com>; Ryan Chen <ryan_chen@aspeedtech.com>
> Subject: Re: [PATCH] i2c: aspeed: Improve driver to support multi-master use cases stably
> 
> On 7/13/2018 11:12 AM, Brendan Higgins wrote:
>> On Fri, Jul 13, 2018 at 10:22 AM Jae Hyun Yoo
>> <jae.hyun.yoo@linux.intel.com> wrote:
>>>
>>> On 7/12/2018 11:21 AM, Jae Hyun Yoo wrote:
>>>> On 7/12/2018 2:33 AM, Brendan Higgins wrote:
>>>>> On Wed, Jun 27, 2018 at 10:55 AM Jae Hyun Yoo
>>>>> <jae.hyun.yoo@linux.intel.com> wrote:
>> <snip>
>>>>> <snip>
>>>>>>>> +       for (;;) {
>>>>>>>> +               if (!(readl(bus->base + ASPEED_I2C_CMD_REG) &
>>>>>>>> +                     (ASPEED_I2CD_BUS_BUSY_STS |
>>>>>>>> +                      ASPEED_I2CD_XFER_MODE_STS_MASK)))
>>>>>>>
>>>>>>> Is using the Transfer Mode State Machine bits necessary? The
>>>>>>> documentation marks it as "for debugging purpose only," so
>>>>>>> relying on it makes me nervous.
>>>>>>>
>>>>>>
>>>>>> As you said, the documentation marks it as "for debugging purpose only."
>>>>>> but ASPEED also uses this way in their SDK code because it's the
>>>>>> best way for checking bus busy status which can cover both single
>>>>>> and multi-master use cases.
>>>>>>
>>>>>
>>>>> Well, it would also be really nice to have access to this bit if
>>>>> someone wants to implement MCTP. Could we maybe check with Aspeed
>>>>> what them meant by "for debugging purposes only" and document it
>>>>> here? It makes me nervous to rely on debugging functionality for
>>>>> normal usage.
>>>>>
>>>>
>>>> Okay, I'll check it with Aspeed. Will let you know their response.
>>>>
>>>
>>> I've checked it with Gary Hsu <gary_hsu@aspeedtech.com> and he
>>> confirmed that the bits reflect real information and good to be used
>>> in practical code.
>>
>> Huh. For my own edification, could you ask them why they said "for
>> debugging purpose only" in the documentation? I am just really curious
>> what they meant by that. I would be satisfied if you just CC'ed me on
>> your email thread with Gary, and I can ask him myself.
>>
> 
> I've already CC'ed Gary and Ryan in this thread.
> 
> Hi Gary,
> 
> Can you explain why the documentation says that the bit field is 'for debugging purpose only'? Any plan to change the description?
> 
> Thanks,
> 
> Jae
> 
>>>
>>> I'll add a comment like below:
>>>
>>> /*
>>>     * This is marked as 'for debugging purpose only' in datasheet but
>>>     * ASPEED confirmed that this reflects real information and good
>>>     * to be used in practical code.
>>>     */
>>>
>>> Is it acceptable then?
>>
>> Yeah, that's fine.
>>
>> <snip>
>>
>> Cheers
>>
Brendan Higgins July 19, 2018, 4:57 p.m. UTC | #14
On Tue, Jul 17, 2018 at 9:18 AM Jae Hyun Yoo
<jae.hyun.yoo@linux.intel.com> wrote:
>
>
> On 7/15/2018 8:05 PM, Gary Hsu wrote:
> > Hi Jae,
> >
> > In originally, we reserved these register bits for debug purpose. But for some error handling case, we found it is also useful to help to clarify some error conditions. So driver also can use these fields information to check something.
> > As for how driver use these information in their code, I have no comment. I don’t understand the driver. But these information is the real controller state, it had no problem to use information.
> >
>
> Thanks Gary!
>
> Hi Brendan,
> Is it acceptable now if I add this as a comment like below?

Yep, that's fine. I didn't mean to hold you up. I was just curious.

>
> Thanks,
> Jae
>
> > Best Regards,
> >
> > 許馥疇 Gary Hsu
> >
> > 信驊科技股份有限公司
> > ASPEED Technology Inc.
> >
> > 2F,No.15,Industry East Road 4.,Hsinchu Science Park, Hsinchu City 30077, Taiwan
> > 新竹科學園區工業東四路 15 號 2F
> >
> > Tel : 886-3-5789568 ext:807
> > Fax : 886-3-5789586
> > Web : http://www.aspeedtech.com
> >
> > ************* Email Confidentiality Notice ********************
> > 免責聲明:
> > 因應個人資料保護法施行,本信件(或其附件)可能包含機密資訊,並受法律保護。如 台端非指定之收件者,請以電子郵件通知本電子郵件之發送者, 並請立即刪除本電子郵件及其附件和銷毀所有複印件。謝謝您的合作!
> >
> > DISCLAIMER:
> > This message (and any attachments) may contain legally privileged and/or other confidential information. If you have received it in error, please notify the sender by reply e-mail and immediately delete the e-mail and any attachments without copying or disclosing the contents. Thank you.
> >
> > -----Original Message-----
> > From: Jae Hyun Yoo [mailto:jae.hyun.yoo@linux.intel.com]
> > Sent: Saturday, July 14, 2018 2:54 AM
> > To: Brendan Higgins <brendanhiggins@google.com>
> > Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>; Joel Stanley <joel@jms.id.au>; Andrew Jeffery <andrew@aj.id.au>; linux-i2c@vger.kernel.org; OpenBMC Maillist <openbmc@lists.ozlabs.org>; Linux ARM <linux-arm-kernel@lists.infradead.org>; linux-aspeed@lists.ozlabs.org; Linux Kernel Mailing List <linux-kernel@vger.kernel.org>; james.feist@linux.intel.com; vernon.mauery@linux.intel.com; Benjamin Fair <benjaminfair@google.com>; Patrick Venture <venture@google.com>; Gary Hsu <gary_hsu@aspeedtech.com>; Ryan Chen <ryan_chen@aspeedtech.com>
> > Subject: Re: [PATCH] i2c: aspeed: Improve driver to support multi-master use cases stably
> >
> > On 7/13/2018 11:12 AM, Brendan Higgins wrote:
> >> On Fri, Jul 13, 2018 at 10:22 AM Jae Hyun Yoo
> >> <jae.hyun.yoo@linux.intel.com> wrote:
> >>>
> >>> On 7/12/2018 11:21 AM, Jae Hyun Yoo wrote:
> >>>> On 7/12/2018 2:33 AM, Brendan Higgins wrote:
> >>>>> On Wed, Jun 27, 2018 at 10:55 AM Jae Hyun Yoo
> >>>>> <jae.hyun.yoo@linux.intel.com> wrote:
> >> <snip>
> >>>>> <snip>
> >>>>>>>> +       for (;;) {
> >>>>>>>> +               if (!(readl(bus->base + ASPEED_I2C_CMD_REG) &
> >>>>>>>> +                     (ASPEED_I2CD_BUS_BUSY_STS |
> >>>>>>>> +                      ASPEED_I2CD_XFER_MODE_STS_MASK)))
> >>>>>>>
> >>>>>>> Is using the Transfer Mode State Machine bits necessary? The
> >>>>>>> documentation marks it as "for debugging purpose only," so
> >>>>>>> relying on it makes me nervous.
> >>>>>>>
> >>>>>>
> >>>>>> As you said, the documentation marks it as "for debugging purpose only."
> >>>>>> but ASPEED also uses this way in their SDK code because it's the
> >>>>>> best way for checking bus busy status which can cover both single
> >>>>>> and multi-master use cases.
> >>>>>>
> >>>>>
> >>>>> Well, it would also be really nice to have access to this bit if
> >>>>> someone wants to implement MCTP. Could we maybe check with Aspeed
> >>>>> what them meant by "for debugging purposes only" and document it
> >>>>> here? It makes me nervous to rely on debugging functionality for
> >>>>> normal usage.
> >>>>>
> >>>>
> >>>> Okay, I'll check it with Aspeed. Will let you know their response.
> >>>>
> >>>
> >>> I've checked it with Gary Hsu <gary_hsu@aspeedtech.com> and he
> >>> confirmed that the bits reflect real information and good to be used
> >>> in practical code.
> >>
> >> Huh. For my own edification, could you ask them why they said "for
> >> debugging purpose only" in the documentation? I am just really curious
> >> what they meant by that. I would be satisfied if you just CC'ed me on
> >> your email thread with Gary, and I can ask him myself.
> >>
> >
> > I've already CC'ed Gary and Ryan in this thread.
> >
> > Hi Gary,
> >
> > Can you explain why the documentation says that the bit field is 'for debugging purpose only'? Any plan to change the description?
> >
> > Thanks,
> >
> > Jae
> >
> >>>
> >>> I'll add a comment like below:
> >>>
> >>> /*
> >>>     * This is marked as 'for debugging purpose only' in datasheet but
> >>>     * ASPEED confirmed that this reflects real information and good
> >>>     * to be used in practical code.
> >>>     */
> >>>
> >>> Is it acceptable then?
> >>
> >> Yeah, that's fine.
> >>
> >> <snip>
> >>
> >> Cheers
> >>
Brendan Higgins July 19, 2018, 4:58 p.m. UTC | #15
On Sun, Jul 15, 2018 at 8:05 PM Gary Hsu <gary_hsu@aspeedtech.com> wrote:
>
> Hi Jae,
>
> In originally, we reserved these register bits for debug purpose. But for some error handling case, we found it is also useful to help to clarify some error conditions. So driver also can use these fields information to check something.
> As for how driver use these information in their code, I have no comment. I don’t understand the driver. But these information is the real controller state, it had no problem to use information.

Okay, so it was originally only intended for debugging, but then showed itself
to be useful, got it. Just out of curiosity, why did you mark it that way in
the documentation? It seems as though you are trying to discourage people from
using it.

>
> Best Regards,
>
> 許馥疇 Gary Hsu
>
> 信驊科技股份有限公司
> ASPEED Technology Inc.
>
> 2F,No.15,Industry East Road 4.,Hsinchu Science Park, Hsinchu City 30077, Taiwan
> 新竹科學園區工業東四路 15 號 2F
>
> Tel : 886-3-5789568 ext:807
> Fax : 886-3-5789586
> Web : http://www.aspeedtech.com
>
> ************* Email Confidentiality Notice ********************
> 免責聲明:
> 因應個人資料保護法施行,本信件(或其附件)可能包含機密資訊,並受法律保護。如 台端非指定之收件者,請以電子郵件通知本電子郵件之發送者, 並請立即刪除本電子郵件及其附件和銷毀所有複印件。謝謝您的合作!
>
> DISCLAIMER:
> This message (and any attachments) may contain legally privileged and/or other confidential information. If you have received it in error, please notify the sender by reply e-mail and immediately delete the e-mail and any attachments without copying or disclosing the contents. Thank you.
>
> -----Original Message-----
> From: Jae Hyun Yoo [mailto:jae.hyun.yoo@linux.intel.com]
> Sent: Saturday, July 14, 2018 2:54 AM
> To: Brendan Higgins <brendanhiggins@google.com>
> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>; Joel Stanley <joel@jms.id.au>; Andrew Jeffery <andrew@aj.id.au>; linux-i2c@vger.kernel.org; OpenBMC Maillist <openbmc@lists.ozlabs.org>; Linux ARM <linux-arm-kernel@lists.infradead.org>; linux-aspeed@lists.ozlabs.org; Linux Kernel Mailing List <linux-kernel@vger.kernel.org>; james.feist@linux.intel.com; vernon.mauery@linux.intel.com; Benjamin Fair <benjaminfair@google.com>; Patrick Venture <venture@google.com>; Gary Hsu <gary_hsu@aspeedtech.com>; Ryan Chen <ryan_chen@aspeedtech.com>
> Subject: Re: [PATCH] i2c: aspeed: Improve driver to support multi-master use cases stably
>
> On 7/13/2018 11:12 AM, Brendan Higgins wrote:
> > On Fri, Jul 13, 2018 at 10:22 AM Jae Hyun Yoo
> > <jae.hyun.yoo@linux.intel.com> wrote:
> >>
> >> On 7/12/2018 11:21 AM, Jae Hyun Yoo wrote:
> >>> On 7/12/2018 2:33 AM, Brendan Higgins wrote:
> >>>> On Wed, Jun 27, 2018 at 10:55 AM Jae Hyun Yoo
> >>>> <jae.hyun.yoo@linux.intel.com> wrote:
> > <snip>
> >>>> <snip>
> >>>>>>> +       for (;;) {
> >>>>>>> +               if (!(readl(bus->base + ASPEED_I2C_CMD_REG) &
> >>>>>>> +                     (ASPEED_I2CD_BUS_BUSY_STS |
> >>>>>>> +                      ASPEED_I2CD_XFER_MODE_STS_MASK)))
> >>>>>>
> >>>>>> Is using the Transfer Mode State Machine bits necessary? The
> >>>>>> documentation marks it as "for debugging purpose only," so
> >>>>>> relying on it makes me nervous.
> >>>>>>
> >>>>>
> >>>>> As you said, the documentation marks it as "for debugging purpose only."
> >>>>> but ASPEED also uses this way in their SDK code because it's the
> >>>>> best way for checking bus busy status which can cover both single
> >>>>> and multi-master use cases.
> >>>>>
> >>>>
> >>>> Well, it would also be really nice to have access to this bit if
> >>>> someone wants to implement MCTP. Could we maybe check with Aspeed
> >>>> what them meant by "for debugging purposes only" and document it
> >>>> here? It makes me nervous to rely on debugging functionality for
> >>>> normal usage.
> >>>>
> >>>
> >>> Okay, I'll check it with Aspeed. Will let you know their response.
> >>>
> >>
> >> I've checked it with Gary Hsu <gary_hsu@aspeedtech.com> and he
> >> confirmed that the bits reflect real information and good to be used
> >> in practical code.
> >
> > Huh. For my own edification, could you ask them why they said "for
> > debugging purpose only" in the documentation? I am just really curious
> > what they meant by that. I would be satisfied if you just CC'ed me on
> > your email thread with Gary, and I can ask him myself.
> >
>
> I've already CC'ed Gary and Ryan in this thread.
>
> Hi Gary,
>
> Can you explain why the documentation says that the bit field is 'for debugging purpose only'? Any plan to change the description?
>
> Thanks,
>
> Jae
>
> >>
> >> I'll add a comment like below:
> >>
> >> /*
> >>    * This is marked as 'for debugging purpose only' in datasheet but
> >>    * ASPEED confirmed that this reflects real information and good
> >>    * to be used in practical code.
> >>    */
> >>
> >> Is it acceptable then?
> >
> > Yeah, that's fine.
> >
> > <snip>
> >
> > Cheers
> >
diff mbox

Patch

diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c
index 60e4d0e939a3..ac3e17d9a365 100644
--- a/drivers/i2c/busses/i2c-aspeed.c
+++ b/drivers/i2c/busses/i2c-aspeed.c
@@ -4,6 +4,7 @@ 
  *  Copyright (C) 2012-2017 ASPEED Technology Inc.
  *  Copyright 2017 IBM Corporation
  *  Copyright 2017 Google, Inc.
+ *  Copyright (c) 2018 Intel Corporation
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License version 2 as
@@ -12,6 +13,7 @@ 
 
 #include <linux/clk.h>
 #include <linux/completion.h>
+#include <linux/delay.h>
 #include <linux/err.h>
 #include <linux/errno.h>
 #include <linux/i2c.h>
@@ -82,6 +84,11 @@ 
 #define ASPEED_I2CD_INTR_RX_DONE			BIT(2)
 #define ASPEED_I2CD_INTR_TX_NAK				BIT(1)
 #define ASPEED_I2CD_INTR_TX_ACK				BIT(0)
+#define ASPEED_I2CD_INTR_ERRORS						       \
+		(ASPEED_I2CD_INTR_SDA_DL_TIMEOUT |			       \
+		 ASPEED_I2CD_INTR_SCL_TIMEOUT |				       \
+		 ASPEED_I2CD_INTR_ABNORMAL |				       \
+		 ASPEED_I2CD_INTR_ARBIT_LOSS)
 #define ASPEED_I2CD_INTR_ALL						       \
 		(ASPEED_I2CD_INTR_SDA_DL_TIMEOUT |			       \
 		 ASPEED_I2CD_INTR_BUS_RECOVER_DONE |			       \
@@ -94,6 +101,7 @@ 
 		 ASPEED_I2CD_INTR_TX_ACK)
 
 /* 0x14 : I2CD Command/Status Register   */
+#define ASPEED_I2CD_XFER_MODE_STS_MASK			GENMASK(22, 19)
 #define ASPEED_I2CD_SCL_LINE_STS			BIT(18)
 #define ASPEED_I2CD_SDA_LINE_STS			BIT(17)
 #define ASPEED_I2CD_BUS_BUSY_STS			BIT(16)
@@ -110,23 +118,27 @@ 
 /* 0x18 : I2CD Slave Device Address Register   */
 #define ASPEED_I2CD_DEV_ADDR_MASK			GENMASK(6, 0)
 
+/* Timeout for bus busy checking */
+#define BUS_BUSY_CHECK_TIMEOUT				250000 /* 250ms */
+#define BUS_BUSY_CHECK_INTERVAL				10000  /* 10ms */
+
 enum aspeed_i2c_master_state {
+	ASPEED_I2C_MASTER_INACTIVE,
 	ASPEED_I2C_MASTER_START,
 	ASPEED_I2C_MASTER_TX_FIRST,
 	ASPEED_I2C_MASTER_TX,
 	ASPEED_I2C_MASTER_RX_FIRST,
 	ASPEED_I2C_MASTER_RX,
 	ASPEED_I2C_MASTER_STOP,
-	ASPEED_I2C_MASTER_INACTIVE,
 };
 
 enum aspeed_i2c_slave_state {
+	ASPEED_I2C_SLAVE_STOP,
 	ASPEED_I2C_SLAVE_START,
 	ASPEED_I2C_SLAVE_READ_REQUESTED,
 	ASPEED_I2C_SLAVE_READ_PROCESSED,
 	ASPEED_I2C_SLAVE_WRITE_REQUESTED,
 	ASPEED_I2C_SLAVE_WRITE_RECEIVED,
-	ASPEED_I2C_SLAVE_STOP,
 };
 
 struct aspeed_i2c_bus {
@@ -150,6 +162,7 @@  struct aspeed_i2c_bus {
 	int				cmd_err;
 	/* Protected only by i2c_lock_bus */
 	int				master_xfer_result;
+	u32				irq_status;
 #if IS_ENABLED(CONFIG_I2C_SLAVE)
 	struct i2c_client		*slave;
 	enum aspeed_i2c_slave_state	slave_state;
@@ -229,37 +242,30 @@  static int aspeed_i2c_recover_bus(struct aspeed_i2c_bus *bus)
 #if IS_ENABLED(CONFIG_I2C_SLAVE)
 static bool aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus)
 {
-	u32 command, irq_status, status_ack = 0;
+	u32 command, status_ack = 0;
 	struct i2c_client *slave = bus->slave;
-	bool irq_handled = true;
 	u8 value;
 
-	spin_lock(&bus->lock);
-	if (!slave) {
-		irq_handled = false;
-		goto out;
-	}
+	if (!slave)
+		return false;
 
 	command = readl(bus->base + ASPEED_I2C_CMD_REG);
-	irq_status = readl(bus->base + ASPEED_I2C_INTR_STS_REG);
 
 	/* Slave was requested, restart state machine. */
-	if (irq_status & ASPEED_I2CD_INTR_SLAVE_MATCH) {
+	if (bus->irq_status & ASPEED_I2CD_INTR_SLAVE_MATCH) {
 		status_ack |= ASPEED_I2CD_INTR_SLAVE_MATCH;
 		bus->slave_state = ASPEED_I2C_SLAVE_START;
 	}
 
 	/* Slave is not currently active, irq was for someone else. */
-	if (bus->slave_state == ASPEED_I2C_SLAVE_STOP) {
-		irq_handled = false;
-		goto out;
-	}
+	if (bus->slave_state == ASPEED_I2C_SLAVE_STOP)
+		return false;
 
 	dev_dbg(bus->dev, "slave irq status 0x%08x, cmd 0x%08x\n",
-		irq_status, command);
+		bus->irq_status, command);
 
 	/* Slave was sent something. */
-	if (irq_status & ASPEED_I2CD_INTR_RX_DONE) {
+	if (bus->irq_status & ASPEED_I2CD_INTR_RX_DONE) {
 		value = readl(bus->base + ASPEED_I2C_BYTE_BUF_REG) >> 8;
 		/* Handle address frame. */
 		if (bus->slave_state == ASPEED_I2C_SLAVE_START) {
@@ -274,28 +280,29 @@  static bool aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus)
 	}
 
 	/* Slave was asked to stop. */
-	if (irq_status & ASPEED_I2CD_INTR_NORMAL_STOP) {
+	if (bus->irq_status & ASPEED_I2CD_INTR_NORMAL_STOP) {
 		status_ack |= ASPEED_I2CD_INTR_NORMAL_STOP;
 		bus->slave_state = ASPEED_I2C_SLAVE_STOP;
 	}
-	if (irq_status & ASPEED_I2CD_INTR_TX_NAK) {
+	if (bus->irq_status & ASPEED_I2CD_INTR_TX_NAK) {
 		status_ack |= ASPEED_I2CD_INTR_TX_NAK;
 		bus->slave_state = ASPEED_I2C_SLAVE_STOP;
 	}
+	if (bus->irq_status & ASPEED_I2CD_INTR_TX_ACK) {
+		status_ack |= ASPEED_I2CD_INTR_TX_ACK;
+	}
 
 	switch (bus->slave_state) {
 	case ASPEED_I2C_SLAVE_READ_REQUESTED:
-		if (irq_status & ASPEED_I2CD_INTR_TX_ACK)
+		if (bus->irq_status & ASPEED_I2CD_INTR_TX_ACK)
 			dev_err(bus->dev, "Unexpected ACK on read request.\n");
 		bus->slave_state = ASPEED_I2C_SLAVE_READ_PROCESSED;
-
 		i2c_slave_event(slave, I2C_SLAVE_READ_REQUESTED, &value);
 		writel(value, bus->base + ASPEED_I2C_BYTE_BUF_REG);
 		writel(ASPEED_I2CD_S_TX_CMD, bus->base + ASPEED_I2C_CMD_REG);
 		break;
 	case ASPEED_I2C_SLAVE_READ_PROCESSED:
-		status_ack |= ASPEED_I2CD_INTR_TX_ACK;
-		if (!(irq_status & ASPEED_I2CD_INTR_TX_ACK))
+		if (!(bus->irq_status & ASPEED_I2CD_INTR_TX_ACK))
 			dev_err(bus->dev,
 				"Expected ACK after processed read.\n");
 		i2c_slave_event(slave, I2C_SLAVE_READ_PROCESSED, &value);
@@ -318,15 +325,8 @@  static bool aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus)
 		break;
 	}
 
-	if (status_ack != irq_status)
-		dev_err(bus->dev,
-			"irq handled != irq. expected %x, but was %x\n",
-			irq_status, status_ack);
-	writel(status_ack, bus->base + ASPEED_I2C_INTR_STS_REG);
-
-out:
-	spin_unlock(&bus->lock);
-	return irq_handled;
+	bus->irq_status ^= status_ack;
+	return !bus->irq_status;
 }
 #endif /* CONFIG_I2C_SLAVE */
 
@@ -384,20 +384,19 @@  static int aspeed_i2c_is_irq_error(u32 irq_status)
 
 static bool aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus)
 {
-	u32 irq_status, status_ack = 0, command = 0;
+	u32 status_ack = 0, command = 0;
 	struct i2c_msg *msg;
 	u8 recv_byte;
 	int ret;
 
-	spin_lock(&bus->lock);
-	irq_status = readl(bus->base + ASPEED_I2C_INTR_STS_REG);
-	/* Ack all interrupt bits. */
-	writel(irq_status, bus->base + ASPEED_I2C_INTR_STS_REG);
-
-	if (irq_status & ASPEED_I2CD_INTR_BUS_RECOVER_DONE) {
+	if (bus->irq_status & ASPEED_I2CD_INTR_BUS_RECOVER_DONE) {
 		bus->master_state = ASPEED_I2C_MASTER_INACTIVE;
 		status_ack |= ASPEED_I2CD_INTR_BUS_RECOVER_DONE;
 		goto out_complete;
+	} else {
+		/* Master is not currently active, irq was for someone else. */
+		if (bus->master_state == ASPEED_I2C_MASTER_INACTIVE)
+			goto out_no_complete;
 	}
 
 	/*
@@ -405,20 +404,23 @@  static bool aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus)
 	 * should clear the command queue effectively taking us back to the
 	 * INACTIVE state.
 	 */
-	ret = aspeed_i2c_is_irq_error(irq_status);
-	if (ret < 0) {
-		dev_dbg(bus->dev, "received error interrupt: 0x%08x",
-			irq_status);
+	ret = aspeed_i2c_is_irq_error(bus->irq_status);
+	if (ret) {
+		dev_dbg(bus->dev, "received error interrupt: 0x%08x\n",
+			bus->irq_status);
 		bus->cmd_err = ret;
 		bus->master_state = ASPEED_I2C_MASTER_INACTIVE;
+		status_ack |= (bus->irq_status & ASPEED_I2CD_INTR_ERRORS);
 		goto out_complete;
 	}
 
 	/* We are in an invalid state; reset bus to a known state. */
 	if (!bus->msgs) {
-		dev_err(bus->dev, "bus in unknown state");
+		dev_err(bus->dev, "bus in unknown state irq_status: 0x%x\n",
+			bus->irq_status);
 		bus->cmd_err = -EIO;
-		if (bus->master_state != ASPEED_I2C_MASTER_STOP)
+		if (bus->master_state != ASPEED_I2C_MASTER_STOP &&
+		    bus->master_state != ASPEED_I2C_MASTER_INACTIVE)
 			aspeed_i2c_do_stop(bus);
 		goto out_no_complete;
 	}
@@ -430,7 +432,13 @@  static bool aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus)
 	 * then update the state and handle the new state below.
 	 */
 	if (bus->master_state == ASPEED_I2C_MASTER_START) {
-		if (unlikely(!(irq_status & ASPEED_I2CD_INTR_TX_ACK))) {
+		if (unlikely(!(bus->irq_status & ASPEED_I2CD_INTR_TX_ACK))) {
+			if (unlikely(!(bus->irq_status &
+				     ASPEED_I2CD_INTR_TX_NAK))) {
+				bus->cmd_err = -ENXIO;
+				bus->master_state = ASPEED_I2C_MASTER_INACTIVE;
+				goto out_complete;
+			}
 			pr_devel("no slave present at %02x", msg->addr);
 			status_ack |= ASPEED_I2CD_INTR_TX_NAK;
 			bus->cmd_err = -ENXIO;
@@ -450,12 +458,13 @@  static bool aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus)
 
 	switch (bus->master_state) {
 	case ASPEED_I2C_MASTER_TX:
-		if (unlikely(irq_status & ASPEED_I2CD_INTR_TX_NAK)) {
-			dev_dbg(bus->dev, "slave NACKed TX");
+		if (unlikely(bus->irq_status & ASPEED_I2CD_INTR_TX_NAK)) {
+			dev_dbg(bus->dev, "slave NACKed TX\n");
 			status_ack |= ASPEED_I2CD_INTR_TX_NAK;
 			goto error_and_stop;
-		} else if (unlikely(!(irq_status & ASPEED_I2CD_INTR_TX_ACK))) {
-			dev_err(bus->dev, "slave failed to ACK TX");
+		} else if (unlikely(!(bus->irq_status &
+				      ASPEED_I2CD_INTR_TX_ACK))) {
+			dev_err(bus->dev, "slave failed to ACK TX\n");
 			goto error_and_stop;
 		}
 		status_ack |= ASPEED_I2CD_INTR_TX_ACK;
@@ -473,12 +482,12 @@  static bool aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus)
 		goto out_no_complete;
 	case ASPEED_I2C_MASTER_RX_FIRST:
 		/* RX may not have completed yet (only address cycle) */
-		if (!(irq_status & ASPEED_I2CD_INTR_RX_DONE))
+		if (!(bus->irq_status & ASPEED_I2CD_INTR_RX_DONE))
 			goto out_no_complete;
 		/* fallthrough intended */
 	case ASPEED_I2C_MASTER_RX:
-		if (unlikely(!(irq_status & ASPEED_I2CD_INTR_RX_DONE))) {
-			dev_err(bus->dev, "master failed to RX");
+		if (unlikely(!(bus->irq_status & ASPEED_I2CD_INTR_RX_DONE))) {
+			dev_err(bus->dev, "master failed to RX\n");
 			goto error_and_stop;
 		}
 		status_ack |= ASPEED_I2CD_INTR_RX_DONE;
@@ -508,8 +517,11 @@  static bool aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus)
 		}
 		goto out_no_complete;
 	case ASPEED_I2C_MASTER_STOP:
-		if (unlikely(!(irq_status & ASPEED_I2CD_INTR_NORMAL_STOP))) {
-			dev_err(bus->dev, "master failed to STOP");
+		if (unlikely(!(bus->irq_status &
+			       ASPEED_I2CD_INTR_NORMAL_STOP))) {
+			dev_err(bus->dev,
+				"master failed to STOP irq_status:0x%x\n",
+				bus->irq_status);
 			bus->cmd_err = -EIO;
 			/* Do not STOP as we have already tried. */
 		} else {
@@ -520,8 +532,8 @@  static bool aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus)
 		goto out_complete;
 	case ASPEED_I2C_MASTER_INACTIVE:
 		dev_err(bus->dev,
-			"master received interrupt 0x%08x, but is inactive",
-			irq_status);
+			"master received interrupt 0x%08x, but is inactive\n",
+			bus->irq_status);
 		bus->cmd_err = -EIO;
 		/* Do not STOP as we should be inactive. */
 		goto out_complete;
@@ -543,26 +555,61 @@  static bool aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus)
 		bus->master_xfer_result = bus->msgs_index + 1;
 	complete(&bus->cmd_complete);
 out_no_complete:
-	if (irq_status != status_ack)
-		dev_err(bus->dev,
-			"irq handled != irq. expected 0x%08x, but was 0x%08x\n",
-			irq_status, status_ack);
-	spin_unlock(&bus->lock);
-	return !!irq_status;
+	bus->irq_status ^= status_ack;
+	return !bus->irq_status;
 }
 
 static irqreturn_t aspeed_i2c_bus_irq(int irq, void *dev_id)
 {
 	struct aspeed_i2c_bus *bus = dev_id;
+	u32 irq_received;
+
+	spin_lock(&bus->lock);
+	irq_received = readl(bus->base + ASPEED_I2C_INTR_STS_REG);
+	bus->irq_status = irq_received;
 
 #if IS_ENABLED(CONFIG_I2C_SLAVE)
-	if (aspeed_i2c_slave_irq(bus)) {
-		dev_dbg(bus->dev, "irq handled by slave.\n");
-		return IRQ_HANDLED;
+	if (bus->master_state != ASPEED_I2C_MASTER_INACTIVE) {
+		if (!aspeed_i2c_master_irq(bus))
+			aspeed_i2c_slave_irq(bus);
+	} else {
+		if (!aspeed_i2c_slave_irq(bus))
+			aspeed_i2c_master_irq(bus);
 	}
+#else
+	aspeed_i2c_master_irq(bus);
 #endif /* CONFIG_I2C_SLAVE */
 
-	return aspeed_i2c_master_irq(bus) ? IRQ_HANDLED : IRQ_NONE;
+	if (bus->irq_status)
+		dev_err(bus->dev,
+			"irq handled != irq. expected 0x%08x, but was 0x%08x\n",
+			irq_received, irq_received ^ bus->irq_status);
+
+	/* Ack all interrupt bits. */
+	writel(irq_received, bus->base + ASPEED_I2C_INTR_STS_REG);
+	spin_unlock(&bus->lock);
+	return bus->irq_status ? IRQ_NONE : IRQ_HANDLED;
+}
+
+static int aspeed_i2c_check_bus_busy_timeout(struct aspeed_i2c_bus *bus)
+{
+	ktime_t timeout = ktime_add_us(ktime_get(), BUS_BUSY_CHECK_TIMEOUT);
+
+	might_sleep();
+
+	for (;;) {
+		if (!(readl(bus->base + ASPEED_I2C_CMD_REG) &
+		      (ASPEED_I2CD_BUS_BUSY_STS |
+		       ASPEED_I2CD_XFER_MODE_STS_MASK)))
+			return 0;
+		if (ktime_compare(ktime_get(), timeout) > 0)
+			break;
+		usleep_range((BUS_BUSY_CHECK_INTERVAL >> 2) + 1,
+			     BUS_BUSY_CHECK_INTERVAL);
+	}
+
+	dev_err(bus->dev, "timeout waiting for idle. attempting recovery\n");
+	return aspeed_i2c_recover_bus(bus);
 }
 
 static int aspeed_i2c_master_xfer(struct i2c_adapter *adap,
@@ -570,22 +617,11 @@  static int aspeed_i2c_master_xfer(struct i2c_adapter *adap,
 {
 	struct aspeed_i2c_bus *bus = i2c_get_adapdata(adap);
 	unsigned long time_left, flags;
-	int ret = 0;
-
-	spin_lock_irqsave(&bus->lock, flags);
-	bus->cmd_err = 0;
 
-	/* If bus is busy, attempt recovery. We assume a single master
-	 * environment.
-	 */
-	if (readl(bus->base + ASPEED_I2C_CMD_REG) & ASPEED_I2CD_BUS_BUSY_STS) {
-		spin_unlock_irqrestore(&bus->lock, flags);
-		ret = aspeed_i2c_recover_bus(bus);
-		if (ret)
-			return ret;
-		spin_lock_irqsave(&bus->lock, flags);
-	}
+	if (aspeed_i2c_check_bus_busy_timeout(bus))
+		return -EAGAIN;
 
+	spin_lock_irqsave(&bus->lock, flags);
 	bus->cmd_err = 0;
 	bus->msgs = msgs;
 	bus->msgs_index = 0;
@@ -851,7 +887,7 @@  static int aspeed_i2c_probe_bus(struct platform_device *pdev)
 	bus->rst = devm_reset_control_get_shared(&pdev->dev, NULL);
 	if (IS_ERR(bus->rst)) {
 		dev_err(&pdev->dev,
-			"missing or invalid reset controller device tree entry");
+			"missing or invalid reset controller device tree entry\n");
 		return PTR_ERR(bus->rst);
 	}
 	reset_control_deassert(bus->rst);