@@ -44,3 +44,78 @@ Description: Read-only. It returns socket_id to indicate which socket
this FPGA belongs to, only valid for integrated solution.
User only needs this information, in case standard numa node
can't provide correct information.
+
+What: /sys/bus/platform/devices/dfl-fme.0/errors/revision
+Date: June 2019
+KernelVersion: 5.3
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-only. Read this file to get the revision of this global
+ error reporting private feature.
+
+What: /sys/bus/platform/devices/dfl-fme.0/errors/pcie0_errors
+Date: June 2019
+KernelVersion: 5.3
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-Write. Read this file for errors detected on pcie0 link.
+ Write this file to clear errors logged in pcie0_errors. Write
+ fails with -EINVAL if input parsing fails or input error code
+ doesn't match.
+
+What: /sys/bus/platform/devices/dfl-fme.0/errors/pcie1_errors
+Date: June 2019
+KernelVersion: 5.3
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-Write. Read this file for errors detected on pcie1 link.
+ Write this file to clear errors logged in pcie1_errors. Write
+ fails with -EINVAL if input parsing fails or input error code
+ doesn't match.
+
+What: /sys/bus/platform/devices/dfl-fme.0/errors/nonfatal_errors
+Date: June 2019
+KernelVersion: 5.3
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-only. It returns non-fatal errors detected.
+
+What: /sys/bus/platform/devices/dfl-fme.0/errors/catfatal_errors
+Date: June 2019
+KernelVersion: 5.3
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-only. It returns catastrophic and fatal errors detected.
+
+What: /sys/bus/platform/devices/dfl-fme.0/errors/inject_error
+Date: June 2019
+KernelVersion: 5.3
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-Write. Read this file to check errors injected. Write this
+ file to inject errors for testing purpose. Write fails with
+ -EINVAL if input parsing fails or input inject error code isn't
+ supported.
+
+What: /sys/bus/platform/devices/dfl-fme.0/errors/fme-errors/errors
+Date: June 2019
+KernelVersion: 5.3
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-only. Read this file to get errors detected by hardware.
+
+What: /sys/bus/platform/devices/dfl-fme.0/errors/fme-errors/first_error
+Date: June 2019
+KernelVersion: 5.3
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-only. Read this file to get the first error detected by
+ hardware.
+
+What: /sys/bus/platform/devices/dfl-fme.0/errors/fme-errors/next_error
+Date: June 2019
+KernelVersion: 5.3
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-only. Read this file to get the second error detected by
+ hardware.
+
+What: /sys/bus/platform/devices/dfl-fme.0/errors/fme-errors/clear
+Date: June 2019
+KernelVersion: 5.3
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Write-only. Write error code to this file to clear all errors
+ logged in errors, first_error and next_error. Write fails with
+ -EINVAL if input parsing fails or input error code doesn't
+ match.
@@ -39,7 +39,7 @@ obj-$(CONFIG_FPGA_DFL_FME_BRIDGE) += dfl-fme-br.o
obj-$(CONFIG_FPGA_DFL_FME_REGION) += dfl-fme-region.o
obj-$(CONFIG_FPGA_DFL_AFU) += dfl-afu.o
-dfl-fme-objs := dfl-fme-main.o dfl-fme-pr.o
+dfl-fme-objs := dfl-fme-main.o dfl-fme-pr.o dfl-fme-error.o
dfl-afu-objs := dfl-afu-main.o dfl-afu-region.o dfl-afu-dma-region.o
dfl-afu-objs += dfl-afu-error.o
new file mode 100644
@@ -0,0 +1,385 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Driver for FPGA Management Engine Error Management
+ *
+ * Copyright 2019 Intel Corporation, Inc.
+ *
+ * Authors:
+ * Kang Luwei <luwei.kang@intel.com>
+ * Xiao Guangrong <guangrong.xiao@linux.intel.com>
+ * Wu Hao <hao.wu@intel.com>
+ * Joseph Grecco <joe.grecco@intel.com>
+ * Enno Luebbers <enno.luebbers@intel.com>
+ * Tim Whisonant <tim.whisonant@intel.com>
+ * Ananda Ravuri <ananda.ravuri@intel.com>
+ * Mitchel, Henry <henry.mitchel@intel.com>
+ */
+
+#include <linux/uaccess.h>
+
+#include "dfl.h"
+#include "dfl-fme.h"
+
+#define FME_ERROR_MASK 0x8
+#define FME_ERROR 0x10
+#define MBP_ERROR BIT_ULL(6)
+#define PCIE0_ERROR_MASK 0x18
+#define PCIE0_ERROR 0x20
+#define PCIE1_ERROR_MASK 0x28
+#define PCIE1_ERROR 0x30
+#define FME_FIRST_ERROR 0x38
+#define FME_NEXT_ERROR 0x40
+#define RAS_NONFAT_ERROR_MASK 0x48
+#define RAS_NONFAT_ERROR 0x50
+#define RAS_CATFAT_ERROR_MASK 0x58
+#define RAS_CATFAT_ERROR 0x60
+#define RAS_ERROR_INJECT 0x68
+#define INJECT_ERROR_MASK GENMASK_ULL(2, 0)
+
+static ssize_t revision_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct device *err_dev = dev->parent;
+ void __iomem *base;
+
+ base = dfl_get_feature_ioaddr_by_id(err_dev, FME_FEATURE_ID_GLOBAL_ERR);
+
+ return sprintf(buf, "%u\n", dfl_feature_revision(base));
+}
+static DEVICE_ATTR_RO(revision);
+
+static ssize_t pcie0_errors_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct device *err_dev = dev->parent;
+ void __iomem *base;
+
+ base = dfl_get_feature_ioaddr_by_id(err_dev, FME_FEATURE_ID_GLOBAL_ERR);
+
+ return sprintf(buf, "0x%llx\n",
+ (unsigned long long)readq(base + PCIE0_ERROR));
+}
+
+static ssize_t pcie0_errors_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct dfl_feature_platform_data *pdata = dev_get_platdata(dev->parent);
+ struct device *err_dev = dev->parent;
+ void __iomem *base;
+ int ret = 0;
+ u64 v, val;
+
+ if (kstrtou64(buf, 0, &val))
+ return -EINVAL;
+
+ base = dfl_get_feature_ioaddr_by_id(err_dev, FME_FEATURE_ID_GLOBAL_ERR);
+
+ mutex_lock(&pdata->lock);
+ writeq(GENMASK_ULL(63, 0), base + PCIE0_ERROR_MASK);
+
+ v = readq(base + PCIE0_ERROR);
+ if (val == v)
+ writeq(v, base + PCIE0_ERROR);
+ else
+ ret = -EINVAL;
+
+ writeq(0ULL, base + PCIE0_ERROR_MASK);
+ mutex_unlock(&pdata->lock);
+ return ret ? ret : count;
+}
+static DEVICE_ATTR_RW(pcie0_errors);
+
+static ssize_t pcie1_errors_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct device *err_dev = dev->parent;
+ void __iomem *base;
+
+ base = dfl_get_feature_ioaddr_by_id(err_dev, FME_FEATURE_ID_GLOBAL_ERR);
+
+ return sprintf(buf, "0x%llx\n",
+ (unsigned long long)readq(base + PCIE1_ERROR));
+}
+
+static ssize_t pcie1_errors_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct dfl_feature_platform_data *pdata = dev_get_platdata(dev->parent);
+ struct device *err_dev = dev->parent;
+ void __iomem *base;
+ int ret = 0;
+ u64 v, val;
+
+ if (kstrtou64(buf, 0, &val))
+ return -EINVAL;
+
+ base = dfl_get_feature_ioaddr_by_id(err_dev, FME_FEATURE_ID_GLOBAL_ERR);
+
+ mutex_lock(&pdata->lock);
+ writeq(GENMASK_ULL(63, 0), base + PCIE1_ERROR_MASK);
+
+ v = readq(base + PCIE1_ERROR);
+ if (val == v)
+ writeq(v, base + PCIE1_ERROR);
+ else
+ ret = -EINVAL;
+
+ writeq(0ULL, base + PCIE1_ERROR_MASK);
+ mutex_unlock(&pdata->lock);
+ return ret ? ret : count;
+}
+static DEVICE_ATTR_RW(pcie1_errors);
+
+static ssize_t nonfatal_errors_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct device *err_dev = dev->parent;
+ void __iomem *base;
+
+ base = dfl_get_feature_ioaddr_by_id(err_dev, FME_FEATURE_ID_GLOBAL_ERR);
+
+ return sprintf(buf, "0x%llx\n",
+ (unsigned long long)readq(base + RAS_NONFAT_ERROR));
+}
+static DEVICE_ATTR_RO(nonfatal_errors);
+
+static ssize_t catfatal_errors_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct device *err_dev = dev->parent;
+ void __iomem *base;
+
+ base = dfl_get_feature_ioaddr_by_id(err_dev, FME_FEATURE_ID_GLOBAL_ERR);
+
+ return sprintf(buf, "0x%llx\n",
+ (unsigned long long)readq(base + RAS_CATFAT_ERROR));
+}
+static DEVICE_ATTR_RO(catfatal_errors);
+
+static ssize_t inject_error_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct device *err_dev = dev->parent;
+ void __iomem *base;
+ u64 v;
+
+ base = dfl_get_feature_ioaddr_by_id(err_dev, FME_FEATURE_ID_GLOBAL_ERR);
+
+ v = readq(base + RAS_ERROR_INJECT);
+
+ return sprintf(buf, "0x%llx\n",
+ (unsigned long long)FIELD_GET(INJECT_ERROR_MASK, v));
+}
+
+static ssize_t inject_error_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct dfl_feature_platform_data *pdata = dev_get_platdata(dev->parent);
+ struct device *err_dev = dev->parent;
+ void __iomem *base;
+ u8 inject_error;
+ u64 v;
+
+ if (kstrtou8(buf, 0, &inject_error))
+ return -EINVAL;
+
+ if (inject_error & ~INJECT_ERROR_MASK)
+ return -EINVAL;
+
+ base = dfl_get_feature_ioaddr_by_id(err_dev, FME_FEATURE_ID_GLOBAL_ERR);
+
+ mutex_lock(&pdata->lock);
+ v = readq(base + RAS_ERROR_INJECT);
+ v &= ~INJECT_ERROR_MASK;
+ v |= FIELD_PREP(INJECT_ERROR_MASK, inject_error);
+ writeq(v, base + RAS_ERROR_INJECT);
+ mutex_unlock(&pdata->lock);
+
+ return count;
+}
+static DEVICE_ATTR_RW(inject_error);
+
+static struct attribute *errors_attrs[] = {
+ &dev_attr_revision.attr,
+ &dev_attr_pcie0_errors.attr,
+ &dev_attr_pcie1_errors.attr,
+ &dev_attr_nonfatal_errors.attr,
+ &dev_attr_catfatal_errors.attr,
+ &dev_attr_inject_error.attr,
+ NULL,
+};
+
+static struct attribute_group errors_attr_group = {
+ .attrs = errors_attrs,
+};
+
+static ssize_t errors_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct device *err_dev = dev->parent;
+ void __iomem *base;
+
+ base = dfl_get_feature_ioaddr_by_id(err_dev, FME_FEATURE_ID_GLOBAL_ERR);
+
+ return sprintf(buf, "0x%llx\n",
+ (unsigned long long)readq(base + FME_ERROR));
+}
+static DEVICE_ATTR_RO(errors);
+
+static ssize_t first_error_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct device *err_dev = dev->parent;
+ void __iomem *base;
+
+ base = dfl_get_feature_ioaddr_by_id(err_dev, FME_FEATURE_ID_GLOBAL_ERR);
+
+ return sprintf(buf, "0x%llx\n",
+ (unsigned long long)readq(base + FME_FIRST_ERROR));
+}
+static DEVICE_ATTR_RO(first_error);
+
+static ssize_t next_error_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct device *err_dev = dev->parent;
+ void __iomem *base;
+
+ base = dfl_get_feature_ioaddr_by_id(err_dev, FME_FEATURE_ID_GLOBAL_ERR);
+
+ return sprintf(buf, "0x%llx\n",
+ (unsigned long long)readq(base + FME_NEXT_ERROR));
+}
+static DEVICE_ATTR_RO(next_error);
+
+static ssize_t clear_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct dfl_feature_platform_data *pdata = dev_get_platdata(dev->parent);
+ struct device *err_dev = dev->parent;
+ void __iomem *base;
+ u64 v, val;
+ int ret = 0;
+
+ if (kstrtou64(buf, 0, &val))
+ return -EINVAL;
+
+ base = dfl_get_feature_ioaddr_by_id(err_dev, FME_FEATURE_ID_GLOBAL_ERR);
+
+ mutex_lock(&pdata->lock);
+ writeq(GENMASK_ULL(63, 0), base + FME_ERROR_MASK);
+
+ v = readq(base + FME_ERROR);
+ if (val == v) {
+ writeq(v, base + FME_ERROR);
+ v = readq(base + FME_FIRST_ERROR);
+ writeq(v, base + FME_FIRST_ERROR);
+ v = readq(base + FME_NEXT_ERROR);
+ writeq(v, base + FME_NEXT_ERROR);
+ } else {
+ ret = -EINVAL;
+ }
+
+ /* Workaround: disable MBP_ERROR if feature revision is 0 */
+ writeq(dfl_feature_revision(base) ? 0ULL : MBP_ERROR,
+ base + FME_ERROR_MASK);
+ mutex_unlock(&pdata->lock);
+ return ret ? ret : count;
+}
+static DEVICE_ATTR_WO(clear);
+
+static struct attribute *fme_errors_attrs[] = {
+ &dev_attr_errors.attr,
+ &dev_attr_first_error.attr,
+ &dev_attr_next_error.attr,
+ &dev_attr_clear.attr,
+ NULL,
+};
+
+static struct attribute_group fme_errors_attr_group = {
+ .attrs = fme_errors_attrs,
+ .name = "fme-errors",
+};
+
+static const struct attribute_group *error_groups[] = {
+ &fme_errors_attr_group,
+ &errors_attr_group,
+ NULL
+};
+
+static void fme_error_enable(struct dfl_feature *feature)
+{
+ void __iomem *base = feature->ioaddr;
+
+ /* Workaround: disable MBP_ERROR if revision is 0 */
+ writeq(dfl_feature_revision(feature->ioaddr) ? 0ULL : MBP_ERROR,
+ base + FME_ERROR_MASK);
+ writeq(0ULL, base + PCIE0_ERROR_MASK);
+ writeq(0ULL, base + PCIE1_ERROR_MASK);
+ writeq(0ULL, base + RAS_NONFAT_ERROR_MASK);
+ writeq(0ULL, base + RAS_CATFAT_ERROR_MASK);
+}
+
+static void err_dev_release(struct device *dev)
+{
+ kfree(dev);
+}
+
+static int fme_global_err_init(struct platform_device *pdev,
+ struct dfl_feature *feature)
+{
+ struct device *dev;
+ int ret = 0;
+
+ dev_dbg(&pdev->dev, "FME Global Error Reporting Init.\n");
+
+ dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+ if (!dev)
+ return -ENOMEM;
+
+ dev->parent = &pdev->dev;
+ dev->release = err_dev_release;
+ dev_set_name(dev, "errors");
+
+ fme_error_enable(feature);
+
+ ret = device_register(dev);
+ if (ret) {
+ put_device(dev);
+ return ret;
+ }
+
+ ret = sysfs_create_groups(&dev->kobj, error_groups);
+ if (ret) {
+ device_unregister(dev);
+ return ret;
+ }
+
+ feature->priv = dev;
+
+ return ret;
+}
+
+static void fme_global_err_uinit(struct platform_device *pdev,
+ struct dfl_feature *feature)
+{
+ struct device *dev = feature->priv;
+
+ dev_dbg(&pdev->dev, "FME Global Error Reporting UInit.\n");
+
+ sysfs_remove_groups(&dev->kobj, error_groups);
+ device_unregister(dev);
+}
+
+const struct dfl_feature_id fme_global_err_id_table[] = {
+ {.id = FME_FEATURE_ID_GLOBAL_ERR,},
+ {0,}
+};
+
+const struct dfl_feature_ops fme_global_err_ops = {
+ .init = fme_global_err_init,
+ .uinit = fme_global_err_uinit,
+};
@@ -226,6 +226,10 @@ static struct dfl_feature_driver fme_feature_drvs[] = {
.id_table = fme_pr_mgmt_id_table,
.ops = &fme_pr_mgmt_ops,
},
+ {
+ .id_table = fme_global_err_id_table,
+ .ops = &fme_global_err_ops,
+ },
{
.ops = NULL,
},
@@ -37,5 +37,7 @@ struct dfl_fme {
extern const struct dfl_feature_ops fme_pr_mgmt_ops;
extern const struct dfl_feature_id fme_pr_mgmt_id_table[];
+extern const struct dfl_feature_ops fme_global_err_ops;
+extern const struct dfl_feature_id fme_global_err_id_table[];
#endif /* __DFL_FME_H */
@@ -197,12 +197,14 @@ struct dfl_feature_driver {
* feature dev (platform device)'s reources.
* @ioaddr: mapped mmio resource address.
* @ops: ops of this sub feature.
+ * @priv: priv data of this feature.
*/
struct dfl_feature {
u64 id;
int resource_index;
void __iomem *ioaddr;
const struct dfl_feature_ops *ops;
+ void *priv;
};
#define DEV_STATUS_IN_USE 0