new file mode 100644
@@ -0,0 +1,152 @@
+// SPDX-License-Identifier: GPL-2.0
+
+cxl-set-alert-config(1)
+=======================
+
+NAME
+----
+cxl-set-alert-config - set the warning alert threshold on a CXL memdev
+
+SYNOPSIS
+--------
+[verse]
+'cxl set-alert-config <mem0> [<mem1>..<memN>] [<options>]'
+
+DESCRIPTION
+-----------
+CXL device raises an alert when its health status is changed. Critical alert
+shall automatically be configured by the device after a device reset.
+If supported, programmable warning thresholds also be initialized to vendor
+recommended defaults, then could be configured by the user.
+
+Use this command to configure warning alert thresholds of a device.
+Having issued this command, the newly requested warning thresholds would
+override the previously programmed warning thresholds.
+
+To enable warning alert, set both 'threshold=value' and 'alert=on'. To disable
+warning alert, set only 'alert=off'. Other cases would cause errors.
+
+Use "cxl list -m <memdev> -A" to examine the programming warning threshold
+capabilities of a device.
+
+EXAMPLES
+--------
+Set warning threshold to 30 and enable alert for life used.
+----
+# cxl set-alert-config mem0 --life-used-threshold=30 --life-used-alert=on
+{
+ "memdev":"mem0",
+ "ram_size":"1024.00 MiB (1073.74 MB)",
+ "alert_config":{
+ "life_used_prog_warn_threshold_valid":true,
+ "dev_over_temperature_prog_warn_threshold_valid":false,
+ "dev_under_temperature_prog_warn_threshold_valid":false,
+ "corrected_volatile_mem_err_prog_warn_threshold_valid":false,
+ "corrected_pmem_err_prog_warn_threshold_valid":false,
+ "life_used_prog_warn_threshold_writable":true,
+ "dev_over_temperature_prog_warn_threshold_writable":true,
+ "dev_under_temperature_prog_warn_threshold_writable":true,
+ "corrected_volatile_mem_err_prog_warn_threshold_writable":true,
+ "corrected_pmem_err_prog_warn_threshold_writable":true,
+ "life_used_crit_alert_threshold":75,
+ "life_used_prog_warn_threshold":30,
+ "dev_over_temperature_crit_alert_threshold":0,
+ "dev_under_temperature_crit_alert_threshold":0,
+ "dev_over_temperature_prog_warn_threshold":0,
+ "dev_under_temperature_prog_warn_threshold":0,
+ "corrected_volatile_mem_err_prog_warn_threshold":0,
+ "corrected_pmem_err_prog_warn_threshold":0
+ },
+ "serial":"0",
+ "host":"0000:0d:00.0"
+}
+cxl memdev: cmd_set_alert_config: set alert configuration 1 mem
+----
+
+Disable warning alert for life_used.
+----
+# cxl set-alert-config mem0 --life-used-alert=off
+{
+ "memdev":"mem0",
+ "ram_size":"1024.00 MiB (1073.74 MB)",
+ "alert_config":{
+ "life_used_prog_warn_threshold_valid":false,
+ "dev_over_temperature_prog_warn_threshold_valid":false,
+ "dev_under_temperature_prog_warn_threshold_valid":false,
+ "corrected_volatile_mem_err_prog_warn_threshold_valid":false,
+ "corrected_pmem_err_prog_warn_threshold_valid":false,
+ "life_used_prog_warn_threshold_writable":true,
+ "dev_over_temperature_prog_warn_threshold_writable":true,
+ "dev_under_temperature_prog_warn_threshold_writable":true,
+ "corrected_volatile_mem_err_prog_warn_threshold_writable":true,
+ "corrected_pmem_err_prog_warn_threshold_writable":true,
+ "life_used_crit_alert_threshold":75,
+ "life_used_prog_warn_threshold":30,
+ "dev_over_temperature_crit_alert_threshold":0,
+ "dev_under_temperature_crit_alert_threshold":0,
+ "dev_over_temperature_prog_warn_threshold":0,
+ "dev_under_temperature_prog_warn_threshold":0,
+ "corrected_volatile_mem_err_prog_warn_threshold":0,
+ "corrected_pmem_err_prog_warn_threshold":0
+ },
+ "serial":"0",
+ "host":"0000:0d:00.0"
+}
+cxl memdev: cmd_set_alert_config: set alert configuration 1 mem
+----
+
+OPTIONS
+-------
+<memory device(s)>::
+include::memdev-option.txt[]
+
+-L::
+--life-used-threshold=::
+ Set <value> for the life used warning alert threshold.
+
+--life-used-alert=::
+ Enable or disable the life used warning alert.
+ Options are 'on' or 'off'.
+
+-O::
+--over-temperature-threshold=::
+ Set <value> for the device over temperature warning alert threshold.
+
+--over-temperature-alert=::
+ Enable or disable the device over temperature warning alert.
+ Options are 'on' or 'off'.
+
+-U::
+--under-temperature-threshold=::
+ Set <value> for the device under temperature warning alert threshold.
+
+--under-temperature-alert=::
+ Enable or disable the device under temperature warning alert.
+ Options are 'on' or 'off'.
+
+-V::
+--volatile-mem-err-threshold=::
+ Set <value> for the corrected volatile memory error warning alert
+ threshold.
+
+--volatile-mem-err-alert=::
+ Enable or disable the corrected volatile memory error warning alert.
+ Options are 'on' or 'off'.
+
+-P::
+--pmem-err-threshold=::
+ Set <value> for the corrected persistent memory error warning alert
+ threshold.
+
+--pmem-err-alert=::
+ Enable or disable the corrected persistent memory error warning alert.
+ Options are 'on' or 'off'.
+
+-v::
+--verbose::
+ Turn on verbose debug messages in the library (if libcxl was built with
+ logging and debug enabled).
+
+SEE ALSO
+--------
+CXL-3.0 8.2.9.8.3.3
@@ -47,6 +47,7 @@ cxl_manpages = [
'cxl-destroy-region.txt',
'cxl-monitor.txt',
'cxl-update-firmware.txt',
+ 'cxl-set-alert-config.txt',
]
foreach man : cxl_manpages
@@ -15,6 +15,7 @@ int cmd_enable_memdev(int argc, const char **argv, struct cxl_ctx *ctx);
int cmd_reserve_dpa(int argc, const char **argv, struct cxl_ctx *ctx);
int cmd_free_dpa(int argc, const char **argv, struct cxl_ctx *ctx);
int cmd_update_fw(int argc, const char **argv, struct cxl_ctx *ctx);
+int cmd_set_alert_config(int argc, const char **argv, struct cxl_ctx *ctx);
int cmd_disable_port(int argc, const char **argv, struct cxl_ctx *ctx);
int cmd_enable_port(int argc, const char **argv, struct cxl_ctx *ctx);
int cmd_set_partition(int argc, const char **argv, struct cxl_ctx *ctx);
@@ -69,6 +69,7 @@ static struct cmd_struct commands[] = {
{ "reserve-dpa", .c_fn = cmd_reserve_dpa },
{ "free-dpa", .c_fn = cmd_free_dpa },
{ "update-firmware", .c_fn = cmd_update_fw },
+ { "set-alert-config", .c_fn = cmd_set_alert_config },
{ "disable-port", .c_fn = cmd_disable_port },
{ "enable-port", .c_fn = cmd_enable_port },
{ "set-partition", .c_fn = cmd_set_partition },
@@ -38,10 +38,38 @@ static struct parameters {
const char *type;
const char *size;
const char *decoder_filter;
+ const char *life_used_threshold;
+ const char *dev_over_temperature_threshold;
+ const char *dev_under_temperature_threshold;
+ const char *corrected_volatile_mem_err_threshold;
+ const char *corrected_pmem_err_threshold;
+ const char *life_used_alert;
+ const char *dev_over_temperature_alert;
+ const char *dev_under_temperature_alert;
+ const char *corrected_volatile_mem_err_alert;
+ const char *corrected_pmem_err_alert;
} param;
static struct log_ctx ml;
+struct alert_context {
+ int valid_alert_actions;
+ int enable_alert_actions;
+ int life_used_threshold;
+ int dev_over_temperature_threshold;
+ int dev_under_temperature_threshold;
+ int corrected_volatile_mem_err_threshold;
+ int corrected_pmem_err_threshold;
+};
+
+enum cxl_setalert_event {
+ CXL_SETALERT_LIFE_USED,
+ CXL_SETALERT_OVER_TEMP,
+ CXL_SETALERT_UNDER_TEMP,
+ CXL_SETALERT_VOLATILE_MEM_ERROR,
+ CXL_SETALERT_PMEM_ERROR,
+};
+
enum cxl_setpart_type {
CXL_SETPART_PMEM,
CXL_SETPART_VOLATILE,
@@ -99,6 +127,36 @@ OPT_BOOLEAN('c', "cancel", ¶m.cancel, \
OPT_BOOLEAN('w', "wait", ¶m.wait, \
"wait for firmware update to complete before returning")
+#define SET_ALERT_OPTIONS() \
+OPT_STRING('L', "life-used-threshold", ¶m.life_used_threshold, \
+ "threshold", "threshold value for life used warning alert"), \
+OPT_STRING('\0', "life-used-alert", ¶m.life_used_alert, \
+ "'on' or 'off'", "enable or disable life used warning alert"), \
+OPT_STRING('O', "over-temperature-threshold", \
+ ¶m.dev_over_temperature_threshold, "threshold", \
+ "threshold value for device over temperature warning alert"), \
+OPT_STRING('\0', "over-temperature-alert", \
+ ¶m.dev_over_temperature_alert, "'on' or 'off'", \
+ "enable or disable device over temperature warning alert"), \
+OPT_STRING('U', "under-temperature-threshold", \
+ ¶m.dev_under_temperature_threshold, "threshold", \
+ "threshold value for device under temperature warning alert"), \
+OPT_STRING('\0', "under-temperature-alert", \
+ ¶m.dev_under_temperature_alert, "'on' or 'off'", \
+ "enable or disable device under temperature warning alert"), \
+OPT_STRING('V', "volatile-mem-err-threshold", \
+ ¶m.corrected_volatile_mem_err_threshold, "threshold", \
+ "threshold value for corrected volatile mem error warning alert"), \
+OPT_STRING('\0', "volatile-mem-err-alert", \
+ ¶m.corrected_volatile_mem_err_alert, "'on' or 'off'", \
+ "enable or disable corrected volatile mem error warning alert"), \
+OPT_STRING('P', "pmem-err-threshold", \
+ ¶m.corrected_pmem_err_threshold, "threshold", \
+ "threshold value for corrected pmem error warning alert"), \
+OPT_STRING('\0', "pmem-err-alert", \
+ ¶m.corrected_pmem_err_alert, "'on' or 'off'", \
+ "enable or disable corrected pmem error warning alert")
+
static const struct option read_options[] = {
BASE_OPTIONS(),
LABEL_OPTIONS(),
@@ -155,6 +213,12 @@ static const struct option update_fw_options[] = {
OPT_END(),
};
+static const struct option set_alert_options[] = {
+ BASE_OPTIONS(),
+ SET_ALERT_OPTIONS(),
+ OPT_END(),
+};
+
enum reserve_dpa_mode {
DPA_ALLOC,
DPA_FREE,
@@ -706,6 +770,148 @@ static int action_update_fw(struct cxl_memdev *memdev,
return rc;
}
+static int validate_alert_threshold(enum cxl_setalert_event event,
+ int threshold)
+{
+ if (event == CXL_SETALERT_LIFE_USED) {
+ if (threshold < 0 || threshold > 100) {
+ log_err(&ml, "Invalid life used threshold: %d\n",
+ threshold);
+ return -EINVAL;
+ }
+ } else if (event == CXL_SETALERT_OVER_TEMP ||
+ event == CXL_SETALERT_UNDER_TEMP) {
+ if (threshold < SHRT_MIN || threshold > SHRT_MAX) {
+ log_err(&ml,
+ "Invalid device temperature threshold: %d\n",
+ threshold);
+ return -EINVAL;
+ }
+ } else {
+ if (threshold < 0 || threshold > USHRT_MAX) {
+ log_err(&ml,
+ "Invalid corrected mem error threshold: %d\n",
+ threshold);
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
+#define alert_param_set_threshold(arg, alert_event) \
+{ \
+ if (!param.arg##_alert) { \
+ if (param.arg##_threshold) { \
+ log_err(&ml, "Action not specified\n"); \
+ return -EINVAL; \
+ } \
+ } else if (strcmp(param.arg##_alert, "on") == 0) { \
+ if (param.arg##_threshold) { \
+ char *endptr; \
+ alertctx.arg##_threshold = \
+ strtol(param.arg##_threshold, &endptr, 10); \
+ if (endptr[0] != '\0') { \
+ log_err(&ml, "Invalid threshold: %s\n", \
+ param.arg##_threshold); \
+ return -EINVAL; \
+ } \
+ rc = validate_alert_threshold( \
+ alert_event, alertctx.arg##_threshold); \
+ if (rc != 0) \
+ return rc; \
+ alertctx.valid_alert_actions |= 1 << alert_event; \
+ alertctx.enable_alert_actions |= 1 << alert_event; \
+ } else { \
+ log_err(&ml, "Threshold not specified\n"); \
+ return -EINVAL; \
+ } \
+ } else if (strcmp(param.arg##_alert, "off") == 0) { \
+ if (!param.arg##_threshold) { \
+ alertctx.valid_alert_actions |= 1 << alert_event; \
+ alertctx.enable_alert_actions &= ~(1 << alert_event); \
+ } else { \
+ log_err(&ml, "Disable not require threshold\n"); \
+ return -EINVAL; \
+ } \
+ } else { \
+ log_err(&ml, "Invalid action: %s\n", param.arg##_alert); \
+ return -EINVAL; \
+ } \
+}
+
+#define setup_threshold_field(arg) \
+{ \
+ if (param.arg##_threshold) \
+ cxl_cmd_alert_config_set_##arg##_prog_warn_threshold( \
+ cmd, alertctx.arg##_threshold); \
+}
+
+static int action_set_alert_config(struct cxl_memdev *memdev,
+ struct action_context *actx)
+{
+ const char *devname = cxl_memdev_get_devname(memdev);
+ struct cxl_cmd *cmd;
+ struct alert_context alertctx = { 0 };
+ struct json_object *jmemdev;
+ unsigned long flags;
+ int rc = 0;
+
+ alert_param_set_threshold(life_used, CXL_SETALERT_LIFE_USED)
+ alert_param_set_threshold(dev_over_temperature, CXL_SETALERT_OVER_TEMP)
+ alert_param_set_threshold(dev_under_temperature,
+ CXL_SETALERT_UNDER_TEMP)
+ alert_param_set_threshold(corrected_volatile_mem_err,
+ CXL_SETALERT_VOLATILE_MEM_ERROR)
+ alert_param_set_threshold(corrected_pmem_err, CXL_SETALERT_PMEM_ERROR)
+ if (alertctx.valid_alert_actions == 0) {
+ log_err(&ml, "No action specified\n");
+ return -EINVAL;
+ }
+
+ cmd = cxl_cmd_new_set_alert_config(memdev);
+ if (!cmd) {
+ rc = -ENXIO;
+ goto out_err;
+ }
+
+ setup_threshold_field(life_used)
+ setup_threshold_field(dev_over_temperature)
+ setup_threshold_field(dev_under_temperature)
+ setup_threshold_field(corrected_volatile_mem_err)
+ setup_threshold_field(corrected_pmem_err)
+ cxl_cmd_alert_config_set_valid_alert_actions(
+ cmd, alertctx.valid_alert_actions);
+ cxl_cmd_alert_config_set_enable_alert_actions(
+ cmd, alertctx.enable_alert_actions);
+
+ rc = cxl_cmd_submit(cmd);
+ if (rc < 0) {
+ log_err(&ml, "cmd submission failed: %s\n", strerror(-rc));
+ goto out_cmd;
+ }
+
+ rc = cxl_cmd_get_mbox_status(cmd);
+ if (rc != 0) {
+ log_err(&ml, "%s: mbox status: %d\n", __func__, rc);
+ rc = -ENXIO;
+ }
+
+out_cmd:
+ cxl_cmd_unref(cmd);
+out_err:
+ if (rc)
+ log_err(&ml, "%s error: %s\n", devname, strerror(-rc));
+
+ flags = UTIL_JSON_ALERT_CONFIG;
+ if (actx->f_out == stdout && isatty(1))
+ flags |= UTIL_JSON_HUMAN;
+ jmemdev = util_cxl_memdev_to_json(memdev, flags);
+ if (actx->jdevs && jmemdev)
+ json_object_array_add(actx->jdevs, jmemdev);
+
+ return rc;
+}
+
static int memdev_action(int argc, const char **argv, struct cxl_ctx *ctx,
int (*action)(struct cxl_memdev *memdev,
struct action_context *actx),
@@ -749,7 +955,8 @@ static int memdev_action(int argc, const char **argv, struct cxl_ctx *ctx,
}
if (action == action_setpartition || action == action_reserve_dpa ||
- action == action_free_dpa || action == action_update_fw)
+ action == action_free_dpa || action == action_update_fw ||
+ action == action_set_alert_config)
actx.jdevs = json_object_new_array();
if (err == argc) {
@@ -968,3 +1175,14 @@ int cmd_update_fw(int argc, const char **argv, struct cxl_ctx *ctx)
return count >= 0 ? 0 : EXIT_FAILURE;
}
+
+int cmd_set_alert_config(int argc, const char **argv, struct cxl_ctx *ctx)
+{
+ int count = memdev_action(
+ argc, argv, ctx, action_set_alert_config, set_alert_options,
+ "cxl set-alert-config <mem0> [<mem1>..<memN>] [<options>]");
+ log_info(&ml, "set alert configuration for %d mem%s\n",
+ count >= 0 ? count : 0, count > 1 ? "s" : "");
+
+ return count >= 0 ? 0 : EXIT_FAILURE;
+}
Add a new command: 'cxl-set-alert-config', which configures device's warning alerts. Device's warning alert programmability and current state can be optained via 'cxl-list' command with '-A' option. Example: { "memdev":"mem0", "ram_size":"1024.00 MiB (1073.74 MB)", "alert_config":{ "life_used_prog_warn_threshold_valid":true, ... "life_used_crit_alert_threshold":75, "life_used_prog_warn_threshold":30, ... }, "serial":"0", "host":"0000:0d:00.0" } cxl memdev: cmd_set_alert_config: set alert configuration 1 mem Signed-off-by: Jehoon Park <jehoon.park@samsung.com> --- Documentation/cxl/cxl-set-alert-config.txt | 152 ++++++++++++++ Documentation/cxl/meson.build | 1 + cxl/builtin.h | 1 + cxl/cxl.c | 1 + cxl/memdev.c | 220 ++++++++++++++++++++- 5 files changed, 374 insertions(+), 1 deletion(-) create mode 100644 Documentation/cxl/cxl-set-alert-config.txt