@@ -235,6 +235,13 @@ inline static uint16_t osm_perfmgr_get_sweep_time_s(osm_perfmgr_t * p_perfmgr)
return p_perfmgr->sweep_time_s;
}
+inline static unsigned osm_perfmgr_delete_inactive(osm_perfmgr_t * pm)
+{
+ unsigned rc;
+ perfmgr_db_delete_inactive(pm->db, &rc);
+ return (rc);
+}
+
void osm_perfmgr_clear_counters(osm_perfmgr_t * p_perfmgr);
void osm_perfmgr_dump_counters(osm_perfmgr_t * p_perfmgr,
perfmgr_db_dump_t dump_type);
@@ -136,6 +136,7 @@ typedef struct db_port {
typedef struct db_node {
cl_map_item_t map_item; /* must be first */
uint64_t node_guid;
+ boolean_t active; /* activly being monitored */
boolean_t esp0;
db_port_t *ports;
uint8_t num_ports;
@@ -161,6 +162,7 @@ perfmgr_db_err_t perfmgr_db_create_entry(perfmgr_db_t * db, uint64_t guid,
boolean_t esp0, uint8_t num_ports,
char *node_name);
perfmgr_db_err_t perfmgr_db_delete_entry(perfmgr_db_t * db, uint64_t guid);
+perfmgr_db_err_t perfmgr_db_delete_inactive(perfmgr_db_t * db, unsigned *cnt);
perfmgr_db_err_t perfmgr_db_add_err_reading(perfmgr_db_t * db, uint64_t guid,
uint8_t port,
@@ -182,6 +184,9 @@ perfmgr_db_err_t perfmgr_db_get_prev_dc(perfmgr_db_t * db, uint64_t guid,
perfmgr_db_err_t perfmgr_db_clear_prev_dc(perfmgr_db_t * db, uint64_t guid,
uint8_t port);
+perfmgr_db_err_t perfmgr_db_mark_active(perfmgr_db_t *db, uint64_t guid,
+ boolean_t active);
+
void perfmgr_db_clear_counters(perfmgr_db_t * db);
perfmgr_db_err_t perfmgr_db_dump(perfmgr_db_t * db, char *file,
perfmgr_db_dump_t dump_type);
@@ -239,7 +239,7 @@ static void help_update_desc(FILE *out, int detail)
static void help_perfmgr(FILE * out, int detail)
{
fprintf(out,
- "perfmgr [enable|disable|clear_counters|dump_counters|print_counters|dump_redir|clear_redir|set_rm_nodes|clear_rm_nodes|sweep_time[seconds]]\n");
+ "perfmgr [enable|disable|clear_counters|dump_counters|print_counters|dump_redir|clear_redir|set_rm_nodes|clear_rm_nodes|clear_inactive|sweep_time[seconds]]\n");
if (detail) {
fprintf(out,
"perfmgr -- print the performance manager state\n");
@@ -260,6 +260,8 @@ static void help_perfmgr(FILE * out, int detail)
fprintf(out,
" [[set|clear]_rm_nodes] -- enable/disable the removal of \"inactive\" nodes from the DB\n"
" Inactive nodes are those which no longer appear on the fabric\n");
+ fprintf(out,
+ " [clear_inactive] -- Delete inactive nodes from the DB\n");
}
}
#endif /* ENABLE_OSM_PERF_MGR */
@@ -1459,7 +1461,11 @@ static void perfmgr_parse(char **p_last, osm_opensm_t * p_osm, FILE * out)
osm_perfmgr_dump_counters(&p_osm->perfmgr,
PERFMGR_EVENT_DB_DUMP_HR);
}
+ } else if (strcmp(p_cmd, "clear_inactive") == 0) {
+ unsigned cnt = osm_perfmgr_delete_inactive(&p_osm->perfmgr);
+ fprintf(out, "Removed %u nodes from Database\n", cnt);
} else if (strcmp(p_cmd, "print_counters") == 0) {
+ char *port = NULL;
p_cmd = name_token(p_last);
if (p_cmd) {
osm_perfmgr_print_counters(&p_osm->perfmgr,
@@ -148,6 +148,8 @@ static void remove_marked_nodes(osm_perfmgr_t * pm)
if (pm->rm_nodes)
perfmgr_db_delete_entry(pm->db, pm->remove_list->guid);
+ else
+ perfmgr_db_mark_active(pm->db, pm->remove_list->guid, FALSE);
if (pm->remove_list->name)
free(pm->remove_list->name);
@@ -524,6 +526,8 @@ static void perfmgr_query_counters(cl_map_item_t * p_map_item, void *context)
goto Exit;
}
+ perfmgr_db_mark_active(pm->db, node_guid, TRUE);
+
/* issue the query for each port */
for (port = mon_node->esp0 ? 0 : 1; port < num_ports; port++) {
ib_net16_t lid;
@@ -105,6 +105,7 @@ static inline perfmgr_db_err_t bad_node_port(db_node_t * node, uint8_t port)
return PERFMGR_EVENT_DB_GUIDNOTFOUND;
if (port >= node->num_ports || (!node->esp0 && port == 0))
return PERFMGR_EVENT_DB_PORTNOTFOUND;
+
return PERFMGR_EVENT_DB_SUCCESS;
}
@@ -139,6 +140,7 @@ static db_node_t *malloc_node(uint64_t guid, boolean_t esp0,
rc->ports[i].valid = FALSE;
}
snprintf(rc->node_name, sizeof(rc->node_name), "%s", name);
+ rc->active = FALSE;
return rc;
@@ -207,6 +209,62 @@ perfmgr_db_delete_entry(perfmgr_db_t * db, uint64_t guid)
return(PERFMGR_EVENT_DB_SUCCESS);
}
+perfmgr_db_err_t
+perfmgr_db_delete_inactive(perfmgr_db_t * db, unsigned *cnt)
+{
+ perfmgr_db_err_t rc = PERFMGR_EVENT_DB_SUCCESS;
+ int i = 0;
+ int num = 0;
+ uint64_t * guid_list;
+ cl_map_item_t * p_map_item = cl_qmap_head(&db->pc_data);
+
+ if (p_map_item == cl_qmap_end(&db->pc_data)) {
+ rc = PERFMGR_EVENT_DB_SUCCESS;
+ goto Done;
+ }
+
+ while (p_map_item != cl_qmap_end(&db->pc_data)) {
+ db_node_t *n = (db_node_t *)p_map_item;
+ if (n->active == FALSE) {
+ guid_list = realloc(guid_list,
+ sizeof(*guid_list) * (num+1));
+ if (!guid_list) {
+ num = 0;
+ rc = PERFMGR_EVENT_DB_NOMEM;
+ goto Done;
+ }
+ guid_list[num] = n->node_guid;
+ num++;
+ }
+ p_map_item = cl_qmap_next(p_map_item);
+ }
+
+ for (i = 0 ; i < num; i++)
+ perfmgr_db_delete_entry(db, guid_list[i]);
+
+ free(guid_list);
+
+Done:
+ if (cnt)
+ *cnt = num;
+
+ return(rc);
+}
+
+perfmgr_db_err_t
+perfmgr_db_mark_active(perfmgr_db_t *db, uint64_t guid, boolean_t active)
+{
+ db_node_t *node = NULL;
+
+ cl_plock_excl_acquire(&db->lock);
+ node = get(db, guid);
+ if (node)
+ node->active = active;
+ cl_plock_release(&db->lock);
+ return (PERFMGR_EVENT_DB_SUCCESS);
+}
+
+
/**********************************************************************
* Dump a reading vs the previous reading to stdout
**********************************************************************/
@@ -575,7 +633,7 @@ static void dump_node_mr(db_node_t * node, FILE * fp)
{
int i = 0;
- fprintf(fp, "\nName\tGUID\tPort\tLast Reset\t"
+ fprintf(fp, "\nName\tGUID\tActive\tPort\tLast Reset\t"
"%s\t%s\t"
"%s\t%s\t%s\t%s\t%s\t%s\t%s\t"
"%s\t%s\t%s\t%s\t%s\t%s\t%s\t"
@@ -609,13 +667,15 @@ static void dump_node_mr(db_node_t * node, FILE * fp)
since[strlen(since) - 1] = '\0'; /* remove \n */
fprintf(fp,
- "%s\t0x%" PRIx64 "\t%d\t%s\t%" PRIu64 "\t%" PRIu64 "\t"
+ "%s\t0x%" PRIx64 "\t%s\t%d\t%s\t%" PRIu64 "\t%" PRIu64 "\t"
"%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t"
"%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t" "%" PRIu64
"\t%" PRIu64 "\t%" PRIu64 "\t" "%" PRIu64 "\t%" PRIu64
"\t%" PRIu64 "\t%" PRIu64 "\t" "%" PRIu64 "\t%" PRIu64
"\t%" PRIu64 "\t%" PRIu64 "\n", node->node_name,
- node->node_guid, i, since,
+ node->node_guid,
+ node->active ? "TRUE" : "FALSE",
+ i, since,
node->ports[i].err_total.symbol_err_cnt,
node->ports[i].err_total.link_err_recover,
node->ports[i].err_total.link_downed,
@@ -655,7 +715,7 @@ static void dump_node_hr(db_node_t * node, FILE * fp)
since[strlen(since) - 1] = '\0'; /* remove \n */
- fprintf(fp, "\"%s\" 0x%" PRIx64 " port %d (Since %s)\n"
+ fprintf(fp, "\"%s\" 0x%" PRIx64 " active %s port %d (Since %s)\n"
" symbol_err_cnt : %" PRIu64 "\n"
" link_err_recover : %" PRIu64 "\n"
" link_downed : %" PRIu64 "\n"
@@ -678,6 +738,7 @@ static void dump_node_hr(db_node_t * node, FILE * fp)
" multicast_rcv_pkts : %" PRIu64 "\n",
node->node_name,
node->node_guid,
+ node->active ? "TRUE":"FALSE",
i,
since,
node->ports[i].err_total.symbol_err_cnt,
When "missing" nodes are not removed by default mark them as inactive. In addition, add a console option to remove them. Signed-off-by: Ira Weiny <weiny2@llnl.gov> --- include/opensm/osm_perfmgr.h | 7 ++++ include/opensm/osm_perfmgr_db.h | 5 +++ opensm/osm_console.c | 8 ++++- opensm/osm_perfmgr.c | 4 ++ opensm/osm_perfmgr_db.c | 69 ++++++++++++++++++++++++++++++++++++-- 5 files changed, 88 insertions(+), 5 deletions(-)