@@ -1,7 +1,7 @@
/*
* Copyright (c) 2007 The Regents of the University of California.
* Copyright (c) 2007-2009 Voltaire, Inc. All rights reserved.
- * Copyright (c) 2009 HNR Consulting. All rights reserved.
+ * Copyright (c) 2009,2010 HNR Consulting. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -90,11 +90,17 @@ typedef enum {
PERFMGR_SWEEP_SUSPENDED
} osm_perfmgr_sweep_state_t;
-/* Redirection information */
-typedef struct redir {
- ib_net16_t redir_lid;
- ib_net32_t redir_qp;
-} redir_t;
+typedef struct monitored_port {
+ uint16_t pkey_ix;
+ ib_net16_t orig_lid;
+ boolean_t redirection;
+ boolean_t valid;
+ /* Redirection fields from ClassPortInfo */
+ ib_gid_t gid;
+ ib_net16_t lid;
+ ib_net16_t pkey;
+ ib_net32_t qp;
+} monitored_port_t;
/* Node to store information about nodes being monitored */
typedef struct monitored_node {
@@ -104,7 +110,7 @@ typedef struct monitored_node {
boolean_t esp0;
char *name;
uint32_t num_ports;
- redir_t redir_port[1]; /* redirection on a per port basis */
+ monitored_port_t port[1];
} monitored_node_t;
struct osm_opensm;
@@ -134,6 +140,8 @@ typedef struct osm_perfmgr {
uint32_t max_outstanding_queries;
cl_qmap_t monitored_map; /* map the nodes being tracked */
monitored_node_t *remove_list;
+ ib_net64_t port_guid;
+ int16_t local_port;
} osm_perfmgr_t;
/*
* FIELDS
@@ -1,7 +1,7 @@
/*
* Copyright (c) 2007 The Regents of the University of California.
* Copyright (c) 2007-2009 Voltaire, Inc. All rights reserved.
- * Copyright (c) 2009 HNR Consulting. All rights reserved.
+ * Copyright (c) 2009,2010 HNR Consulting. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -64,6 +64,7 @@
#include <opensm/osm_log.h>
#include <opensm/osm_node.h>
#include <opensm/osm_opensm.h>
+#include <opensm/osm_helper.h>
#define PERFMGR_INITIAL_TID_VALUE 0xcafe
@@ -194,6 +195,7 @@ static void perfmgr_mad_send_err_callback(void *bind_context,
uint8_t port = context->perfmgr_context.port;
cl_map_item_t *p_node;
monitored_node_t *p_mon_node;
+ ib_net16_t orig_lid;
OSM_LOG_ENTER(pm->log);
@@ -225,9 +227,11 @@ static void perfmgr_mad_send_err_callback(void *bind_context,
p_mon_node->num_ports);
goto Exit;
}
- /* Clear redirection info */
- p_mon_node->redir_port[port].redir_lid = 0;
- p_mon_node->redir_port[port].redir_qp = 0;
+ /* Clear redirection info for this port except orig_lid */
+ orig_lid = p_mon_node->port[port].orig_lid;
+ memset(&p_mon_node->port[port], 0, sizeof(monitored_port_t));
+ p_mon_node->port[port].orig_lid = orig_lid;
+ p_mon_node->port[port].valid = TRUE;
cl_plock_release(&pm->osm->lock);
}
@@ -256,7 +260,7 @@ ib_api_status_t osm_perfmgr_bind(osm_perfmgr_t * pm, ib_net64_t port_guid)
goto Exit;
}
- bind_info.port_guid = port_guid;
+ bind_info.port_guid = pm->port_guid = port_guid;
bind_info.mad_class = IB_MCLASS_PERF;
bind_info.class_version = 1;
bind_info.is_responder = FALSE;
@@ -309,24 +313,14 @@ static ib_net32_t get_qp(monitored_node_t * mon_node, uint8_t port)
ib_net32_t qp = IB_QP1;
if (mon_node && mon_node->num_ports && port < mon_node->num_ports &&
- mon_node->redir_port[port].redir_lid &&
- mon_node->redir_port[port].redir_qp)
- qp = mon_node->redir_port[port].redir_qp;
+ mon_node->port[port].redirection && mon_node->port[port].qp)
+ qp = mon_node->port[port].qp;
return qp;
}
-/**********************************************************************
- * Given a node, a port, and an optional monitored node,
- * return the appropriate lid to query that port
- **********************************************************************/
-static ib_net16_t get_lid(osm_node_t * p_node, uint8_t port,
- monitored_node_t * mon_node)
+static ib_net16_t get_base_lid(osm_node_t * p_node, uint8_t port)
{
- if (mon_node && mon_node->num_ports && port < mon_node->num_ports &&
- mon_node->redir_port[port].redir_lid)
- return mon_node->redir_port[port].redir_lid;
-
switch (p_node->node_info.node_type) {
case IB_NODE_TYPE_CA:
case IB_NODE_TYPE_ROUTER:
@@ -339,12 +333,26 @@ static ib_net16_t get_lid(osm_node_t * p_node, uint8_t port,
}
/**********************************************************************
+ * Given a node, a port, and an optional monitored node,
+ * return the lid appropriate to query that port
+ **********************************************************************/
+static ib_net16_t get_lid(osm_node_t * p_node, uint8_t port,
+ monitored_node_t * mon_node)
+{
+ if (mon_node && mon_node->num_ports && port < mon_node->num_ports &&
+ mon_node->port[port].lid)
+ return mon_node->port[port].lid;
+
+ return get_base_lid(p_node, port);
+}
+
+/**********************************************************************
* Form and send the Port Counters MAD for a single port.
**********************************************************************/
static ib_api_status_t perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr,
ib_net16_t dest_lid,
- ib_net32_t dest_qp, uint8_t port,
- uint8_t mad_method,
+ ib_net32_t dest_qp, uint16_t pkey_ix,
+ uint8_t port, uint8_t mad_method,
osm_madw_context_t * p_context)
{
ib_api_status_t status = IB_SUCCESS;
@@ -383,8 +391,7 @@ static ib_api_status_t perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr,
p_madw->mad_addr.addr_type.gsi.remote_qp = dest_qp;
p_madw->mad_addr.addr_type.gsi.remote_qkey =
cl_hton32(IB_QP1_WELL_KNOWN_Q_KEY);
- /* FIXME what about other partitions */
- p_madw->mad_addr.addr_type.gsi.pkey_ix = 0;
+ p_madw->mad_addr.addr_type.gsi.pkey_ix = pkey_ix;
p_madw->mad_addr.addr_type.gsi.service_level = 0;
p_madw->mad_addr.addr_type.gsi.global_route = FALSE;
p_madw->resp_expected = TRUE;
@@ -420,6 +427,7 @@ static void collect_guids(cl_map_item_t * p_map_item, void *context)
osm_perfmgr_t *pm = (osm_perfmgr_t *) context;
monitored_node_t *mon_node = NULL;
uint32_t num_ports;
+ int port;
OSM_LOG_ENTER(pm->log);
@@ -428,7 +436,7 @@ static void collect_guids(cl_map_item_t * p_map_item, void *context)
/* if not already in map add it */
num_ports = osm_node_get_num_physp(node);
mon_node = malloc(sizeof(*mon_node) +
- sizeof(redir_t) * num_ports);
+ sizeof(monitored_port_t) * num_ports);
if (!mon_node) {
OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 4C06: "
"malloc failed: not handling node %s"
@@ -437,7 +445,7 @@ static void collect_guids(cl_map_item_t * p_map_item, void *context)
goto Exit;
}
memset(mon_node, 0,
- sizeof(*mon_node) + sizeof(redir_t) * num_ports);
+ sizeof(*mon_node) + sizeof(monitored_port_t) * num_ports);
mon_node->guid = node_guid;
mon_node->name = strdup(node->print_desc);
mon_node->num_ports = num_ports;
@@ -445,6 +453,11 @@ static void collect_guids(cl_map_item_t * p_map_item, void *context)
mon_node->esp0 = (node->sw &&
ib_switch_info_is_enhanced_port0(&node->sw->
switch_info));
+ for (port = mon_node->esp0 ? 0 : 1; port < num_ports; port++) {
+ mon_node->port[port].orig_lid = get_base_lid(node, port);
+ mon_node->port[port].valid = TRUE;
+ }
+
cl_qmap_insert(&pm->monitored_map, node_guid,
(cl_map_item_t *) mon_node);
}
@@ -501,6 +514,9 @@ static void perfmgr_query_counters(cl_map_item_t * p_map_item, void *context)
if (!osm_node_get_physp_ptr(node, port))
continue;
+ if (!mon_node->port[port].valid)
+ continue;
+
lid = get_lid(node, port, mon_node);
if (lid == 0) {
OSM_LOG(pm->log, OSM_LOG_DEBUG, "WARN: node 0x%" PRIx64
@@ -521,8 +537,10 @@ static void perfmgr_query_counters(cl_map_item_t * p_map_item, void *context)
OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Getting stats for node 0x%"
PRIx64 " port %d (lid %u) (%s)\n", node_guid, port,
cl_ntoh16(lid), node->print_desc);
- status = perfmgr_send_pc_mad(pm, lid, remote_qp, port,
- IB_MAD_METHOD_GET, &mad_context);
+ status = perfmgr_send_pc_mad(pm, lid, remote_qp,
+ mon_node->port[port].pkey_ix,
+ port, IB_MAD_METHOD_GET,
+ &mad_context);
if (status != IB_SUCCESS)
OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C09: "
"Failed to issue port counter query for node 0x%"
@@ -769,6 +787,24 @@ void osm_perfmgr_process(osm_perfmgr_t * pm)
pm->subn->sm_state == IB_SMINFO_STATE_NOTACTIVE)
perfmgr_discovery(pm->subn->p_osm);
+ /* if redirection enabled, determine local port */
+ if (pm->subn->opt.perfmgr_redir && pm->local_port == -1) {
+ osm_node_t *p_node;
+ osm_port_t *p_port;
+
+ CL_PLOCK_ACQUIRE(pm->sm->p_lock);
+ p_port = osm_get_port_by_guid(pm->subn, pm->port_guid);
+ if (p_port) {
+ p_node = p_port->p_node;
+ CL_ASSERT(p_node);
+ pm->local_port =
+ ib_node_info_get_local_port_num(&p_node->node_info);
+ } else
+ OSM_LOG(pm->log, OSM_LOG_ERROR,
+ "ERR 4C87: No PerfMgr port object\n");
+ CL_PLOCK_RELEASE(pm->sm->p_lock);
+ }
+
#if ENABLE_OSM_PERF_MGR_PROFILE
gettimeofday(&before, NULL);
#endif
@@ -932,8 +968,8 @@ static int counter_overflow_32(ib_net32_t val)
* MAD to the port.
**********************************************************************/
static void perfmgr_check_overflow(osm_perfmgr_t * pm,
- monitored_node_t * mon_node, uint8_t port,
- ib_port_counters_t * pc)
+ monitored_node_t * mon_node, int16_t pkey_ix,
+ uint8_t port, ib_port_counters_t * pc)
{
osm_madw_context_t mad_context;
ib_api_status_t status;
@@ -960,6 +996,9 @@ static void perfmgr_check_overflow(osm_perfmgr_t * pm,
osm_node_t *p_node = NULL;
ib_net16_t lid = 0;
+ if (!mon_node->port[port].valid)
+ goto Exit;
+
osm_log(pm->log, OSM_LOG_VERBOSE,
"PerfMgr: Counter overflow: %s (0x%" PRIx64
") port %d; clearing counters\n",
@@ -984,8 +1023,9 @@ static void perfmgr_check_overflow(osm_perfmgr_t * pm,
mad_context.perfmgr_context.port = port;
mad_context.perfmgr_context.mad_method = IB_MAD_METHOD_SET;
/* clear port counters */
- status = perfmgr_send_pc_mad(pm, lid, remote_qp, port,
- IB_MAD_METHOD_SET, &mad_context);
+ status = perfmgr_send_pc_mad(pm, lid, remote_qp, pkey_ix,
+ port, IB_MAD_METHOD_SET,
+ &mad_context);
if (status != IB_SUCCESS)
OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 4C11: "
"Failed to send clear counters MAD for %s (0x%"
@@ -1043,6 +1083,64 @@ static void perfmgr_log_events(osm_perfmgr_t * pm,
time_diff, mon_node->name, mon_node->guid, port);
}
+static int16_t validate_redir_pkey(osm_perfmgr_t *pm, ib_net16_t pkey)
+{
+ int16_t pkey_ix = -1;
+ osm_port_t *p_port;
+ osm_pkey_tbl_t *p_pkey_tbl;
+ ib_net16_t *p_orig_pkey;
+ uint16_t block;
+ uint8_t index;
+
+ OSM_LOG_ENTER(pm->log);
+
+ CL_PLOCK_ACQUIRE(pm->sm->p_lock);
+ p_port = osm_get_port_by_guid(pm->subn, pm->port_guid);
+ if (!p_port) {
+ CL_PLOCK_RELEASE(pm->sm->p_lock);
+ OSM_LOG(pm->log, OSM_LOG_ERROR,
+ "ERR 4C1E: No PerfMgr port object\n");
+ goto Exit;
+ }
+ if (p_port->p_physp && osm_physp_is_valid(p_port->p_physp)) {
+ p_pkey_tbl = &p_port->p_physp->pkeys;
+ if (!p_pkey_tbl) {
+ CL_PLOCK_RELEASE(pm->sm->p_lock);
+ OSM_LOG(pm->log, OSM_LOG_VERBOSE,
+ "No PKey table found for PerfMgr port\n");
+ goto Exit;
+ }
+ p_orig_pkey = cl_map_get(&p_pkey_tbl->keys,
+ ib_pkey_get_base(pkey));
+ if (!p_orig_pkey) {
+ CL_PLOCK_RELEASE(pm->sm->p_lock);
+ OSM_LOG(pm->log, OSM_LOG_VERBOSE,
+ "PKey 0x%x not found for PerfMgr port\n",
+ cl_ntoh16(pkey));
+ goto Exit;
+ }
+ if (osm_pkey_tbl_get_block_and_idx(p_pkey_tbl, p_orig_pkey,
+ &block, &index) == IB_SUCCESS) {
+ CL_PLOCK_RELEASE(pm->sm->p_lock);
+ pkey_ix = block * IB_NUM_PKEY_ELEMENTS_IN_BLOCK + index;
+ } else {
+ CL_PLOCK_RELEASE(pm->sm->p_lock);
+ OSM_LOG(pm->log, OSM_LOG_ERROR,
+ "ERR 0x4C1F: Failed to obtain P_Key 0x%04x "
+ "block and index for PerfMgr port\n",
+ cl_ntoh16(pkey));
+ }
+ } else {
+ CL_PLOCK_RELEASE(pm->sm->p_lock);
+ OSM_LOG(pm->log, OSM_LOG_ERROR,
+ "ERR 4C20: Local PerfMgt port physp invalid\n");
+ }
+
+Exit:
+ OSM_LOG_EXIT(pm->log);
+ return pkey_ix;
+}
+
/**********************************************************************
* The dispatcher uses a thread pool which will call this function when
* there is a thread available to process the mad received on the wire.
@@ -1061,6 +1159,8 @@ static void pc_recv_process(void *context, void *data)
perfmgr_db_data_cnt_reading_t data_reading;
cl_map_item_t *p_node;
monitored_node_t *p_mon_node;
+ int16_t pkey_ix = 0;
+ boolean_t valid = TRUE;
OSM_LOG_ENTER(pm->log);
@@ -1084,7 +1184,8 @@ static void pc_recv_process(void *context, void *data)
p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO);
/* Response could also be redirection (IBM eHCA PMA does this) */
- if (p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO) {
+ if (p_mad->status & IB_MAD_STATUS_REDIRECT &&
+ p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO) {
char gid_str[INET6_ADDRSTRLEN];
ib_class_port_info_t *cpi =
(ib_class_port_info_t *) &
@@ -1097,17 +1198,46 @@ static void pc_recv_process(void *context, void *data)
inet_ntop(AF_INET6, cpi->redir_gid.raw, gid_str,
sizeof gid_str), cl_ntoh32(cpi->redir_qp));
- /* LID or GID redirection ? */
- /* For GID redirection, need to get PathRecord from SA */
+ if (!pm->subn->opt.perfmgr_redir) {
+ OSM_LOG(pm->log, OSM_LOG_VERBOSE,
+ "Redirection requested but disabled\n");
+ valid = FALSE;
+ }
+
+ /* valid redirection ? */
if (cpi->redir_lid == 0) {
+ if (!ib_gid_is_notzero(&cpi->redir_gid)) {
+ OSM_LOG(pm->log, OSM_LOG_VERBOSE,
+ "Invalid redirection "
+ "(both redirect LID and GID are zero)\n");
+ valid = FALSE;
+ }
+ }
+ if (cpi->redir_qp == 0) {
+ OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Invalid RedirectQP\n");
+ valid = FALSE;
+ }
+ if (cpi->redir_pkey == 0) {
+ OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Invalid RedirectP_Key\n");
+ valid = FALSE;
+ }
+ if (cpi->redir_qkey != IB_QP1_WELL_KNOWN_Q_KEY) {
+ OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Invalid RedirectQ_Key\n");
+ valid = FALSE;
+ }
+
+ pkey_ix = validate_redir_pkey(pm, cpi->redir_pkey);
+ if (pkey_ix == -1) {
OSM_LOG(pm->log, OSM_LOG_VERBOSE,
- "GID redirection not currently implemented!\n");
- goto Exit;
+ "Index for Pkey 0x%x not found\n",
+ cl_ntoh16(cpi->redir_pkey));
+ valid = FALSE;
}
- if (!pm->subn->opt.perfmgr_redir) {
- OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C16: "
- "redirection requested but disabled\n");
+ if (cpi->redir_lid == 0) {
+ /* GID redirection: get PathRecord information */
+ OSM_LOG(pm->log, OSM_LOG_VERBOSE,
+ "GID redirection not currently supported\n");
goto Exit;
}
@@ -1122,13 +1252,24 @@ static void pc_recv_process(void *context, void *data)
p_mon_node->num_ports);
goto Exit;
}
- p_mon_node->redir_port[port].redir_lid = cpi->redir_lid;
- p_mon_node->redir_port[port].redir_qp = cpi->redir_qp;
+ p_mon_node->port[port].redirection = TRUE;
+ p_mon_node->port[port].valid = valid;
+ memcpy(&p_mon_node->port[port].gid, &cpi->redir_gid,
+ sizeof(ib_gid_t));
+ p_mon_node->port[port].lid = cpi->redir_lid;
+ p_mon_node->port[port].qp = cpi->redir_qp;
+ p_mon_node->port[port].pkey = cpi->redir_pkey;
+ if (pkey_ix != -1)
+ p_mon_node->port[port].pkey_ix = pkey_ix;
cl_plock_release(&pm->osm->lock);
+ if (!valid)
+ goto Exit;
+
/* Finally, reissue the query to the redirected location */
status = perfmgr_send_pc_mad(pm, cpi->redir_lid, cpi->redir_qp,
- port, mad_context->perfmgr_context.
+ pkey_ix, port,
+ mad_context->perfmgr_context.
mad_method, mad_context);
if (status != IB_SUCCESS)
OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C14: "
@@ -1163,7 +1304,7 @@ static void pc_recv_process(void *context, void *data)
perfmgr_db_clear_prev_dc(pm->db, node_guid, port);
}
- perfmgr_check_overflow(pm, p_mon_node, port, wire_read);
+ perfmgr_check_overflow(pm, p_mon_node, pkey_ix, port, wire_read);
#if ENABLE_OSM_PERF_MGR_PROFILE
do {
@@ -1208,6 +1349,7 @@ ib_api_status_t osm_perfmgr_init(osm_perfmgr_t * pm, osm_opensm_t * osm,
pm->sweep_time_s = p_opt->perfmgr_sweep_time_s;
pm->max_outstanding_queries = p_opt->perfmgr_max_outstanding_queries;
pm->osm = osm;
+ pm->local_port = -1;
status = cl_timer_init(&pm->sweep_timer, perfmgr_sweep, pm);
if (status != IB_SUCCESS)