From patchwork Thu Feb 21 21:33:46 2013 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ira Weiny X-Patchwork-Id: 2173221 X-Patchwork-Delegate: hal@mellanox.com Return-Path: X-Original-To: patchwork-linux-rdma@patchwork.kernel.org Delivered-To: patchwork-process-083081@patchwork2.kernel.org Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by patchwork2.kernel.org (Postfix) with ESMTP id 3211EDF215 for ; Thu, 21 Feb 2013 21:33:48 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753759Ab3BUVdr (ORCPT ); Thu, 21 Feb 2013 16:33:47 -0500 Received: from prdiron-1.llnl.gov ([128.15.143.171]:42660 "EHLO prdiron-1.llnl.gov" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753709Ab3BUVdr (ORCPT ); Thu, 21 Feb 2013 16:33:47 -0500 X-Attachments: Received: from eris.llnl.gov (HELO trebuchet.chaos) ([128.115.7.7]) by prdiron-1.llnl.gov with SMTP; 21 Feb 2013 13:33:46 -0800 Date: Thu, 21 Feb 2013 13:33:46 -0800 From: Ira Weiny To: Hal Rosenstock Cc: "linux-rdma@vger.kernel.org" Subject: [PATCH 03/06] opensm/perfmgr: Issue PortCountersExtended query when supported Message-Id: <20130221133346.eb44c1bd053d295da91c451e@llnl.gov> X-Mailer: Sylpheed 3.3.0 (GTK+ 2.18.9; x86_64-unknown-linux-gnu) Mime-Version: 1.0 Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org Signed-off-by: Ira Weiny --- include/opensm/osm_perfmgr_db.h | 2 +- opensm/osm_perfmgr.c | 311 ++++++++++++++++++++++++++++++++------- opensm/osm_perfmgr_db.c | 2 +- 3 files changed, 258 insertions(+), 57 deletions(-) diff --git a/include/opensm/osm_perfmgr_db.h b/include/opensm/osm_perfmgr_db.h index 5491d2f..ada6765 100644 --- a/include/opensm/osm_perfmgr_db.h +++ b/include/opensm/osm_perfmgr_db.h @@ -207,7 +207,7 @@ void perfmgr_db_fill_err_read(ib_port_counters_t * wire_read, perfmgr_db_err_reading_t * reading); void perfmgr_db_fill_data_cnt_read_pc(ib_port_counters_t * wire_read, perfmgr_db_data_cnt_reading_t * reading); -void perfmgr_db_fill_data_cnt_read_epc(ib_port_counters_ext_t * wire_read, +void perfmgr_db_fill_data_cnt_read_pce(ib_port_counters_ext_t * wire_read, perfmgr_db_data_cnt_reading_t * reading); END_C_DECLS diff --git a/opensm/osm_perfmgr.c b/opensm/osm_perfmgr.c index 69b3d77..d73b2a2 100644 --- a/opensm/osm_perfmgr.c +++ b/opensm/osm_perfmgr.c @@ -566,6 +566,51 @@ static ib_api_status_t perfmgr_send_cpi_mad(osm_perfmgr_t * pm, } /********************************************************************** + * return if PortCountersExtended are supported. + **********************************************************************/ +static boolean_t pce_supported(monitored_node_t *mon_node, uint8_t port) +{ + monitored_port_t *mon_port = &(mon_node->port[port]); + return (mon_port->cpi_valid + && (mon_port->cap_mask & IB_PM_EXT_WIDTH_SUPPORTED)); +} + +/********************************************************************** + * Form and send the PortCountersExtended MAD for a single port. + **********************************************************************/ +static ib_api_status_t perfmgr_send_pce_mad(osm_perfmgr_t * perfmgr, + ib_net16_t dest_lid, + ib_net32_t dest_qp, + uint16_t pkey_ix, + uint8_t port, uint8_t mad_method, + osm_madw_context_t * p_context, + uint8_t sl) +{ + ib_api_status_t status = IB_SUCCESS; + ib_port_counters_ext_t *port_counter_ext = NULL; + ib_perfmgt_mad_t *pm_mad = NULL; + osm_madw_t *p_madw = NULL; + + OSM_LOG_ENTER(perfmgr->log); + + p_madw = perfmgr_build_mad(perfmgr, dest_lid, sl, dest_qp, pkey_ix, + mad_method, IB_MAD_ATTR_PORT_CNTRS_EXT, p_context, + &pm_mad); + if (p_madw == NULL) + return IB_INSUFFICIENT_MEMORY; + + port_counter_ext = (ib_port_counters_ext_t *) & pm_mad->data; + memset(port_counter_ext, 0, sizeof(*port_counter_ext)); + port_counter_ext->port_select = port; + port_counter_ext->counter_select = cl_hton16(0xFF); + + status = perfmgr_send_mad(perfmgr, p_madw); + + OSM_LOG_EXIT(perfmgr->log); + return status; +} + +/********************************************************************** * query the Port Counters of all the nodes in the subnet. **********************************************************************/ static void perfmgr_query_counters(cl_map_item_t * p_map_item, void *context) @@ -667,6 +712,28 @@ static void perfmgr_query_counters(cl_map_item_t * p_map_item, void *context) PRIx64 " port %d (%s)\n", node->node_info.node_guid, port, node->print_desc); + + if (pce_supported(mon_node, port)) { + +#if ENABLE_OSM_PERF_MGR_PROFILE + gettimeofday(&mad_context.perfmgr_context.query_start, NULL); +#endif + status = perfmgr_send_pce_mad(pm, lid, remote_qp, + mon_node->port[port].pkey_ix, + port, + IB_MAD_METHOD_GET, + &mad_context, + 0); /* FIXME SL != 0 */ + if (status != IB_SUCCESS) + OSM_LOG(pm->log, OSM_LOG_ERROR, + "ERR 5417: Failed to issue " + "port counter query for " + "node 0x%" PRIx64 " port " + "%d (%s)\n", + node->node_info.node_guid, + port, + node->print_desc); + } } } Exit: @@ -1008,11 +1075,9 @@ void osm_perfmgr_destroy(osm_perfmgr_t * pm) **********************************************************************/ static void perfmgr_check_oob_clear(osm_perfmgr_t * pm, monitored_node_t * mon_node, uint8_t port, - perfmgr_db_err_reading_t * cr, - perfmgr_db_data_cnt_reading_t * dc) + perfmgr_db_err_reading_t * cr) { perfmgr_db_err_reading_t prev_err; - perfmgr_db_data_cnt_reading_t prev_dc; if (perfmgr_db_get_prev_err(pm->db, mon_node->guid, port, &prev_err) != PERFMGR_EVENT_DB_SUCCESS) { @@ -1040,31 +1105,11 @@ static void perfmgr_check_oob_clear(osm_perfmgr_t * pm, mon_node->name, mon_node->guid, port); perfmgr_db_clear_prev_err(pm->db, mon_node->guid, port); } - - /* FIXME handle extended counters */ - if (perfmgr_db_get_prev_dc(pm->db, mon_node->guid, port, &prev_dc) - != PERFMGR_EVENT_DB_SUCCESS) { - OSM_LOG(pm->log, OSM_LOG_VERBOSE, - "Failed to find previous data count " - "reading for %s (0x%" PRIx64 ") port %u\n", - mon_node->name, mon_node->guid, port); - return; - } - - if (dc->xmit_data < prev_dc.xmit_data || - dc->rcv_data < prev_dc.rcv_data || - dc->xmit_pkts < prev_dc.xmit_pkts || - dc->rcv_pkts < prev_dc.rcv_pkts) { - OSM_LOG(pm->log, OSM_LOG_ERROR, - "PerfMgr: ERR 540B: Detected an out of band data counter " - "clear on node %s (0x%" PRIx64 ") port %u\n", - mon_node->name, mon_node->guid, port); - perfmgr_db_clear_prev_dc(pm->db, mon_node->guid, port); - } } /********************************************************************** * Return 1 if the value is "close" to overflowing + * "close" is defined at 25% for now **********************************************************************/ static int counter_overflow_4(uint8_t val) { @@ -1086,6 +1131,11 @@ static int counter_overflow_32(ib_net32_t val) return (cl_ntoh32(val) >= (UINT32_MAX - (UINT32_MAX / 4))); } +static int counter_overflow_64(ib_net64_t val) +{ + return (cl_ntoh64(val) >= (UINT64_MAX - (UINT64_MAX / 4))); +} + /********************************************************************** * Check if the port counters have overflowed and if so issue a clear * MAD to the port. @@ -1112,10 +1162,11 @@ static void perfmgr_check_overflow(osm_perfmgr_t * pm, counter_overflow_4(PC_LINK_INT(pc->link_int_buffer_overrun)) || counter_overflow_4(PC_BUF_OVERRUN(pc->link_int_buffer_overrun)) || counter_overflow_16(pc->vl15_dropped) || - counter_overflow_32(pc->xmit_data) || - counter_overflow_32(pc->rcv_data) || - counter_overflow_32(pc->xmit_pkts) || - counter_overflow_32(pc->rcv_pkts)) { + (!pce_supported(mon_node, port) && + (counter_overflow_32(pc->xmit_data) || + counter_overflow_32(pc->rcv_data) || + counter_overflow_32(pc->xmit_pkts) || + counter_overflow_32(pc->rcv_pkts)))) { osm_node_t *p_node = NULL; ib_net16_t lid = 0; @@ -1157,6 +1208,78 @@ static void perfmgr_check_overflow(osm_perfmgr_t * pm, mon_node->name, mon_node->guid, port); perfmgr_db_clear_prev_err(pm->db, mon_node->guid, port); + if (!pce_supported(mon_node, port)) + perfmgr_db_clear_prev_dc(pm->db, mon_node->guid, port); + } + +Exit: + OSM_LOG_EXIT(pm->log); +} + +/********************************************************************** + * Check if the port counters have overflowed and if so issue a clear + * MAD to the port. + **********************************************************************/ +static void perfmgr_check_pce_overflow(osm_perfmgr_t * pm, + monitored_node_t * mon_node, + int16_t pkey_ix, + uint8_t port, + ib_port_counters_ext_t * pc) +{ + osm_madw_context_t mad_context; + ib_api_status_t status; + ib_net32_t remote_qp; + + OSM_LOG_ENTER(pm->log); + + if (counter_overflow_64(pc->xmit_data) || + counter_overflow_64(pc->rcv_data) || + counter_overflow_64(pc->xmit_pkts) || + counter_overflow_64(pc->rcv_pkts) || + counter_overflow_64(pc->unicast_xmit_pkts) || + counter_overflow_64(pc->unicast_rcv_pkts) || + counter_overflow_64(pc->multicast_xmit_pkts) || + counter_overflow_64(pc->multicast_rcv_pkts)) { + osm_node_t *p_node = NULL; + ib_net16_t lid = 0; + + if (!mon_node->port[port].valid) + goto Exit; + + osm_log(pm->log, OSM_LOG_VERBOSE, + "PerfMgr: PortCountersExtended overflow: %s (0x%" + PRIx64 ") port %d; clearing counters\n", + mon_node->name, mon_node->guid, port); + + cl_plock_acquire(&pm->osm->lock); + p_node = + osm_get_node_by_guid(pm->subn, cl_hton64(mon_node->guid)); + lid = get_lid(p_node, port, mon_node); + cl_plock_release(&pm->osm->lock); + if (lid == 0) { + OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 5418: " + "Failed to clear counters for %s (0x%" + PRIx64 ") port %d; failed to get lid\n", + mon_node->name, mon_node->guid, port); + goto Exit; + } + + remote_qp = get_qp(NULL, port); + + mad_context.perfmgr_context.node_guid = mon_node->guid; + mad_context.perfmgr_context.port = port; + mad_context.perfmgr_context.mad_method = IB_MAD_METHOD_SET; + /* clear port counters */ + status = perfmgr_send_pce_mad(pm, lid, remote_qp, pkey_ix, + port, IB_MAD_METHOD_SET, + &mad_context, + 0); /* FIXME SL != 0 */ + if (status != IB_SUCCESS) + OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 5419: " + "Failed to send clear counters MAD for %s (0x%" + PRIx64 ") port %d\n", + mon_node->name, mon_node->guid, port); + perfmgr_db_clear_prev_dc(pm->db, mon_node->guid, port); } @@ -1354,6 +1477,47 @@ static void handle_redirect(osm_perfmgr_t *pm, } /********************************************************************** + * Detect if someone else on the network could have cleared the counters + * without us knowing. This is easy to detect because the counters never + * wrap but are "sticky" PortCountersExtended version. + * + * The one time this will not work is if the port is getting errors fast + * enough to have the reading overtake the previous reading. In this case, + * counters will be missed. + **********************************************************************/ +static void perfmgr_check_data_cnt_oob_clear(osm_perfmgr_t * pm, + monitored_node_t * mon_node, + uint8_t port, + perfmgr_db_data_cnt_reading_t * dc) +{ + perfmgr_db_data_cnt_reading_t prev_dc; + + if (perfmgr_db_get_prev_dc(pm->db, mon_node->guid, port, &prev_dc) + != PERFMGR_EVENT_DB_SUCCESS) { + OSM_LOG(pm->log, OSM_LOG_VERBOSE, + "Failed to find previous data count " + "reading for %s (0x%" PRIx64 ") port %u\n", + mon_node->name, mon_node->guid, port); + return; + } + + if (dc->xmit_data < prev_dc.xmit_data || + dc->rcv_data < prev_dc.rcv_data || + dc->xmit_pkts < prev_dc.xmit_pkts || + dc->rcv_pkts < prev_dc.rcv_pkts || + dc->unicast_xmit_pkts < prev_dc.unicast_xmit_pkts || + dc->unicast_rcv_pkts < prev_dc.unicast_rcv_pkts || + dc->multicast_xmit_pkts < prev_dc.multicast_xmit_pkts || + dc->multicast_rcv_pkts < prev_dc.multicast_rcv_pkts) { + OSM_LOG(pm->log, OSM_LOG_ERROR, + "PerfMgr: ERR 540B: Detected an out of band data counter " + "clear on node %s (0x%" PRIx64 ") port %u\n", + mon_node->name, mon_node->guid, port); + perfmgr_db_clear_prev_dc(pm->db, mon_node->guid, port); + } +} + +/********************************************************************** * The dispatcher uses a thread pool which will call this function when * there is a thread available to process the mad received on the wire. **********************************************************************/ @@ -1362,8 +1526,6 @@ static void pc_recv_process(void *context, void *data) osm_perfmgr_t *pm = context; osm_madw_t *p_madw = data; osm_madw_context_t *mad_context = &p_madw->context; - ib_port_counters_t *wire_read = - (ib_port_counters_t *) & osm_madw_get_perfmgt_mad_ptr(p_madw)->data; ib_mad_t *p_mad = osm_madw_get_mad_ptr(p_madw); uint64_t node_guid = mad_context->perfmgr_context.node_guid; uint8_t port = mad_context->perfmgr_context.port; @@ -1392,6 +1554,7 @@ static void pc_recv_process(void *context, void *data) PRIx64 " port %u\n", p_mad->status, node_guid, port); CL_ASSERT(p_mad->attr_id == IB_MAD_ATTR_PORT_CNTRS || + p_mad->attr_id == IB_MAD_ATTR_PORT_CNTRS_EXT || p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO); /* capture CLASS_PORT_INFO data */ @@ -1431,33 +1594,71 @@ static void pc_recv_process(void *context, void *data) goto Exit; } - perfmgr_db_fill_err_read(wire_read, &err_reading); - /* FIXME separate query for extended counters if they are supported - * on the port. - */ - perfmgr_db_fill_data_cnt_read_pc(wire_read, &data_reading); - - /* detect an out of band clear on the port */ - if (mad_context->perfmgr_context.mad_method != IB_MAD_METHOD_SET) - perfmgr_check_oob_clear(pm, p_mon_node, port, &err_reading, - &data_reading); - - if (mad_context->perfmgr_context.mad_method == IB_MAD_METHOD_GET) { - /* log errors from this reading */ - if (pm->subn->opt.perfmgr_log_errors) - perfmgr_log_errors(pm, p_mon_node, port, &err_reading); - - perfmgr_db_add_err_reading(pm->db, node_guid, port, - &err_reading); - perfmgr_db_add_dc_reading(pm->db, node_guid, port, - &data_reading); + if (p_mad->attr_id == IB_MAD_ATTR_PORT_CNTRS_EXT) { + ib_port_counters_ext_t *ext_wire_read = + (ib_port_counters_ext_t *) + &osm_madw_get_perfmgt_mad_ptr(p_madw)->data; + + /* convert wire data to perfmgr data counter reading */ + perfmgr_db_fill_data_cnt_read_pce(ext_wire_read, &data_reading); + + /* detect an out of band clear on the port */ + if (mad_context->perfmgr_context.mad_method != + IB_MAD_METHOD_SET) + perfmgr_check_data_cnt_oob_clear(pm, p_mon_node, port, + &data_reading); + + /* add counter */ + if (mad_context->perfmgr_context.mad_method + == IB_MAD_METHOD_GET) { + perfmgr_db_add_dc_reading(pm->db, node_guid, port, + &data_reading); + } else { + perfmgr_db_clear_prev_dc(pm->db, node_guid, port); + } + + /* check overflow */ + perfmgr_check_pce_overflow(pm, p_mon_node, + p_mon_node->port[port].pkey_ix, + port, ext_wire_read); } else { - perfmgr_db_clear_prev_err(pm->db, node_guid, port); - perfmgr_db_clear_prev_dc(pm->db, node_guid, port); - } + boolean_t pce_sup = pce_supported(p_mon_node, port); + ib_port_counters_t *wire_read = + (ib_port_counters_t *) + &osm_madw_get_perfmgt_mad_ptr(p_madw)->data; + + perfmgr_db_fill_err_read(wire_read, &err_reading); + if (!pce_sup) + perfmgr_db_fill_data_cnt_read_pc(wire_read, &data_reading); + + /* detect an out of band clear on the port */ + if (mad_context->perfmgr_context.mad_method != IB_MAD_METHOD_SET) { + perfmgr_check_oob_clear(pm, p_mon_node, port, &err_reading); + if (!pce_sup) + perfmgr_check_data_cnt_oob_clear(pm, p_mon_node, port, + &data_reading); + } - perfmgr_check_overflow(pm, p_mon_node, p_mon_node->port[port].pkey_ix, - port, wire_read); + if (mad_context->perfmgr_context.mad_method == IB_MAD_METHOD_GET) { + /* log errors from this reading */ + if (pm->subn->opt.perfmgr_log_errors) + perfmgr_log_errors(pm, p_mon_node, port, &err_reading); + + perfmgr_db_add_err_reading(pm->db, node_guid, port, + &err_reading); + if (!pce_sup) + perfmgr_db_add_dc_reading(pm->db, node_guid, port, + &data_reading); + } else { + perfmgr_db_clear_prev_err(pm->db, node_guid, port); + if (!pce_sup) + perfmgr_db_clear_prev_dc(pm->db, node_guid, port); + } + + perfmgr_check_overflow(pm, p_mon_node, p_mon_node->port[port].pkey_ix, + port, wire_read); + + } #ifdef ENABLE_OSM_PERF_MGR_PROFILE do { diff --git a/opensm/osm_perfmgr_db.c b/opensm/osm_perfmgr_db.c index 116bb07..f8d0403 100644 --- a/opensm/osm_perfmgr_db.c +++ b/opensm/osm_perfmgr_db.c @@ -1053,7 +1053,7 @@ perfmgr_db_fill_data_cnt_read_pc(ib_port_counters_t * wire_read, } void -perfmgr_db_fill_data_cnt_read_epc(ib_port_counters_ext_t * wire_read, +perfmgr_db_fill_data_cnt_read_pce(ib_port_counters_ext_t * wire_read, perfmgr_db_data_cnt_reading_t * reading) { reading->xmit_data = cl_ntoh64(wire_read->xmit_data);