From patchwork Fri Mar 1 01:09:43 2013 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ira Weiny X-Patchwork-Id: 2200171 X-Patchwork-Delegate: hal@mellanox.com Return-Path: X-Original-To: patchwork-linux-rdma@patchwork.kernel.org Delivered-To: patchwork-process-083081@patchwork2.kernel.org Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by patchwork2.kernel.org (Postfix) with ESMTP id A4631DF2A2 for ; Fri, 1 Mar 2013 01:09:45 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752014Ab3CABJo (ORCPT ); Thu, 28 Feb 2013 20:09:44 -0500 Received: from prdiron-2.llnl.gov ([128.15.143.172]:37360 "EHLO prdiron-2.llnl.gov" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751078Ab3CABJo (ORCPT ); Thu, 28 Feb 2013 20:09:44 -0500 X-Attachments: Received: from eris.llnl.gov (HELO trebuchet.chaos) ([128.115.7.7]) by prdiron-2.llnl.gov with SMTP; 28 Feb 2013 17:09:43 -0800 Date: Thu, 28 Feb 2013 17:09:43 -0800 From: Ira Weiny To: "linux-rdma@vger.kernel.org" Cc: Hal Rosenstock Subject: [PATCH 07/07] opensm/perfmgr: add sl support Message-Id: <20130228170943.b8b2fe6a13ecaf7aaf2e97b3@llnl.gov> X-Mailer: Sylpheed 3.3.0 (GTK+ 2.18.9; x86_64-unknown-linux-gnu) Mime-Version: 1.0 Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org SL's are queried internally when running in a master SM and queried from the SA when running in a standby or inactive SM. Signed-off-by: Ira Weiny --- include/opensm/osm_perfmgr.h | 11 ++ opensm/osm_perfmgr.c | 345 +++++++++++++++++++++++++++++++++++++++--- 2 files changed, 336 insertions(+), 20 deletions(-) diff --git a/include/opensm/osm_perfmgr.h b/include/opensm/osm_perfmgr.h index 4141d41..5ca1cf4 100644 --- a/include/opensm/osm_perfmgr.h +++ b/include/opensm/osm_perfmgr.h @@ -2,6 +2,7 @@ * Copyright (c) 2007 The Regents of the University of California. * Copyright (c) 2007-2009 Voltaire, Inc. All rights reserved. * Copyright (c) 2009,2010 HNR Consulting. All rights reserved. + * Copyright (c) 2013 Lawrence Livermore National Security, All rights reserved * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -90,6 +91,11 @@ typedef enum { PERFMGR_SWEEP_SUSPENDED } osm_perfmgr_sweep_state_t; +typedef struct pr_map_item { + cl_map_item_t map_item; + ib_path_rec_t pr; +} pr_map_item_t; + typedef struct monitored_port { uint16_t pkey_ix; ib_net16_t orig_lid; @@ -150,6 +156,11 @@ typedef struct osm_perfmgr { int16_t local_port; int rm_nodes; boolean_t query_cpi; + cl_qmap_t path_rec_map; + /* when operating in stand alone mode we are required to query the + * remote master SA */ + osm_bind_handle_t sa_bind_handle; + int pr_query_outstanding; } osm_perfmgr_t; /* * FIELDS diff --git a/opensm/osm_perfmgr.c b/opensm/osm_perfmgr.c index 9df28f9..b1cd419 100644 --- a/opensm/osm_perfmgr.c +++ b/opensm/osm_perfmgr.c @@ -2,7 +2,7 @@ * Copyright (c) 2007 The Regents of the University of California. * Copyright (c) 2007-2009 Voltaire, Inc. All rights reserved. * Copyright (c) 2009,2010 HNR Consulting. All rights reserved. - * Copyright (c) 2013 Lawrence Livermore National Security. All rights * reserved. + * Copyright (c) 2013 Lawrence Livermore National Security. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -123,6 +123,7 @@ static inline void diff_time(struct timeval *before, struct timeval *after, static void init_monitored_nodes(osm_perfmgr_t * pm) { cl_qmap_init(&pm->monitored_map); + cl_qmap_init(&pm->path_rec_map); pm->remove_list = NULL; cl_event_construct(&pm->sig_query); cl_event_init(&pm->sig_query, FALSE); @@ -254,6 +255,10 @@ Exit: OSM_LOG_EXIT(pm->log); } +static void perfmgr_sa_mad_recv_cb(osm_madw_t * p_madw, void *bind_context, + osm_madw_t * p_req_madw); +static void perfmgr_sa_mad_send_err_cb(void *bind_context, + osm_madw_t * p_madw); /********************************************************************** * Bind the PerfMgr to the vendor layer for MAD sends/receives **********************************************************************/ @@ -294,6 +299,22 @@ ib_api_status_t osm_perfmgr_bind(osm_perfmgr_t * pm, ib_net64_t port_guid) OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5404: Vendor specific bind failed (%s)\n", ib_get_err_str(status)); + goto Exit; + } + + bind_info.mad_class = IB_MCLASS_SUBN_ADM; + bind_info.class_version = 2; + + pm->sa_bind_handle = osm_vendor_bind(pm->vendor, &bind_info, + pm->mad_pool, + perfmgr_sa_mad_recv_cb, + perfmgr_sa_mad_send_err_cb, pm); + + if (pm->sa_bind_handle == OSM_BIND_INVALID_HANDLE) { + status = IB_ERROR; + OSM_LOG(pm->log, OSM_LOG_ERROR, + "ERR 540E: PM SA bind failed (%s)\n", + ib_get_err_str(status)); } Exit: @@ -307,12 +328,17 @@ Exit: static void perfmgr_mad_unbind(osm_perfmgr_t * pm) { OSM_LOG_ENTER(pm->log); - if (pm->bind_handle == OSM_BIND_INVALID_HANDLE) { + + if (pm->bind_handle == OSM_BIND_INVALID_HANDLE) OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5405: No previous bind\n"); - goto Exit; - } - osm_vendor_unbind(pm->bind_handle); -Exit: + else + osm_vendor_unbind(pm->bind_handle); + + if (pm->sa_bind_handle == OSM_BIND_INVALID_HANDLE) + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 540F: No previous SA bind\n"); + else + osm_vendor_unbind(pm->sa_bind_handle); + OSM_LOG_EXIT(pm->log); } @@ -330,6 +356,250 @@ static ib_net32_t get_qp(monitored_node_t * mon_node, uint8_t port) return qp; } +static inline boolean_t sm_not_active(osm_perfmgr_t *pm) +{ + return (pm->subn->sm_state == IB_SMINFO_STATE_STANDBY || + pm->subn->sm_state == IB_SMINFO_STATE_NOTACTIVE); +} + +static int get_sm_info(osm_perfmgr_t *pm, ib_net16_t *smlid, uint8_t *smsl) +{ + int i, rc = -1; + uint32_t num_ports = 32; + ib_port_attr_t attr_array[32]; + + osm_vendor_get_all_port_attr(pm->vendor, attr_array, &num_ports); + + for(i = 0; iport_guid) { + *smlid = attr_array[i].sm_lid; + *smsl = attr_array[i].sm_sl; + rc = 0; + } + } + + return (rc); +} + +static void insert_pr_map(osm_perfmgr_t *pm, ib_path_rec_t *pr) +{ + pr_map_item_t *mi = calloc(1, sizeof(*mi)); + if (mi) { + cl_map_item_t *tmp; + if ((tmp = cl_qmap_get(&pm->path_rec_map, (uint64_t)pr->dlid)) + != cl_qmap_end(&pm->path_rec_map)) { + cl_qmap_remove_item(&pm->path_rec_map, tmp); + free(tmp); + } + memcpy(&mi->pr, pr, sizeof(mi->pr)); + cl_qmap_insert(&pm->path_rec_map, (uint64_t)pr->dlid, + (cl_map_item_t *) mi); + } else { + OSM_LOG(pm->log, OSM_LOG_ERROR, + "ERR 54FC: Failed to allocate path " + "record map item for DLID %d", + cl_ntoh16(pr->dlid)); + } +} + +/** ========================================================================= + * SA query call backs for the sa_bind_handle + */ +static void perfmgr_sa_mad_recv_cb(osm_madw_t * p_madw, void *bind_context, + osm_madw_t * p_req_madw) +{ + osm_perfmgr_t *pm = (osm_perfmgr_t *) bind_context; + ib_sa_mad_t *sa_mad; + int num_results; + int i; + + OSM_LOG_ENTER(pm->log); + + osm_madw_copy_context(p_madw, p_req_madw); + osm_mad_pool_put(pm->mad_pool, p_req_madw); + + sa_mad = osm_madw_get_sa_mad_ptr(p_madw); + + num_results = (p_madw->mad_size - IB_SA_MAD_HDR_SIZE) / + (sa_mad->attr_offset << 3); + + for (i = 0; i < num_results; i++) { + ib_path_rec_t *pr = (ib_path_rec_t *) + (sa_mad->data + (i*sizeof(ib_path_rec_t))); + + /* only want reversible paths */ + if ((pr->num_path & 0x80) == 0) + continue; + + insert_pr_map(pm, pr); + } + + osm_mad_pool_put(pm->mad_pool, p_madw); + pm->pr_query_outstanding = 0; + + OSM_LOG_EXIT(pm->log); +} + +static void perfmgr_sa_mad_send_err_cb(void *bind_context, osm_madw_t * p_madw) +{ + osm_perfmgr_t *pm = (osm_perfmgr_t *) bind_context; + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 540D: PM PathRecord query " + "failed; sm LID %u MAD TID 0x%" PRIx64 "\n", + cl_ntoh16(p_madw->mad_addr.dest_lid), + cl_ntoh64(p_madw->p_mad->trans_id)); + pm->pr_query_outstanding = -1; +} + +static void create_half_world_query(osm_perfmgr_t *pm, ib_sa_mad_t *sa_mad) +{ + ib_path_rec_t *pr = (ib_path_rec_t *)sa_mad->data; + + sa_mad->base_ver = 1; + sa_mad->mgmt_class = IB_MCLASS_SUBN_ADM; + sa_mad->class_ver = 2; + sa_mad->method = IB_MAD_METHOD_GETTABLE; + sa_mad->status = 0; + sa_mad->trans_id = + cl_hton64((uint64_t) cl_atomic_inc(&pm->trans_id) & + (uint64_t) (0xFFFFFFFF)); + if (sa_mad->trans_id == 0) + sa_mad->trans_id = + cl_hton64((uint64_t) cl_atomic_inc(&pm->trans_id) & + (uint64_t) (0xFFFFFFFF)); + sa_mad->attr_id = IB_MAD_ATTR_PATH_RECORD; + sa_mad->attr_mod = 0; + + pr->slid = osm_port_get_base_lid(osm_get_port_by_guid(pm->subn, + pm->port_guid)); + sa_mad->comp_mask = IB_PR_COMPMASK_SLID; +} + +static int send_sa_pr_query(osm_perfmgr_t *pm) +{ + ib_sa_mad_t *sa_mad = NULL; + osm_madw_t *p_madw = NULL; + ib_net16_t smlid; + uint8_t smsl; + + if (get_sm_info(pm, &smlid, &smsl) < 0) { + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 54FE: " + "PM failed to find SM LID & SL for PR query\n"); + return (-1); + } + + p_madw = osm_mad_pool_get(pm->mad_pool, pm->sa_bind_handle, + MAD_BLOCK_SIZE, NULL); + if (p_madw == NULL) + return -1; + + sa_mad = osm_madw_get_sa_mad_ptr(p_madw); + + create_half_world_query(pm, sa_mad); + + p_madw->mad_addr.dest_lid = smlid; + p_madw->mad_addr.addr_type.gsi.remote_qp = 1; + p_madw->mad_addr.addr_type.gsi.remote_qkey = + cl_hton32(IB_QP1_WELL_KNOWN_Q_KEY); + p_madw->mad_addr.addr_type.gsi.pkey_ix = 0; + p_madw->mad_addr.addr_type.gsi.service_level = smsl; + p_madw->mad_addr.addr_type.gsi.global_route = FALSE; + p_madw->resp_expected = TRUE; + + if (osm_vendor_send(pm->sa_bind_handle, p_madw, TRUE) + != IB_SUCCESS) + return (-1); + + return (0); +} + +static int get_internal_pr(osm_perfmgr_t *pm) +{ + ib_sa_mad_t sa_mad; + const osm_alias_guid_t *p_src_alias_guid, *p_dest_alias_guid; + const osm_port_t *p_src_port, *p_dest_port; + const ib_gid_t *p_sgid = NULL, *p_dgid = NULL; + osm_port_t *p_local_port; + cl_qlist_t pr_list; + cl_list_item_t *item; + unsigned num_rec, i; + + create_half_world_query(pm, &sa_mad); + + osm_pr_get_end_points(&pm->osm->sa, &sa_mad, + &p_src_alias_guid, &p_dest_alias_guid, + &p_src_port, &p_dest_port, + &p_sgid, &p_dgid); + + cl_qlist_init(&pr_list); + p_local_port = osm_get_port_by_guid(pm->subn, pm->port_guid); + + /* Get all alias GUIDs for the src port */ + p_src_alias_guid = (osm_alias_guid_t *) cl_qmap_head(&pm->subn->alias_port_guid_tbl); + while (p_src_alias_guid != + (osm_alias_guid_t *) cl_qmap_end(&pm->subn->alias_port_guid_tbl)) { + + if (osm_get_port_by_alias_guid(pm->subn, p_src_alias_guid->alias_guid) + == p_src_port) { + osm_pr_process_half(&pm->osm->sa, &sa_mad, p_local_port, + p_src_alias_guid, p_dest_alias_guid, + p_sgid, p_dgid, &pr_list); + } + + p_src_alias_guid = (osm_alias_guid_t *) cl_qmap_next(&p_src_alias_guid->map_item); + } + + num_rec = cl_qlist_count(&pr_list); + for (i = 0; i < num_rec; i++) { + ib_path_rec_t *pr; + item = cl_qlist_remove_head(&pr_list); + pr = (ib_path_rec_t *)((osm_sa_item_t *)item)->resp.data; + + /* only want reversible paths */ + if ((pr->num_path & 0x80) == 0) + continue; + + insert_pr_map(pm, pr); + free(item); + } + pm->pr_query_outstanding = 0; + return (0); +} + +static ib_path_rec_t * get_pr_from_pr_map(osm_perfmgr_t *pm, ib_net16_t dlid) +{ + cl_map_item_t *mi; + if ((mi = cl_qmap_get(&pm->path_rec_map, (uint64_t)dlid)) != + cl_qmap_end(&pm->path_rec_map)) { + pr_map_item_t *pr = (pr_map_item_t *)mi; + return (&pr->pr); + } + return (NULL); +} + +static int8_t get_sl(osm_perfmgr_t *pm, monitored_node_t * mon_node, uint8_t port) +{ + uint16_t dlid; + ib_path_rec_t *pr; + + if (!mon_node || port >= mon_node->num_ports) + return (-1); + + dlid = mon_node->port[port].redirection ? + mon_node->port[port].lid : + mon_node->port[port].orig_lid; + pr = get_pr_from_pr_map(pm, dlid); + if (pr) { + OSM_LOG(pm->log, OSM_LOG_DEBUG, "PM %s port %d -> SL 0x%x\n", + mon_node->name, port, ib_path_rec_sl(pr)); + return ((int8_t)ib_path_rec_sl(pr)); + } + + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 54FD: " + "PM failed to find SL for %s port %d\n", + mon_node->name, port); + return (-1); +} + static ib_net16_t get_base_lid(osm_node_t * p_node, uint8_t port) { switch (p_node->node_info.node_type) { @@ -683,6 +953,7 @@ static void perfmgr_query_counters(cl_map_item_t * p_map_item, void *context) /* issue the query for each port */ for (port = mon_node->esp0 ? 0 : 1; port < num_ports; port++) { ib_net16_t lid; + int8_t sl; if (!osm_node_get_physp_ptr(node, port)) continue; @@ -700,6 +971,9 @@ static void perfmgr_query_counters(cl_map_item_t * p_map_item, void *context) } remote_qp = get_qp(mon_node, port); + sl = get_sl(pm, mon_node, port); + if (sl < 0) + continue; mad_context.perfmgr_context.node_guid = node_guid; mad_context.perfmgr_context.port = port; @@ -709,7 +983,7 @@ static void perfmgr_query_counters(cl_map_item_t * p_map_item, void *context) status = perfmgr_send_cpi_mad(pm, lid, remote_qp, mon_node->port[port].pkey_ix, port, &mad_context, - 0); /* FIXME SL != 0 */ + sl); if (status != IB_SUCCESS) OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5410: " "Failed to issue ClassPortInfo query " @@ -733,7 +1007,7 @@ static void perfmgr_query_counters(cl_map_item_t * p_map_item, void *context) port, IB_MAD_METHOD_GET, 0xffff, &mad_context, - 0); /* FIXME SL != 0 */ + sl); if (status != IB_SUCCESS) OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5409: " "Failed to issue port counter query for node 0x%" @@ -751,7 +1025,7 @@ static void perfmgr_query_counters(cl_map_item_t * p_map_item, void *context) port, IB_MAD_METHOD_GET, &mad_context, - 0); /* FIXME SL != 0 */ + sl); if (status != IB_SUCCESS) OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5417: Failed to issue " @@ -1007,8 +1281,7 @@ void osm_perfmgr_process(osm_perfmgr_t * pm) pm->sweep_state = PERFMGR_SWEEP_ACTIVE; cl_spinlock_release(&pm->lock); - if (pm->subn->sm_state == IB_SMINFO_STATE_STANDBY || - pm->subn->sm_state == IB_SMINFO_STATE_NOTACTIVE) + if (sm_not_active(pm)) perfmgr_discovery(pm->subn->p_osm); /* if redirection enabled, determine local port */ @@ -1034,6 +1307,18 @@ void osm_perfmgr_process(osm_perfmgr_t * pm) #ifdef ENABLE_OSM_PERF_MGR_PROFILE gettimeofday(&before, NULL); #endif + pm->pr_query_outstanding = 1; + if (sm_not_active(pm)) { + /* FIXME register for UnPath/RePath rather than reissue query */ + if (send_sa_pr_query(pm)) { + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 542F: " + "PM PathRecord query send failed\n"); + pm->pr_query_outstanding = -1; + } + } else { + get_internal_pr(pm); + } + /* With the global lock held, collect the node guids */ /* FIXME we should be able to track SA notices * and not have to sweep the node_guid_tbl each pass @@ -1043,8 +1328,15 @@ void osm_perfmgr_process(osm_perfmgr_t * pm) cl_qmap_apply_func(&pm->subn->node_guid_tbl, collect_guids, pm); cl_plock_release(&pm->osm->lock); - /* then for each node query their counters */ - cl_qmap_apply_func(&pm->monitored_map, perfmgr_query_counters, pm); + /* Wait for PR query to complete */ + while(pm->pr_query_outstanding == 1) + ; + + if (pm->pr_query_outstanding == 0) { + cl_qmap_apply_func(&pm->monitored_map, perfmgr_query_counters, pm); + } else { + pm->pr_query_outstanding = 0; + } /* clean out any nodes found to be removed during the sweep */ remove_marked_nodes(pm); @@ -1235,6 +1527,7 @@ static void perfmgr_check_overflow(osm_perfmgr_t * pm, counter_overflow_32(pc->rcv_pkts)))) { osm_node_t *p_node = NULL; ib_net16_t lid = 0; + int8_t sl; if (!mon_node->port[port].valid) goto Exit; @@ -1244,6 +1537,9 @@ static void perfmgr_check_overflow(osm_perfmgr_t * pm, ") port %d; clearing counters\n", mon_node->name, mon_node->guid, port); + if ((sl = get_sl(pm, mon_node, port)) < 0) + goto Exit; + cl_plock_acquire(&pm->osm->lock); p_node = osm_get_node_by_guid(pm->subn, cl_hton64(mon_node->guid)); @@ -1276,8 +1572,7 @@ static void perfmgr_check_overflow(osm_perfmgr_t * pm, status = perfmgr_send_pc_mad(pm, lid, remote_qp, pkey_ix, port, IB_MAD_METHOD_SET, counter_select, - &mad_context, - 0); /* FIXME SL != 0 */ + &mad_context, sl); if (status != IB_SUCCESS) OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 5411: " "Failed to send clear counters MAD for %s (0x%" @@ -1320,6 +1615,7 @@ static void perfmgr_check_pce_overflow(osm_perfmgr_t * pm, counter_overflow_64(pc->multicast_rcv_pkts)))) { osm_node_t *p_node = NULL; ib_net16_t lid = 0; + int8_t sl; if (!mon_node->port[port].valid) goto Exit; @@ -1329,6 +1625,9 @@ static void perfmgr_check_pce_overflow(osm_perfmgr_t * pm, PRIx64 ") port %d; clearing counters\n", mon_node->name, mon_node->guid, port); + if ((sl = get_sl(pm, mon_node, port)) < 0) + goto Exit; + cl_plock_acquire(&pm->osm->lock); p_node = osm_get_node_by_guid(pm->subn, cl_hton64(mon_node->guid)); @@ -1350,8 +1649,7 @@ static void perfmgr_check_pce_overflow(osm_perfmgr_t * pm, /* clear port counters */ status = perfmgr_send_pce_mad(pm, lid, remote_qp, pkey_ix, port, IB_MAD_METHOD_SET, - &mad_context, - 0); /* FIXME SL != 0 */ + &mad_context, sl); if (status != IB_SUCCESS) OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 5419: " "Failed to send clear counters MAD for %s (0x%" @@ -1475,6 +1773,7 @@ static boolean_t handle_redirect(osm_perfmgr_t *pm, boolean_t valid = TRUE; int16_t pkey_ix = 0; uint8_t mad_method; + int8_t sl; OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Redirection to LID %u GID %s QP 0x%x received\n", @@ -1528,6 +1827,12 @@ static boolean_t handle_redirect(osm_perfmgr_t *pm, if (!valid) goto Exit; + sl = get_sl(pm, p_mon_node, port); + if (sl < 0) { + valid = FALSE; + goto Exit; + } + /* LID redirection support (easier than GID redirection) */ cl_plock_acquire(&pm->osm->lock); p_mon_node->port[port].redirection = TRUE; @@ -1550,7 +1855,7 @@ static boolean_t handle_redirect(osm_perfmgr_t *pm, status = perfmgr_send_cpi_mad(pm, cpi->redir_lid, cpi->redir_qp, pkey_ix, port, mad_context, - 0); /* FIXME SL != 0 */ + sl); } else { /* reissue the original query to the redirected location */ mad_method = mad_context->perfmgr_context.mad_method; @@ -1561,14 +1866,14 @@ static boolean_t handle_redirect(osm_perfmgr_t *pm, mad_method, 0xffff, mad_context, - 0); /* FIXME SL != 0 */ + sl); } else { status = perfmgr_send_pce_mad(pm, cpi->redir_lid, cpi->redir_qp, pkey_ix, port, mad_method, mad_context, - 0); /* FIXME SL != 0 */ + sl); } } if (status != IB_SUCCESS)