@@ -559,6 +559,8 @@ unsigned int lnet_nid_cpt_hash(struct lnet_nid *nid,
extern unsigned int lnet_recovery_limit;
extern unsigned int lnet_peer_discovery_disabled;
extern unsigned int lnet_drop_asym_route;
+extern unsigned int lnet_max_recovery_ping_interval;
+extern unsigned int lnet_max_recovery_ping_count;
extern unsigned int router_sensitivity_percentage;
extern int alive_router_check_interval;
extern int live_router_check_interval;
@@ -1009,15 +1011,14 @@ int lnet_get_peer_ni_info(u32 peer_index, u64 *nid,
return false;
}
-#define LNET_RECOVERY_INTERVAL_MAX 900
static inline unsigned int
lnet_get_next_recovery_ping(unsigned int ping_count, time64_t now)
{
unsigned int interval;
- /* 2^9 = 512, 2^10 = 1024 */
- if (ping_count > 9)
- interval = LNET_RECOVERY_INTERVAL_MAX;
+ /* lnet_max_recovery_interval <= 2^lnet_max_recovery_ping_count */
+ if (ping_count > lnet_max_recovery_ping_count)
+ interval = lnet_max_recovery_ping_interval;
else
interval = 1 << ping_count;
@@ -117,6 +117,22 @@ static int recovery_interval_set(const char *val,
MODULE_PARM_DESC(lnet_recovery_limit,
"How long to attempt recovery of unhealthy peer interfaces in seconds. Set to 0 to allow indefinite recovery");
+unsigned int lnet_max_recovery_ping_interval = 900;
+unsigned int lnet_max_recovery_ping_count = 9;
+static int max_recovery_ping_interval_set(const char *val,
+ const struct kernel_param *kp);
+
+#define param_check_max_recovery_ping_interval(name, p) \
+ __param_check(name, p, int)
+
+static struct kernel_param_ops param_ops_max_recovery_ping_interval = {
+ .set = max_recovery_ping_interval_set,
+ .get = param_get_int,
+};
+module_param(lnet_max_recovery_ping_interval, max_recovery_ping_interval, 0644);
+MODULE_PARM_DESC(lnet_max_recovery_ping_interval,
+ "The max interval between LNet recovery pings, in seconds");
+
static int lnet_interfaces_max = LNET_INTERFACES_MAX_DEFAULT;
static int intf_max_set(const char *val, const struct kernel_param *kp);
module_param_call(lnet_interfaces_max, intf_max_set, param_get_int,
@@ -258,6 +274,39 @@ static int lnet_discover(struct lnet_process_id id, u32 force,
}
static int
+max_recovery_ping_interval_set(const char *val, const struct kernel_param *kp)
+{
+ int rc;
+ unsigned long value;
+
+ rc = kstrtoul(val, 0, &value);
+ if (rc) {
+ CERROR("Invalid module parameter value for 'lnet_max_recovery_ping_interval'\n");
+ return rc;
+ }
+
+ if (!value) {
+ CERROR("Invalid max ping timeout. Must be strictly positive\n");
+ return -EINVAL;
+ }
+
+ /* The purpose of locking the api_mutex here is to ensure that
+ * the correct value ends up stored properly.
+ */
+ mutex_lock(&the_lnet.ln_api_mutex);
+ lnet_max_recovery_ping_interval = value;
+ lnet_max_recovery_ping_count = 0;
+ value >>= 1;
+ while (value) {
+ lnet_max_recovery_ping_count++;
+ value >>= 1;
+ }
+ mutex_unlock(&the_lnet.ln_api_mutex);
+
+ return 0;
+}
+
+static int
discovery_set(const char *val, const struct kernel_param *kp)
{
int rc;