@@ -165,7 +165,10 @@ struct crush_map {
__u32 choose_local_fallback_tries;
/* choose attempts before giving up */
__u32 choose_total_tries;
- /* attempt chooseleaf inner descent once; on failure retry outer descent */
+ /* attempt chooseleaf inner descent once for firstn mode; on
+ * reject retry outer descent. Note that this does *not*
+ * apply to a collision: in that case we will retry as we used
+ * to. */
__u32 chooseleaf_descend_once;
};
@@ -299,6 +299,8 @@ static int crush_choose_firstn(const struct crush_map *map,
const __u32 *weight, int weight_max,
int x, int numrep, int type,
int *out, int outpos,
+ unsigned int attempts,
+ unsigned int recurse_attempts,
int recurse_to_leaf,
int descend_once, int *out2)
{
@@ -385,6 +387,7 @@ static int crush_choose_firstn(const struct crush_map *map,
weight, weight_max,
x, outpos+1, 0,
out2, outpos,
+ recurse_attempts, 0,
0,
map->chooseleaf_descend_once,
NULL) <= outpos)
@@ -421,7 +424,7 @@ reject:
flocal <= in->size + map->choose_local_fallback_tries)
/* exhaustive bucket search */
retry_bucket = 1;
- else if (ftotal <= map->choose_total_tries)
+ else if (ftotal <= attempts)
/* then retry descent */
retry_descent = 1;
else
@@ -634,7 +637,8 @@ int crush_do_rule(const struct crush_map *map,
__u32 step;
int i, j;
int numrep;
- int choose_leaf_tries = 1;
+ int choose_tries = map->choose_total_tries;
+ int choose_leaf_tries = 0;
const int descend_once = 0;
if ((__u32)ruleno >= map->max_rules) {
@@ -701,6 +705,8 @@ int crush_do_rule(const struct crush_map *map,
x, numrep,
curstep->arg2,
o+osize, j,
+ choose_tries,
+ choose_leaf_tries ? choose_leaf_tries : choose_tries,
recurse_to_leaf,
descend_once, c+osize);
} else {
@@ -711,8 +717,8 @@ int crush_do_rule(const struct crush_map *map,
x, numrep, numrep,
curstep->arg2,
o+osize, j,
- map->choose_total_tries,
- choose_leaf_tries,
+ choose_tries,
+ choose_leaf_tries ? choose_leaf_tries : 1,
recurse_to_leaf,
c+osize,
0);
Parameterize the attempts for the _firstn choose method, and apply the rule-specified tries count to firstn mode as well. Note that we have slightly different behavior here than with indep: If the firstn value is not specified for firstn, we pass through the normal attempt count. This maintains compatibility with legacy behavior. Note that this is usually *not* actually N^2 work, though, because of the descend_once tunable. However, descend_once is unfortunately *not* the same thing as 1 chooseleaf try because it is only checked on a reject but not on a collision. Sigh. In contrast, for indep, if tries is not specified we default to 1 recursive attempt, because that is simply more sane, and we have the option to do so. The descend_once tunable has no effect for indep. Reflects ceph.git commit 64aeded50d80942d66a5ec7b604ff2fcbf5d7b63. Signed-off-by: Ilya Dryomov <ilya.dryomov@inktank.com> --- include/linux/crush/crush.h | 5 ++++- net/ceph/crush/mapper.c | 14 ++++++++++---- 2 files changed, 14 insertions(+), 5 deletions(-)