@@ -109,6 +109,7 @@ struct bpf_local_storage {
struct bpf_local_storage_cache {
spinlock_t idx_lock;
u64 idx_usage_counts[BPF_LOCAL_STORAGE_CACHE_SIZE];
+ DECLARE_BITMAP(idx_exclusive, BPF_LOCAL_STORAGE_CACHE_SIZE);
};
#define DEFINE_BPF_STORAGE_CACHE(name) \
@@ -116,9 +117,10 @@ static struct bpf_local_storage_cache name = { \
.idx_lock = __SPIN_LOCK_UNLOCKED(name.idx_lock), \
}
-u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache);
+int bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache,
+ u64 flags);
void bpf_local_storage_cache_idx_free(struct bpf_local_storage_cache *cache,
- u16 idx);
+ u16 idx, u64 flags);
/* Helper functions for bpf_local_storage */
int bpf_local_storage_map_alloc_check(union bpf_attr *attr);
@@ -1257,6 +1257,18 @@ enum bpf_stack_build_id_status {
BPF_STACK_BUILD_ID_IP = 2,
};
+/* Flags passed in map_extra when creating local_storage maps
+ * of types: BPF_MAP_TYPE_INODE_STORAGE
+ * BPF_MAP_TYPE_TASK_STORAGE
+ * BPF_MAP_TYPE_SK_STORAGE
+ */
+enum bpf_local_storage_extra_flags {
+ /* Give the map exclusive use of a local_storage cache slot
+ * or fail map alloc
+ */
+ BPF_LOCAL_STORAGE_FORCE_CACHE = (1U << 0),
+};
+
#define BPF_BUILD_ID_SIZE 20
struct bpf_stack_build_id {
__s32 status;
@@ -1296,6 +1308,8 @@ union bpf_attr {
* BPF_MAP_TYPE_BLOOM_FILTER - the lowest 4 bits indicate the
* number of hash functions (if 0, the bloom filter will default
* to using 5 hash functions).
+ * BPF_MAP_TYPE_{INODE,TASK,SK}_STORAGE - local_storage specific
+ * flags (see bpf_local_storage_extra_flags)
*/
__u64 map_extra;
};
@@ -227,12 +227,21 @@ static int notsupp_get_next_key(struct bpf_map *map, void *key,
static struct bpf_map *inode_storage_map_alloc(union bpf_attr *attr)
{
struct bpf_local_storage_map *smap;
+ int cache_idx_or_err;
+
+ cache_idx_or_err = bpf_local_storage_cache_idx_get(&inode_cache,
+ attr->map_extra);
+ if (cache_idx_or_err < 0)
+ return ERR_PTR(cache_idx_or_err);
smap = bpf_local_storage_map_alloc(attr);
- if (IS_ERR(smap))
+ if (IS_ERR(smap)) {
+ bpf_local_storage_cache_idx_free(&inode_cache, (u16)cache_idx_or_err,
+ attr->map_extra);
return ERR_CAST(smap);
+ }
- smap->cache_idx = bpf_local_storage_cache_idx_get(&inode_cache);
+ smap->cache_idx = (u16)cache_idx_or_err;
return &smap->map;
}
@@ -241,7 +250,8 @@ static void inode_storage_map_free(struct bpf_map *map)
struct bpf_local_storage_map *smap;
smap = (struct bpf_local_storage_map *)map;
- bpf_local_storage_cache_idx_free(&inode_cache, smap->cache_idx);
+ bpf_local_storage_cache_idx_free(&inode_cache, smap->cache_idx,
+ map->map_extra);
bpf_local_storage_map_free(smap, NULL);
}
@@ -231,12 +231,19 @@ bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
{
struct bpf_local_storage_data *sdata;
struct bpf_local_storage_elem *selem;
+ struct bpf_local_storage_map *cached;
+ bool cached_exclusive = false;
/* Fast path (cache hit) */
sdata = rcu_dereference_check(local_storage->cache[smap->cache_idx],
bpf_rcu_lock_held());
- if (sdata && rcu_access_pointer(sdata->smap) == smap)
- return sdata;
+ if (sdata) {
+ if (rcu_access_pointer(sdata->smap) == smap)
+ return sdata;
+
+ cached = rcu_dereference_check(sdata->smap, bpf_rcu_lock_held());
+ cached_exclusive = cached->map.map_extra & BPF_LOCAL_STORAGE_FORCE_CACHE;
+ }
/* Slow path (cache miss) */
hlist_for_each_entry_rcu(selem, &local_storage->list, snode,
@@ -248,7 +255,7 @@ bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
return NULL;
sdata = SDATA(selem);
- if (cacheit_lockit) {
+ if (cacheit_lockit && !cached_exclusive) {
unsigned long flags;
/* spinlock is needed to avoid racing with the
@@ -482,15 +489,27 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
return ERR_PTR(err);
}
-u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache)
+int bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache,
+ u64 flags)
{
+ bool exclusive = flags & BPF_LOCAL_STORAGE_FORCE_CACHE;
+ bool adding_to_full = false;
u64 min_usage = U64_MAX;
- u16 i, res = 0;
+ int res = 0;
+ u16 i;
spin_lock(&cache->idx_lock);
+ if (bitmap_full(cache->idx_exclusive, BPF_LOCAL_STORAGE_CACHE_SIZE)) {
+ res = -ENOMEM;
+ adding_to_full = true;
+ if (exclusive)
+ goto out;
+ }
+
for (i = 0; i < BPF_LOCAL_STORAGE_CACHE_SIZE; i++) {
- if (cache->idx_usage_counts[i] < min_usage) {
+ if ((adding_to_full || !test_bit(i, cache->idx_exclusive)) &&
+ cache->idx_usage_counts[i] < min_usage) {
min_usage = cache->idx_usage_counts[i];
res = i;
@@ -499,17 +518,23 @@ u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache)
break;
}
}
+
+ if (exclusive)
+ set_bit(res, cache->idx_exclusive);
cache->idx_usage_counts[res]++;
+out:
spin_unlock(&cache->idx_lock);
return res;
}
void bpf_local_storage_cache_idx_free(struct bpf_local_storage_cache *cache,
- u16 idx)
+ u16 idx, u64 flags)
{
spin_lock(&cache->idx_lock);
+ if (flags & BPF_LOCAL_STORAGE_FORCE_CACHE)
+ clear_bit(idx, cache->idx_exclusive);
cache->idx_usage_counts[idx]--;
spin_unlock(&cache->idx_lock);
}
@@ -583,7 +608,8 @@ int bpf_local_storage_map_alloc_check(union bpf_attr *attr)
attr->max_entries ||
attr->key_size != sizeof(int) || !attr->value_size ||
/* Enforce BTF for userspace sk dumping */
- !attr->btf_key_type_id || !attr->btf_value_type_id)
+ !attr->btf_key_type_id || !attr->btf_value_type_id ||
+ attr->map_extra & ~BPF_LOCAL_STORAGE_FORCE_CACHE)
return -EINVAL;
if (!bpf_capable())
@@ -289,12 +289,21 @@ static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key)
static struct bpf_map *task_storage_map_alloc(union bpf_attr *attr)
{
struct bpf_local_storage_map *smap;
+ int cache_idx_or_err;
+
+ cache_idx_or_err = bpf_local_storage_cache_idx_get(&task_cache,
+ attr->map_extra);
+ if (cache_idx_or_err < 0)
+ return ERR_PTR(cache_idx_or_err);
smap = bpf_local_storage_map_alloc(attr);
- if (IS_ERR(smap))
+ if (IS_ERR(smap)) {
+ bpf_local_storage_cache_idx_free(&task_cache, (u16)cache_idx_or_err,
+ attr->map_extra);
return ERR_CAST(smap);
+ }
- smap->cache_idx = bpf_local_storage_cache_idx_get(&task_cache);
+ smap->cache_idx = (u16)cache_idx_or_err;
return &smap->map;
}
@@ -303,7 +312,8 @@ static void task_storage_map_free(struct bpf_map *map)
struct bpf_local_storage_map *smap;
smap = (struct bpf_local_storage_map *)map;
- bpf_local_storage_cache_idx_free(&task_cache, smap->cache_idx);
+ bpf_local_storage_cache_idx_free(&task_cache, smap->cache_idx,
+ map->map_extra);
bpf_local_storage_map_free(smap, &bpf_task_storage_busy);
}
@@ -847,8 +847,11 @@ static int map_create(union bpf_attr *attr)
return -EINVAL;
}
- if (attr->map_type != BPF_MAP_TYPE_BLOOM_FILTER &&
- attr->map_extra != 0)
+ if (!(attr->map_type == BPF_MAP_TYPE_BLOOM_FILTER ||
+ attr->map_type == BPF_MAP_TYPE_INODE_STORAGE ||
+ attr->map_type == BPF_MAP_TYPE_TASK_STORAGE ||
+ attr->map_type == BPF_MAP_TYPE_SK_STORAGE) &&
+ attr->map_extra != 0)
return -EINVAL;
f_flags = bpf_get_file_flag(attr->map_flags);
@@ -90,19 +90,28 @@ static void bpf_sk_storage_map_free(struct bpf_map *map)
struct bpf_local_storage_map *smap;
smap = (struct bpf_local_storage_map *)map;
- bpf_local_storage_cache_idx_free(&sk_cache, smap->cache_idx);
+ bpf_local_storage_cache_idx_free(&sk_cache, smap->cache_idx, map->map_extra);
bpf_local_storage_map_free(smap, NULL);
}
static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr)
{
struct bpf_local_storage_map *smap;
+ int cache_idx_or_err;
+
+ cache_idx_or_err = bpf_local_storage_cache_idx_get(&sk_cache,
+ attr->map_extra);
+ if (cache_idx_or_err < 0)
+ return ERR_PTR(cache_idx_or_err);
smap = bpf_local_storage_map_alloc(attr);
- if (IS_ERR(smap))
+ if (IS_ERR(smap)) {
+ bpf_local_storage_cache_idx_free(&sk_cache, (u16)cache_idx_or_err,
+ attr->map_extra);
return ERR_CAST(smap);
+ }
- smap->cache_idx = bpf_local_storage_cache_idx_get(&sk_cache);
+ smap->cache_idx = (u16)cache_idx_or_err;
return &smap->map;
}
Allow local_storage maps to claim exclusive use of a cache slot in struct bpf_local_storage's cache. When a local_storage map claims a slot and is cached in via bpf_local_storage_lookup, it will not be replaced until the map is free'd. As a result, after a local_storage map is alloc'd for a specific bpf_local_storage, lookup calls after the first will quickly find the correct map. When requesting an exclusive cache slot, bpf_local_storage_cache_idx_get can now fail if all slots are already claimed. Because a map's cache_idx is assigned when the bpf_map is allocated - which occurs before the program runs - the map load and subsequent prog load will fail. A bit in struct bpf_map's map_extra is used to designate whether a map would like to claim an exclusive slot. Similarly, bitmap idx_exclusive is added to bpf_local_storage_cache to track whether a slot is exclusively claimed. Functions that manipulate the cache are modified to test for BPF_LOCAL_STORAGE_FORCE_CACHE bit and test/set idx_exclusive where necessary. When a map exclusively claims a cache slot, non-exclusive local_storage maps which were previously assigned the same cache_idx are not migrated to unclaimed cache_idx. Such a migration would require full iteration of the cache list and necessitate a reverse migration on map free to even things out. Since a used cache slot will only be exclusively claimed if no empty slot exists, the additional complexity was deemed unnecessary. Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com> --- include/linux/bpf_local_storage.h | 6 +++-- include/uapi/linux/bpf.h | 14 +++++++++++ kernel/bpf/bpf_inode_storage.c | 16 +++++++++--- kernel/bpf/bpf_local_storage.c | 42 +++++++++++++++++++++++++------ kernel/bpf/bpf_task_storage.c | 16 +++++++++--- kernel/bpf/syscall.c | 7 ++++-- net/core/bpf_sk_storage.c | 15 ++++++++--- 7 files changed, 95 insertions(+), 21 deletions(-)