diff mbox series

[bpf-next,1/5] bpf: Implement task local storage

Message ID 20201027170317.2011119-2-kpsingh@chromium.org (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series Implement task_local_storage | expand

Commit Message

KP Singh Oct. 27, 2020, 5:03 p.m. UTC
From: KP Singh <kpsingh@google.com>

Similar to bpf_local_storage for sockets and inodes add local storage
for task_struct.

The life-cycle of storage is managed with the life-cycle of the
task_struct.  i.e. the storage is destroyed along with the owning task
with a callback to the bpf_task_storage_free from the task_free LSM
hook.

The BPF LSM allocates an __rcu pointer to the bpf_local_storage in
the security blob which are now stackable and can co-exist with other
LSMs.

The userspace map operations can be done by using a pid fd as a key
passed to the lookup, update and delete operations.

Signed-off-by: KP Singh <kpsingh@google.com>
---
 include/linux/bpf_lsm.h                       |  23 ++
 include/linux/bpf_types.h                     |   1 +
 include/uapi/linux/bpf.h                      |  39 +++
 kernel/bpf/Makefile                           |   1 +
 kernel/bpf/bpf_lsm.c                          |   4 +
 kernel/bpf/bpf_task_storage.c                 | 327 ++++++++++++++++++
 kernel/bpf/syscall.c                          |   3 +-
 kernel/bpf/verifier.c                         |  10 +
 security/bpf/hooks.c                          |   2 +
 .../bpf/bpftool/Documentation/bpftool-map.rst |   3 +-
 tools/bpf/bpftool/bash-completion/bpftool     |   2 +-
 tools/bpf/bpftool/map.c                       |   4 +-
 tools/include/uapi/linux/bpf.h                |  39 +++
 tools/lib/bpf/libbpf_probes.c                 |   2 +
 14 files changed, 456 insertions(+), 4 deletions(-)
 create mode 100644 kernel/bpf/bpf_task_storage.c

Comments

Martin KaFai Lau Oct. 28, 2020, 1:13 a.m. UTC | #1
On Tue, Oct 27, 2020 at 06:03:13PM +0100, KP Singh wrote:
[ ... ]

> diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c
> new file mode 100644
> index 000000000000..774140c458cc
> --- /dev/null
> +++ b/kernel/bpf/bpf_task_storage.c
> @@ -0,0 +1,327 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (c) 2019 Facebook
> + * Copyright 2020 Google LLC.
> + */
> +
> +#include "linux/pid.h"
> +#include "linux/sched.h"
> +#include <linux/rculist.h>
> +#include <linux/list.h>
> +#include <linux/hash.h>
> +#include <linux/types.h>
> +#include <linux/spinlock.h>
> +#include <linux/bpf.h>
> +#include <linux/bpf_local_storage.h>
> +#include <net/sock.h>
Is this required?

> +#include <uapi/linux/sock_diag.h>
> +#include <uapi/linux/btf.h>
> +#include <linux/bpf_lsm.h>
> +#include <linux/btf_ids.h>
> +#include <linux/fdtable.h>
> +
> +DEFINE_BPF_STORAGE_CACHE(task_cache);
> +
> +static struct bpf_local_storage __rcu **task_storage_ptr(void *owner)
> +{
> +	struct task_struct *task = owner;
> +	struct bpf_storage_blob *bsb;
> +
> +	bsb = bpf_task(task);
> +	if (!bsb)
> +		return NULL;
> +	return &bsb->storage;
> +}
> +
> +static struct bpf_local_storage_data *
> +task_storage_lookup(struct task_struct *task, struct bpf_map *map,
> +		    bool cacheit_lockit)
> +{
> +	struct bpf_local_storage *task_storage;
> +	struct bpf_local_storage_map *smap;
> +	struct bpf_storage_blob *bsb;
> +
> +	bsb = bpf_task(task);
> +	if (!bsb)
> +		return NULL;
> +
> +	task_storage = rcu_dereference(bsb->storage);
> +	if (!task_storage)
> +		return NULL;
> +
> +	smap = (struct bpf_local_storage_map *)map;
> +	return bpf_local_storage_lookup(task_storage, smap, cacheit_lockit);
> +}
> +

[ ... ]

> +static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key)
> +{
> +	struct bpf_local_storage_data *sdata;
> +	struct task_struct *task;
> +	struct pid *pid;
> +	struct file *f;
> +	int fd, err;
> +
> +	fd = *(int *)key;
> +	f = fget_raw(fd);
> +	if (!f)
> +		return ERR_PTR(-EBADF);
> +
> +	if (f->f_op != &pidfd_fops) {
> +		err = -EBADF;
> +		goto out_fput;
> +	}
> +
> +	pid = get_pid(f->private_data);
n00b question. Is get_pid(f->private_data) required?
f->private_data could be freed while holding f->f_count?

> +	task = get_pid_task(pid, PIDTYPE_PID);
Should put_task_struct() be called before returning?

> +	if (!task || !task_storage_ptr(task)) {
"!task_storage_ptr(task)" is unnecessary, task_storage_lookup() should
have taken care of it.


> +		err = -ENOENT;
> +		goto out;
> +	}
> +
> +	sdata = task_storage_lookup(task, map, true);
> +	put_pid(pid);
> +	return sdata ? sdata->data : NULL;
> +out:
> +	put_pid(pid);
> +out_fput:
> +	fput(f);
> +	return ERR_PTR(err);
> +}
> +
[ ... ]

> +static int task_storage_map_btf_id;
> +const struct bpf_map_ops task_storage_map_ops = {
> +	.map_meta_equal = bpf_map_meta_equal,
> +	.map_alloc_check = bpf_local_storage_map_alloc_check,
> +	.map_alloc = task_storage_map_alloc,
> +	.map_free = task_storage_map_free,
> +	.map_get_next_key = notsupp_get_next_key,
> +	.map_lookup_elem = bpf_pid_task_storage_lookup_elem,
> +	.map_update_elem = bpf_pid_task_storage_update_elem,
> +	.map_delete_elem = bpf_pid_task_storage_delete_elem,
Please exercise the syscall use cases also in the selftest.

> +	.map_check_btf = bpf_local_storage_map_check_btf,
> +	.map_btf_name = "bpf_local_storage_map",
> +	.map_btf_id = &task_storage_map_btf_id,
> +	.map_owner_storage_ptr = task_storage_ptr,
> +};
> +
Martin KaFai Lau Oct. 28, 2020, 1:22 a.m. UTC | #2
On Tue, Oct 27, 2020 at 06:03:13PM +0100, KP Singh wrote:
[ ... ] 

> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index e6ceac3f7d62..bb443c4f3637 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -157,6 +157,7 @@ enum bpf_map_type {
>  	BPF_MAP_TYPE_STRUCT_OPS,
>  	BPF_MAP_TYPE_RINGBUF,
>  	BPF_MAP_TYPE_INODE_STORAGE,
> +	BPF_MAP_TYPE_TASK_STORAGE,
>  };
>  
>  /* Note that tracing related programs such as
> @@ -3742,6 +3743,42 @@ union bpf_attr {
>   * 	Return
>   * 		The helper returns **TC_ACT_REDIRECT** on success or
>   * 		**TC_ACT_SHOT** on error.
> + *
> + * void *bpf_task_storage_get(struct bpf_map *map, void *task, void *value, u64 flags)
After peeking patch 2,  I think the pointer type should be
"struct task_struct *task" instead of "void *task".

Same for bpf_task_storage_delete().

> + *	Description
> + *		Get a bpf_local_storage from the *task*.
> + *
> + *		Logically, it could be thought of as getting the value from
> + *		a *map* with *task* as the **key**.  From this
> + *		perspective,  the usage is not much different from
> + *		**bpf_map_lookup_elem**\ (*map*, **&**\ *task*) except this
> + *		helper enforces the key must be an task_struct and the map must also
> + *		be a **BPF_MAP_TYPE_TASK_STORAGE**.
> + *
> + *		Underneath, the value is stored locally at *task* instead of
> + *		the *map*.  The *map* is used as the bpf-local-storage
> + *		"type". The bpf-local-storage "type" (i.e. the *map*) is
> + *		searched against all bpf_local_storage residing at *task*.
> + *
> + *		An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be
> + *		used such that a new bpf_local_storage will be
> + *		created if one does not exist.  *value* can be used
> + *		together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify
> + *		the initial value of a bpf_local_storage.  If *value* is
> + *		**NULL**, the new bpf_local_storage will be zero initialized.
> + *	Return
> + *		A bpf_local_storage pointer is returned on success.
> + *
> + *		**NULL** if not found or there was an error in adding
> + *		a new bpf_local_storage.
> + *
> + * int bpf_task_storage_delete(struct bpf_map *map, void *task)
> + *	Description
> + *		Delete a bpf_local_storage from a *task*.
> + *	Return
> + *		0 on success.
> + *
> + *		**-ENOENT** if the bpf_local_storage cannot be found.
>   */
Andrii Nakryiko Oct. 29, 2020, 11:12 p.m. UTC | #3
On Wed, Oct 28, 2020 at 9:17 AM KP Singh <kpsingh@chromium.org> wrote:
>
> From: KP Singh <kpsingh@google.com>
>
> Similar to bpf_local_storage for sockets and inodes add local storage
> for task_struct.
>
> The life-cycle of storage is managed with the life-cycle of the
> task_struct.  i.e. the storage is destroyed along with the owning task
> with a callback to the bpf_task_storage_free from the task_free LSM
> hook.
>
> The BPF LSM allocates an __rcu pointer to the bpf_local_storage in
> the security blob which are now stackable and can co-exist with other
> LSMs.
>
> The userspace map operations can be done by using a pid fd as a key
> passed to the lookup, update and delete operations.
>
> Signed-off-by: KP Singh <kpsingh@google.com>
> ---

Please also double-check all three of get_pid_task() uses, you need to
put_task_struct() in all cases.

>  include/linux/bpf_lsm.h                       |  23 ++
>  include/linux/bpf_types.h                     |   1 +
>  include/uapi/linux/bpf.h                      |  39 +++
>  kernel/bpf/Makefile                           |   1 +
>  kernel/bpf/bpf_lsm.c                          |   4 +
>  kernel/bpf/bpf_task_storage.c                 | 327 ++++++++++++++++++
>  kernel/bpf/syscall.c                          |   3 +-
>  kernel/bpf/verifier.c                         |  10 +
>  security/bpf/hooks.c                          |   2 +
>  .../bpf/bpftool/Documentation/bpftool-map.rst |   3 +-
>  tools/bpf/bpftool/bash-completion/bpftool     |   2 +-
>  tools/bpf/bpftool/map.c                       |   4 +-
>  tools/include/uapi/linux/bpf.h                |  39 +++
>  tools/lib/bpf/libbpf_probes.c                 |   2 +
>  14 files changed, 456 insertions(+), 4 deletions(-)
>  create mode 100644 kernel/bpf/bpf_task_storage.c

Please split out bpftool, bpftool documentation, and libbpf changes
into their respective patches.

[...]

> + *
> + * int bpf_task_storage_delete(struct bpf_map *map, void *task)

please use long for return type, as all other helpers (except
bpf_inode_storage_delete, which would be nice to fix as well) do.

> + *     Description
> + *             Delete a bpf_local_storage from a *task*.
> + *     Return
> + *             0 on success.
> + *
> + *             **-ENOENT** if the bpf_local_storage cannot be found.
>   */

[...]

> +
> +void bpf_task_storage_free(struct task_struct *task)
> +{
> +       struct bpf_local_storage_elem *selem;
> +       struct bpf_local_storage *local_storage;
> +       bool free_task_storage = false;
> +       struct bpf_storage_blob *bsb;
> +       struct hlist_node *n;
> +
> +       bsb = bpf_task(task);
> +       if (!bsb)
> +               return;
> +
> +       rcu_read_lock();
> +
> +       local_storage = rcu_dereference(bsb->storage);
> +       if (!local_storage) {
> +               rcu_read_unlock();
> +               return;
> +       }
> +
> +       /* Netiher the bpf_prog nor the bpf-map's syscall

typo: Neither

> +        * could be modifying the local_storage->list now.
> +        * Thus, no elem can be added-to or deleted-from the
> +        * local_storage->list by the bpf_prog or by the bpf-map's syscall.
> +        *
> +        * It is racing with bpf_local_storage_map_free() alone
> +        * when unlinking elem from the local_storage->list and
> +        * the map's bucket->list.
> +        */
> +       raw_spin_lock_bh(&local_storage->lock);
> +       hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) {
> +               /* Always unlink from map before unlinking from
> +                * local_storage.
> +                */
> +               bpf_selem_unlink_map(selem);
> +               free_task_storage = bpf_selem_unlink_storage_nolock(
> +                       local_storage, selem, false);

this will override the previous value of free_task_storage. Did you
intend to do || here?

> +       }
> +       raw_spin_unlock_bh(&local_storage->lock);
> +       rcu_read_unlock();
> +
> +       /* free_task_storage should always be true as long as
> +        * local_storage->list was non-empty.
> +        */
> +       if (free_task_storage)
> +               kfree_rcu(local_storage, rcu);
> +}
> +

[...]
Song Liu Oct. 29, 2020, 11:27 p.m. UTC | #4
On Wed, Oct 28, 2020 at 9:17 AM KP Singh <kpsingh@chromium.org> wrote:
>
> From: KP Singh <kpsingh@google.com>
>
> Similar to bpf_local_storage for sockets and inodes add local storage
> for task_struct.
>
> The life-cycle of storage is managed with the life-cycle of the
> task_struct.  i.e. the storage is destroyed along with the owning task
> with a callback to the bpf_task_storage_free from the task_free LSM
> hook.

It looks like task local storage is tightly coupled to LSM. As we discussed,
it will be great to use task local storage in tracing programs. Would you
like to enable it from the beginning? Alternatively, I guess we can also do
follow-up patches.

>
> The BPF LSM allocates an __rcu pointer to the bpf_local_storage in
> the security blob which are now stackable and can co-exist with other
> LSMs.
>
> The userspace map operations can be done by using a pid fd as a key
> passed to the lookup, update and delete operations.

While testing task local storage, I noticed a limitation of pid fd:

/* Currently, the process identified by
 * @pid must be a thread-group leader. This restriction currently exists
 * for all aspects of pidfds including pidfd creation (CLONE_PIDFD cannot
 * be used with CLONE_THREAD) and pidfd polling (only supports thread group
 * leaders).
 */

This could be a problem for some use cases. How about we try to remove
this restriction (maybe with a new flag to pidfd_open) as part of this set?

Thanks,
Song

[...]
KP Singh Oct. 30, 2020, 10:53 a.m. UTC | #5
Thanks for taking a look!

On Wed, Oct 28, 2020 at 2:13 AM Martin KaFai Lau <kafai@fb.com> wrote:
>
> On Tue, Oct 27, 2020 at 06:03:13PM +0100, KP Singh wrote:
> [ ... ]
>
> > diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c
> > new file mode 100644
> > index 000000000000..774140c458cc
> > --- /dev/null
> > +++ b/kernel/bpf/bpf_task_storage.c
> > @@ -0,0 +1,327 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/*
> > + * Copyright (c) 2019 Facebook
> > + * Copyright 2020 Google LLC.
> > + */
> > +
> > +#include "linux/pid.h"
> > +#include "linux/sched.h"
> > +#include <linux/rculist.h>
> > +#include <linux/list.h>
> > +#include <linux/hash.h>
> > +#include <linux/types.h>
> > +#include <linux/spinlock.h>
> > +#include <linux/bpf.h>
> > +#include <linux/bpf_local_storage.h>
> > +#include <net/sock.h>
> Is this required?

Nope. Removed.

>
> > +#include <uapi/linux/sock_diag.h>
> > +#include <uapi/linux/btf.h>
> > +#include <linux/bpf_lsm.h>
> > +#include <linux/btf_ids.h>
> > +#include <linux/fdtable.h>
> > +
> > +DEFINE_BPF_STORAGE_CACHE(task_cache);
> > +
> > +static struct bpf_local_storage __rcu **task_storage_ptr(void *owner)

[...]

> > +             err = -EBADF;
> > +             goto out_fput;
> > +     }
> > +
> > +     pid = get_pid(f->private_data);
> n00b question. Is get_pid(f->private_data) required?
> f->private_data could be freed while holding f->f_count?

I would assume that holding a reference to the file should also
keep the private_data alive but I was not sure so I grabbed the
extra reference.

>
> > +     task = get_pid_task(pid, PIDTYPE_PID);
> Should put_task_struct() be called before returning?

If we keep using get_pid_task then, yes, I see it grabs a reference to the task.
We could also call pid_task under rcu locks but it might be cleaner to
just get_pid_task
and put_task_struct().

>
> > +     if (!task || !task_storage_ptr(task)) {
> "!task_storage_ptr(task)" is unnecessary, task_storage_lookup() should
> have taken care of it.
>
>
> > +             err = -ENOENT;
> > +             goto out;
> > +     }
> > +
> > +     sdata = task_storage_lookup(task, map, true);
> > +     put_pid(pid);

[...]

> > +     .map_lookup_elem = bpf_pid_task_storage_lookup_elem,
> > +     .map_update_elem = bpf_pid_task_storage_update_elem,
> > +     .map_delete_elem = bpf_pid_task_storage_delete_elem,
> Please exercise the syscall use cases also in the selftest.

Will do. Thanks for the nudge :)

>
> > +     .map_check_btf = bpf_local_storage_map_check_btf,
> > +     .map_btf_name = "bpf_local_storage_map",
> > +     .map_btf_id = &task_storage_map_btf_id,
> > +     .map_owner_storage_ptr = task_storage_ptr,
> > +};
> > +
KP Singh Oct. 30, 2020, 11:02 a.m. UTC | #6
On Fri, Oct 30, 2020 at 12:12 AM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Wed, Oct 28, 2020 at 9:17 AM KP Singh <kpsingh@chromium.org> wrote:
> >
> > From: KP Singh <kpsingh@google.com>
> >
> > Similar to bpf_local_storage for sockets and inodes add local storage
> > for task_struct.
> >
> > The life-cycle of storage is managed with the life-cycle of the
> > task_struct.  i.e. the storage is destroyed along with the owning task
> > with a callback to the bpf_task_storage_free from the task_free LSM
> > hook.
> >
> > The BPF LSM allocates an __rcu pointer to the bpf_local_storage in
> > the security blob which are now stackable and can co-exist with other
> > LSMs.
> >
> > The userspace map operations can be done by using a pid fd as a key
> > passed to the lookup, update and delete operations.
> >
> > Signed-off-by: KP Singh <kpsingh@google.com>
> > ---
>
> Please also double-check all three of get_pid_task() uses, you need to
> put_task_struct() in all cases.

Done, Martin also pointed it out.

>
> >  include/linux/bpf_lsm.h                       |  23 ++
> >  include/linux/bpf_types.h                     |   1 +
> >  include/uapi/linux/bpf.h                      |  39 +++
> >  kernel/bpf/Makefile                           |   1 +

[...]

>
> > + *
> > + * int bpf_task_storage_delete(struct bpf_map *map, void *task)
>
> please use long for return type, as all other helpers (except
> bpf_inode_storage_delete, which would be nice to fix as well) do.

Done. Will also fix the return value of bpf_inode_storage_delete in a
separate patch.

>
> > + *     Description
> > + *             Delete a bpf_local_storage from a *task*.
> > + *     Return
> > + *             0 on success.

[...]

> > +               return;
> > +       }
> > +
> > +       /* Netiher the bpf_prog nor the bpf-map's syscall
>
> typo: Neither

Thanks. Fixed.

>
> > +        * could be modifying the local_storage->list now.
> > +        * Thus, no elem can be added-to or deleted-from the
> > +        * local_storage->list by the bpf_prog or by the bpf-map's syscall.
> > +        *
> > +        * It is racing with bpf_local_storage_map_free() alone
> > +        * when unlinking elem from the local_storage->list and
> > +        * the map's bucket->list.
> > +        */
> > +       raw_spin_lock_bh(&local_storage->lock);
> > +       hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) {
> > +               /* Always unlink from map before unlinking from
> > +                * local_storage.
> > +                */
> > +               bpf_selem_unlink_map(selem);
> > +               free_task_storage = bpf_selem_unlink_storage_nolock(
> > +                       local_storage, selem, false);
>
> this will override the previous value of free_task_storage. Did you
> intend to do || here?

in bpf_selem_unlink_storage_nolock:

  free_local_storage = hlist_is_singular_node(&selem->snode,
  &local_storage->list);

free_local_storage is only true when the linked list has one element, so it does
not really matter. I guess we could use the "||" here for correctness, and if
we do that, we should also update the other local storages.

>
> > +       }
> > +       raw_spin_unlock_bh(&local_storage->lock);
> > +       rcu_read_unlock();
> > +
> > +       /* free_task_storage should always be true as long as
> > +        * local_storage->list was non-empty.
> > +        */
> > +       if (free_task_storage)
> > +               kfree_rcu(local_storage, rcu);
> > +}
> > +
>
> [...]
KP Singh Oct. 30, 2020, 11:07 a.m. UTC | #7
"

On Fri, Oct 30, 2020 at 12:28 AM Song Liu <song@kernel.org> wrote:
>
> On Wed, Oct 28, 2020 at 9:17 AM KP Singh <kpsingh@chromium.org> wrote:
> >
> > From: KP Singh <kpsingh@google.com>
> >
> > Similar to bpf_local_storage for sockets and inodes add local storage
> > for task_struct.
> >
> > The life-cycle of storage is managed with the life-cycle of the
> > task_struct.  i.e. the storage is destroyed along with the owning task
> > with a callback to the bpf_task_storage_free from the task_free LSM
> > hook.
>
> It looks like task local storage is tightly coupled to LSM. As we discussed,
> it will be great to use task local storage in tracing programs. Would you
> like to enable it from the beginning? Alternatively, I guess we can also do
> follow-up patches.
>

I would prefer if we do it in follow-up patches.

> >
> > The BPF LSM allocates an __rcu pointer to the bpf_local_storage in
> > the security blob which are now stackable and can co-exist with other
> > LSMs.
> >
> > The userspace map operations can be done by using a pid fd as a key
> > passed to the lookup, update and delete operations.
>
> While testing task local storage, I noticed a limitation of pid fd:
>
> /* Currently, the process identified by
>  * @pid must be a thread-group leader. This restriction currently exists
>  * for all aspects of pidfds including pidfd creation (CLONE_PIDFD cannot
>  * be used with CLONE_THREAD) and pidfd polling (only supports thread group
>  * leaders).
>  */
>
> This could be a problem for some use cases. How about we try to remove
> this restriction (maybe with a new flag to pidfd_open) as part of this set?

I would appreciate it if we could also do this in a follow-up patch.

I do see that there is a comment in fork.c:

    "CLONE_THREAD is blocked until someone really needs it."

But I don't understand the requirements well enough and would thus prefer
to do this in a follow-up series.

- KP

>
> Thanks,
> Song
>
> [...]
Song Liu Oct. 31, 2020, 12:02 a.m. UTC | #8
On Fri, Oct 30, 2020 at 4:07 AM KP Singh <kpsingh@chromium.org> wrote:
>
> "
>
> On Fri, Oct 30, 2020 at 12:28 AM Song Liu <song@kernel.org> wrote:
> >
> > On Wed, Oct 28, 2020 at 9:17 AM KP Singh <kpsingh@chromium.org> wrote:
> > >
> > > From: KP Singh <kpsingh@google.com>
> > >
> > > Similar to bpf_local_storage for sockets and inodes add local storage
> > > for task_struct.
> > >
> > > The life-cycle of storage is managed with the life-cycle of the
> > > task_struct.  i.e. the storage is destroyed along with the owning task
> > > with a callback to the bpf_task_storage_free from the task_free LSM
> > > hook.
> >
> > It looks like task local storage is tightly coupled to LSM. As we discussed,
> > it will be great to use task local storage in tracing programs. Would you
> > like to enable it from the beginning? Alternatively, I guess we can also do
> > follow-up patches.
> >
>
> I would prefer if we do it in follow-up patches.
>
> > >
> > > The BPF LSM allocates an __rcu pointer to the bpf_local_storage in
> > > the security blob which are now stackable and can co-exist with other
> > > LSMs.
> > >
> > > The userspace map operations can be done by using a pid fd as a key
> > > passed to the lookup, update and delete operations.
> >
> > While testing task local storage, I noticed a limitation of pid fd:
> >
> > /* Currently, the process identified by
> >  * @pid must be a thread-group leader. This restriction currently exists
> >  * for all aspects of pidfds including pidfd creation (CLONE_PIDFD cannot
> >  * be used with CLONE_THREAD) and pidfd polling (only supports thread group
> >  * leaders).
> >  */
> >
> > This could be a problem for some use cases. How about we try to remove
> > this restriction (maybe with a new flag to pidfd_open) as part of this set?
>
> I would appreciate it if we could also do this in a follow-up patch.
>
> I do see that there is a comment in fork.c:
>
>     "CLONE_THREAD is blocked until someone really needs it."
>
> But I don't understand the requirements well enough and would thus prefer
> to do this in a follow-up series.

Sounds good. Let's work on these in follow-up patches.

Thanks,
Song
KP Singh Nov. 3, 2020, 2:46 p.m. UTC | #9
On Fri, Oct 30, 2020 at 11:53 AM KP Singh <kpsingh@chromium.org> wrote:
>
> Thanks for taking a look!
>
> On Wed, Oct 28, 2020 at 2:13 AM Martin KaFai Lau <kafai@fb.com> wrote:
> >
> > On Tue, Oct 27, 2020 at 06:03:13PM +0100, KP Singh wrote:
> > [ ... ]
> >
> > > diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c
> > > new file mode 100644
> > > index 000000000000..774140c458cc
> > > --- /dev/null
> > > +++ b/kernel/bpf/bpf_task_storage.c
> > > @@ -0,0 +1,327 @@
> > > +// SPDX-License-Identifier: GPL-2.0
> > > +/*
> > > + * Copyright (c) 2019 Facebook
> > > + * Copyright 2020 Google LLC.
> > > + */
> > > +
> > > +#include "linux/pid.h"
> > > +#include "linux/sched.h"
> > > +#include <linux/rculist.h>
> > > +#include <linux/list.h>
> > > +#include <linux/hash.h>
> > > +#include <linux/types.h>
> > > +#include <linux/spinlock.h>
> > > +#include <linux/bpf.h>
> > > +#include <linux/bpf_local_storage.h>
> > > +#include <net/sock.h>
> > Is this required?
>
> Nope. Removed.
>
> >
> > > +#include <uapi/linux/sock_diag.h>
> > > +#include <uapi/linux/btf.h>
> > > +#include <linux/bpf_lsm.h>
> > > +#include <linux/btf_ids.h>
> > > +#include <linux/fdtable.h>
> > > +
> > > +DEFINE_BPF_STORAGE_CACHE(task_cache);
> > > +
> > > +static struct bpf_local_storage __rcu **task_storage_ptr(void *owner)
>
> [...]
>
> > > +             err = -EBADF;
> > > +             goto out_fput;
> > > +     }
> > > +
> > > +     pid = get_pid(f->private_data);
> > n00b question. Is get_pid(f->private_data) required?
> > f->private_data could be freed while holding f->f_count?
>
> I would assume that holding a reference to the file should also
> keep the private_data alive but I was not sure so I grabbed the
> extra reference.
>
> >
> > > +     task = get_pid_task(pid, PIDTYPE_PID);
> > Should put_task_struct() be called before returning?
>
> If we keep using get_pid_task then, yes, I see it grabs a reference to the task.
> We could also call pid_task under rcu locks but it might be cleaner to
> just get_pid_task
> and put_task_struct().

I refactored this to use pidfd_get_pid and it seems like we can simply call
pid_task since we are already in an RCU read side critical section.

And to be pedantic, I added a WARN_ON_ONCE(!rcu_read_lock_held());
(although this is not required as lockdep should pretty much handle it
by default)

- KP

>
> >
> > > +     if (!task || !task_storage_ptr(task)) {
> > "!task_storage_ptr(task)" is unnecessary, task_storage_lookup() should
> > have taken care of it.
> >
> >
> > > +             err = -ENOENT;
> > > +             goto out;
> > > +     }
> > > +
> > > +     sdata = task_storage_lookup(task, map, true);
> > > +     put_pid(pid);
>
> [...]
>
> > > +     .map_lookup_elem = bpf_pid_task_storage_lookup_elem,
> > > +     .map_update_elem = bpf_pid_task_storage_update_elem,
> > > +     .map_delete_elem = bpf_pid_task_storage_delete_elem,
> > Please exercise the syscall use cases also in the selftest.
>
> Will do. Thanks for the nudge :)

I also added another patch to exercise them for the other storage types too.

- KP

>
> >
> > > +     .map_check_btf = bpf_local_storage_map_check_btf,
> > > +     .map_btf_name = "bpf_local_storage_map",
> > > +     .map_btf_id = &task_storage_map_btf_id,
> > > +     .map_owner_storage_ptr = task_storage_ptr,
> > > +};
> > > +
KP Singh Nov. 3, 2020, 2:52 p.m. UTC | #10
[,,,]

> > + *
> > + * void *bpf_task_storage_get(struct bpf_map *map, void *task, void *value, u64 flags)
> After peeking patch 2,  I think the pointer type should be
> "struct task_struct *task" instead of "void *task".
>
> Same for bpf_task_storage_delete().

Done. Thanks!
diff mbox series

Patch

diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h
index aaacb6aafc87..326cb68a3632 100644
--- a/include/linux/bpf_lsm.h
+++ b/include/linux/bpf_lsm.h
@@ -7,6 +7,7 @@ 
 #ifndef _LINUX_BPF_LSM_H
 #define _LINUX_BPF_LSM_H
 
+#include "linux/sched.h"
 #include <linux/bpf.h>
 #include <linux/lsm_hooks.h>
 
@@ -35,9 +36,21 @@  static inline struct bpf_storage_blob *bpf_inode(
 	return inode->i_security + bpf_lsm_blob_sizes.lbs_inode;
 }
 
+static inline struct bpf_storage_blob *bpf_task(
+	const struct task_struct *task)
+{
+	if (unlikely(!task->security))
+		return NULL;
+
+	return task->security + bpf_lsm_blob_sizes.lbs_task;
+}
+
 extern const struct bpf_func_proto bpf_inode_storage_get_proto;
 extern const struct bpf_func_proto bpf_inode_storage_delete_proto;
+extern const struct bpf_func_proto bpf_task_storage_get_proto;
+extern const struct bpf_func_proto bpf_task_storage_delete_proto;
 void bpf_inode_storage_free(struct inode *inode);
+void bpf_task_storage_free(struct task_struct *task);
 
 #else /* !CONFIG_BPF_LSM */
 
@@ -53,10 +66,20 @@  static inline struct bpf_storage_blob *bpf_inode(
 	return NULL;
 }
 
+static inline struct bpf_storage_blob *bpf_task(
+	const struct task_struct *task)
+{
+	return NULL;
+}
+
 static inline void bpf_inode_storage_free(struct inode *inode)
 {
 }
 
+static inline void bpf_task_storage_free(struct task_struct *task)
+{
+}
+
 #endif /* CONFIG_BPF_LSM */
 
 #endif /* _LINUX_BPF_LSM_H */
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 2e6f568377f1..99f7fd657d87 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -109,6 +109,7 @@  BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops)
 #endif
 #ifdef CONFIG_BPF_LSM
 BPF_MAP_TYPE(BPF_MAP_TYPE_INODE_STORAGE, inode_storage_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_TASK_STORAGE, task_storage_map_ops)
 #endif
 BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops)
 #if defined(CONFIG_XDP_SOCKETS)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index e6ceac3f7d62..bb443c4f3637 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -157,6 +157,7 @@  enum bpf_map_type {
 	BPF_MAP_TYPE_STRUCT_OPS,
 	BPF_MAP_TYPE_RINGBUF,
 	BPF_MAP_TYPE_INODE_STORAGE,
+	BPF_MAP_TYPE_TASK_STORAGE,
 };
 
 /* Note that tracing related programs such as
@@ -3742,6 +3743,42 @@  union bpf_attr {
  * 	Return
  * 		The helper returns **TC_ACT_REDIRECT** on success or
  * 		**TC_ACT_SHOT** on error.
+ *
+ * void *bpf_task_storage_get(struct bpf_map *map, void *task, void *value, u64 flags)
+ *	Description
+ *		Get a bpf_local_storage from the *task*.
+ *
+ *		Logically, it could be thought of as getting the value from
+ *		a *map* with *task* as the **key**.  From this
+ *		perspective,  the usage is not much different from
+ *		**bpf_map_lookup_elem**\ (*map*, **&**\ *task*) except this
+ *		helper enforces the key must be an task_struct and the map must also
+ *		be a **BPF_MAP_TYPE_TASK_STORAGE**.
+ *
+ *		Underneath, the value is stored locally at *task* instead of
+ *		the *map*.  The *map* is used as the bpf-local-storage
+ *		"type". The bpf-local-storage "type" (i.e. the *map*) is
+ *		searched against all bpf_local_storage residing at *task*.
+ *
+ *		An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be
+ *		used such that a new bpf_local_storage will be
+ *		created if one does not exist.  *value* can be used
+ *		together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify
+ *		the initial value of a bpf_local_storage.  If *value* is
+ *		**NULL**, the new bpf_local_storage will be zero initialized.
+ *	Return
+ *		A bpf_local_storage pointer is returned on success.
+ *
+ *		**NULL** if not found or there was an error in adding
+ *		a new bpf_local_storage.
+ *
+ * int bpf_task_storage_delete(struct bpf_map *map, void *task)
+ *	Description
+ *		Delete a bpf_local_storage from a *task*.
+ *	Return
+ *		0 on success.
+ *
+ *		**-ENOENT** if the bpf_local_storage cannot be found.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3900,6 +3937,8 @@  union bpf_attr {
 	FN(bpf_per_cpu_ptr),            \
 	FN(bpf_this_cpu_ptr),		\
 	FN(redirect_peer),		\
+	FN(task_storage_get),		\
+	FN(task_storage_delete),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index bdc8cd1b6767..f0b93ced5a7f 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -6,6 +6,7 @@  obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_i
 obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
 obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o
 obj-${CONFIG_BPF_LSM}	  += bpf_inode_storage.o
+obj-${CONFIG_BPF_LSM}	  += bpf_task_storage.o
 obj-$(CONFIG_BPF_SYSCALL) += disasm.o
 obj-$(CONFIG_BPF_JIT) += trampoline.o
 obj-$(CONFIG_BPF_SYSCALL) += btf.o
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index 78ea8a7bd27f..61f8cc52fd5b 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -59,6 +59,10 @@  bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_sk_storage_get_proto;
 	case BPF_FUNC_sk_storage_delete:
 		return &bpf_sk_storage_delete_proto;
+	case BPF_FUNC_task_storage_get:
+		return &bpf_task_storage_get_proto;
+	case BPF_FUNC_task_storage_delete:
+		return &bpf_task_storage_delete_proto;
 	default:
 		return tracing_prog_func_proto(func_id, prog);
 	}
diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c
new file mode 100644
index 000000000000..774140c458cc
--- /dev/null
+++ b/kernel/bpf/bpf_task_storage.c
@@ -0,0 +1,327 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2019 Facebook
+ * Copyright 2020 Google LLC.
+ */
+
+#include "linux/pid.h"
+#include "linux/sched.h"
+#include <linux/rculist.h>
+#include <linux/list.h>
+#include <linux/hash.h>
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <linux/bpf.h>
+#include <linux/bpf_local_storage.h>
+#include <net/sock.h>
+#include <uapi/linux/sock_diag.h>
+#include <uapi/linux/btf.h>
+#include <linux/bpf_lsm.h>
+#include <linux/btf_ids.h>
+#include <linux/fdtable.h>
+
+DEFINE_BPF_STORAGE_CACHE(task_cache);
+
+static struct bpf_local_storage __rcu **task_storage_ptr(void *owner)
+{
+	struct task_struct *task = owner;
+	struct bpf_storage_blob *bsb;
+
+	bsb = bpf_task(task);
+	if (!bsb)
+		return NULL;
+	return &bsb->storage;
+}
+
+static struct bpf_local_storage_data *
+task_storage_lookup(struct task_struct *task, struct bpf_map *map,
+		    bool cacheit_lockit)
+{
+	struct bpf_local_storage *task_storage;
+	struct bpf_local_storage_map *smap;
+	struct bpf_storage_blob *bsb;
+
+	bsb = bpf_task(task);
+	if (!bsb)
+		return NULL;
+
+	task_storage = rcu_dereference(bsb->storage);
+	if (!task_storage)
+		return NULL;
+
+	smap = (struct bpf_local_storage_map *)map;
+	return bpf_local_storage_lookup(task_storage, smap, cacheit_lockit);
+}
+
+void bpf_task_storage_free(struct task_struct *task)
+{
+	struct bpf_local_storage_elem *selem;
+	struct bpf_local_storage *local_storage;
+	bool free_task_storage = false;
+	struct bpf_storage_blob *bsb;
+	struct hlist_node *n;
+
+	bsb = bpf_task(task);
+	if (!bsb)
+		return;
+
+	rcu_read_lock();
+
+	local_storage = rcu_dereference(bsb->storage);
+	if (!local_storage) {
+		rcu_read_unlock();
+		return;
+	}
+
+	/* Netiher the bpf_prog nor the bpf-map's syscall
+	 * could be modifying the local_storage->list now.
+	 * Thus, no elem can be added-to or deleted-from the
+	 * local_storage->list by the bpf_prog or by the bpf-map's syscall.
+	 *
+	 * It is racing with bpf_local_storage_map_free() alone
+	 * when unlinking elem from the local_storage->list and
+	 * the map's bucket->list.
+	 */
+	raw_spin_lock_bh(&local_storage->lock);
+	hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) {
+		/* Always unlink from map before unlinking from
+		 * local_storage.
+		 */
+		bpf_selem_unlink_map(selem);
+		free_task_storage = bpf_selem_unlink_storage_nolock(
+			local_storage, selem, false);
+	}
+	raw_spin_unlock_bh(&local_storage->lock);
+	rcu_read_unlock();
+
+	/* free_task_storage should always be true as long as
+	 * local_storage->list was non-empty.
+	 */
+	if (free_task_storage)
+		kfree_rcu(local_storage, rcu);
+}
+
+static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key)
+{
+	struct bpf_local_storage_data *sdata;
+	struct task_struct *task;
+	struct pid *pid;
+	struct file *f;
+	int fd, err;
+
+	fd = *(int *)key;
+	f = fget_raw(fd);
+	if (!f)
+		return ERR_PTR(-EBADF);
+
+	if (f->f_op != &pidfd_fops) {
+		err = -EBADF;
+		goto out_fput;
+	}
+
+	pid = get_pid(f->private_data);
+	task = get_pid_task(pid, PIDTYPE_PID);
+	if (!task || !task_storage_ptr(task)) {
+		err = -ENOENT;
+		goto out;
+	}
+
+	sdata = task_storage_lookup(task, map, true);
+	put_pid(pid);
+	return sdata ? sdata->data : NULL;
+out:
+	put_pid(pid);
+out_fput:
+	fput(f);
+	return ERR_PTR(err);
+}
+
+static int bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key,
+					    void *value, u64 map_flags)
+{
+	struct bpf_local_storage_data *sdata;
+	struct task_struct *task;
+	struct pid *pid;
+	struct file *f;
+	int fd, err;
+
+	fd = *(int *)key;
+	f = fget_raw(fd);
+	if (!f)
+		return -EBADF;
+
+	if (f->f_op != &pidfd_fops) {
+		err = -EBADF;
+		goto out_fput;
+	}
+
+	pid = get_pid(f->private_data);
+	task = get_pid_task(pid, PIDTYPE_PID);
+	if (!task || !task_storage_ptr(task)) {
+		err = -ENOENT;
+		goto out;
+	}
+
+	sdata = bpf_local_storage_update(
+		task, (struct bpf_local_storage_map *)map, value, map_flags);
+
+	err = PTR_ERR_OR_ZERO(sdata);
+out:
+	put_pid(pid);
+out_fput:
+	fput(f);
+	return err;
+}
+
+static int task_storage_delete(struct task_struct *task, struct bpf_map *map)
+{
+	struct bpf_local_storage_data *sdata;
+
+	sdata = task_storage_lookup(task, map, false);
+	if (!sdata)
+		return -ENOENT;
+
+	bpf_selem_unlink(SELEM(sdata));
+
+	return 0;
+}
+
+static int bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key)
+{
+	struct task_struct *task;
+	struct pid *pid;
+	struct file *f;
+	int fd, err;
+
+	fd = *(int *)key;
+	f = fget_raw(fd);
+	if (!f)
+		return -EBADF;
+
+	if (f->f_op != &pidfd_fops) {
+		err = -EBADF;
+		goto out_fput;
+	}
+
+	pid = get_pid(f->private_data);
+	task = get_pid_task(pid, PIDTYPE_PID);
+	if (!task || !task_storage_ptr(task)) {
+		err = -ENOENT;
+		goto out;
+	}
+
+	err = task_storage_delete(task, map);
+
+out:
+	put_pid(pid);
+out_fput:
+	fput(f);
+	return err;
+}
+
+BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
+	   task, void *, value, u64, flags)
+{
+	struct bpf_local_storage_data *sdata;
+
+	if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE))
+		return (unsigned long)NULL;
+
+	/* explicitly check that the task_storage_ptr is not
+	 * NULL as task_storage_lookup returns NULL in this case and
+	 * bpf_local_storage_update expects the owner to have a
+	 * valid storage pointer.
+	 */
+	if (!task_storage_ptr(task))
+		return (unsigned long)NULL;
+
+	sdata = task_storage_lookup(task, map, true);
+	if (sdata)
+		return (unsigned long)sdata->data;
+
+	/* This helper must only called from where the task is guaranteed
+	 * to have a refcount and cannot be freed.
+	 */
+	if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) {
+		sdata = bpf_local_storage_update(
+			task, (struct bpf_local_storage_map *)map, value,
+			BPF_NOEXIST);
+		return IS_ERR(sdata) ? (unsigned long)NULL :
+					     (unsigned long)sdata->data;
+	}
+
+	return (unsigned long)NULL;
+}
+
+BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *,
+	   task)
+{
+	/* This helper must only called from where the task is guaranteed
+	 * to have a refcount and cannot be freed.
+	 */
+	return task_storage_delete(task, map);
+}
+
+static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key)
+{
+	return -ENOTSUPP;
+}
+
+static struct bpf_map *task_storage_map_alloc(union bpf_attr *attr)
+{
+	struct bpf_local_storage_map *smap;
+
+	smap = bpf_local_storage_map_alloc(attr);
+	if (IS_ERR(smap))
+		return ERR_CAST(smap);
+
+	smap->cache_idx = bpf_local_storage_cache_idx_get(&task_cache);
+	return &smap->map;
+}
+
+static void task_storage_map_free(struct bpf_map *map)
+{
+	struct bpf_local_storage_map *smap;
+
+	smap = (struct bpf_local_storage_map *)map;
+	bpf_local_storage_cache_idx_free(&task_cache, smap->cache_idx);
+	bpf_local_storage_map_free(smap);
+}
+
+static int task_storage_map_btf_id;
+const struct bpf_map_ops task_storage_map_ops = {
+	.map_meta_equal = bpf_map_meta_equal,
+	.map_alloc_check = bpf_local_storage_map_alloc_check,
+	.map_alloc = task_storage_map_alloc,
+	.map_free = task_storage_map_free,
+	.map_get_next_key = notsupp_get_next_key,
+	.map_lookup_elem = bpf_pid_task_storage_lookup_elem,
+	.map_update_elem = bpf_pid_task_storage_update_elem,
+	.map_delete_elem = bpf_pid_task_storage_delete_elem,
+	.map_check_btf = bpf_local_storage_map_check_btf,
+	.map_btf_name = "bpf_local_storage_map",
+	.map_btf_id = &task_storage_map_btf_id,
+	.map_owner_storage_ptr = task_storage_ptr,
+};
+
+BTF_ID_LIST_SINGLE(bpf_task_storage_btf_ids, struct, task_struct)
+
+const struct bpf_func_proto bpf_task_storage_get_proto = {
+	.func = bpf_task_storage_get,
+	.gpl_only = false,
+	.ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
+	.arg1_type = ARG_CONST_MAP_PTR,
+	.arg2_type = ARG_PTR_TO_BTF_ID,
+	.arg2_btf_id = &bpf_task_storage_btf_ids[0],
+	.arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL,
+	.arg4_type = ARG_ANYTHING,
+};
+
+const struct bpf_func_proto bpf_task_storage_delete_proto = {
+	.func = bpf_task_storage_delete,
+	.gpl_only = false,
+	.ret_type = RET_INTEGER,
+	.arg1_type = ARG_CONST_MAP_PTR,
+	.arg2_type = ARG_PTR_TO_BTF_ID,
+	.arg2_btf_id = &bpf_task_storage_btf_ids[0],
+};
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 8f50c9c19f1b..f3fe9f53f93c 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -773,7 +773,8 @@  static int map_check_btf(struct bpf_map *map, const struct btf *btf,
 		    map->map_type != BPF_MAP_TYPE_ARRAY &&
 		    map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
 		    map->map_type != BPF_MAP_TYPE_SK_STORAGE &&
-		    map->map_type != BPF_MAP_TYPE_INODE_STORAGE)
+		    map->map_type != BPF_MAP_TYPE_INODE_STORAGE &&
+		    map->map_type != BPF_MAP_TYPE_TASK_STORAGE)
 			return -ENOTSUPP;
 		if (map->spin_lock_off + sizeof(struct bpf_spin_lock) >
 		    map->value_size) {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 6200519582a6..b0790876694f 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -4469,6 +4469,11 @@  static int check_map_func_compatibility(struct bpf_verifier_env *env,
 		    func_id != BPF_FUNC_inode_storage_delete)
 			goto error;
 		break;
+	case BPF_MAP_TYPE_TASK_STORAGE:
+		if (func_id != BPF_FUNC_task_storage_get &&
+		    func_id != BPF_FUNC_task_storage_delete)
+			goto error;
+		break;
 	default:
 		break;
 	}
@@ -4547,6 +4552,11 @@  static int check_map_func_compatibility(struct bpf_verifier_env *env,
 		if (map->map_type != BPF_MAP_TYPE_INODE_STORAGE)
 			goto error;
 		break;
+	case BPF_FUNC_task_storage_get:
+	case BPF_FUNC_task_storage_delete:
+		if (map->map_type != BPF_MAP_TYPE_TASK_STORAGE)
+			goto error;
+		break;
 	default:
 		break;
 	}
diff --git a/security/bpf/hooks.c b/security/bpf/hooks.c
index 788667d582ae..e5971fa74fd7 100644
--- a/security/bpf/hooks.c
+++ b/security/bpf/hooks.c
@@ -12,6 +12,7 @@  static struct security_hook_list bpf_lsm_hooks[] __lsm_ro_after_init = {
 	#include <linux/lsm_hook_defs.h>
 	#undef LSM_HOOK
 	LSM_HOOK_INIT(inode_free_security, bpf_inode_storage_free),
+	LSM_HOOK_INIT(task_free, bpf_task_storage_free),
 };
 
 static int __init bpf_lsm_init(void)
@@ -23,6 +24,7 @@  static int __init bpf_lsm_init(void)
 
 struct lsm_blob_sizes bpf_lsm_blob_sizes __lsm_ro_after_init = {
 	.lbs_inode = sizeof(struct bpf_storage_blob),
+	.lbs_task = sizeof(struct bpf_storage_blob),
 };
 
 DEFINE_LSM(bpf) = {
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index dade10cdf295..3d52256ba75f 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -50,7 +50,8 @@  MAP COMMANDS
 |		| **lru_percpu_hash** | **lpm_trie** | **array_of_maps** | **hash_of_maps**
 |		| **devmap** | **devmap_hash** | **sockmap** | **cpumap** | **xskmap** | **sockhash**
 |		| **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage**
-|		| **queue** | **stack** | **sk_storage** | **struct_ops** | **ringbuf** | **inode_storage** }
+|		| **queue** | **stack** | **sk_storage** | **struct_ops** | **ringbuf** | **inode_storage**
+		| **task_storage** }
 
 DESCRIPTION
 ===========
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 3f1da30c4da6..fdffbc64c65c 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -705,7 +705,7 @@  _bpftool()
                                 hash_of_maps devmap devmap_hash sockmap cpumap \
                                 xskmap sockhash cgroup_storage reuseport_sockarray \
                                 percpu_cgroup_storage queue stack sk_storage \
-                                struct_ops inode_storage' -- \
+                                struct_ops inode_storage task_storage' -- \
                                                    "$cur" ) )
                             return 0
                             ;;
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index a7efbd84fbcc..b400364ee054 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -51,6 +51,7 @@  const char * const map_type_name[] = {
 	[BPF_MAP_TYPE_STRUCT_OPS]		= "struct_ops",
 	[BPF_MAP_TYPE_RINGBUF]			= "ringbuf",
 	[BPF_MAP_TYPE_INODE_STORAGE]		= "inode_storage",
+	[BPF_MAP_TYPE_TASK_STORAGE]		= "task_storage",
 };
 
 const size_t map_type_name_size = ARRAY_SIZE(map_type_name);
@@ -1464,7 +1465,8 @@  static int do_help(int argc, char **argv)
 		"                 lru_percpu_hash | lpm_trie | array_of_maps | hash_of_maps |\n"
 		"                 devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n"
 		"                 cgroup_storage | reuseport_sockarray | percpu_cgroup_storage |\n"
-		"                 queue | stack | sk_storage | struct_ops | ringbuf | inode_storage }\n"
+		"                 queue | stack | sk_storage | struct_ops | ringbuf | inode_storage |\n"
+		"		  task_storage }\n"
 		"       " HELP_SPEC_OPTIONS "\n"
 		"",
 		bin_name, argv[-2]);
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index e6ceac3f7d62..bb443c4f3637 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -157,6 +157,7 @@  enum bpf_map_type {
 	BPF_MAP_TYPE_STRUCT_OPS,
 	BPF_MAP_TYPE_RINGBUF,
 	BPF_MAP_TYPE_INODE_STORAGE,
+	BPF_MAP_TYPE_TASK_STORAGE,
 };
 
 /* Note that tracing related programs such as
@@ -3742,6 +3743,42 @@  union bpf_attr {
  * 	Return
  * 		The helper returns **TC_ACT_REDIRECT** on success or
  * 		**TC_ACT_SHOT** on error.
+ *
+ * void *bpf_task_storage_get(struct bpf_map *map, void *task, void *value, u64 flags)
+ *	Description
+ *		Get a bpf_local_storage from the *task*.
+ *
+ *		Logically, it could be thought of as getting the value from
+ *		a *map* with *task* as the **key**.  From this
+ *		perspective,  the usage is not much different from
+ *		**bpf_map_lookup_elem**\ (*map*, **&**\ *task*) except this
+ *		helper enforces the key must be an task_struct and the map must also
+ *		be a **BPF_MAP_TYPE_TASK_STORAGE**.
+ *
+ *		Underneath, the value is stored locally at *task* instead of
+ *		the *map*.  The *map* is used as the bpf-local-storage
+ *		"type". The bpf-local-storage "type" (i.e. the *map*) is
+ *		searched against all bpf_local_storage residing at *task*.
+ *
+ *		An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be
+ *		used such that a new bpf_local_storage will be
+ *		created if one does not exist.  *value* can be used
+ *		together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify
+ *		the initial value of a bpf_local_storage.  If *value* is
+ *		**NULL**, the new bpf_local_storage will be zero initialized.
+ *	Return
+ *		A bpf_local_storage pointer is returned on success.
+ *
+ *		**NULL** if not found or there was an error in adding
+ *		a new bpf_local_storage.
+ *
+ * int bpf_task_storage_delete(struct bpf_map *map, void *task)
+ *	Description
+ *		Delete a bpf_local_storage from a *task*.
+ *	Return
+ *		0 on success.
+ *
+ *		**-ENOENT** if the bpf_local_storage cannot be found.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3900,6 +3937,8 @@  union bpf_attr {
 	FN(bpf_per_cpu_ptr),            \
 	FN(bpf_this_cpu_ptr),		\
 	FN(redirect_peer),		\
+	FN(task_storage_get),		\
+	FN(task_storage_delete),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index 5482a9b7ae2d..bed00ca194f0 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -1,6 +1,7 @@ 
 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
 /* Copyright (c) 2019 Netronome Systems, Inc. */
 
+#include "linux/bpf.h"
 #include <errno.h>
 #include <fcntl.h>
 #include <string.h>
@@ -230,6 +231,7 @@  bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex)
 		break;
 	case BPF_MAP_TYPE_SK_STORAGE:
 	case BPF_MAP_TYPE_INODE_STORAGE:
+	case BPF_MAP_TYPE_TASK_STORAGE:
 		btf_key_type_id = 1;
 		btf_value_type_id = 3;
 		value_size = 8;