diff mbox series

[bpf-next,v6,2/8] bpf: enable detaching links of struct_ops objects.

Message ID 20240524223036.318800-3-thinker.li@gmail.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series Notify user space when a struct_ops object is detached/unregistered | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_maps, false, 360) / test_maps on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/apply fail Patch does not apply to bpf-next-0

Commit Message

Kui-Feng Lee May 24, 2024, 10:30 p.m. UTC
Implement the detach callback in bpf_link_ops for struct_ops so that user
programs can detach a struct_ops link. The subsystems that struct_ops
objects are registered to can also use this callback to detach the links
being passed to them.

Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com>
---
 kernel/bpf/bpf_struct_ops.c | 53 ++++++++++++++++++++++++++++++++-----
 1 file changed, 47 insertions(+), 6 deletions(-)

Comments

Martin KaFai Lau May 29, 2024, 6:17 a.m. UTC | #1
On 5/24/24 3:30 PM, Kui-Feng Lee wrote:
> +static int bpf_struct_ops_map_link_detach(struct bpf_link *link)
> +{
> +	struct bpf_struct_ops_link *st_link = container_of(link, struct bpf_struct_ops_link, link);
> +	struct bpf_struct_ops_map *st_map;
> +	struct bpf_map *map;
> +
> +	mutex_lock(&update_mutex);

update_mutex is needed to detach.

> +
> +	map = rcu_dereference_protected(st_link->map, lockdep_is_held(&update_mutex));
> +	if (!map) {
> +		mutex_unlock(&update_mutex);
> +		return 0;
> +	}
> +	st_map = container_of(map, struct bpf_struct_ops_map, map);
> +
> +	st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data, link);
> +
> +	rcu_assign_pointer(st_link->map, NULL);
> +	/* Pair with bpf_map_get() in bpf_struct_ops_link_create() or
> +	 * bpf_map_inc() in bpf_struct_ops_map_link_update().
> +	 */
> +	bpf_map_put(&st_map->map);
> +
> +	mutex_unlock(&update_mutex);
> +
> +	return 0;
> +}
> +
>   static const struct bpf_link_ops bpf_struct_ops_map_lops = {
>   	.dealloc = bpf_struct_ops_map_link_dealloc,
> +	.detach = bpf_struct_ops_map_link_detach,
>   	.show_fdinfo = bpf_struct_ops_map_link_show_fdinfo,
>   	.fill_link_info = bpf_struct_ops_map_link_fill_link_info,
>   	.update_map = bpf_struct_ops_map_link_update,
> @@ -1176,13 +1208,22 @@ int bpf_struct_ops_link_create(union bpf_attr *attr)
>   	if (err)
>   		goto err_out;
>   
> +	/* Init link->map before calling reg() in case being detached
> +	 * immediately.
> +	 */

With update_mutex held in link_create here, the parallel detach can still happen 
before the link is fully initialized (the link->map pointer here in particular)?

> +	RCU_INIT_POINTER(link->map, map);
> +
> +	mutex_lock(&update_mutex);
>   	err = st_map->st_ops_desc->st_ops->reg(st_map->kvalue.data, &link->link);
>   	if (err) {
> +		RCU_INIT_POINTER(link->map, NULL);

I was hoping by holding the the update_mutex, it can avoid this link->map init 
dance, like RCU_INIT_POINTER(link->map, map) above and then resetting here on 
the error case.

> +		mutex_unlock(&update_mutex);
>   		bpf_link_cleanup(&link_primer);
> +		/* The link has been free by bpf_link_cleanup() */
>   		link = NULL;
>   		goto err_out;
>   	}
> -	RCU_INIT_POINTER(link->map, map);

If only init link->map once here like the existing code (and the init is 
protected by the update_mutex), the subsystem should not be able to detach until 
the link->map is fully initialized.

or I am missing something obvious. Can you explain why this link->map init dance 
is still needed?

> +	mutex_unlock(&update_mutex);
Kui-Feng Lee May 29, 2024, 3:04 p.m. UTC | #2
On Tue, May 28, 2024 at 11:17 PM Martin KaFai Lau <martin.lau@linux.dev> wrote:
>
> On 5/24/24 3:30 PM, Kui-Feng Lee wrote:
> > +static int bpf_struct_ops_map_link_detach(struct bpf_link *link)
> > +{
> > +     struct bpf_struct_ops_link *st_link = container_of(link, struct bpf_struct_ops_link, link);
> > +     struct bpf_struct_ops_map *st_map;
> > +     struct bpf_map *map;
> > +
> > +     mutex_lock(&update_mutex);
>
> update_mutex is needed to detach.
>
> > +
> > +     map = rcu_dereference_protected(st_link->map, lockdep_is_held(&update_mutex));
> > +     if (!map) {
> > +             mutex_unlock(&update_mutex);
> > +             return 0;
> > +     }
> > +     st_map = container_of(map, struct bpf_struct_ops_map, map);
> > +
> > +     st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data, link);
> > +
> > +     rcu_assign_pointer(st_link->map, NULL);
> > +     /* Pair with bpf_map_get() in bpf_struct_ops_link_create() or
> > +      * bpf_map_inc() in bpf_struct_ops_map_link_update().
> > +      */
> > +     bpf_map_put(&st_map->map);
> > +
> > +     mutex_unlock(&update_mutex);
> > +
> > +     return 0;
> > +}
> > +
> >   static const struct bpf_link_ops bpf_struct_ops_map_lops = {
> >       .dealloc = bpf_struct_ops_map_link_dealloc,
> > +     .detach = bpf_struct_ops_map_link_detach,
> >       .show_fdinfo = bpf_struct_ops_map_link_show_fdinfo,
> >       .fill_link_info = bpf_struct_ops_map_link_fill_link_info,
> >       .update_map = bpf_struct_ops_map_link_update,
> > @@ -1176,13 +1208,22 @@ int bpf_struct_ops_link_create(union bpf_attr *attr)
> >       if (err)
> >               goto err_out;
> >
> > +     /* Init link->map before calling reg() in case being detached
> > +      * immediately.
> > +      */
>
> With update_mutex held in link_create here, the parallel detach can still happen
> before the link is fully initialized (the link->map pointer here in particular)?
>
> > +     RCU_INIT_POINTER(link->map, map);
> > +
> > +     mutex_lock(&update_mutex);
> >       err = st_map->st_ops_desc->st_ops->reg(st_map->kvalue.data, &link->link);
> >       if (err) {
> > +             RCU_INIT_POINTER(link->map, NULL);
>
> I was hoping by holding the the update_mutex, it can avoid this link->map init
> dance, like RCU_INIT_POINTER(link->map, map) above and then resetting here on
> the error case.
>
> > +             mutex_unlock(&update_mutex);
> >               bpf_link_cleanup(&link_primer);
> > +             /* The link has been free by bpf_link_cleanup() */
> >               link = NULL;
> >               goto err_out;
> >       }
> > -     RCU_INIT_POINTER(link->map, map);
>
> If only init link->map once here like the existing code (and the init is
> protected by the update_mutex), the subsystem should not be able to detach until
> the link->map is fully initialized.
>
> or I am missing something obvious. Can you explain why this link->map init dance
> is still needed?

Ok, I get what you mean.

I will move RCU_INIT_POINTER() back to its original place, and move the check
on the value of "err" to the place after mutext_unlock(). Is it what you like?

>
> > +     mutex_unlock(&update_mutex);
>
Martin KaFai Lau May 29, 2024, 10:38 p.m. UTC | #3
On 5/29/24 8:04 AM, Kuifeng Lee wrote:
> On Tue, May 28, 2024 at 11:17 PM Martin KaFai Lau <martin.lau@linux.dev> wrote:
>>
>> On 5/24/24 3:30 PM, Kui-Feng Lee wrote:
>>> +static int bpf_struct_ops_map_link_detach(struct bpf_link *link)
>>> +{
>>> +     struct bpf_struct_ops_link *st_link = container_of(link, struct bpf_struct_ops_link, link);
>>> +     struct bpf_struct_ops_map *st_map;
>>> +     struct bpf_map *map;
>>> +
>>> +     mutex_lock(&update_mutex);
>>
>> update_mutex is needed to detach.
>>
>>> +
>>> +     map = rcu_dereference_protected(st_link->map, lockdep_is_held(&update_mutex));
>>> +     if (!map) {
>>> +             mutex_unlock(&update_mutex);
>>> +             return 0;
>>> +     }
>>> +     st_map = container_of(map, struct bpf_struct_ops_map, map);
>>> +
>>> +     st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data, link);
>>> +
>>> +     rcu_assign_pointer(st_link->map, NULL);
>>> +     /* Pair with bpf_map_get() in bpf_struct_ops_link_create() or
>>> +      * bpf_map_inc() in bpf_struct_ops_map_link_update().
>>> +      */
>>> +     bpf_map_put(&st_map->map);
>>> +
>>> +     mutex_unlock(&update_mutex);
>>> +
>>> +     return 0;
>>> +}
>>> +
>>>    static const struct bpf_link_ops bpf_struct_ops_map_lops = {
>>>        .dealloc = bpf_struct_ops_map_link_dealloc,
>>> +     .detach = bpf_struct_ops_map_link_detach,
>>>        .show_fdinfo = bpf_struct_ops_map_link_show_fdinfo,
>>>        .fill_link_info = bpf_struct_ops_map_link_fill_link_info,
>>>        .update_map = bpf_struct_ops_map_link_update,
>>> @@ -1176,13 +1208,22 @@ int bpf_struct_ops_link_create(union bpf_attr *attr)
>>>        if (err)
>>>                goto err_out;
>>>
>>> +     /* Init link->map before calling reg() in case being detached
>>> +      * immediately.
>>> +      */
>>
>> With update_mutex held in link_create here, the parallel detach can still happen
>> before the link is fully initialized (the link->map pointer here in particular)?
>>
>>> +     RCU_INIT_POINTER(link->map, map);
>>> +
>>> +     mutex_lock(&update_mutex);
>>>        err = st_map->st_ops_desc->st_ops->reg(st_map->kvalue.data, &link->link);
>>>        if (err) {
>>> +             RCU_INIT_POINTER(link->map, NULL);
>>
>> I was hoping by holding the the update_mutex, it can avoid this link->map init
>> dance, like RCU_INIT_POINTER(link->map, map) above and then resetting here on
>> the error case.
>>
>>> +             mutex_unlock(&update_mutex);
>>>                bpf_link_cleanup(&link_primer);
>>> +             /* The link has been free by bpf_link_cleanup() */
>>>                link = NULL;
>>>                goto err_out;
>>>        }
>>> -     RCU_INIT_POINTER(link->map, map);
>>
>> If only init link->map once here like the existing code (and the init is
>> protected by the update_mutex), the subsystem should not be able to detach until
>> the link->map is fully initialized.
>>
>> or I am missing something obvious. Can you explain why this link->map init dance
>> is still needed?
> 
> Ok, I get what you mean.
> 
> I will move RCU_INIT_POINTER() back to its original place, and move the check
> on the value of "err" to the place after mutext_unlock().
The RCU_INIT_POINTER(link->map, map) needs to be done with update_mutex held and
it should be init after the err check, so the err check needs to be inside
update_mutex lock also.

Something like this (untested):

	mutex_lock(&update_mutex);

	err = st_map->st_ops_desc->st_ops->reg(st_map->kvalue.data, &link->link);
	if (err) {
		mutex_unlock(&update_mutex);
		bpf_link_cleanup(&link_primer);
		link = NULL;
		goto err_out;
	}
	RCU_INIT_POINTER(link->map, map);

	mutex_unlock(&update_mutex);


> 
>>
>>> +     mutex_unlock(&update_mutex);
>>
Kui-Feng Lee May 29, 2024, 11:26 p.m. UTC | #4
On Wed, May 29, 2024 at 3:38 PM Martin KaFai Lau <martin.lau@linux.dev> wrote:
>
> On 5/29/24 8:04 AM, Kuifeng Lee wrote:
> > On Tue, May 28, 2024 at 11:17 PM Martin KaFai Lau <martin.lau@linux.dev> wrote:
> >>
> >> On 5/24/24 3:30 PM, Kui-Feng Lee wrote:
> >>> +static int bpf_struct_ops_map_link_detach(struct bpf_link *link)
> >>> +{
> >>> +     struct bpf_struct_ops_link *st_link = container_of(link, struct bpf_struct_ops_link, link);
> >>> +     struct bpf_struct_ops_map *st_map;
> >>> +     struct bpf_map *map;
> >>> +
> >>> +     mutex_lock(&update_mutex);
> >>
> >> update_mutex is needed to detach.
> >>
> >>> +
> >>> +     map = rcu_dereference_protected(st_link->map, lockdep_is_held(&update_mutex));
> >>> +     if (!map) {
> >>> +             mutex_unlock(&update_mutex);
> >>> +             return 0;
> >>> +     }
> >>> +     st_map = container_of(map, struct bpf_struct_ops_map, map);
> >>> +
> >>> +     st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data, link);
> >>> +
> >>> +     rcu_assign_pointer(st_link->map, NULL);
> >>> +     /* Pair with bpf_map_get() in bpf_struct_ops_link_create() or
> >>> +      * bpf_map_inc() in bpf_struct_ops_map_link_update().
> >>> +      */
> >>> +     bpf_map_put(&st_map->map);
> >>> +
> >>> +     mutex_unlock(&update_mutex);
> >>> +
> >>> +     return 0;
> >>> +}
> >>> +
> >>>    static const struct bpf_link_ops bpf_struct_ops_map_lops = {
> >>>        .dealloc = bpf_struct_ops_map_link_dealloc,
> >>> +     .detach = bpf_struct_ops_map_link_detach,
> >>>        .show_fdinfo = bpf_struct_ops_map_link_show_fdinfo,
> >>>        .fill_link_info = bpf_struct_ops_map_link_fill_link_info,
> >>>        .update_map = bpf_struct_ops_map_link_update,
> >>> @@ -1176,13 +1208,22 @@ int bpf_struct_ops_link_create(union bpf_attr *attr)
> >>>        if (err)
> >>>                goto err_out;
> >>>
> >>> +     /* Init link->map before calling reg() in case being detached
> >>> +      * immediately.
> >>> +      */
> >>
> >> With update_mutex held in link_create here, the parallel detach can still happen
> >> before the link is fully initialized (the link->map pointer here in particular)?
> >>
> >>> +     RCU_INIT_POINTER(link->map, map);
> >>> +
> >>> +     mutex_lock(&update_mutex);
> >>>        err = st_map->st_ops_desc->st_ops->reg(st_map->kvalue.data, &link->link);
> >>>        if (err) {
> >>> +             RCU_INIT_POINTER(link->map, NULL);
> >>
> >> I was hoping by holding the the update_mutex, it can avoid this link->map init
> >> dance, like RCU_INIT_POINTER(link->map, map) above and then resetting here on
> >> the error case.
> >>
> >>> +             mutex_unlock(&update_mutex);
> >>>                bpf_link_cleanup(&link_primer);
> >>> +             /* The link has been free by bpf_link_cleanup() */
> >>>                link = NULL;
> >>>                goto err_out;
> >>>        }
> >>> -     RCU_INIT_POINTER(link->map, map);
> >>
> >> If only init link->map once here like the existing code (and the init is
> >> protected by the update_mutex), the subsystem should not be able to detach until
> >> the link->map is fully initialized.
> >>
> >> or I am missing something obvious. Can you explain why this link->map init dance
> >> is still needed?
> >
> > Ok, I get what you mean.
> >
> > I will move RCU_INIT_POINTER() back to its original place, and move the check
> > on the value of "err" to the place after mutext_unlock().
> The RCU_INIT_POINTER(link->map, map) needs to be done with update_mutex held and
> it should be init after the err check, so the err check needs to be inside
> update_mutex lock also.
>
> Something like this (untested):
>
>         mutex_lock(&update_mutex);
>
>         err = st_map->st_ops_desc->st_ops->reg(st_map->kvalue.data, &link->link);
>         if (err) {
>                 mutex_unlock(&update_mutex);
>                 bpf_link_cleanup(&link_primer);
>                 link = NULL;
>                 goto err_out;
>         }
>         RCU_INIT_POINTER(link->map, map);
>
>         mutex_unlock(&update_mutex);
>

Sure! According to what we discussed off-line, the RCU_INIT_POINTER()
will be moved
back to its original place. Subsystems should not try to access link->map.


>
> >
> >>
> >>> +     mutex_unlock(&update_mutex);
> >>
>
diff mbox series

Patch

diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index 1542dded7489..f2439acd9757 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -1057,9 +1057,6 @@  static void bpf_struct_ops_map_link_dealloc(struct bpf_link *link)
 	st_map = (struct bpf_struct_ops_map *)
 		rcu_dereference_protected(st_link->map, true);
 	if (st_map) {
-		/* st_link->map can be NULL if
-		 * bpf_struct_ops_link_create() fails to register.
-		 */
 		st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data, link);
 		bpf_map_put(&st_map->map);
 	}
@@ -1075,7 +1072,8 @@  static void bpf_struct_ops_map_link_show_fdinfo(const struct bpf_link *link,
 	st_link = container_of(link, struct bpf_struct_ops_link, link);
 	rcu_read_lock();
 	map = rcu_dereference(st_link->map);
-	seq_printf(seq, "map_id:\t%d\n", map->id);
+	if (map)
+		seq_printf(seq, "map_id:\t%d\n", map->id);
 	rcu_read_unlock();
 }
 
@@ -1088,7 +1086,8 @@  static int bpf_struct_ops_map_link_fill_link_info(const struct bpf_link *link,
 	st_link = container_of(link, struct bpf_struct_ops_link, link);
 	rcu_read_lock();
 	map = rcu_dereference(st_link->map);
-	info->struct_ops.map_id = map->id;
+	if (map)
+		info->struct_ops.map_id = map->id;
 	rcu_read_unlock();
 	return 0;
 }
@@ -1113,6 +1112,10 @@  static int bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map
 	mutex_lock(&update_mutex);
 
 	old_map = rcu_dereference_protected(st_link->map, lockdep_is_held(&update_mutex));
+	if (!old_map) {
+		err = -ENOLINK;
+		goto err_out;
+	}
 	if (expected_old_map && old_map != expected_old_map) {
 		err = -EPERM;
 		goto err_out;
@@ -1139,8 +1142,37 @@  static int bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map
 	return err;
 }
 
+static int bpf_struct_ops_map_link_detach(struct bpf_link *link)
+{
+	struct bpf_struct_ops_link *st_link = container_of(link, struct bpf_struct_ops_link, link);
+	struct bpf_struct_ops_map *st_map;
+	struct bpf_map *map;
+
+	mutex_lock(&update_mutex);
+
+	map = rcu_dereference_protected(st_link->map, lockdep_is_held(&update_mutex));
+	if (!map) {
+		mutex_unlock(&update_mutex);
+		return 0;
+	}
+	st_map = container_of(map, struct bpf_struct_ops_map, map);
+
+	st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data, link);
+
+	rcu_assign_pointer(st_link->map, NULL);
+	/* Pair with bpf_map_get() in bpf_struct_ops_link_create() or
+	 * bpf_map_inc() in bpf_struct_ops_map_link_update().
+	 */
+	bpf_map_put(&st_map->map);
+
+	mutex_unlock(&update_mutex);
+
+	return 0;
+}
+
 static const struct bpf_link_ops bpf_struct_ops_map_lops = {
 	.dealloc = bpf_struct_ops_map_link_dealloc,
+	.detach = bpf_struct_ops_map_link_detach,
 	.show_fdinfo = bpf_struct_ops_map_link_show_fdinfo,
 	.fill_link_info = bpf_struct_ops_map_link_fill_link_info,
 	.update_map = bpf_struct_ops_map_link_update,
@@ -1176,13 +1208,22 @@  int bpf_struct_ops_link_create(union bpf_attr *attr)
 	if (err)
 		goto err_out;
 
+	/* Init link->map before calling reg() in case being detached
+	 * immediately.
+	 */
+	RCU_INIT_POINTER(link->map, map);
+
+	mutex_lock(&update_mutex);
 	err = st_map->st_ops_desc->st_ops->reg(st_map->kvalue.data, &link->link);
 	if (err) {
+		RCU_INIT_POINTER(link->map, NULL);
+		mutex_unlock(&update_mutex);
 		bpf_link_cleanup(&link_primer);
+		/* The link has been free by bpf_link_cleanup() */
 		link = NULL;
 		goto err_out;
 	}
-	RCU_INIT_POINTER(link->map, map);
+	mutex_unlock(&update_mutex);
 
 	return bpf_link_settle(&link_primer);