diff mbox series

BUG: d_path() races with do_move_mount() on ->mnt_ns, leading to use-after-free

Message ID CAG48ez2dS04ONb-EVQGOtmeU6vTpKLe4J0W1yqa+Q9j+Hg3hFw@mail.gmail.com (mailing list archive)
State New, archived
Headers show
Series BUG: d_path() races with do_move_mount() on ->mnt_ns, leading to use-after-free | expand

Commit Message

Jann Horn Sept. 13, 2022, 5:14 p.m. UTC
As the subject says, there's a race between d_path() (specifically
__prepend_path()) looking at mnt->mnt_ns with is_anon_ns(), and
do_move_mount() switching out the ->mnt_ns and freeing the old one.
This can theoretically lead to a use-after-free read, but it doesn't
seem to be very interesting from a security perspective, since all it
gets you is a comparison of a value in freed memory with zero.

KASAN splat from a kernel that's been patched to widen the race window:


 ==================================================================
 BUG: KASAN: use-after-free in prepend_path (fs/mount.h:146
fs/d_path.c:127 fs/d_path.c:177)
 Read of size 8 at addr ffff88800add2748 by task SLOWME/685

 CPU: 8 PID: 685 Comm: SLOWME Not tainted
6.0.0-rc5-00015-ge839a756012b-dirty #110
 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
1.16.0-debian-1.16.0-4 04/01/2014
 Call Trace:
  <TASK>
 dump_stack_lvl (lib/dump_stack.c:107 (discriminator 1))
 print_report.cold (mm/kasan/report.c:318 mm/kasan/report.c:433)
[...]
 kasan_report (mm/kasan/report.c:162 mm/kasan/report.c:497)
[...]
 prepend_path (fs/mount.h:146 fs/d_path.c:127 fs/d_path.c:177)
[...]
 __do_sys_getcwd (fs/d_path.c:438)
[...]
 do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80)
 entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120)
[...]
  </TASK>

 Allocated by task 685:
 kasan_save_stack (mm/kasan/common.c:39)
 __kasan_kmalloc (mm/kasan/common.c:45 mm/kasan/common.c:437
mm/kasan/common.c:516 mm/kasan/common.c:525)
 alloc_mnt_ns (./include/linux/slab.h:600 ./include/linux/slab.h:733
fs/namespace.c:3426)
 __do_sys_fsmount (fs/namespace.c:3720)
 do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80)
 entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120)

 Freed by task 686:
 kasan_save_stack (mm/kasan/common.c:39)
 kasan_set_track (mm/kasan/common.c:45)
 kasan_set_free_info (mm/kasan/generic.c:372)
 ____kasan_slab_free (mm/kasan/common.c:369 mm/kasan/common.c:329)
 kfree (mm/slub.c:1780 mm/slub.c:3534 mm/slub.c:4562)
 do_move_mount (fs/namespace.c:2899)
 __x64_sys_move_mount (fs/namespace.c:3812 fs/namespace.c:3765
fs/namespace.c:3765)
 do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80)
 entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120)

 The buggy address belongs to the object at ffff88800add2700
  which belongs to the cache kmalloc-128 of size 128
 The buggy address is located 72 bytes inside of
  128-byte region [ffff88800add2700, ffff88800add2780)

[...]

 Memory state around the buggy address:
  ffff88800add2600: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
  ffff88800add2680: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
 >ffff88800add2700: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
                                               ^
  ffff88800add2780: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
  ffff88800add2800: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
 ==================================================================



To reproduce, apply this kernel patch to widen the race window:


                                return 1;       // absolute root


Then run this reproducer (build with "-pthread"):


#define _GNU_SOURCE
#include <pthread.h>
#include <unistd.h>
#include <err.h>
#include <fcntl.h>
#include <sys/syscall.h>
#include <sys/stat.h>
#include <sys/prctl.h>
#include <sys/mount.h>
#include <linux/mount.h>

#define SYSCHK(x) ({          \
  typeof(x) __res = (x);      \
  if (__res == (typeof(x))-1) \
    err(1, "SYSCHK(" #x ")"); \
  __res;                      \
})

void fsconfig(int fd, unsigned int cmd, char *key, void *value, int aux) {
  SYSCHK(syscall(__NR_fsconfig, fd, cmd, key, value, aux));
}

static int mnt_fd = -1;

static void *thread_fn(void *dummy) {
  mkdir("/dev/shm/test", 0700);
  SYSCHK(syscall(__NR_move_mount, mnt_fd, "", AT_FDCWD, "/dev/shm/test",
                 MOVE_MOUNT_F_EMPTY_PATH));
  sleep(1);
  SYSCHK(umount2("/dev/shm/test", MNT_DETACH));
  return NULL;
}

int main(void) {
  int fs_fd = SYSCHK(syscall(__NR_fsopen, "tmpfs", 0));
  fsconfig(fs_fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0);
  mnt_fd = SYSCHK(syscall(__NR_fsmount, fs_fd, 0, MOUNT_ATTR_NOSUID |
MOUNT_ATTR_NODEV));
  SYSCHK(close(fs_fd));
  SYSCHK(fchdir(mnt_fd));

  pthread_t thread;
  if (pthread_create(&thread, NULL, thread_fn, NULL))
    errx(1, "pthread_create");

  char buf[0x10000];
  SYSCHK(prctl(PR_SET_NAME, "SLOWME"));
  SYSCHK(getcwd(buf, sizeof(buf)));
  SYSCHK(prctl(PR_SET_NAME, "dummy"));
  SYSCHK(chdir("/"));
  if (pthread_join(thread, NULL))
    errx(1, "pthread_join");
  SYSCHK(close(mnt_fd));
  return 0;
}

Comments

Al Viro Sept. 13, 2022, 5:48 p.m. UTC | #1
On Tue, Sep 13, 2022 at 07:14:56PM +0200, Jann Horn wrote:
> As the subject says, there's a race between d_path() (specifically
> __prepend_path()) looking at mnt->mnt_ns with is_anon_ns(), and
> do_move_mount() switching out the ->mnt_ns and freeing the old one.
> This can theoretically lead to a use-after-free read, but it doesn't
> seem to be very interesting from a security perspective, since all it
> gets you is a comparison of a value in freed memory with zero.

... with d_absolute_path() being the only caller that might even
theoretically care.

	Anyway, shouldn't be hard to deal with - adding rcu_head to
struct mnt_namespace (anon-unioned with e.g. ->list) and turning kfree()
in free_mnt_ns() into kfree_rcu() ought to do it...
diff mbox series

Patch

diff --git a/fs/d_path.c b/fs/d_path.c
index e4e0ebad1f153..51fbed8deffe4 100644
--- a/fs/d_path.c
+++ b/fs/d_path.c
@@ -7,6 +7,7 @@ 
 #include <linux/slab.h>
 #include <linux/prefetch.h>
 #include "mount.h"
+#include <linux/delay.h>

 struct prepend_buffer {
        char *buf;
@@ -117,6 +118,11 @@  static int __prepend_path(const struct dentry
*dentry, const struct mount *mnt,
                        }
                        /* Global root */
                        mnt_ns = READ_ONCE(mnt->mnt_ns);
+                       if (strcmp(current->comm, "SLOWME") == 0) {
+                               pr_warn("%s: begin delay\n", __func__);
+                               mdelay(1000);
+                               pr_warn("%s: end delay\n", __func__);
+                       }
                        /* open-coded is_mounted() to use local mnt_ns */
                        if (!IS_ERR_OR_NULL(mnt_ns) && !is_anon_ns(mnt_ns))