@@ -2179,7 +2179,7 @@ static void x86_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm)
* For now, this can't happen because all callers hold mmap_sem
* for write. If this changes, we'll need a different solution.
*/
- lockdep_assert_held_exclusive(&mm->mmap_sem);
+ lockdep_assert_held_exclusive(&mm->mmap_lock);
if (atomic_inc_return(&mm->context.perf_rdpmc_allowed) == 1)
on_each_cpu_mask(mm_cpumask(mm), refresh_pce, NULL, 1);
@@ -104,7 +104,7 @@ static struct mm_struct tboot_mm = {
.pgd = swapper_pg_dir,
.mm_users = ATOMIC_INIT(2),
.mm_count = ATOMIC_INIT(1),
- .mmap_sem = __RWSEM_INITIALIZER(init_mm.mmap_sem),
+ .mmap_lock = __RANGE_LOCK_TREE_INITIALIZER(init_mm.mmap_lock),
.page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
.mmlist = LIST_HEAD_INIT(init_mm.mmlist),
};
@@ -1516,7 +1516,7 @@ static noinline void
__do_page_fault(struct pt_regs *regs, unsigned long hw_error_code,
unsigned long address)
{
- prefetchw(¤t->mm->mmap_sem);
+ prefetchw(¤t->mm->mmap_lock);
if (unlikely(kmmio_fault(regs, address)))
return;
@@ -80,7 +80,7 @@ struct mm_struct efi_mm = {
.mm_rb = RB_ROOT,
.mm_users = ATOMIC_INIT(2),
.mm_count = ATOMIC_INIT(1),
- .mmap_sem = __RWSEM_INITIALIZER(efi_mm.mmap_sem),
+ .mmap_lock = __RANGE_LOCK_TREE_INITIALIZER(efi_mm.mmap_lock),
.page_table_lock = __SPIN_LOCK_UNLOCKED(efi_mm.page_table_lock),
.mmlist = LIST_HEAD_INIT(efi_mm.mmlist),
.cpu_bitmap = { [BITS_TO_LONGS(NR_CPUS)] = 0},
@@ -2899,74 +2899,74 @@ static inline void setup_nr_node_ids(void) {}
static inline bool mm_is_locked(struct mm_struct *mm,
struct range_lock *mmrange)
{
- return rwsem_is_locked(&mm->mmap_sem);
+ return range_is_locked(&mm->mmap_lock, mmrange);
}
/* Reader wrappers */
static inline int mm_read_trylock(struct mm_struct *mm,
struct range_lock *mmrange)
{
- return down_read_trylock(&mm->mmap_sem);
+ return range_read_trylock(&mm->mmap_lock, mmrange);
}
static inline void mm_read_lock(struct mm_struct *mm,
struct range_lock *mmrange)
{
- down_read(&mm->mmap_sem);
+ range_read_lock(&mm->mmap_lock, mmrange);
}
static inline void mm_read_lock_nested(struct mm_struct *mm,
struct range_lock *mmrange, int subclass)
{
- down_read_nested(&mm->mmap_sem, subclass);
+ range_read_lock_nested(&mm->mmap_lock, mmrange, subclass);
}
static inline void mm_read_unlock(struct mm_struct *mm,
struct range_lock *mmrange)
{
- up_read(&mm->mmap_sem);
+ range_read_unlock(&mm->mmap_lock, mmrange);
}
/* Writer wrappers */
static inline int mm_write_trylock(struct mm_struct *mm,
struct range_lock *mmrange)
{
- return down_write_trylock(&mm->mmap_sem);
+ return range_write_trylock(&mm->mmap_lock, mmrange);
}
static inline void mm_write_lock(struct mm_struct *mm,
struct range_lock *mmrange)
{
- down_write(&mm->mmap_sem);
+ range_write_lock(&mm->mmap_lock, mmrange);
}
static inline int mm_write_lock_killable(struct mm_struct *mm,
struct range_lock *mmrange)
{
- return down_write_killable(&mm->mmap_sem);
+ return range_write_lock_killable(&mm->mmap_lock, mmrange);
}
static inline void mm_downgrade_write(struct mm_struct *mm,
struct range_lock *mmrange)
{
- downgrade_write(&mm->mmap_sem);
+ range_downgrade_write(&mm->mmap_lock, mmrange);
}
static inline void mm_write_unlock(struct mm_struct *mm,
struct range_lock *mmrange)
{
- up_write(&mm->mmap_sem);
+ range_write_unlock(&mm->mmap_lock, mmrange);
}
static inline void mm_write_lock_nested(struct mm_struct *mm,
struct range_lock *mmrange,
int subclass)
{
- down_write_nested(&mm->mmap_sem, subclass);
+ range_write_lock_nest_lock(&(mm)->mmap_lock, mmrange, nest_lock);
}
-#define mm_write_nest_lock(mm, range, nest_lock) \
- down_write_nest_lock(&(mm)->mmap_sem, nest_lock)
+#define mm_write_nest_lock(mm, range, nest_lock) \
+ range_write_lock_nest_lock(&(mm)->mmap_lock, range, nest_lock)
#endif /* __KERNEL__ */
#endif /* _LINUX_MM_H */
@@ -8,7 +8,7 @@
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/rbtree.h>
-#include <linux/rwsem.h>
+#include <linux/range_lock.h>
#include <linux/completion.h>
#include <linux/cpumask.h>
#include <linux/uprobes.h>
@@ -400,7 +400,7 @@ struct mm_struct {
spinlock_t page_table_lock; /* Protects page tables and some
* counters
*/
- struct rw_semaphore mmap_sem;
+ struct range_lock_tree mmap_lock;
struct list_head mmlist; /* List of maybe swapped mm's. These
* are globally strung together off
@@ -36,7 +36,7 @@ struct bpf_stack_map {
/* irq_work to run up_read() for build_id lookup in nmi context */
struct stack_map_irq_work {
struct irq_work irq_work;
- struct rw_semaphore *sem;
+ struct range_lock_tree *lock;
struct range_lock *mmrange;
};
@@ -45,8 +45,9 @@ static void do_up_read(struct irq_work *entry)
struct stack_map_irq_work *work;
work = container_of(entry, struct stack_map_irq_work, irq_work);
- up_read_non_owner(work->sem);
- work->sem = NULL;
+ /* XXX we might have to add a non_owner to range lock/unlock */
+ range_read_unlock(work->lock, work->mmrange);
+ work->lock = NULL;
}
static DEFINE_PER_CPU(struct stack_map_irq_work, up_read_work);
@@ -338,7 +339,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
if (!work) {
mm_read_unlock(current->mm, &mmrange);
} else {
- work->sem = ¤t->mm->mmap_sem;
+ work->lock = ¤t->mm->mmap_lock;
work->mmrange = &mmrange;
irq_work_queue(&work->irq_work);
/*
@@ -991,7 +991,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
mm->vmacache_seqnum = 0;
atomic_set(&mm->mm_users, 1);
atomic_set(&mm->mm_count, 1);
- init_rwsem(&mm->mmap_sem);
+ range_lock_tree_init(&mm->mmap_lock);
INIT_LIST_HEAD(&mm->mmlist);
mm->core_state = NULL;
mm_pgtables_bytes_init(mm);
@@ -30,7 +30,7 @@ struct mm_struct init_mm = {
.pgd = swapper_pg_dir,
.mm_users = ATOMIC_INIT(2),
.mm_count = ATOMIC_INIT(1),
- .mmap_sem = __RWSEM_INITIALIZER(init_mm.mmap_sem),
+ .mmap_lock = __RANGE_LOCK_TREE_INITIALIZER(init_mm.mmap_lock),
.page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
.arg_lock = __SPIN_LOCK_UNLOCKED(init_mm.arg_lock),
.mmlist = LIST_HEAD_INIT(init_mm.mmlist),
@@ -4494,7 +4494,7 @@ void __might_fault(const char *file, int line)
__might_sleep(file, line, 0);
#if defined(CONFIG_DEBUG_ATOMIC_SLEEP)
if (current->mm)
- might_lock_read(¤t->mm->mmap_sem);
+ might_lock_read(¤t->mm->mmap_lock);
#endif
}
EXPORT_SYMBOL(__might_fault);
With mmrange now in place and everyone using the mm locking wrappers, we can convert the rwsem to a the range locking scheme. Every single user of mmap_sem will use a full range, which means that there is no more parallelism than what we already had. This is the worst case scenario. Prefetching and some lockdep stuff have been blindly converted (for now). This lays out the foundations for later mm address space locking scalability. Signed-off-by: Davidlohr Bueso <dbueso@suse.de> --- arch/x86/events/core.c | 2 +- arch/x86/kernel/tboot.c | 2 +- arch/x86/mm/fault.c | 2 +- drivers/firmware/efi/efi.c | 2 +- include/linux/mm.h | 26 +++++++++++++------------- include/linux/mm_types.h | 4 ++-- kernel/bpf/stackmap.c | 9 +++++---- kernel/fork.c | 2 +- mm/init-mm.c | 2 +- mm/memory.c | 2 +- 10 files changed, 27 insertions(+), 26 deletions(-)