@@ -157,6 +157,17 @@ stable_node_chains_prune_millisecs
scan. It's a noop if not a single KSM page hit the
``max_page_sharing`` yet.
+global_force
+ specifies whether to force all anonymous and eligible pages to
+ be scanned in KSM. When set to 1, the ksmd will consume more cpu
+ performance because more pages needs to be scanned suddenly, so
+ setting it to 1 is not recommended in service-online environments.
+ we can use it together with ``/proc/<pid>/ksm_merging_pages`` to
+ explore the maximum potential of KSM merging of all the running
+ applications in service-developping or debugging environments.
+ Note: if an app is started before setting global_force to 1, it
+ may need to be restart to let the affect effective.
+
The effectiveness of KSM and MADV_MERGEABLE is shown in ``/sys/kernel/mm/ksm/``:
pages_shared
@@ -202,6 +213,14 @@ ksm_swpin_copy
note that KSM page might be copied when swapping in because do_swap_page()
cannot do all the locking needed to reconstitute a cross-anon_vma KSM page.
+Boot parameter
+==============
+
+You can change the sysfs boot time defaults of global_force supported by
+passing the parameter ``ksm_global_force=true`` to kernel cmdline, where
+all anonymous pagess of memory will be scanned by ksm daemon after kernel
+start up, and you will see that the value of ``global_force`` is 1.
+
--
Izik Eidus,
Hugh Dickins, 17 Nov 2009
@@ -19,6 +19,9 @@ struct stable_node;
struct mem_cgroup;
#ifdef CONFIG_KSM
+
+extern bool ksm_global_force __read_mostly;
+
int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
unsigned long end, int advice, unsigned long *vm_flags);
int __ksm_enter(struct mm_struct *mm);
@@ -37,6 +40,32 @@ static inline void ksm_exit(struct mm_struct *mm)
__ksm_exit(mm);
}
+static inline int ksm_enter_vma(struct vm_area_struct *vma)
+{
+ struct mm_struct *mm = vma->vm_mm;
+
+ if (!ksm_global_force)
+ return 0;
+
+ if (!test_bit(MMF_VM_MERGEABLE, &mm->flags))
+ return __ksm_enter(vma->vm_mm);
+
+ return 0;
+}
+
+/*
+ * When mm_fault happens, check whether ksm_global_force is set to true,
+ * if yes, make this mm enter KSM.
+ */
+static inline vm_fault_t ksm_enter_mm_fault(struct mm_struct *mm)
+{
+ if (!test_bit(MMF_VM_MERGEABLE, &mm->flags))
+ if (ksm_global_force)
+ if (__ksm_enter(mm))
+ return VM_FAULT_OOM;
+ return 0;
+}
+
/*
* When do_swap_page() first faults in from swap what used to be a KSM page,
* no problem, it will be assigned to this vma's anon_vma; but thereafter,
@@ -65,6 +94,17 @@ static inline void ksm_exit(struct mm_struct *mm)
{
}
+static inline vm_fault_t ksm_enter_mm_fault(struct mm_struct *mm)
+{
+ return 0;
+}
+
+static inline int ksm_enter_vma(struct vm_area_struct *vma)
+{
+ return 0;
+}
+
+
#ifdef CONFIG_MMU
static inline int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
unsigned long end, int advice, unsigned long *vm_flags)
@@ -277,6 +277,9 @@ static unsigned int zero_checksum __read_mostly;
/* Whether to merge empty (zeroed) pages with actual zero pages */
static bool ksm_use_zero_pages __read_mostly;
+/* Whether to force all mm to be scanned in KSM */
+bool ksm_global_force __read_mostly;
+
#ifdef CONFIG_NUMA
/* Zeroed when merging across nodes is not allowed */
static unsigned int ksm_merge_across_nodes = 1;
@@ -334,6 +337,43 @@ static void __init ksm_slab_free(void)
mm_slot_cache = NULL;
}
+static inline bool vma_eligible_for_ksm(struct vm_area_struct *vma,
+ unsigned long vm_flags)
+{
+ /*
+ * Be somewhat over-protective for now!
+ */
+ if (vm_flags & (VM_SHARED | VM_MAYSHARE |
+ VM_PFNMAP | VM_IO | VM_DONTEXPAND |
+ VM_HUGETLB | VM_MIXEDMAP))
+ return false; /* just ignore the advice */
+
+ if (vma_is_dax(vma))
+ return false;
+
+#ifdef VM_SAO
+ if (*vm_flags & VM_SAO)
+ return false;
+#endif
+#ifdef VM_SPARC_ADI
+ if (*vm_flags & VM_SPARC_ADI)
+ return false;
+#endif
+
+ return true;
+}
+
+static inline bool vma_is_scannable(struct vm_area_struct *vma)
+{
+ if (!(vma->vm_flags & VM_MERGEABLE) && !ksm_global_force)
+ return false;
+
+ if (!vma_eligible_for_ksm(vma, vma->vm_flags))
+ return false;
+
+ return true;
+}
+
static __always_inline bool is_stable_node_chain(struct stable_node *chain)
{
return chain->rmap_hlist_len == STABLE_NODE_CHAIN;
@@ -523,7 +563,7 @@ static struct vm_area_struct *find_mergeable_vma(struct mm_struct *mm,
if (ksm_test_exit(mm))
return NULL;
vma = vma_lookup(mm, addr);
- if (!vma || !(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
+ if (!vma || !vma_is_scannable(vma) || !vma->anon_vma)
return NULL;
return vma;
}
@@ -990,7 +1030,7 @@ static int unmerge_and_remove_all_rmap_items(void)
for_each_vma(vmi, vma) {
if (ksm_test_exit(mm))
break;
- if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
+ if (!vma_is_scannable(vma) || !vma->anon_vma)
continue;
err = unmerge_ksm_pages(vma,
vma->vm_start, vma->vm_end);
@@ -2300,7 +2340,7 @@ static struct rmap_item *scan_get_next_rmap_item(struct page **page)
goto no_vmas;
for_each_vma(vmi, vma) {
- if (!(vma->vm_flags & VM_MERGEABLE))
+ if (!vma_is_scannable(vma))
continue;
if (ksm_scan.address < vma->vm_start)
ksm_scan.address = vma->vm_start;
@@ -2450,26 +2490,12 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
switch (advice) {
case MADV_MERGEABLE:
- /*
- * Be somewhat over-protective for now!
- */
- if (*vm_flags & (VM_MERGEABLE | VM_SHARED | VM_MAYSHARE |
- VM_PFNMAP | VM_IO | VM_DONTEXPAND |
- VM_HUGETLB | VM_MIXEDMAP))
+ if (*vm_flags & VM_MERGEABLE)
return 0; /* just ignore the advice */
- if (vma_is_dax(vma))
+ if (!vma_eligible_for_ksm(vma, *vm_flags))
return 0;
-#ifdef VM_SAO
- if (*vm_flags & VM_SAO)
- return 0;
-#endif
-#ifdef VM_SPARC_ADI
- if (*vm_flags & VM_SPARC_ADI)
- return 0;
-#endif
-
if (!test_bit(MMF_VM_MERGEABLE, &mm->flags)) {
err = __ksm_enter(mm);
if (err)
@@ -2508,7 +2534,6 @@ int __ksm_enter(struct mm_struct *mm)
/* Check ksm_run too? Would need tighter locking */
needs_wakeup = list_empty(&ksm_mm_head.mm_list);
-
spin_lock(&ksm_mmlist_lock);
insert_to_mm_slots_hash(mm, mm_slot);
/*
@@ -2943,6 +2968,48 @@ static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr,
}
KSM_ATTR(run);
+static ssize_t global_force_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%d\n", ksm_global_force);
+}
+
+static ssize_t global_force_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ int err;
+ unsigned long value;
+
+ err = kstrtoul(buf, 10, &value);
+ if (err || value > UINT_MAX)
+ return -EINVAL;
+ if (value > 1)
+ return -EINVAL;
+
+ ksm_global_force = value;
+
+ return count;
+}
+KSM_ATTR(global_force);
+
+/* used for boot cmdline */
+static int __init setup_ksm_global_force(char *str)
+{
+ int ret = 0;
+
+ if (!str)
+ goto out;
+ if (!strcmp(str, "true")) {
+ ksm_global_force = true;
+ ret = 1;
+ }
+out:
+ if (!ret)
+ pr_warn("ksm_global_force= cannot parse, ignored\n");
+ return ret;
+}
+__setup("ksm_global_force=", setup_ksm_global_force);
+
#ifdef CONFIG_NUMA
static ssize_t merge_across_nodes_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
@@ -3153,6 +3220,7 @@ static struct attribute *ksm_attrs[] = {
&sleep_millisecs_attr.attr,
&pages_to_scan_attr.attr,
&run_attr.attr,
+ &global_force_attr.attr,
&pages_shared_attr.attr,
&pages_sharing_attr.attr,
&pages_unshared_attr.attr,
@@ -4989,6 +4989,10 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
p4d_t *p4d;
vm_fault_t ret;
+ ret = ksm_enter_mm_fault(mm);
+ if (ret)
+ return VM_FAULT_OOM;
+
pgd = pgd_offset(mm, address);
p4d = p4d_alloc(mm, pgd, address);
if (!p4d)
@@ -46,6 +46,7 @@
#include <linux/pkeys.h>
#include <linux/oom.h>
#include <linux/sched/mm.h>
+#include <linux/ksm.h>
#include <linux/uaccess.h>
#include <asm/cacheflush.h>
@@ -1140,6 +1141,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
if (err)
return NULL;
khugepaged_enter_vma(res, vm_flags);
+ ksm_enter_vma(res);
return res;
}
@@ -2052,6 +2054,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
}
anon_vma_unlock_write(vma->anon_vma);
khugepaged_enter_vma(vma, vma->vm_flags);
+ ksm_enter_vma(vma);
mas_destroy(&mas);
return error;
}
@@ -2134,6 +2137,7 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address)
}
anon_vma_unlock_write(vma->anon_vma);
khugepaged_enter_vma(vma, vma->vm_flags);
+ ksm_enter_vma(vma);
mas_destroy(&mas);
return error;
}
@@ -2645,6 +2649,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
if (vma &&
!vma_expand(&mas, vma, merge_start, merge_end, vm_pgoff, next)) {
khugepaged_enter_vma(vma, vm_flags);
+ ksm_enter_vma(vma);
goto expanded;
}
@@ -2761,6 +2766,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
* call covers the non-merge case.
*/
khugepaged_enter_vma(vma, vma->vm_flags);
+ ksm_enter_vma(vma);
/* Once vma denies write, undo our temporary denial count */
unmap_writable:
@@ -3068,6 +3074,7 @@ static int do_brk_flags(struct ma_state *mas, struct vm_area_struct *vma,
anon_vma_unlock_write(vma->anon_vma);
}
khugepaged_enter_vma(vma, flags);
+ ksm_enter_vma(vma);
goto out;
}