@@ -3325,6 +3325,237 @@ static bool __maybe_unused seq_is_valid(struct lruvec *lruvec)
get_nr_gens(lruvec, LRU_GEN_ANON) <= MAX_NR_GENS;
}
+/******************************************************************************
+ * PID controller
+ ******************************************************************************/
+
+/*
+ * A feedback loop based on Proportional-Integral-Derivative (PID) controller.
+ *
+ * The P term is refaulted/(evicted+protected) from a tier in the generation
+ * currently being evicted; the I term is the exponential moving average of the
+ * P term over the generations previously evicted, using the smoothing factor
+ * 1/2; the D term isn't supported.
+ *
+ * The setpoint (SP) is always the first tier of one type; the process variable
+ * (PV) is either any tier of the other type or any other tier of the same
+ * type.
+ *
+ * The error is the difference between the SP and the PV; the correction is to
+ * turn off protection when SP>PV or turn on protection when SP<PV.
+ *
+ * For future optimizations:
+ * 1. The D term may discount the other two terms over time so that long-lived
+ * generations can resist stale information.
+ */
+struct ctrl_pos {
+ unsigned long refaulted;
+ unsigned long total;
+ int gain;
+};
+
+static void read_ctrl_pos(struct lruvec *lruvec, int type, int tier, int gain,
+ struct ctrl_pos *pos)
+{
+ struct lru_gen_folio *lrugen = &lruvec->lrugen;
+ int hist = lru_hist_from_seq(lrugen->min_seq[type]);
+
+ pos->refaulted = lrugen->avg_refaulted[type][tier] +
+ atomic_long_read(&lrugen->refaulted[hist][type][tier]);
+ pos->total = lrugen->avg_total[type][tier] +
+ atomic_long_read(&lrugen->evicted[hist][type][tier]);
+ if (tier)
+ pos->total += lrugen->protected[hist][type][tier - 1];
+ pos->gain = gain;
+}
+
+static void reset_ctrl_pos(struct lruvec *lruvec, int type, bool carryover)
+{
+ int hist, tier;
+ struct lru_gen_folio *lrugen = &lruvec->lrugen;
+ bool clear = carryover ? NR_HIST_GENS == 1 : NR_HIST_GENS > 1;
+ unsigned long seq = carryover ? lrugen->min_seq[type] : lrugen->max_seq + 1;
+
+ lockdep_assert_held(&lruvec->lru_lock);
+
+ if (!carryover && !clear)
+ return;
+
+ hist = lru_hist_from_seq(seq);
+
+ for (tier = 0; tier < MAX_NR_TIERS; tier++) {
+ if (carryover) {
+ unsigned long sum;
+
+ sum = lrugen->avg_refaulted[type][tier] +
+ atomic_long_read(&lrugen->refaulted[hist][type][tier]);
+ WRITE_ONCE(lrugen->avg_refaulted[type][tier], sum / 2);
+
+ sum = lrugen->avg_total[type][tier] +
+ atomic_long_read(&lrugen->evicted[hist][type][tier]);
+ if (tier)
+ sum += lrugen->protected[hist][type][tier - 1];
+ WRITE_ONCE(lrugen->avg_total[type][tier], sum / 2);
+ }
+
+ if (clear) {
+ atomic_long_set(&lrugen->refaulted[hist][type][tier], 0);
+ atomic_long_set(&lrugen->evicted[hist][type][tier], 0);
+ if (tier)
+ WRITE_ONCE(lrugen->protected[hist][type][tier - 1], 0);
+ }
+ }
+}
+
+static bool positive_ctrl_err(struct ctrl_pos *sp, struct ctrl_pos *pv)
+{
+ /*
+ * Return true if the PV has a limited number of refaults or a lower
+ * refaulted/total than the SP.
+ */
+ return pv->refaulted < MIN_LRU_BATCH ||
+ pv->refaulted * (sp->total + MIN_LRU_BATCH) * sp->gain <=
+ (sp->refaulted + 1) * pv->total * pv->gain;
+}
+
+/******************************************************************************
+ * the aging
+ ******************************************************************************/
+
+/* protect pages accessed multiple times through file descriptors */
+static int folio_inc_gen(struct lruvec *lruvec, struct folio *folio, bool reclaiming)
+{
+ int type = folio_is_file_lru(folio);
+ struct lru_gen_folio *lrugen = &lruvec->lrugen;
+ int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]);
+ unsigned long new_flags, old_flags = READ_ONCE(folio->flags);
+
+ VM_WARN_ON_ONCE_FOLIO(!(old_flags & LRU_GEN_MASK), folio);
+
+ do {
+ new_gen = ((old_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;
+ /* folio_update_gen() has promoted this page? */
+ if (new_gen >= 0 && new_gen != old_gen)
+ return new_gen;
+
+ new_gen = (old_gen + 1) % MAX_NR_GENS;
+
+ new_flags = old_flags & ~(LRU_GEN_MASK | LRU_REFS_MASK | LRU_REFS_FLAGS);
+ new_flags |= (new_gen + 1UL) << LRU_GEN_PGOFF;
+ /* for folio_end_writeback() */
+ if (reclaiming)
+ new_flags |= BIT(PG_reclaim);
+ } while (!try_cmpxchg(&folio->flags, &old_flags, new_flags));
+
+ lru_gen_update_size(lruvec, folio, old_gen, new_gen);
+
+ return new_gen;
+}
+
+static unsigned long get_pte_pfn(pte_t pte, struct vm_area_struct *vma, unsigned long addr)
+{
+ unsigned long pfn = pte_pfn(pte);
+
+ VM_WARN_ON_ONCE(addr < vma->vm_start || addr >= vma->vm_end);
+
+ if (!pte_present(pte) || is_zero_pfn(pfn))
+ return -1;
+
+ if (WARN_ON_ONCE(pte_devmap(pte) || pte_special(pte)))
+ return -1;
+
+ if (WARN_ON_ONCE(!pfn_valid(pfn)))
+ return -1;
+
+ return pfn;
+}
+
+static struct folio *get_pfn_folio(unsigned long pfn, struct mem_cgroup *memcg,
+ struct pglist_data *pgdat, bool can_swap)
+{
+ struct folio *folio;
+
+ /* try to avoid unnecessary memory loads */
+ if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat))
+ return NULL;
+
+ folio = pfn_folio(pfn);
+ if (folio_nid(folio) != pgdat->node_id)
+ return NULL;
+
+ if (folio_memcg_rcu(folio) != memcg)
+ return NULL;
+
+ /* file VMAs can contain anon pages from COW */
+ if (!folio_is_file_lru(folio) && !can_swap)
+ return NULL;
+
+ return folio;
+}
+
+/* promote pages accessed through page tables */
+static int folio_update_gen(struct folio *folio, int gen)
+{
+ unsigned long new_flags, old_flags = READ_ONCE(folio->flags);
+
+ VM_WARN_ON_ONCE(gen >= MAX_NR_GENS);
+ VM_WARN_ON_ONCE(!rcu_read_lock_held());
+
+ do {
+ /* lru_gen_del_folio() has isolated this page? */
+ if (!(old_flags & LRU_GEN_MASK)) {
+ /* for shrink_folio_list() */
+ new_flags = old_flags | BIT(PG_referenced);
+ continue;
+ }
+
+ new_flags = old_flags & ~(LRU_GEN_MASK | LRU_REFS_MASK | LRU_REFS_FLAGS);
+ new_flags |= (gen + 1UL) << LRU_GEN_PGOFF;
+ } while (!try_cmpxchg(&folio->flags, &old_flags, new_flags));
+
+ return ((old_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;
+}
+
+static void update_batch_size(struct lru_gen_mm_walk *walk, struct folio *folio,
+ int old_gen, int new_gen)
+{
+ int type = folio_is_file_lru(folio);
+ int zone = folio_zonenum(folio);
+ int delta = folio_nr_pages(folio);
+
+ VM_WARN_ON_ONCE(old_gen >= MAX_NR_GENS);
+ VM_WARN_ON_ONCE(new_gen >= MAX_NR_GENS);
+
+ walk->batched++;
+
+ walk->nr_pages[old_gen][type][zone] -= delta;
+ walk->nr_pages[new_gen][type][zone] += delta;
+}
+
+static void reset_batch_size(struct lruvec *lruvec, struct lru_gen_mm_walk *walk)
+{
+ int gen, type, zone;
+ struct lru_gen_folio *lrugen = &lruvec->lrugen;
+
+ walk->batched = 0;
+
+ for_each_gen_type_zone(gen, type, zone) {
+ enum lru_list lru = type * LRU_INACTIVE_FILE;
+ int delta = walk->nr_pages[gen][type][zone];
+
+ if (!delta)
+ continue;
+
+ walk->nr_pages[gen][type][zone] = 0;
+ WRITE_ONCE(lrugen->nr_pages[gen][type][zone],
+ lrugen->nr_pages[gen][type][zone] + delta);
+
+ if (lru_gen_is_active(lruvec, gen))
+ lru += LRU_ACTIVE;
+ __update_lru_size(lruvec, lru, zone, delta);
+ }
+}
+
/******************************************************************************
* Bloom filters
******************************************************************************/
@@ -3672,237 +3903,6 @@ static bool iterate_mm_list_nowalk(struct lruvec *lruvec, unsigned long max_seq)
return success;
}
-/******************************************************************************
- * PID controller
- ******************************************************************************/
-
-/*
- * A feedback loop based on Proportional-Integral-Derivative (PID) controller.
- *
- * The P term is refaulted/(evicted+protected) from a tier in the generation
- * currently being evicted; the I term is the exponential moving average of the
- * P term over the generations previously evicted, using the smoothing factor
- * 1/2; the D term isn't supported.
- *
- * The setpoint (SP) is always the first tier of one type; the process variable
- * (PV) is either any tier of the other type or any other tier of the same
- * type.
- *
- * The error is the difference between the SP and the PV; the correction is to
- * turn off protection when SP>PV or turn on protection when SP<PV.
- *
- * For future optimizations:
- * 1. The D term may discount the other two terms over time so that long-lived
- * generations can resist stale information.
- */
-struct ctrl_pos {
- unsigned long refaulted;
- unsigned long total;
- int gain;
-};
-
-static void read_ctrl_pos(struct lruvec *lruvec, int type, int tier, int gain,
- struct ctrl_pos *pos)
-{
- struct lru_gen_folio *lrugen = &lruvec->lrugen;
- int hist = lru_hist_from_seq(lrugen->min_seq[type]);
-
- pos->refaulted = lrugen->avg_refaulted[type][tier] +
- atomic_long_read(&lrugen->refaulted[hist][type][tier]);
- pos->total = lrugen->avg_total[type][tier] +
- atomic_long_read(&lrugen->evicted[hist][type][tier]);
- if (tier)
- pos->total += lrugen->protected[hist][type][tier - 1];
- pos->gain = gain;
-}
-
-static void reset_ctrl_pos(struct lruvec *lruvec, int type, bool carryover)
-{
- int hist, tier;
- struct lru_gen_folio *lrugen = &lruvec->lrugen;
- bool clear = carryover ? NR_HIST_GENS == 1 : NR_HIST_GENS > 1;
- unsigned long seq = carryover ? lrugen->min_seq[type] : lrugen->max_seq + 1;
-
- lockdep_assert_held(&lruvec->lru_lock);
-
- if (!carryover && !clear)
- return;
-
- hist = lru_hist_from_seq(seq);
-
- for (tier = 0; tier < MAX_NR_TIERS; tier++) {
- if (carryover) {
- unsigned long sum;
-
- sum = lrugen->avg_refaulted[type][tier] +
- atomic_long_read(&lrugen->refaulted[hist][type][tier]);
- WRITE_ONCE(lrugen->avg_refaulted[type][tier], sum / 2);
-
- sum = lrugen->avg_total[type][tier] +
- atomic_long_read(&lrugen->evicted[hist][type][tier]);
- if (tier)
- sum += lrugen->protected[hist][type][tier - 1];
- WRITE_ONCE(lrugen->avg_total[type][tier], sum / 2);
- }
-
- if (clear) {
- atomic_long_set(&lrugen->refaulted[hist][type][tier], 0);
- atomic_long_set(&lrugen->evicted[hist][type][tier], 0);
- if (tier)
- WRITE_ONCE(lrugen->protected[hist][type][tier - 1], 0);
- }
- }
-}
-
-static bool positive_ctrl_err(struct ctrl_pos *sp, struct ctrl_pos *pv)
-{
- /*
- * Return true if the PV has a limited number of refaults or a lower
- * refaulted/total than the SP.
- */
- return pv->refaulted < MIN_LRU_BATCH ||
- pv->refaulted * (sp->total + MIN_LRU_BATCH) * sp->gain <=
- (sp->refaulted + 1) * pv->total * pv->gain;
-}
-
-/******************************************************************************
- * the aging
- ******************************************************************************/
-
-/* protect pages accessed multiple times through file descriptors */
-static int folio_inc_gen(struct lruvec *lruvec, struct folio *folio, bool reclaiming)
-{
- int type = folio_is_file_lru(folio);
- struct lru_gen_folio *lrugen = &lruvec->lrugen;
- int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]);
- unsigned long new_flags, old_flags = READ_ONCE(folio->flags);
-
- VM_WARN_ON_ONCE_FOLIO(!(old_flags & LRU_GEN_MASK), folio);
-
- do {
- new_gen = ((old_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;
- /* folio_update_gen() has promoted this page? */
- if (new_gen >= 0 && new_gen != old_gen)
- return new_gen;
-
- new_gen = (old_gen + 1) % MAX_NR_GENS;
-
- new_flags = old_flags & ~(LRU_GEN_MASK | LRU_REFS_MASK | LRU_REFS_FLAGS);
- new_flags |= (new_gen + 1UL) << LRU_GEN_PGOFF;
- /* for folio_end_writeback() */
- if (reclaiming)
- new_flags |= BIT(PG_reclaim);
- } while (!try_cmpxchg(&folio->flags, &old_flags, new_flags));
-
- lru_gen_update_size(lruvec, folio, old_gen, new_gen);
-
- return new_gen;
-}
-
-static unsigned long get_pte_pfn(pte_t pte, struct vm_area_struct *vma, unsigned long addr)
-{
- unsigned long pfn = pte_pfn(pte);
-
- VM_WARN_ON_ONCE(addr < vma->vm_start || addr >= vma->vm_end);
-
- if (!pte_present(pte) || is_zero_pfn(pfn))
- return -1;
-
- if (WARN_ON_ONCE(pte_devmap(pte) || pte_special(pte)))
- return -1;
-
- if (WARN_ON_ONCE(!pfn_valid(pfn)))
- return -1;
-
- return pfn;
-}
-
-static struct folio *get_pfn_folio(unsigned long pfn, struct mem_cgroup *memcg,
- struct pglist_data *pgdat, bool can_swap)
-{
- struct folio *folio;
-
- /* try to avoid unnecessary memory loads */
- if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat))
- return NULL;
-
- folio = pfn_folio(pfn);
- if (folio_nid(folio) != pgdat->node_id)
- return NULL;
-
- if (folio_memcg_rcu(folio) != memcg)
- return NULL;
-
- /* file VMAs can contain anon pages from COW */
- if (!folio_is_file_lru(folio) && !can_swap)
- return NULL;
-
- return folio;
-}
-
-/* promote pages accessed through page tables */
-static int folio_update_gen(struct folio *folio, int gen)
-{
- unsigned long new_flags, old_flags = READ_ONCE(folio->flags);
-
- VM_WARN_ON_ONCE(gen >= MAX_NR_GENS);
- VM_WARN_ON_ONCE(!rcu_read_lock_held());
-
- do {
- /* lru_gen_del_folio() has isolated this page? */
- if (!(old_flags & LRU_GEN_MASK)) {
- /* for shrink_folio_list() */
- new_flags = old_flags | BIT(PG_referenced);
- continue;
- }
-
- new_flags = old_flags & ~(LRU_GEN_MASK | LRU_REFS_MASK | LRU_REFS_FLAGS);
- new_flags |= (gen + 1UL) << LRU_GEN_PGOFF;
- } while (!try_cmpxchg(&folio->flags, &old_flags, new_flags));
-
- return ((old_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;
-}
-
-static void update_batch_size(struct lru_gen_mm_walk *walk, struct folio *folio,
- int old_gen, int new_gen)
-{
- int type = folio_is_file_lru(folio);
- int zone = folio_zonenum(folio);
- int delta = folio_nr_pages(folio);
-
- VM_WARN_ON_ONCE(old_gen >= MAX_NR_GENS);
- VM_WARN_ON_ONCE(new_gen >= MAX_NR_GENS);
-
- walk->batched++;
-
- walk->nr_pages[old_gen][type][zone] -= delta;
- walk->nr_pages[new_gen][type][zone] += delta;
-}
-
-static void reset_batch_size(struct lruvec *lruvec, struct lru_gen_mm_walk *walk)
-{
- int gen, type, zone;
- struct lru_gen_folio *lrugen = &lruvec->lrugen;
-
- walk->batched = 0;
-
- for_each_gen_type_zone(gen, type, zone) {
- enum lru_list lru = type * LRU_INACTIVE_FILE;
- int delta = walk->nr_pages[gen][type][zone];
-
- if (!delta)
- continue;
-
- walk->nr_pages[gen][type][zone] = 0;
- WRITE_ONCE(lrugen->nr_pages[gen][type][zone],
- lrugen->nr_pages[gen][type][zone] + delta);
-
- if (lru_gen_is_active(lruvec, gen))
- lru += LRU_ACTIVE;
- __update_lru_size(lruvec, lru, zone, delta);
- }
-}
-
static int should_skip_vma(unsigned long start, unsigned long end, struct mm_walk *args)
{
struct address_space *mapping;
This allows to avoid building this code on powerpc architecture. No functional change in this patch. Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> --- mm/vmscan.c | 462 ++++++++++++++++++++++++++-------------------------- 1 file changed, 231 insertions(+), 231 deletions(-)