@@ -3827,6 +3827,7 @@ bool migrc_try_flush_free_folios(void);
void migrc_try_flush_free_folios_dirty(void);
struct migrc_req *fold_ubc_nowr_to_migrc(void);
void free_migrc_req(struct migrc_req *req);
+int migrc_pending_nr_in_zone(struct zone *z);
extern atomic_t migrc_gen;
extern struct llist_head migrc_reqs;
@@ -3842,6 +3843,7 @@ static inline bool migrc_try_flush_free_folios(void) { return false; }
static inline void migrc_try_flush_free_folios_dirty(void) {}
static inline struct migrc_req *fold_ubc_nowr_to_migrc(void) { return NULL; }
static inline void free_migrc_req(struct migrc_req *req) {}
+static inline int migrc_pending_nr_in_zone(struct zone *z) { return 0; }
#endif
#endif /* _LINUX_MM_H */
@@ -958,6 +958,9 @@ struct zone {
/* Zone statistics */
atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS];
+#ifdef CONFIG_MIGRC
+ atomic_t migrc_pending_nr;
+#endif
} ____cacheline_internodealigned_in_smp;
enum pgdat_flags {
@@ -117,9 +117,12 @@ void migrc_shrink(struct llist_head *h)
llist_for_each_entry_safe(p, p2, n, migrc_node) {
if (p->migrc_state == MIGRC_SRC_PENDING) {
struct pglist_data *node;
+ struct zone *zone;
node = NODE_DATA(page_to_nid(p));
+ zone = page_zone(p);
atomic_dec(&node->migrc_pending_nr);
+ atomic_dec(&zone->migrc_pending_nr);
}
if (WARN_ON(!migrc_pending(page_folio(p))))
@@ -172,6 +175,7 @@ static void migrc_expand_req(struct folio *fsrc, struct folio *fdst)
{
struct migrc_req *req;
struct pglist_data *node;
+ struct zone *zone;
req = fold_ubc_nowr_to_migrc();
if (!req)
@@ -190,7 +194,9 @@ static void migrc_expand_req(struct folio *fsrc, struct folio *fdst)
req->last = &fsrc->page.migrc_node;
node = NODE_DATA(folio_nid(fsrc));
+ zone = page_zone(&fsrc->page);
atomic_inc(&node->migrc_pending_nr);
+ atomic_inc(&zone->migrc_pending_nr);
if (migrc_is_full(folio_nid(fsrc)))
migrc_try_flush_free_folios();
@@ -275,6 +281,12 @@ bool migrc_req_processing(void)
{
return current->mreq && current->mreq_dirty;
}
+
+int migrc_pending_nr_in_zone(struct zone *z)
+{
+ return atomic_read(&z->migrc_pending_nr);
+
+}
#else
static inline bool migrc_src_pending(struct folio *f) { return false; }
static inline bool migrc_dst_pending(struct folio *f) { return false; }
@@ -3179,6 +3179,11 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
long min = mark;
int o;
+ /*
+ * There are pages that can be freed by migrc_try_flush_free_folios().
+ */
+ free_pages += migrc_pending_nr_in_zone(z);
+
/* free_pages may go negative - that's OK */
free_pages -= __zone_watermark_unusable_free(z, order, alloc_flags);
@@ -4257,6 +4262,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
unsigned int zonelist_iter_cookie;
int reserve_flags;
+ migrc_try_flush_free_folios();
restart:
compaction_retries = 0;
no_progress_loops = 0;
@@ -4772,6 +4778,16 @@ struct page *__alloc_pages(gfp_t gfp, unsigned int order, int preferred_nid,
if (likely(page))
goto out;
+ if (order && migrc_try_flush_free_folios()) {
+ /*
+ * Try again after freeing migrc's pending pages in case
+ * of high order allocation.
+ */
+ page = get_page_from_freelist(alloc_gfp, order, alloc_flags, &ac);
+ if (likely(page))
+ goto out;
+ }
+
alloc_gfp = gfp;
ac.spread_dirty_pages = false;
CONFIG_MIGRC duplicates folios participated in migration to avoid TLB flushes and provide a consistent view to CPUs that are still caching its old mapping in TLB. However, the duplicated folios can be freed and available right away through appropreate TLB flushes if needed. Adjusted watermark check routine, __zone_watermark_ok(), with the number of duplicated folios and made it perform TLB flushes and free the duplicated folios if page allocation routine is in trouble due to memory pressure, even more aggresively for high order allocation. Signed-off-by: Byungchul Park <byungchul@sk.com> --- include/linux/mm.h | 2 ++ include/linux/mmzone.h | 3 +++ mm/migrate.c | 12 ++++++++++++ mm/page_alloc.c | 16 ++++++++++++++++ 4 files changed, 33 insertions(+)