@@ -694,22 +694,15 @@ static void page_list_add_scrub(struct page_info *pg, unsigned int node,
page_list_add(pg, &heap(node, zone, order));
}
-/* Allocate 2^@order contiguous pages. */
-static struct page_info *alloc_heap_pages(
- unsigned int zone_lo, unsigned int zone_hi,
- unsigned int order, unsigned int memflags,
- struct domain *d)
+static struct page_info *get_free_buddy(unsigned int zone_lo,
+ unsigned int zone_hi,
+ unsigned int order, unsigned int memflags,
+ struct domain *d)
{
- unsigned int i, j, zone = 0, nodemask_retry = 0;
nodeid_t first_node, node = MEMF_get_node(memflags), req_node = node;
- unsigned long request = 1UL << order;
+ nodemask_t nodemask = d ? d->node_affinity : node_online_map;
+ unsigned int j, zone, nodemask_retry = 0, request = 1UL << order;
struct page_info *pg;
- nodemask_t nodemask = (d != NULL ) ? d->node_affinity : node_online_map;
- bool need_scrub, need_tlbflush = 0;
- uint32_t tlbflush_timestamp = 0;
-
- /* Make sure there are enough bits in memflags for nodeID. */
- BUILD_BUG_ON((_MEMF_bits - _MEMF_node) < (8 * sizeof(nodeid_t)));
if ( node == NUMA_NO_NODE )
{
@@ -725,36 +718,8 @@ static struct page_info *alloc_heap_pages(
first_node = node;
ASSERT(node < MAX_NUMNODES);
- ASSERT(zone_lo <= zone_hi);
- ASSERT(zone_hi < NR_ZONES);
-
- if ( unlikely(order > MAX_ORDER) )
- return NULL;
-
- spin_lock(&heap_lock);
-
- /*
- * Claimed memory is considered unavailable unless the request
- * is made by a domain with sufficient unclaimed pages.
- */
- if ( (outstanding_claims + request >
- total_avail_pages + tmem_freeable_pages()) &&
- ((memflags & MEMF_no_refcount) ||
- !d || d->outstanding_pages < request) )
- goto not_found;
-
- /*
- * TMEM: When available memory is scarce due to tmem absorbing it, allow
- * only mid-size allocations to avoid worst of fragmentation issues.
- * Others try tmem pools then fail. This is a workaround until all
- * post-dom0-creation-multi-page allocations can be eliminated.
- */
- if ( ((order == 0) || (order >= 9)) &&
- (total_avail_pages <= midsize_alloc_zone_pages) &&
- tmem_freeable_pages() )
- goto try_tmem;
- /*
+ /*
* Start with requested node, but exhaust all node memory in requested
* zone before failing, only calc new node value if we fail to find memory
* in target node, this avoids needless computation on fast-path.
@@ -770,11 +735,11 @@ static struct page_info *alloc_heap_pages(
/* Find smallest order which can satisfy the request. */
for ( j = order; j <= MAX_ORDER; j++ )
if ( (pg = page_list_remove_head(&heap(node, zone, j))) )
- goto found;
+ return pg;
} while ( zone-- > zone_lo ); /* careful: unsigned zone may wrap */
if ( (memflags & MEMF_exact_node) && req_node != NUMA_NO_NODE )
- goto not_found;
+ return NULL;
/* Pick next node. */
if ( !node_isset(node, nodemask) )
@@ -791,42 +756,93 @@ static struct page_info *alloc_heap_pages(
{
/* When we have tried all in nodemask, we fall back to others. */
if ( (memflags & MEMF_exact_node) || nodemask_retry++ )
- goto not_found;
+ return NULL;
nodes_andnot(nodemask, node_online_map, nodemask);
first_node = node = first_node(nodemask);
if ( node >= MAX_NUMNODES )
- goto not_found;
+ return NULL;
}
}
+}
+
+/* Allocate 2^@order contiguous pages. */
+static struct page_info *alloc_heap_pages(
+ unsigned int zone_lo, unsigned int zone_hi,
+ unsigned int order, unsigned int memflags,
+ struct domain *d)
+{
+ nodeid_t node;
+ unsigned int i, buddy_order, zone;
+ unsigned long request = 1UL << order;
+ struct page_info *pg;
+ bool need_scrub, need_tlbflush = false;
+ uint32_t tlbflush_timestamp = 0;
+
+ /* Make sure there are enough bits in memflags for nodeID. */
+ BUILD_BUG_ON((_MEMF_bits - _MEMF_node) < (8 * sizeof(nodeid_t)));
+
+ ASSERT(zone_lo <= zone_hi);
+ ASSERT(zone_hi < NR_ZONES);
+
+ if ( unlikely(order > MAX_ORDER) )
+ return NULL;
+
+ spin_lock(&heap_lock);
+
+ /*
+ * Claimed memory is considered unavailable unless the request
+ * is made by a domain with sufficient unclaimed pages.
+ */
+ if ( (outstanding_claims + request >
+ total_avail_pages + tmem_freeable_pages()) &&
+ ((memflags & MEMF_no_refcount) ||
+ !d || d->outstanding_pages < request) )
+ {
+ spin_unlock(&heap_lock);
+ return NULL;
+ }
- try_tmem:
- /* Try to free memory from tmem */
- if ( (pg = tmem_relinquish_pages(order, memflags)) != NULL )
+ /*
+ * TMEM: When available memory is scarce due to tmem absorbing it, allow
+ * only mid-size allocations to avoid worst of fragmentation issues.
+ * Others try tmem pools then fail. This is a workaround until all
+ * post-dom0-creation-multi-page allocations can be eliminated.
+ */
+ if ( ((order == 0) || (order >= 9)) &&
+ (total_avail_pages <= midsize_alloc_zone_pages) &&
+ tmem_freeable_pages() )
{
- /* reassigning an already allocated anonymous heap page */
+ /* Try to free memory from tmem. */
+ pg = tmem_relinquish_pages(order, memflags);
spin_unlock(&heap_lock);
return pg;
}
- not_found:
- /* No suitable memory blocks. Fail the request. */
- spin_unlock(&heap_lock);
- return NULL;
+ pg = get_free_buddy(zone_lo, zone_hi, order, memflags, d);
+ if ( !pg )
+ {
+ /* No suitable memory blocks. Fail the request. */
+ spin_unlock(&heap_lock);
+ return NULL;
+ }
+
+ node = phys_to_nid(page_to_maddr(pg));
+ zone = page_to_zone(pg);
+ buddy_order = PFN_ORDER(pg);
- found:
need_scrub = (pg->u.free.first_dirty != INVALID_DIRTY_IDX);
/* We may have to halve the chunk a number of times. */
- while ( j != order )
+ while ( buddy_order != order )
{
/*
* Some of the sub-chunks may be clean but we will mark them
* as dirty (if need_scrub is set) to avoid traversing the
* list here.
*/
- page_list_add_scrub(pg, node, zone, --j,
+ page_list_add_scrub(pg, node, zone, --buddy_order,
need_scrub ? 0 : INVALID_DIRTY_IDX);
- pg += 1 << j;
+ pg += 1 << buddy_order;
}
ASSERT(avail[node][zone] >= request);
This will make code a bit more readable, especially with changes that will be introduced in subsequent patches. Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com> --- Change in v4: * New patch xen/common/page_alloc.c | 132 +++++++++++++++++++++++++++--------------------- 1 file changed, 74 insertions(+), 58 deletions(-)