@@ -384,10 +384,21 @@ int free_memtype(u64 start, u64 end)
*/
static unsigned long lookup_memtype(u64 paddr)
{
- int rettype = _PAGE_CACHE_WB;
+ int rettype = -1;
struct memtype *entry;
if (x86_platform.is_untracked_pat_range(paddr, paddr + PAGE_SIZE))
+ return _PAGE_CACHE_WB;
+
+ spin_lock(&memtype_lock);
+
+ entry = rbt_memtype_lookup(paddr);
+ if (entry != NULL)
+ rettype = entry->type;
+
+ spin_unlock(&memtype_lock);
+
+ if (rettype != -1)
return rettype;
if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) {
@@ -404,16 +415,7 @@ static unsigned long lookup_memtype(u64 paddr)
return rettype;
}
- spin_lock(&memtype_lock);
-
- entry = rbt_memtype_lookup(paddr);
- if (entry != NULL)
- rettype = entry->type;
- else
- rettype = _PAGE_CACHE_UC_MINUS;
-
- spin_unlock(&memtype_lock);
- return rettype;
+ return _PAGE_CACHE_UC_MINUS;
}
/**
The PAT interval tree is only defined for non-RAM ranges, and is both a shorter list and log-n lookup compared to the linear walk over the resource ranges looking for "System RAM". In the case of heavy vm_insert_pfn() users like the gpu drivers, which regularly modify the contents of the AGP aperture, this gives a significant reduction in the overhead of faulting in fresh addresses. However, note that in 1f9cc3cb6a27521ed (x86, pat: Update the page flags for memtype atomically instead of using memtype_lock), the contention upon the memtype_lock in lookup_memtype() was observed to be behind a factor of 50x reduction in page fault rate for 32 cpus running vm_insert_pfn(). By performing the locked lookup of memtype first, we are once again exposed to that contention for is_ram pages. Though in effect we will be just moving the contention from resource_lock (rwlock) to memtype_lock (spinlock) Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> %%Cc: Robin Holt <holt@sgi.com> %%Cc: Suresh Siddha <suresh.b.siddha@intel.com> %%Cc: H. Peter Anvin <hpa@zytor.com> --- arch/x86/mm/pat.c | 24 +++++++++++++----------- 1 files changed, 13 insertions(+), 11 deletions(-)