@@ -559,12 +559,44 @@ static void __pdp_fini(struct i915_page_directory_pointer *pdp)
pdp->page_directory = NULL;
}
+static struct
+i915_page_directory_pointer *alloc_pdp(struct drm_device *dev)
+{
+ struct i915_page_directory_pointer *pdp;
+ int ret = -ENOMEM;
+
+ WARN_ON(!USES_FULL_48BIT_PPGTT(dev));
+
+ pdp = kzalloc(sizeof(*pdp), GFP_KERNEL);
+ if (!pdp)
+ return ERR_PTR(-ENOMEM);
+
+ ret = __pdp_init(dev, pdp);
+ if (ret)
+ goto fail_bitmap;
+
+ ret = setup_px(dev, pdp);
+ if (ret)
+ goto fail_page_m;
+
+ return pdp;
+
+fail_page_m:
+ __pdp_fini(pdp);
+fail_bitmap:
+ kfree(pdp);
+
+ return ERR_PTR(ret);
+}
+
static void free_pdp(struct drm_device *dev,
struct i915_page_directory_pointer *pdp)
{
__pdp_fini(pdp);
- if (USES_FULL_48BIT_PPGTT(dev))
+ if (USES_FULL_48BIT_PPGTT(dev)) {
+ cleanup_px(dev, pdp);
kfree(pdp);
+ }
}
/* Broadwell Page Directory Pointer Descriptors */
@@ -758,12 +790,9 @@ static void gen8_free_scratch(struct i915_address_space *vm)
free_scratch_page(dev, vm->scratch_page);
}
-static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
+static void gen8_ppgtt_cleanup_3lvl(struct drm_device *dev,
+ struct i915_page_directory_pointer *pdp)
{
- struct i915_hw_ppgtt *ppgtt =
- container_of(vm, struct i915_hw_ppgtt, base);
- struct i915_page_directory_pointer *pdp = &ppgtt->pdp; /* FIXME: 48b */
- struct drm_device *dev = ppgtt->base.dev;
int i;
for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev)) {
@@ -775,6 +804,31 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
}
free_pdp(dev, pdp);
+}
+
+static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt)
+{
+ int i;
+
+ for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) {
+ if (WARN_ON(!ppgtt->pml4.pdps[i]))
+ continue;
+
+ gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, ppgtt->pml4.pdps[i]);
+ }
+
+ cleanup_px(ppgtt->base.dev, &ppgtt->pml4);
+}
+
+static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
+{
+ struct i915_hw_ppgtt *ppgtt =
+ container_of(vm, struct i915_hw_ppgtt, base);
+
+ if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
+ gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, &ppgtt->pdp);
+ else
+ gen8_ppgtt_cleanup_4lvl(ppgtt);
gen8_free_scratch(vm);
}
@@ -957,14 +1011,15 @@ static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt)
ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.dev)->ring_mask;
}
-static int gen8_alloc_va_range(struct i915_address_space *vm,
- uint64_t start, uint64_t length)
+static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
+ struct i915_page_directory_pointer *pdp,
+ uint64_t start,
+ uint64_t length)
{
struct i915_hw_ppgtt *ppgtt =
container_of(vm, struct i915_hw_ppgtt, base);
unsigned long *new_page_dirs, **new_page_tables;
struct drm_device *dev = vm->dev;
- struct i915_page_directory_pointer *pdp = &ppgtt->pdp; /* FIXME: 48b */
struct i915_page_directory *pd;
const uint64_t orig_start = start;
const uint64_t orig_length = length;
@@ -1065,6 +1120,79 @@ err_out:
return ret;
}
+static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
+ struct i915_pml4 *pml4,
+ uint64_t start,
+ uint64_t length)
+{
+ DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4);
+ struct i915_page_directory_pointer *pdp;
+ const uint64_t orig_start = start;
+ const uint64_t orig_length = length;
+ uint64_t temp, pml4e;
+ int ret = 0;
+
+ /* Do the pml4 allocations first, so we don't need to track the newly
+ * allocated tables below the pdp */
+ bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4);
+
+ /* The pagedirectory and pagetable allocations are done in the shared 3
+ * and 4 level code. Just allocate the pdps.
+ */
+ gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) {
+ if (!test_bit(pml4e, pml4->used_pml4es)) {
+ pdp = alloc_pdp(vm->dev);
+ if (IS_ERR(pdp))
+ goto err_out;
+
+ pml4->pdps[pml4e] = pdp;
+ __set_bit(pml4e, new_pdps);
+ trace_i915_page_directory_pointer_entry_alloc(vm,
+ pml4e,
+ start,
+ GEN8_PML4E_SHIFT);
+ }
+ }
+
+ WARN(bitmap_weight(new_pdps, GEN8_PML4ES_PER_PML4) > 2,
+ "The allocation has spanned more than 512GB. "
+ "It is highly likely this is incorrect.");
+
+ start = orig_start;
+ length = orig_length;
+
+ gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) {
+ WARN_ON(!pdp);
+
+ ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length);
+ if (ret)
+ goto err_out;
+ }
+
+ bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es,
+ GEN8_PML4ES_PER_PML4);
+
+ return 0;
+
+err_out:
+ for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
+ gen8_ppgtt_cleanup_3lvl(vm->dev, pml4->pdps[pml4e]);
+
+ return ret;
+}
+
+static int gen8_alloc_va_range(struct i915_address_space *vm,
+ uint64_t start, uint64_t length)
+{
+ struct i915_hw_ppgtt *ppgtt =
+ container_of(vm, struct i915_hw_ppgtt, base);
+
+ if (USES_FULL_48BIT_PPGTT(vm->dev))
+ return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length);
+ else
+ return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length);
+}
+
/*
* GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
* with a net effect resembling a 2-level page table in normal x86 terms. Each
@@ -1090,9 +1218,14 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
ppgtt->switch_mm = gen8_mm_switch;
- if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
- ret = __pdp_init(false, &ppgtt->pdp);
+ if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
+ ret = setup_px(ppgtt->base.dev, &ppgtt->pml4);
+ if (ret)
+ goto free_scratch;
+ ppgtt->base.total = 1ULL << 48;
+ } else {
+ ret = __pdp_init(false, &ppgtt->pdp);
if (ret)
goto free_scratch;
@@ -1104,10 +1237,10 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
* 2GiB).
*/
ppgtt->base.total = to_i915(ppgtt->base.dev)->gtt.base.total;
- } else {
- ppgtt->base.total = 1ULL << 48;
- ret = -EPERM; /* Not yet implemented */
- goto free_scratch;
+
+ trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base,
+ 0, 0,
+ GEN8_PML4E_SHIFT);
}
return 0;
@@ -95,6 +95,7 @@ typedef uint64_t gen8_pde_t;
*/
#define GEN8_PML4ES_PER_PML4 512
#define GEN8_PML4E_SHIFT 39
+#define GEN8_PML4E_MASK (GEN8_PML4ES_PER_PML4 - 1)
#define GEN8_PDPE_SHIFT 30
/* NB: GEN8_PDPE_MASK is untrue for 32b platforms, but it has no impact on 32b page
* tables */
@@ -465,6 +466,15 @@ static inline uint32_t gen6_pde_index(uint32_t addr)
temp = min(temp, length), \
start += temp, length -= temp)
+#define gen8_for_each_pml4e(pdp, pml4, start, length, temp, iter) \
+ for (iter = gen8_pml4e_index(start); \
+ pdp = (pml4)->pdps[iter], \
+ length > 0 && iter < GEN8_PML4ES_PER_PML4; \
+ iter++, \
+ temp = ALIGN(start+1, 1ULL << GEN8_PML4E_SHIFT) - start, \
+ temp = min(temp, length), \
+ start += temp, length -= temp)
+
static inline uint32_t gen8_pte_index(uint64_t address)
{
return i915_pte_index(address, GEN8_PDE_SHIFT);
@@ -482,8 +492,7 @@ static inline uint32_t gen8_pdpe_index(uint64_t address)
static inline uint32_t gen8_pml4e_index(uint64_t address)
{
- WARN_ON(1); /* For 64B */
- return 0;
+ return (address >> GEN8_PML4E_SHIFT) & GEN8_PML4E_MASK;
}
static inline size_t gen8_pte_count(uint64_t address, uint64_t length)
@@ -221,6 +221,14 @@ DEFINE_EVENT_PRINT(i915_px_entry, i915_page_directory_entry_alloc,
__entry->vm, __entry->px, __entry->start, __entry->end)
);
+DEFINE_EVENT_PRINT(i915_px_entry, i915_page_directory_pointer_entry_alloc,
+ TP_PROTO(struct i915_address_space *vm, u32 pml4e, u64 start, u64 pml4e_shift),
+ TP_ARGS(vm, pml4e, start, pml4e_shift),
+
+ TP_printk("vm=%p, pml4e=%d (0x%llx-0x%llx)",
+ __entry->vm, __entry->px, __entry->start, __entry->end)
+);
+
/* Avoid extra math because we only support two sizes. The format is defined by
* bitmap_scnprintf. Each 32 bits is 8 HEX digits followed by comma */
#define TRACE_PT_SIZE(bits) \