@@ -35,6 +35,9 @@
#include <linux/iomap.h>
#include "internal.h"
+#define CREATE_TRACE_POINTS
+#include <trace/events/fs_dax.h>
+
/* We choose 4096 entries - same as per-zone page wait tables */
#define DAX_WAIT_TABLE_BITS 12
#define DAX_WAIT_TABLE_ENTRIES (1 << DAX_WAIT_TABLE_BITS)
@@ -1311,6 +1314,16 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
loff_t pos;
int error;
+ /*
+ * Check whether offset isn't beyond end of file now. Caller is
+ * supposed to hold locks serializing us with truncate / punch hole so
+ * this is a reliable test.
+ */
+ pgoff = linear_page_index(vma, pmd_addr);
+ max_pgoff = (i_size_read(inode) - 1) >> PAGE_SHIFT;
+
+ trace_dax_pmd_fault(inode, vma, address, flags, pgoff, max_pgoff, 0);
+
/* Fall back to PTEs if we're going to COW */
if (write && !(vma->vm_flags & VM_SHARED))
goto fallback;
@@ -1321,16 +1334,10 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
if ((pmd_addr + PMD_SIZE) > vma->vm_end)
goto fallback;
- /*
- * Check whether offset isn't beyond end of file now. Caller is
- * supposed to hold locks serializing us with truncate / punch hole so
- * this is a reliable test.
- */
- pgoff = linear_page_index(vma, pmd_addr);
- max_pgoff = (i_size_read(inode) - 1) >> PAGE_SHIFT;
-
- if (pgoff > max_pgoff)
- return VM_FAULT_SIGBUS;
+ if (pgoff > max_pgoff) {
+ result = VM_FAULT_SIGBUS;
+ goto out;
+ }
/* If the PMD would extend beyond the file size */
if ((pgoff | PG_PMD_COLOUR) > max_pgoff)
@@ -1401,6 +1408,9 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
split_huge_pmd(vma, pmd, address);
count_vm_event(THP_FAULT_FALLBACK);
}
+out:
+ trace_dax_pmd_fault_done(inode, vma, address, flags, pgoff, max_pgoff,
+ result);
return result;
}
EXPORT_SYMBOL_GPL(dax_iomap_pmd_fault);
@@ -281,6 +281,17 @@ extern pgprot_t protection_map[16];
#define FAULT_FLAG_REMOTE 0x80 /* faulting for non current tsk/mm */
#define FAULT_FLAG_INSTRUCTION 0x100 /* The fault was during an instruction fetch */
+#define FAULT_FLAG_TRACE \
+ { FAULT_FLAG_WRITE, "WRITE" }, \
+ { FAULT_FLAG_MKWRITE, "MKWRITE" }, \
+ { FAULT_FLAG_ALLOW_RETRY, "ALLOW_RETRY" }, \
+ { FAULT_FLAG_RETRY_NOWAIT, "RETRY_NOWAIT" }, \
+ { FAULT_FLAG_KILLABLE, "KILLABLE" }, \
+ { FAULT_FLAG_TRIED, "TRIED" }, \
+ { FAULT_FLAG_USER, "USER" }, \
+ { FAULT_FLAG_REMOTE, "REMOTE" }, \
+ { FAULT_FLAG_INSTRUCTION, "INSTRUCTION" }
+
/*
* vm_fault is filled by the the pagefault handler and passed to the vma's
* ->fault function. The vma's ->fault is responsible for returning a bitmask
@@ -1107,6 +1118,20 @@ static inline void clear_page_pfmemalloc(struct page *page)
VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE | \
VM_FAULT_FALLBACK)
+#define VM_FAULT_RESULT_TRACE \
+ { VM_FAULT_OOM, "OOM" }, \
+ { VM_FAULT_SIGBUS, "SIGBUS" }, \
+ { VM_FAULT_MAJOR, "MAJOR" }, \
+ { VM_FAULT_WRITE, "WRITE" }, \
+ { VM_FAULT_HWPOISON, "HWPOISON" }, \
+ { VM_FAULT_HWPOISON_LARGE, "HWPOISON_LARGE" }, \
+ { VM_FAULT_SIGSEGV, "SIGSEGV" }, \
+ { VM_FAULT_NOPAGE, "NOPAGE" }, \
+ { VM_FAULT_LOCKED, "LOCKED" }, \
+ { VM_FAULT_RETRY, "RETRY" }, \
+ { VM_FAULT_FALLBACK, "FALLBACK" }, \
+ { VM_FAULT_DONE_COW, "DONE_COW" }
+
/* Encode hstate index for a hwpoisoned large page */
#define VM_FAULT_SET_HINDEX(x) ((x) << 12)
#define VM_FAULT_GET_HINDEX(x) (((x) >> 12) & 0xf)
new file mode 100644
@@ -0,0 +1,68 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM fs_dax
+
+#if !defined(_TRACE_FS_DAX_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_FS_DAX_H
+
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(dax_pmd_fault_class,
+ TP_PROTO(struct inode *inode, struct vm_area_struct *vma,
+ unsigned long address, unsigned int flags, pgoff_t pgoff,
+ pgoff_t max_pgoff, int result),
+ TP_ARGS(inode, vma, address, flags, pgoff, max_pgoff, result),
+ TP_STRUCT__entry(
+ __field(unsigned long, ino)
+ __field(unsigned long, vm_start)
+ __field(unsigned long, vm_end)
+ __field(unsigned long, vm_flags)
+ __field(unsigned long, address)
+ __field(pgoff_t, pgoff)
+ __field(pgoff_t, max_pgoff)
+ __field(dev_t, dev)
+ __field(unsigned int, flags)
+ __field(int, result)
+ ),
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = inode->i_ino;
+ __entry->vm_start = vma->vm_start;
+ __entry->vm_end = vma->vm_end;
+ __entry->vm_flags = vma->vm_flags;
+ __entry->address = address;
+ __entry->flags = flags;
+ __entry->pgoff = pgoff;
+ __entry->max_pgoff = max_pgoff;
+ __entry->result = result;
+ ),
+ TP_printk("dev %d:%d ino %#lx %s %s address %#lx vm_start "
+ "%#lx vm_end %#lx pgoff %#lx max_pgoff %#lx %s",
+ MAJOR(__entry->dev),
+ MINOR(__entry->dev),
+ __entry->ino,
+ __entry->vm_flags & VM_SHARED ? "shared" : "private",
+ __print_flags(__entry->flags, "|", FAULT_FLAG_TRACE),
+ __entry->address,
+ __entry->vm_start,
+ __entry->vm_end,
+ __entry->pgoff,
+ __entry->max_pgoff,
+ __print_flags(__entry->result, "|", VM_FAULT_RESULT_TRACE)
+ )
+)
+
+#define DEFINE_PMD_FAULT_EVENT(name) \
+DEFINE_EVENT(dax_pmd_fault_class, name, \
+ TP_PROTO(struct inode *inode, struct vm_area_struct *vma, \
+ unsigned long address, unsigned int flags, pgoff_t pgoff, \
+ pgoff_t max_pgoff, int result), \
+ TP_ARGS(inode, vma, address, flags, pgoff, max_pgoff, result))
+
+DEFINE_PMD_FAULT_EVENT(dax_pmd_fault);
+DEFINE_PMD_FAULT_EVENT(dax_pmd_fault_done);
+
+
+#endif /* _TRACE_FS_DAX_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>