diff mbox

[5/9] x86/mm: Introduce _PAGE_DIRTY_SW

Message ID 20180607143705.3531-6-yu-cheng.yu@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Yu-cheng Yu June 7, 2018, 2:37 p.m. UTC
The PTE DIRTY bit indicates a few conditions:

(1) When the processor writes to a memory page, the page's
    PTE is (R/W + _PAGE_DIRTY_HW);
(2) When a modified page is shared from fork(), its PTE is
    (R/O + _PAGE_DIRTY_HW);
(3) When access_remote_vm() has tried to write to a read-
    only page with (FOLL_FORCE | FOLL_WRITE), the PTE is
    (R/O + _PAGE_DIRTY_HW);
(4) A shadow stack memory page is required to be set as
    (R/O + _PAGE_DIRTY_HW);

In case (1) above, the DIRTY bit is set by the processor;
for other cases, it is set by the software.  However, the
processor reads the DIRTY bit only in case (4) for ensuring
a valid shadow stack page.

To make (R/O + _PAGE_DIRTY_HW) exclusively for shadow stack,
we introduce _PAGE_BIT_DIRTY_SW, a spare bit of the 64-bit
PTE, to replace _PAGE_BIT_DIRTY for case (2), (3) and (4).

This results to the following possible PTE settings:

Modified PTE:		  (R/W + _PAGE_DIRTY_HW)
Modified and shared PTE:  (R/O + _PAGE_DIRTY_SW)
R/O PTE was (FOLL_FORCE | FOLL_WRITE): (R/O + _PAGE_DIRTY_SW)
Shadow stack PTE:	  (R/O + _PAGE_DIRTY_HW)
Shared shadow stack PTE:  (R/O + _PAGE_DIRTY_SW)

Note that _PAGE_BIT_DRITY_SW is only used in R/O PTEs but
not R/W PTEs.

When this patch is applied, there are six free bits left in
the 64-bit PTE.  There is no more free bit in the 32-bit
PTE (except for PAE) and shadow stack is not implemented
for the 32-bit kernel.

Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
---
 arch/x86/include/asm/pgtable.h       | 91 ++++++++++++++++++++++++++++++++----
 arch/x86/include/asm/pgtable_types.h | 14 +++++-
 include/asm-generic/pgtable.h        | 12 +++++
 3 files changed, 105 insertions(+), 12 deletions(-)

Comments

kernel test robot June 8, 2018, 5:15 a.m. UTC | #1
Hi Yu-cheng,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on asm-generic/master]
[also build test WARNING on v4.17 next-20180607]
[cannot apply to tip/x86/core mmotm/master]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Yu-cheng-Yu/Control-Flow-Enforcement-Part-2/20180608-111152
base:   https://git.kernel.org/pub/scm/linux/kernel/git/arnd/asm-generic.git master
config: i386-randconfig-x003-201822 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-16) 7.3.0
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

All warnings (new ones prefixed by >>):

   In file included from arch/x86/include/asm/current.h:5:0,
                    from include/linux/sched.h:12,
                    from include/linux/context_tracking.h:5,
                    from arch/x86/kernel/traps.c:15:
   arch/x86/kernel/traps.c: In function 'do_control_protection':
   arch/x86/kernel/traps.c:605:27: error: 'X86_FEATURE_SHSTK' undeclared (first use in this function); did you mean 'X86_FEATURE_EST'?
     if (!cpu_feature_enabled(X86_FEATURE_SHSTK) &&
                              ^
   include/linux/compiler.h:58:30: note: in definition of macro '__trace_if'
     if (__builtin_constant_p(!!(cond)) ? !!(cond) :   \
                                 ^~~~
>> arch/x86/kernel/traps.c:605:2: note: in expansion of macro 'if'
     if (!cpu_feature_enabled(X86_FEATURE_SHSTK) &&
     ^~
>> arch/x86/kernel/traps.c:605:7: note: in expansion of macro 'cpu_feature_enabled'
     if (!cpu_feature_enabled(X86_FEATURE_SHSTK) &&
          ^~~~~~~~~~~~~~~~~~~
   arch/x86/kernel/traps.c:605:27: note: each undeclared identifier is reported only once for each function it appears in
     if (!cpu_feature_enabled(X86_FEATURE_SHSTK) &&
                              ^
   include/linux/compiler.h:58:30: note: in definition of macro '__trace_if'
     if (__builtin_constant_p(!!(cond)) ? !!(cond) :   \
                                 ^~~~
>> arch/x86/kernel/traps.c:605:2: note: in expansion of macro 'if'
     if (!cpu_feature_enabled(X86_FEATURE_SHSTK) &&
     ^~
>> arch/x86/kernel/traps.c:605:7: note: in expansion of macro 'cpu_feature_enabled'
     if (!cpu_feature_enabled(X86_FEATURE_SHSTK) &&
          ^~~~~~~~~~~~~~~~~~~
   arch/x86/kernel/traps.c:606:27: error: 'X86_FEATURE_IBT' undeclared (first use in this function); did you mean 'X86_FEATURE_IBS'?
         !cpu_feature_enabled(X86_FEATURE_IBT)) {
                              ^
   include/linux/compiler.h:58:30: note: in definition of macro '__trace_if'
     if (__builtin_constant_p(!!(cond)) ? !!(cond) :   \
                                 ^~~~
>> arch/x86/kernel/traps.c:605:2: note: in expansion of macro 'if'
     if (!cpu_feature_enabled(X86_FEATURE_SHSTK) &&
     ^~
   arch/x86/kernel/traps.c:606:7: note: in expansion of macro 'cpu_feature_enabled'
         !cpu_feature_enabled(X86_FEATURE_IBT)) {
          ^~~~~~~~~~~~~~~~~~~

vim +/if +605 arch/x86/kernel/traps.c

a74a6de2 Yu-cheng Yu 2018-06-07  589  
a74a6de2 Yu-cheng Yu 2018-06-07  590  /*
a74a6de2 Yu-cheng Yu 2018-06-07  591   * When a control protection exception occurs, send a signal
a74a6de2 Yu-cheng Yu 2018-06-07  592   * to the responsible application.  Currently, control
a74a6de2 Yu-cheng Yu 2018-06-07  593   * protection is only enabled for the user mode.  This
a74a6de2 Yu-cheng Yu 2018-06-07  594   * exception should not come from the kernel mode.
a74a6de2 Yu-cheng Yu 2018-06-07  595   */
a74a6de2 Yu-cheng Yu 2018-06-07  596  dotraplinkage void
a74a6de2 Yu-cheng Yu 2018-06-07  597  do_control_protection(struct pt_regs *regs, long error_code)
a74a6de2 Yu-cheng Yu 2018-06-07  598  {
a74a6de2 Yu-cheng Yu 2018-06-07  599  	struct task_struct *tsk;
a74a6de2 Yu-cheng Yu 2018-06-07  600  
a74a6de2 Yu-cheng Yu 2018-06-07  601  	RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
a74a6de2 Yu-cheng Yu 2018-06-07  602  	cond_local_irq_enable(regs);
a74a6de2 Yu-cheng Yu 2018-06-07  603  
a74a6de2 Yu-cheng Yu 2018-06-07  604  	tsk = current;
a74a6de2 Yu-cheng Yu 2018-06-07 @605  	if (!cpu_feature_enabled(X86_FEATURE_SHSTK) &&
a74a6de2 Yu-cheng Yu 2018-06-07  606  	    !cpu_feature_enabled(X86_FEATURE_IBT)) {
a74a6de2 Yu-cheng Yu 2018-06-07  607  		goto exit;
a74a6de2 Yu-cheng Yu 2018-06-07  608  	}
a74a6de2 Yu-cheng Yu 2018-06-07  609  
a74a6de2 Yu-cheng Yu 2018-06-07  610  	if (!user_mode(regs)) {
a74a6de2 Yu-cheng Yu 2018-06-07  611  		tsk->thread.error_code = error_code;
a74a6de2 Yu-cheng Yu 2018-06-07  612  		tsk->thread.trap_nr = X86_TRAP_CP;
a74a6de2 Yu-cheng Yu 2018-06-07  613  		if (notify_die(DIE_TRAP, "control protection fault", regs,
a74a6de2 Yu-cheng Yu 2018-06-07  614  			       error_code, X86_TRAP_CP, SIGSEGV) != NOTIFY_STOP)
a74a6de2 Yu-cheng Yu 2018-06-07  615  			die("control protection fault", regs, error_code);
a74a6de2 Yu-cheng Yu 2018-06-07  616  		return;
a74a6de2 Yu-cheng Yu 2018-06-07  617  	}
a74a6de2 Yu-cheng Yu 2018-06-07  618  
a74a6de2 Yu-cheng Yu 2018-06-07  619  	tsk->thread.error_code = error_code;
a74a6de2 Yu-cheng Yu 2018-06-07  620  	tsk->thread.trap_nr = X86_TRAP_CP;
a74a6de2 Yu-cheng Yu 2018-06-07  621  
a74a6de2 Yu-cheng Yu 2018-06-07  622  	if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
a74a6de2 Yu-cheng Yu 2018-06-07  623  	    printk_ratelimit()) {
a74a6de2 Yu-cheng Yu 2018-06-07  624  		unsigned int max_idx, err_idx;
a74a6de2 Yu-cheng Yu 2018-06-07  625  
a74a6de2 Yu-cheng Yu 2018-06-07  626  		max_idx = ARRAY_SIZE(control_protection_err) - 1;
a74a6de2 Yu-cheng Yu 2018-06-07  627  		err_idx = min((unsigned int)error_code - 1, max_idx);
a74a6de2 Yu-cheng Yu 2018-06-07  628  		pr_info("%s[%d] control protection ip:%lx sp:%lx error:%lx(%s)",
a74a6de2 Yu-cheng Yu 2018-06-07  629  			tsk->comm, task_pid_nr(tsk),
a74a6de2 Yu-cheng Yu 2018-06-07  630  			regs->ip, regs->sp, error_code,
a74a6de2 Yu-cheng Yu 2018-06-07  631  			control_protection_err[err_idx]);
a74a6de2 Yu-cheng Yu 2018-06-07  632  		print_vma_addr(" in ", regs->ip);
a74a6de2 Yu-cheng Yu 2018-06-07  633  		pr_cont("\n");
a74a6de2 Yu-cheng Yu 2018-06-07  634  	}
a74a6de2 Yu-cheng Yu 2018-06-07  635  
a74a6de2 Yu-cheng Yu 2018-06-07  636  exit:
a74a6de2 Yu-cheng Yu 2018-06-07  637  	force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk);
a74a6de2 Yu-cheng Yu 2018-06-07  638  }
a74a6de2 Yu-cheng Yu 2018-06-07  639  NOKPROBE_SYMBOL(do_control_protection);
a74a6de2 Yu-cheng Yu 2018-06-07  640  

:::::: The code at line 605 was first introduced by commit
:::::: a74a6de2a3290257798598ae1f816eddb04f63f2 x86/cet: Control protection exception handler

:::::: TO: Yu-cheng Yu <yu-cheng.yu@intel.com>
:::::: CC: 0day robot <lkp@intel.com>

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
diff mbox

Patch

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 00b5e79c09a6..0996f8a6979a 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -116,9 +116,9 @@  extern pmdval_t early_pmd_flags;
  * The following only work if pte_present() is true.
  * Undefined behaviour if not..
  */
-static inline int pte_dirty(pte_t pte)
+static inline bool pte_dirty(pte_t pte)
 {
-	return pte_flags(pte) & _PAGE_DIRTY;
+	return pte_flags(pte) & _PAGE_DIRTY_BITS;
 }
 
 
@@ -140,9 +140,9 @@  static inline int pte_young(pte_t pte)
 	return pte_flags(pte) & _PAGE_ACCESSED;
 }
 
-static inline int pmd_dirty(pmd_t pmd)
+static inline bool pmd_dirty(pmd_t pmd)
 {
-	return pmd_flags(pmd) & _PAGE_DIRTY;
+	return pmd_flags(pmd) & _PAGE_DIRTY_BITS;
 }
 
 static inline int pmd_young(pmd_t pmd)
@@ -150,9 +150,9 @@  static inline int pmd_young(pmd_t pmd)
 	return pmd_flags(pmd) & _PAGE_ACCESSED;
 }
 
-static inline int pud_dirty(pud_t pud)
+static inline bool pud_dirty(pud_t pud)
 {
-	return pud_flags(pud) & _PAGE_DIRTY;
+	return pud_flags(pud) & _PAGE_DIRTY_BITS;
 }
 
 static inline int pud_young(pud_t pud)
@@ -281,9 +281,23 @@  static inline pte_t pte_clear_flags(pte_t pte, pteval_t clear)
 	return native_make_pte(v & ~clear);
 }
 
+#if defined(CONFIG_X86_INTEL_SHADOW_STACK_USER)
+static inline pte_t pte_move_flags(pte_t pte, pteval_t from, pteval_t to)
+{
+	if (pte_flags(pte) & from)
+		pte = pte_set_flags(pte_clear_flags(pte, from), to);
+	return pte;
+}
+#else
+static inline pte_t pte_move_flags(pte_t pte, pteval_t from, pteval_t to)
+{
+	return pte;
+}
+#endif
+
 static inline pte_t pte_mkclean(pte_t pte)
 {
-	return pte_clear_flags(pte, _PAGE_DIRTY);
+	return pte_clear_flags(pte, _PAGE_DIRTY_BITS);
 }
 
 static inline pte_t pte_mkold(pte_t pte)
@@ -293,6 +307,7 @@  static inline pte_t pte_mkold(pte_t pte)
 
 static inline pte_t pte_wrprotect(pte_t pte)
 {
+	pte = pte_move_flags(pte, _PAGE_DIRTY_HW, _PAGE_DIRTY_SW);
 	return pte_clear_flags(pte, _PAGE_RW);
 }
 
@@ -302,9 +317,18 @@  static inline pte_t pte_mkexec(pte_t pte)
 }
 
 static inline pte_t pte_mkdirty(pte_t pte)
+{
+	pteval_t dirty = (!IS_ENABLED(CONFIG_X86_INTEL_SHSTK_USER) ||
+			   pte_write(pte)) ? _PAGE_DIRTY_HW:_PAGE_DIRTY_SW;
+	return pte_set_flags(pte, dirty | _PAGE_SOFT_DIRTY);
+}
+
+#ifdef CONFIG_ARCH_HAS_SHSTK
+static inline pte_t pte_mkdirty_shstk(pte_t pte)
 {
 	return pte_set_flags(pte, _PAGE_DIRTY_HW | _PAGE_SOFT_DIRTY);
 }
+#endif
 
 static inline pte_t pte_mkyoung(pte_t pte)
 {
@@ -313,6 +337,7 @@  static inline pte_t pte_mkyoung(pte_t pte)
 
 static inline pte_t pte_mkwrite(pte_t pte)
 {
+	pte = pte_move_flags(pte, _PAGE_DIRTY_SW, _PAGE_DIRTY_HW);
 	return pte_set_flags(pte, _PAGE_RW);
 }
 
@@ -360,6 +385,20 @@  static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear)
 	return native_make_pmd(v & ~clear);
 }
 
+#if defined(CONFIG_X86_INTEL_SHADOW_STACK_USER)
+static inline pmd_t pmd_move_flags(pmd_t pmd, pmdval_t from, pmdval_t to)
+{
+	if (pmd_flags(pmd) & from)
+		pmd = pmd_set_flags(pmd_clear_flags(pmd, from), to);
+	return pmd;
+}
+#else
+static inline pmd_t pmd_move_flags(pmd_t pmd, pmdval_t from, pmdval_t to)
+{
+	return pmd;
+}
+#endif
+
 static inline pmd_t pmd_mkold(pmd_t pmd)
 {
 	return pmd_clear_flags(pmd, _PAGE_ACCESSED);
@@ -367,18 +406,29 @@  static inline pmd_t pmd_mkold(pmd_t pmd)
 
 static inline pmd_t pmd_mkclean(pmd_t pmd)
 {
-	return pmd_clear_flags(pmd, _PAGE_DIRTY);
+	return pmd_clear_flags(pmd, _PAGE_DIRTY_BITS);
 }
 
 static inline pmd_t pmd_wrprotect(pmd_t pmd)
 {
+	pmd = pmd_move_flags(pmd, _PAGE_DIRTY_HW, _PAGE_DIRTY_SW);
 	return pmd_clear_flags(pmd, _PAGE_RW);
 }
 
 static inline pmd_t pmd_mkdirty(pmd_t pmd)
+{
+	pmdval_t dirty = (!IS_ENABLED(CONFIG_X86_INTEL_SHSTK_USER) ||
+			  (pmd_flags(pmd) & _PAGE_RW)) ?
+			  _PAGE_DIRTY_HW:_PAGE_DIRTY_SW;
+	return pmd_set_flags(pmd, dirty | _PAGE_SOFT_DIRTY);
+}
+
+#ifdef CONFIG_ARCH_HAS_SHSTK
+static inline pmd_t pmd_mkdirty_shstk(pmd_t pmd)
 {
 	return pmd_set_flags(pmd, _PAGE_DIRTY_HW | _PAGE_SOFT_DIRTY);
 }
+#endif
 
 static inline pmd_t pmd_mkdevmap(pmd_t pmd)
 {
@@ -397,6 +447,7 @@  static inline pmd_t pmd_mkyoung(pmd_t pmd)
 
 static inline pmd_t pmd_mkwrite(pmd_t pmd)
 {
+	pmd = pmd_move_flags(pmd, _PAGE_DIRTY_SW, _PAGE_DIRTY_HW);
 	return pmd_set_flags(pmd, _PAGE_RW);
 }
 
@@ -419,6 +470,20 @@  static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear)
 	return native_make_pud(v & ~clear);
 }
 
+#if defined(CONFIG_X86_INTEL_SHADOW_STACK_USER)
+static inline pud_t pud_move_flags(pud_t pud, pudval_t from, pudval_t to)
+{
+	if (pud_flags(pud) & from)
+		pud = pud_set_flags(pud_clear_flags(pud, from), to);
+	return pud;
+}
+#else
+static inline pud_t pud_move_flags(pud_t pud, pudval_t from, pudval_t to)
+{
+	return pud;
+}
+#endif
+
 static inline pud_t pud_mkold(pud_t pud)
 {
 	return pud_clear_flags(pud, _PAGE_ACCESSED);
@@ -426,17 +491,22 @@  static inline pud_t pud_mkold(pud_t pud)
 
 static inline pud_t pud_mkclean(pud_t pud)
 {
-	return pud_clear_flags(pud, _PAGE_DIRTY);
+	return pud_clear_flags(pud, _PAGE_DIRTY_BITS);
 }
 
 static inline pud_t pud_wrprotect(pud_t pud)
 {
+	pud = pud_move_flags(pud, _PAGE_DIRTY_HW, _PAGE_DIRTY_SW);
 	return pud_clear_flags(pud, _PAGE_RW);
 }
 
 static inline pud_t pud_mkdirty(pud_t pud)
 {
-	return pud_set_flags(pud, _PAGE_DIRTY_HW | _PAGE_SOFT_DIRTY);
+	pudval_t dirty = (!IS_ENABLED(CONFIG_X86_INTEL_SHSTK_USER) ||
+			  (pud_flags(pud) & _PAGE_RW)) ?
+			  _PAGE_DIRTY_HW:_PAGE_DIRTY_SW;
+
+	return pud_set_flags(pud, dirty | _PAGE_SOFT_DIRTY);
 }
 
 static inline pud_t pud_mkdevmap(pud_t pud)
@@ -456,6 +526,7 @@  static inline pud_t pud_mkyoung(pud_t pud)
 
 static inline pud_t pud_mkwrite(pud_t pud)
 {
+	pud = pud_move_flags(pud, _PAGE_DIRTY_SW, _PAGE_DIRTY_HW);
 	return pud_set_flags(pud, _PAGE_RW);
 }
 
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 2ac5d46d7c49..0907adb56197 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -23,6 +23,7 @@ 
 #define _PAGE_BIT_SOFTW2	10	/* " */
 #define _PAGE_BIT_SOFTW3	11	/* " */
 #define _PAGE_BIT_PAT_LARGE	12	/* On 2MB or 1GB pages */
+#define _PAGE_BIT_SOFTW5	57	/* available for programmer */
 #define _PAGE_BIT_SOFTW4	58	/* available for programmer */
 #define _PAGE_BIT_PKEY_BIT0	59	/* Protection Keys, bit 1/4 */
 #define _PAGE_BIT_PKEY_BIT1	60	/* Protection Keys, bit 2/4 */
@@ -34,6 +35,7 @@ 
 #define _PAGE_BIT_CPA_TEST	_PAGE_BIT_SOFTW1
 #define _PAGE_BIT_SOFT_DIRTY	_PAGE_BIT_SOFTW3 /* software dirty tracking */
 #define _PAGE_BIT_DEVMAP	_PAGE_BIT_SOFTW4
+#define _PAGE_BIT_DIRTY_SW	_PAGE_BIT_SOFTW5 /* was written to */
 
 /* If _PAGE_BIT_PRESENT is clear, we use these: */
 /* - if the user mapped it with PROT_NONE; pte_present gives true */
@@ -109,6 +111,14 @@ 
 #define _PAGE_DEVMAP	(_AT(pteval_t, 0))
 #endif
 
+#if defined(CONFIG_X86_INTEL_SHADOW_STACK_USER)
+#define _PAGE_DIRTY_SW	(_AT(pteval_t, 1) << _PAGE_BIT_DIRTY_SW)
+#else
+#define _PAGE_DIRTY_SW	(_AT(pteval_t, 0))
+#endif
+
+#define _PAGE_DIRTY_BITS (_PAGE_DIRTY_HW | _PAGE_DIRTY_SW)
+
 #define _PAGE_PROTNONE	(_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
 
 #define _PAGE_TABLE_NOENC	(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |\
@@ -122,9 +132,9 @@ 
  * instance, and is *not* included in this mask since
  * pte_modify() does modify it.
  */
-#define _PAGE_CHG_MASK	(PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT |		\
+#define _PAGE_CHG_MASK	(PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT |			\
 			 _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY_HW |	\
-			 _PAGE_SOFT_DIRTY)
+			 _PAGE_DIRTY_SW | _PAGE_SOFT_DIRTY)
 #define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE)
 
 /*
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index f59639afaa39..3f6f998509f0 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -1097,4 +1097,16 @@  static inline void init_espfix_bsp(void) { }
 #endif
 #endif
 
+#ifndef CONFIG_ARCH_HAS_SHSTK
+static inline pte_t pte_mkdirty_shstk(pte_t pte)
+{
+	return pte;
+}
+
+static inline pmd_t pmd_mkdirty_shstk(pmd_t pmd)
+{
+	return pmd;
+}
+#endif
+
 #endif /* _ASM_GENERIC_PGTABLE_H */