diff mbox series

[v16,13/16] x86/sgx: implement direct reclamation for cgroups

Message ID 20240821015404.6038-14-haitao.huang@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series Add Cgroup support for SGX EPC memory | expand

Commit Message

Haitao Huang Aug. 21, 2024, 1:54 a.m. UTC
sgx_reclaim_direct() was introduced to preemptively reclaim some pages
as the best effort to avoid on-demand reclamation that can stall forward
progress in some situations, e.g., allocating pages to load previously
reclaimed page to perform EDMM operations on [1].

Currently when the global usage is close to the capacity,
sgx_reclaim_direct() makes one invocation to sgx_reclaim_pages_global()
but does not guarantee there are free pages available for later
allocations to succeed. In other words, the only goal here is to reduce
the chance of on-demand reclamation at allocation time. In cases of
allocation failure, the caller, the EDMM ioctl()'s, would return -EAGAIN
to user space and let the user space to decide whether to retry or not.

With EPC cgroups enabled, usage of a cgroup can also reach its limit
(usually much lower than capacity) and trigger per-cgroup reclamation.
Implement a similar strategy to reduce the chance of on-demand
per-cgroup reclamation for this use case.

Create a wrapper, sgx_cgroup_reclaim_direct(), to perform a preemptive
reclamation at cgroup level, and have sgx_reclaim_direct() call it when
EPC cgroup is enabled.

[1] https://lore.kernel.org/all/a0d8f037c4a075d56bf79f432438412985f7ff7a.1652137848.git.reinette.chatre@intel.com/T/#u

Signed-off-by: Haitao Huang <haitao.huang@linux.intel.com>
---
 arch/x86/kernel/cpu/sgx/epc_cgroup.c | 15 +++++++++++++++
 arch/x86/kernel/cpu/sgx/epc_cgroup.h |  3 +++
 arch/x86/kernel/cpu/sgx/main.c       |  4 ++++
 3 files changed, 22 insertions(+)

Comments

Jarkko Sakkinen Aug. 27, 2024, 6:16 p.m. UTC | #1
On Wed Aug 21, 2024 at 4:54 AM EEST, Haitao Huang wrote:
> sgx_reclaim_direct() was introduced to preemptively reclaim some pages
> as the best effort to avoid on-demand reclamation that can stall forward
> progress in some situations, e.g., allocating pages to load previously
> reclaimed page to perform EDMM operations on [1].
>
> Currently when the global usage is close to the capacity,
> sgx_reclaim_direct() makes one invocation to sgx_reclaim_pages_global()
> but does not guarantee there are free pages available for later
> allocations to succeed. In other words, the only goal here is to reduce
> the chance of on-demand reclamation at allocation time. In cases of
> allocation failure, the caller, the EDMM ioctl()'s, would return -EAGAIN
> to user space and let the user space to decide whether to retry or not.
>
> With EPC cgroups enabled, usage of a cgroup can also reach its limit
> (usually much lower than capacity) and trigger per-cgroup reclamation.
> Implement a similar strategy to reduce the chance of on-demand
> per-cgroup reclamation for this use case.
>
> Create a wrapper, sgx_cgroup_reclaim_direct(), to perform a preemptive
> reclamation at cgroup level, and have sgx_reclaim_direct() call it when
> EPC cgroup is enabled.
>
> [1] https://lore.kernel.org/all/a0d8f037c4a075d56bf79f432438412985f7ff7a.1652137848.git.reinette.chatre@intel.com/T/#u
>
> Signed-off-by: Haitao Huang <haitao.huang@linux.intel.com>
> ---
>  arch/x86/kernel/cpu/sgx/epc_cgroup.c | 15 +++++++++++++++
>  arch/x86/kernel/cpu/sgx/epc_cgroup.h |  3 +++
>  arch/x86/kernel/cpu/sgx/main.c       |  4 ++++
>  3 files changed, 22 insertions(+)
>
> diff --git a/arch/x86/kernel/cpu/sgx/epc_cgroup.c b/arch/x86/kernel/cpu/sgx/epc_cgroup.c
> index 23a61689e0d9..b7d60b2d878d 100644
> --- a/arch/x86/kernel/cpu/sgx/epc_cgroup.c
> +++ b/arch/x86/kernel/cpu/sgx/epc_cgroup.c
> @@ -252,6 +252,21 @@ void sgx_cgroup_reclaim_pages_global(struct mm_struct *charge_mm)
>  	sgx_cgroup_reclaim_pages(&sgx_cg_root, charge_mm, SGX_NR_TO_SCAN);
>  }
>  
> +/**
> + * sgx_cgroup_reclaim_direct() - Preemptive reclamation.
> + *
> + * Scan and attempt to reclaim %SGX_NR_TO_SCAN as best effort to allow caller
> + * make quick progress.
> + */
> +void sgx_cgroup_reclaim_direct(void)
> +{
> +	struct sgx_cgroup *sgx_cg = sgx_get_current_cg();
> +
> +	if (sgx_cgroup_should_reclaim(sgx_cg))
> +		sgx_cgroup_reclaim_pages(sgx_cg, current->mm, SGX_NR_TO_SCAN);
> +	sgx_put_cg(sgx_cg);
> +}
> +
>  /*
>   * Asynchronous work flow to reclaim pages from the cgroup when the cgroup is
>   * at/near its maximum capacity.
> diff --git a/arch/x86/kernel/cpu/sgx/epc_cgroup.h b/arch/x86/kernel/cpu/sgx/epc_cgroup.h
> index c0390111e28c..cf2b946d993e 100644
> --- a/arch/x86/kernel/cpu/sgx/epc_cgroup.h
> +++ b/arch/x86/kernel/cpu/sgx/epc_cgroup.h
> @@ -38,6 +38,8 @@ static inline void __init sgx_cgroup_register(void) { }
>  
>  static inline void sgx_cgroup_reclaim_pages_global(struct mm_struct *charge_mm) { }
>  
> +static inline void sgx_cgroup_reclaim_direct(void) { }
> +
>  #else /* CONFIG_CGROUP_MISC */
>  
>  struct sgx_cgroup {
> @@ -90,6 +92,7 @@ static inline void sgx_put_cg(struct sgx_cgroup *sgx_cg)
>  int sgx_cgroup_try_charge(struct sgx_cgroup *sgx_cg, enum sgx_reclaim reclaim);
>  void sgx_cgroup_uncharge(struct sgx_cgroup *sgx_cg);
>  void sgx_cgroup_reclaim_pages_global(struct mm_struct *charge_mm);
> +void sgx_cgroup_reclaim_direct(void);
>  int __init sgx_cgroup_init(void);
>  void __init sgx_cgroup_register(void);
>  void __init sgx_cgroup_deinit(void);
> diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
> index d00cb012838b..9a8f91ebd21b 100644
> --- a/arch/x86/kernel/cpu/sgx/main.c
> +++ b/arch/x86/kernel/cpu/sgx/main.c
> @@ -428,6 +428,10 @@ static void sgx_reclaim_pages_global(struct mm_struct *charge_mm)
>   */
>  void sgx_reclaim_direct(void)
>  {
> +	/* Reduce chance of per-cgroup reclamation for later allocation */
> +	sgx_cgroup_reclaim_direct();
> +
> +	/* Reduce chance of the global reclamation for later allocation */
>  	if (sgx_should_reclaim_global(SGX_NR_LOW_PAGES))
>  		sgx_reclaim_pages_global(current->mm);
>  }


Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>

BR, Jarkko
Huang, Kai Aug. 27, 2024, 11:55 p.m. UTC | #2
On 21/08/2024 1:54 pm, Haitao Huang wrote:
> sgx_reclaim_direct() was introduced to preemptively reclaim some pages
> as the best effort to avoid on-demand reclamation that can stall forward
> progress in some situations, e.g., allocating pages to load previously
> reclaimed page to perform EDMM operations on [1].
> 
> Currently when the global usage is close to the capacity,
> sgx_reclaim_direct() makes one invocation to sgx_reclaim_pages_global()
> but does not guarantee there are free pages available for later
> allocations to succeed. In other words, the only goal here is to reduce
> the chance of on-demand reclamation at allocation time. In cases of
> allocation failure, the caller, the EDMM ioctl()'s, would return -EAGAIN
> to user space and let the user space to decide whether to retry or not.
> 
> With EPC cgroups enabled, usage of a cgroup can also reach its limit
> (usually much lower than capacity) and trigger per-cgroup reclamation.
> Implement a similar strategy to reduce the chance of on-demand
> per-cgroup reclamation for this use case.

I wish there's some explanation about why we don't just try to bring 
down the usage to limit here, but I guess that's OK since what we do is 
just _trying_ to increase the success rate of the later EPC allocation.

Also, when this is invoked, it should be very rare that the limit is way 
lower than the usage, so ...

> 
> Create a wrapper, sgx_cgroup_reclaim_direct(), to perform a preemptive
> reclamation at cgroup level, and have sgx_reclaim_direct() call it when
> EPC cgroup is enabled.
> 
> [1] https://lore.kernel.org/all/a0d8f037c4a075d56bf79f432438412985f7ff7a.1652137848.git.reinette.chatre@intel.com/T/#u
> 
> Signed-off-by: Haitao Huang <haitao.huang@linux.intel.com>

... feel free to add:

Reviewed-by: Kai Huang <kai.huang@intel.com>

> ---
>   arch/x86/kernel/cpu/sgx/epc_cgroup.c | 15 +++++++++++++++
>   arch/x86/kernel/cpu/sgx/epc_cgroup.h |  3 +++
>   arch/x86/kernel/cpu/sgx/main.c       |  4 ++++
>   3 files changed, 22 insertions(+)
> 
> diff --git a/arch/x86/kernel/cpu/sgx/epc_cgroup.c b/arch/x86/kernel/cpu/sgx/epc_cgroup.c
> index 23a61689e0d9..b7d60b2d878d 100644
> --- a/arch/x86/kernel/cpu/sgx/epc_cgroup.c
> +++ b/arch/x86/kernel/cpu/sgx/epc_cgroup.c
> @@ -252,6 +252,21 @@ void sgx_cgroup_reclaim_pages_global(struct mm_struct *charge_mm)
>   	sgx_cgroup_reclaim_pages(&sgx_cg_root, charge_mm, SGX_NR_TO_SCAN);
>   }
>   
> +/**
> + * sgx_cgroup_reclaim_direct() - Preemptive reclamation.
> + *
> + * Scan and attempt to reclaim %SGX_NR_TO_SCAN as best effort to allow caller
> + * make quick progress.
> + */

Nit:

I don't think this is to allow the "caller" to make quick(er) progress?

I should be making the "later EPC allocation" quicker?

> +void sgx_cgroup_reclaim_direct(void)
> +{
> +	struct sgx_cgroup *sgx_cg = sgx_get_current_cg();
> +
> +	if (sgx_cgroup_should_reclaim(sgx_cg))
> +		sgx_cgroup_reclaim_pages(sgx_cg, current->mm, SGX_NR_TO_SCAN);
> +	sgx_put_cg(sgx_cg);
> +}
> +
>   /*
>    * Asynchronous work flow to reclaim pages from the cgroup when the cgroup is
>    * at/near its maximum capacity.
> diff --git a/arch/x86/kernel/cpu/sgx/epc_cgroup.h b/arch/x86/kernel/cpu/sgx/epc_cgroup.h
> index c0390111e28c..cf2b946d993e 100644
> --- a/arch/x86/kernel/cpu/sgx/epc_cgroup.h
> +++ b/arch/x86/kernel/cpu/sgx/epc_cgroup.h
> @@ -38,6 +38,8 @@ static inline void __init sgx_cgroup_register(void) { }
>   
>   static inline void sgx_cgroup_reclaim_pages_global(struct mm_struct *charge_mm) { }
>   
> +static inline void sgx_cgroup_reclaim_direct(void) { }
> +
>   #else /* CONFIG_CGROUP_MISC */
>   
>   struct sgx_cgroup {
> @@ -90,6 +92,7 @@ static inline void sgx_put_cg(struct sgx_cgroup *sgx_cg)
>   int sgx_cgroup_try_charge(struct sgx_cgroup *sgx_cg, enum sgx_reclaim reclaim);
>   void sgx_cgroup_uncharge(struct sgx_cgroup *sgx_cg);
>   void sgx_cgroup_reclaim_pages_global(struct mm_struct *charge_mm);
> +void sgx_cgroup_reclaim_direct(void);
>   int __init sgx_cgroup_init(void);
>   void __init sgx_cgroup_register(void);
>   void __init sgx_cgroup_deinit(void);
> diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
> index d00cb012838b..9a8f91ebd21b 100644
> --- a/arch/x86/kernel/cpu/sgx/main.c
> +++ b/arch/x86/kernel/cpu/sgx/main.c
> @@ -428,6 +428,10 @@ static void sgx_reclaim_pages_global(struct mm_struct *charge_mm)
>    */
>   void sgx_reclaim_direct(void)
>   {
> +	/* Reduce chance of per-cgroup reclamation for later allocation */
> +	sgx_cgroup_reclaim_direct();

See.  It says "for later allocation" here.
Huang, Kai Aug. 29, 2024, 9:34 a.m. UTC | #3
> 
> ... feel free to add:
> 
> Reviewed-by: Kai Huang <kai.huang@intel.com>
> 
> 

One more nitpicking about patch title:

"implement" -> "Implement"
diff mbox series

Patch

diff --git a/arch/x86/kernel/cpu/sgx/epc_cgroup.c b/arch/x86/kernel/cpu/sgx/epc_cgroup.c
index 23a61689e0d9..b7d60b2d878d 100644
--- a/arch/x86/kernel/cpu/sgx/epc_cgroup.c
+++ b/arch/x86/kernel/cpu/sgx/epc_cgroup.c
@@ -252,6 +252,21 @@  void sgx_cgroup_reclaim_pages_global(struct mm_struct *charge_mm)
 	sgx_cgroup_reclaim_pages(&sgx_cg_root, charge_mm, SGX_NR_TO_SCAN);
 }
 
+/**
+ * sgx_cgroup_reclaim_direct() - Preemptive reclamation.
+ *
+ * Scan and attempt to reclaim %SGX_NR_TO_SCAN as best effort to allow caller
+ * make quick progress.
+ */
+void sgx_cgroup_reclaim_direct(void)
+{
+	struct sgx_cgroup *sgx_cg = sgx_get_current_cg();
+
+	if (sgx_cgroup_should_reclaim(sgx_cg))
+		sgx_cgroup_reclaim_pages(sgx_cg, current->mm, SGX_NR_TO_SCAN);
+	sgx_put_cg(sgx_cg);
+}
+
 /*
  * Asynchronous work flow to reclaim pages from the cgroup when the cgroup is
  * at/near its maximum capacity.
diff --git a/arch/x86/kernel/cpu/sgx/epc_cgroup.h b/arch/x86/kernel/cpu/sgx/epc_cgroup.h
index c0390111e28c..cf2b946d993e 100644
--- a/arch/x86/kernel/cpu/sgx/epc_cgroup.h
+++ b/arch/x86/kernel/cpu/sgx/epc_cgroup.h
@@ -38,6 +38,8 @@  static inline void __init sgx_cgroup_register(void) { }
 
 static inline void sgx_cgroup_reclaim_pages_global(struct mm_struct *charge_mm) { }
 
+static inline void sgx_cgroup_reclaim_direct(void) { }
+
 #else /* CONFIG_CGROUP_MISC */
 
 struct sgx_cgroup {
@@ -90,6 +92,7 @@  static inline void sgx_put_cg(struct sgx_cgroup *sgx_cg)
 int sgx_cgroup_try_charge(struct sgx_cgroup *sgx_cg, enum sgx_reclaim reclaim);
 void sgx_cgroup_uncharge(struct sgx_cgroup *sgx_cg);
 void sgx_cgroup_reclaim_pages_global(struct mm_struct *charge_mm);
+void sgx_cgroup_reclaim_direct(void);
 int __init sgx_cgroup_init(void);
 void __init sgx_cgroup_register(void);
 void __init sgx_cgroup_deinit(void);
diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
index d00cb012838b..9a8f91ebd21b 100644
--- a/arch/x86/kernel/cpu/sgx/main.c
+++ b/arch/x86/kernel/cpu/sgx/main.c
@@ -428,6 +428,10 @@  static void sgx_reclaim_pages_global(struct mm_struct *charge_mm)
  */
 void sgx_reclaim_direct(void)
 {
+	/* Reduce chance of per-cgroup reclamation for later allocation */
+	sgx_cgroup_reclaim_direct();
+
+	/* Reduce chance of the global reclamation for later allocation */
 	if (sgx_should_reclaim_global(SGX_NR_LOW_PAGES))
 		sgx_reclaim_pages_global(current->mm);
 }