Message ID | 20190805170451.26009-3-joel@joelfernandes.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v4,1/5] mm/page_idle: Add per-pid idle page tracking using virtual indexing | expand |
On Mon 05-08-19 13:04:49, Joel Fernandes (Google) wrote: > This bit will be used by idle page tracking code to correctly identify > if a page that was swapped out was idle before it got swapped out. > Without this PTE bit, we lose information about if a page is idle or not > since the page frame gets unmapped. And why do we need that? Why cannot we simply assume all swapped out pages to be idle? They were certainly idle enough to be reclaimed, right? Or what does idle actualy mean here? > In this patch we reuse PTE_DEVMAP bit since idle page tracking only > works on user pages in the LRU. Device pages should not consitute those > so it should be unused and safe to use. > > Cc: Robin Murphy <robin.murphy@arm.com> > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org> > --- > arch/arm64/Kconfig | 1 + > arch/arm64/include/asm/pgtable-prot.h | 1 + > arch/arm64/include/asm/pgtable.h | 15 +++++++++++++++ > 3 files changed, 17 insertions(+) > > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig > index 3adcec05b1f6..9d1412c693d7 100644 > --- a/arch/arm64/Kconfig > +++ b/arch/arm64/Kconfig > @@ -128,6 +128,7 @@ config ARM64 > select HAVE_ARCH_MMAP_RND_BITS > select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT > select HAVE_ARCH_PREL32_RELOCATIONS > + select HAVE_ARCH_PTE_SWP_PGIDLE > select HAVE_ARCH_SECCOMP_FILTER > select HAVE_ARCH_STACKLEAK > select HAVE_ARCH_THREAD_STRUCT_WHITELIST > diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h > index 92d2e9f28f28..917b15c5d63a 100644 > --- a/arch/arm64/include/asm/pgtable-prot.h > +++ b/arch/arm64/include/asm/pgtable-prot.h > @@ -18,6 +18,7 @@ > #define PTE_SPECIAL (_AT(pteval_t, 1) << 56) > #define PTE_DEVMAP (_AT(pteval_t, 1) << 57) > #define PTE_PROT_NONE (_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */ > +#define PTE_SWP_PGIDLE PTE_DEVMAP /* for idle page tracking during swapout */ > > #ifndef __ASSEMBLY__ > > diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h > index 3f5461f7b560..558f5ebd81ba 100644 > --- a/arch/arm64/include/asm/pgtable.h > +++ b/arch/arm64/include/asm/pgtable.h > @@ -212,6 +212,21 @@ static inline pte_t pte_mkdevmap(pte_t pte) > return set_pte_bit(pte, __pgprot(PTE_DEVMAP)); > } > > +static inline int pte_swp_page_idle(pte_t pte) > +{ > + return 0; > +} > + > +static inline pte_t pte_swp_mkpage_idle(pte_t pte) > +{ > + return set_pte_bit(pte, __pgprot(PTE_SWP_PGIDLE)); > +} > + > +static inline pte_t pte_swp_clear_page_idle(pte_t pte) > +{ > + return clear_pte_bit(pte, __pgprot(PTE_SWP_PGIDLE)); > +} > + > static inline void set_pte(pte_t *ptep, pte_t pte) > { > WRITE_ONCE(*ptep, pte); > -- > 2.22.0.770.g0f2c4a37fd-goog
On Tue, Aug 06, 2019 at 10:42:03AM +0200, Michal Hocko wrote: > On Mon 05-08-19 13:04:49, Joel Fernandes (Google) wrote: > > This bit will be used by idle page tracking code to correctly identify > > if a page that was swapped out was idle before it got swapped out. > > Without this PTE bit, we lose information about if a page is idle or not > > since the page frame gets unmapped. > > And why do we need that? Why cannot we simply assume all swapped out > pages to be idle? They were certainly idle enough to be reclaimed, > right? Or what does idle actualy mean here? Yes, but other than swapping, in Android a page can be forced to be swapped out as well using the new hints that Minchan is adding? Also, even if they were idle enough to be swapped, there is a chance that they were marked as idle and *accessed* before the swapping. Due to swapping, the "page was accessed since we last marked it as idle" information is lost. I am able to verify this. Idle in this context means the same thing as in page idle tracking terms, the page was not accessed by userspace since we last marked it as idle (using /proc/<pid>/page_idle). thanks, - Joel > > In this patch we reuse PTE_DEVMAP bit since idle page tracking only > > works on user pages in the LRU. Device pages should not consitute those > > so it should be unused and safe to use. > > > > Cc: Robin Murphy <robin.murphy@arm.com> > > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org> > > --- > > arch/arm64/Kconfig | 1 + > > arch/arm64/include/asm/pgtable-prot.h | 1 + > > arch/arm64/include/asm/pgtable.h | 15 +++++++++++++++ > > 3 files changed, 17 insertions(+) > > > > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig > > index 3adcec05b1f6..9d1412c693d7 100644 > > --- a/arch/arm64/Kconfig > > +++ b/arch/arm64/Kconfig > > @@ -128,6 +128,7 @@ config ARM64 > > select HAVE_ARCH_MMAP_RND_BITS > > select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT > > select HAVE_ARCH_PREL32_RELOCATIONS > > + select HAVE_ARCH_PTE_SWP_PGIDLE > > select HAVE_ARCH_SECCOMP_FILTER > > select HAVE_ARCH_STACKLEAK > > select HAVE_ARCH_THREAD_STRUCT_WHITELIST > > diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h > > index 92d2e9f28f28..917b15c5d63a 100644 > > --- a/arch/arm64/include/asm/pgtable-prot.h > > +++ b/arch/arm64/include/asm/pgtable-prot.h > > @@ -18,6 +18,7 @@ > > #define PTE_SPECIAL (_AT(pteval_t, 1) << 56) > > #define PTE_DEVMAP (_AT(pteval_t, 1) << 57) > > #define PTE_PROT_NONE (_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */ > > +#define PTE_SWP_PGIDLE PTE_DEVMAP /* for idle page tracking during swapout */ > > > > #ifndef __ASSEMBLY__ > > > > diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h > > index 3f5461f7b560..558f5ebd81ba 100644 > > --- a/arch/arm64/include/asm/pgtable.h > > +++ b/arch/arm64/include/asm/pgtable.h > > @@ -212,6 +212,21 @@ static inline pte_t pte_mkdevmap(pte_t pte) > > return set_pte_bit(pte, __pgprot(PTE_DEVMAP)); > > } > > > > +static inline int pte_swp_page_idle(pte_t pte) > > +{ > > + return 0; > > +} > > + > > +static inline pte_t pte_swp_mkpage_idle(pte_t pte) > > +{ > > + return set_pte_bit(pte, __pgprot(PTE_SWP_PGIDLE)); > > +} > > + > > +static inline pte_t pte_swp_clear_page_idle(pte_t pte) > > +{ > > + return clear_pte_bit(pte, __pgprot(PTE_SWP_PGIDLE)); > > +} > > + > > static inline void set_pte(pte_t *ptep, pte_t pte) > > { > > WRITE_ONCE(*ptep, pte); > > -- > > 2.22.0.770.g0f2c4a37fd-goog > > -- > Michal Hocko > SUSE Labs
On Tue 06-08-19 06:36:27, Joel Fernandes wrote: > On Tue, Aug 06, 2019 at 10:42:03AM +0200, Michal Hocko wrote: > > On Mon 05-08-19 13:04:49, Joel Fernandes (Google) wrote: > > > This bit will be used by idle page tracking code to correctly identify > > > if a page that was swapped out was idle before it got swapped out. > > > Without this PTE bit, we lose information about if a page is idle or not > > > since the page frame gets unmapped. > > > > And why do we need that? Why cannot we simply assume all swapped out > > pages to be idle? They were certainly idle enough to be reclaimed, > > right? Or what does idle actualy mean here? > > Yes, but other than swapping, in Android a page can be forced to be swapped > out as well using the new hints that Minchan is adding? Yes and that is effectivelly making them idle, no? > Also, even if they were idle enough to be swapped, there is a chance that they > were marked as idle and *accessed* before the swapping. Due to swapping, the > "page was accessed since we last marked it as idle" information is lost. I am > able to verify this. > > Idle in this context means the same thing as in page idle tracking terms, the > page was not accessed by userspace since we last marked it as idle (using > /proc/<pid>/page_idle). Please describe a usecase and why that information might be useful.
On Tue, Aug 06, 2019 at 12:47:55PM +0200, Michal Hocko wrote: > On Tue 06-08-19 06:36:27, Joel Fernandes wrote: > > On Tue, Aug 06, 2019 at 10:42:03AM +0200, Michal Hocko wrote: > > > On Mon 05-08-19 13:04:49, Joel Fernandes (Google) wrote: > > > > This bit will be used by idle page tracking code to correctly identify > > > > if a page that was swapped out was idle before it got swapped out. > > > > Without this PTE bit, we lose information about if a page is idle or not > > > > since the page frame gets unmapped. > > > > > > And why do we need that? Why cannot we simply assume all swapped out > > > pages to be idle? They were certainly idle enough to be reclaimed, > > > right? Or what does idle actualy mean here? > > > > Yes, but other than swapping, in Android a page can be forced to be swapped > > out as well using the new hints that Minchan is adding? > > Yes and that is effectivelly making them idle, no? 1. mark page-A idle which was present at that time. 2. run workload 3. page-A is touched several times 4. *sudden* memory pressure happen so finally page A is finally swapped out 5. now see the page A idle - but it's incorrect.
On Tue, Aug 06, 2019 at 12:47:55PM +0200, Michal Hocko wrote: > On Tue 06-08-19 06:36:27, Joel Fernandes wrote: > > On Tue, Aug 06, 2019 at 10:42:03AM +0200, Michal Hocko wrote: > > > On Mon 05-08-19 13:04:49, Joel Fernandes (Google) wrote: > > > > This bit will be used by idle page tracking code to correctly identify > > > > if a page that was swapped out was idle before it got swapped out. > > > > Without this PTE bit, we lose information about if a page is idle or not > > > > since the page frame gets unmapped. > > > > > > And why do we need that? Why cannot we simply assume all swapped out > > > pages to be idle? They were certainly idle enough to be reclaimed, > > > right? Or what does idle actualy mean here? > > > > Yes, but other than swapping, in Android a page can be forced to be swapped > > out as well using the new hints that Minchan is adding? > > Yes and that is effectivelly making them idle, no? That depends on how you think of it. If you are thinking of a monitoring process like a heap profiler, then from the heap profiler's (that only cares about the process it is monitoring) perspective it will look extremely odd if pages that are recently accessed by the process appear to be idle which would falsely look like those processes are leaking memory. The reality being, Android forced those pages into swap because of other reasons. I would like for the swapping mechanism, whether forced swapping or memory reclaim, not to interfere with the idle detection. This is just an effort to make the idle tracking a little bit better. We would like to not lose the 'accessed' information of the pages. Initially, I had proposed what you are suggesting as well however the above reasons made me to do it like this. Also Minchan and Konstantin suggested this, so there are more people interested in the swap idle bit. Minchan, can you provide more thoughts here? (He is on 2-week vacation from today so hopefully replies before he vanishes ;-)). Also assuming all swap pages as idle has other "semantic" issues. It is quite odd if a swapped page is automatically marked as idle without userspace telling it to. Consider the following set of events: 1. Userspace marks only a certain memory region as idle. 2. Userspace reads back the bits corresponding to a bigger region. Part of this bigger region is swapped. Userspace expects all of the pages it did not mark, to have idle bit set to '0' because it never marked them as idle. However if it is now surprised by what it read back (not all '0' read back). Since a page is swapped, it will be now marked "automatically" as idle as per your proposal, even if userspace never marked it explicity before. This would be quite confusing/ambiguous. I will include this and other information in future commit messages. thanks, - Joel
On Tue 06-08-19 20:07:37, Minchan Kim wrote: > On Tue, Aug 06, 2019 at 12:47:55PM +0200, Michal Hocko wrote: > > On Tue 06-08-19 06:36:27, Joel Fernandes wrote: > > > On Tue, Aug 06, 2019 at 10:42:03AM +0200, Michal Hocko wrote: > > > > On Mon 05-08-19 13:04:49, Joel Fernandes (Google) wrote: > > > > > This bit will be used by idle page tracking code to correctly identify > > > > > if a page that was swapped out was idle before it got swapped out. > > > > > Without this PTE bit, we lose information about if a page is idle or not > > > > > since the page frame gets unmapped. > > > > > > > > And why do we need that? Why cannot we simply assume all swapped out > > > > pages to be idle? They were certainly idle enough to be reclaimed, > > > > right? Or what does idle actualy mean here? > > > > > > Yes, but other than swapping, in Android a page can be forced to be swapped > > > out as well using the new hints that Minchan is adding? > > > > Yes and that is effectivelly making them idle, no? > > 1. mark page-A idle which was present at that time. > 2. run workload > 3. page-A is touched several times > 4. *sudden* memory pressure happen so finally page A is finally swapped out > 5. now see the page A idle - but it's incorrect. Could you expand on what you mean by idle exactly? Why pageout doesn't really qualify as "mark-idle and reclaim"? Also could you describe a usecase where the swapout distinction really matters and it would lead to incorrect behavior?
On Tue, Aug 06, 2019 at 01:14:52PM +0200, Michal Hocko wrote: > On Tue 06-08-19 20:07:37, Minchan Kim wrote: > > On Tue, Aug 06, 2019 at 12:47:55PM +0200, Michal Hocko wrote: > > > On Tue 06-08-19 06:36:27, Joel Fernandes wrote: > > > > On Tue, Aug 06, 2019 at 10:42:03AM +0200, Michal Hocko wrote: > > > > > On Mon 05-08-19 13:04:49, Joel Fernandes (Google) wrote: > > > > > > This bit will be used by idle page tracking code to correctly identify > > > > > > if a page that was swapped out was idle before it got swapped out. > > > > > > Without this PTE bit, we lose information about if a page is idle or not > > > > > > since the page frame gets unmapped. > > > > > > > > > > And why do we need that? Why cannot we simply assume all swapped out > > > > > pages to be idle? They were certainly idle enough to be reclaimed, > > > > > right? Or what does idle actualy mean here? > > > > > > > > Yes, but other than swapping, in Android a page can be forced to be swapped > > > > out as well using the new hints that Minchan is adding? > > > > > > Yes and that is effectivelly making them idle, no? > > > > 1. mark page-A idle which was present at that time. > > 2. run workload > > 3. page-A is touched several times > > 4. *sudden* memory pressure happen so finally page A is finally swapped out > > 5. now see the page A idle - but it's incorrect. > > Could you expand on what you mean by idle exactly? Why pageout doesn't > really qualify as "mark-idle and reclaim"? Also could you describe a > usecase where the swapout distinction really matters and it would lead > to incorrect behavior? Michal, Did you read this post ? : https://lore.kernel.org/lkml/20190806104715.GC218260@google.com/T/#m4ece68ceaf6e54d4d29e974f5f4c1080e733f6c1 Just wanted to be sure you did not miss it. thanks, - Joel
On Tue 06-08-19 07:14:46, Joel Fernandes wrote: > On Tue, Aug 06, 2019 at 12:47:55PM +0200, Michal Hocko wrote: > > On Tue 06-08-19 06:36:27, Joel Fernandes wrote: > > > On Tue, Aug 06, 2019 at 10:42:03AM +0200, Michal Hocko wrote: > > > > On Mon 05-08-19 13:04:49, Joel Fernandes (Google) wrote: > > > > > This bit will be used by idle page tracking code to correctly identify > > > > > if a page that was swapped out was idle before it got swapped out. > > > > > Without this PTE bit, we lose information about if a page is idle or not > > > > > since the page frame gets unmapped. > > > > > > > > And why do we need that? Why cannot we simply assume all swapped out > > > > pages to be idle? They were certainly idle enough to be reclaimed, > > > > right? Or what does idle actualy mean here? > > > > > > Yes, but other than swapping, in Android a page can be forced to be swapped > > > out as well using the new hints that Minchan is adding? > > > > Yes and that is effectivelly making them idle, no? > > That depends on how you think of it. I would much prefer to have it documented so that I do not have to guess ;) > If you are thinking of a monitoring > process like a heap profiler, then from the heap profiler's (that only cares > about the process it is monitoring) perspective it will look extremely odd if > pages that are recently accessed by the process appear to be idle which would > falsely look like those processes are leaking memory. The reality being, > Android forced those pages into swap because of other reasons. I would like > for the swapping mechanism, whether forced swapping or memory reclaim, not to > interfere with the idle detection. Hmm, but how are you going to handle situation when the page is unmapped and refaulted again (e.g. a normal reclaim of a pagecache)? You are losing that information same was as in the swapout case, no? Or am I missing something? > This is just an effort to make the idle tracking a little bit better. We > would like to not lose the 'accessed' information of the pages. > > Initially, I had proposed what you are suggesting as well however the above > reasons made me to do it like this. Also Minchan and Konstantin suggested > this, so there are more people interested in the swap idle bit. Minchan, can > you provide more thoughts here? (He is on 2-week vacation from today so > hopefully replies before he vanishes ;-)). We can move on with the rest of the series in the mean time but I would like to see a proper justification for the swap entries and why they should be handled special. > Also assuming all swap pages as idle has other "semantic" issues. It is quite > odd if a swapped page is automatically marked as idle without userspace > telling it to. Consider the following set of events: 1. Userspace marks only > a certain memory region as idle. 2. Userspace reads back the bits > corresponding to a bigger region. Part of this bigger region is swapped. > Userspace expects all of the pages it did not mark, to have idle bit set to > '0' because it never marked them as idle. However if it is now surprised by > what it read back (not all '0' read back). Since a page is swapped, it will > be now marked "automatically" as idle as per your proposal, even if userspace > never marked it explicity before. This would be quite confusing/ambiguous. OK, I see. I guess the primary question I have is how do you distinguish Idle page which got unmapped and faulted in again from swapped out page and refaulted - including the time the pte is not present.
On Tue, Aug 06, 2019 at 01:57:03PM +0200, Michal Hocko wrote: > On Tue 06-08-19 07:14:46, Joel Fernandes wrote: > > On Tue, Aug 06, 2019 at 12:47:55PM +0200, Michal Hocko wrote: > > > On Tue 06-08-19 06:36:27, Joel Fernandes wrote: > > > > On Tue, Aug 06, 2019 at 10:42:03AM +0200, Michal Hocko wrote: > > > > > On Mon 05-08-19 13:04:49, Joel Fernandes (Google) wrote: > > > > > > This bit will be used by idle page tracking code to correctly identify > > > > > > if a page that was swapped out was idle before it got swapped out. > > > > > > Without this PTE bit, we lose information about if a page is idle or not > > > > > > since the page frame gets unmapped. > > > > > > > > > > And why do we need that? Why cannot we simply assume all swapped out > > > > > pages to be idle? They were certainly idle enough to be reclaimed, > > > > > right? Or what does idle actualy mean here? > > > > > > > > Yes, but other than swapping, in Android a page can be forced to be swapped > > > > out as well using the new hints that Minchan is adding? > > > > > > Yes and that is effectivelly making them idle, no? > > > > That depends on how you think of it. > > I would much prefer to have it documented so that I do not have to guess ;) Sure :) > > If you are thinking of a monitoring > > process like a heap profiler, then from the heap profiler's (that only cares > > about the process it is monitoring) perspective it will look extremely odd if > > pages that are recently accessed by the process appear to be idle which would > > falsely look like those processes are leaking memory. The reality being, > > Android forced those pages into swap because of other reasons. I would like > > for the swapping mechanism, whether forced swapping or memory reclaim, not to > > interfere with the idle detection. > > Hmm, but how are you going to handle situation when the page is unmapped > and refaulted again (e.g. a normal reclaim of a pagecache)? You are > losing that information same was as in the swapout case, no? Or am I > missing something? Yes you are right, it would have the same issue, thanks for bringing it up. Should we rename this bit to PTE_IDLE and do the same thing that we are doing for swap? i.e. if (page_idle(page)) and page is a file page, then we write state into the PTE of the page. Later on refault, the PTE bit would automatically get cleared (just like it does on swap-in). But before refault, the idle tracking code sees the page as still marked idle. Do you see any issue with that? > > This is just an effort to make the idle tracking a little bit better. We > > would like to not lose the 'accessed' information of the pages. > > > > Initially, I had proposed what you are suggesting as well however the above > > reasons made me to do it like this. Also Minchan and Konstantin suggested > > this, so there are more people interested in the swap idle bit. Minchan, can > > you provide more thoughts here? (He is on 2-week vacation from today so > > hopefully replies before he vanishes ;-)). > > We can move on with the rest of the series in the mean time but I would > like to see a proper justification for the swap entries and why they > should be handled special. Ok, I will improve the changelog. > > Also assuming all swap pages as idle has other "semantic" issues. It is quite > > odd if a swapped page is automatically marked as idle without userspace > > telling it to. Consider the following set of events: 1. Userspace marks only > > a certain memory region as idle. 2. Userspace reads back the bits > > corresponding to a bigger region. Part of this bigger region is swapped. > > Userspace expects all of the pages it did not mark, to have idle bit set to > > '0' because it never marked them as idle. However if it is now surprised by > > what it read back (not all '0' read back). Since a page is swapped, it will > > be now marked "automatically" as idle as per your proposal, even if userspace > > never marked it explicity before. This would be quite confusing/ambiguous. > > OK, I see. I guess the primary question I have is how do you distinguish > Idle page which got unmapped and faulted in again from swapped out page > and refaulted - including the time the pte is not present. Ok, lets discuss more. thanks Michal! - Joel
On Tue 06-08-19 09:43:21, Joel Fernandes wrote: > On Tue, Aug 06, 2019 at 01:57:03PM +0200, Michal Hocko wrote: > > On Tue 06-08-19 07:14:46, Joel Fernandes wrote: > > > On Tue, Aug 06, 2019 at 12:47:55PM +0200, Michal Hocko wrote: > > > > On Tue 06-08-19 06:36:27, Joel Fernandes wrote: > > > > > On Tue, Aug 06, 2019 at 10:42:03AM +0200, Michal Hocko wrote: > > > > > > On Mon 05-08-19 13:04:49, Joel Fernandes (Google) wrote: > > > > > > > This bit will be used by idle page tracking code to correctly identify > > > > > > > if a page that was swapped out was idle before it got swapped out. > > > > > > > Without this PTE bit, we lose information about if a page is idle or not > > > > > > > since the page frame gets unmapped. > > > > > > > > > > > > And why do we need that? Why cannot we simply assume all swapped out > > > > > > pages to be idle? They were certainly idle enough to be reclaimed, > > > > > > right? Or what does idle actualy mean here? > > > > > > > > > > Yes, but other than swapping, in Android a page can be forced to be swapped > > > > > out as well using the new hints that Minchan is adding? > > > > > > > > Yes and that is effectivelly making them idle, no? > > > > > > That depends on how you think of it. > > > > I would much prefer to have it documented so that I do not have to guess ;) > > Sure :) > > > > If you are thinking of a monitoring > > > process like a heap profiler, then from the heap profiler's (that only cares > > > about the process it is monitoring) perspective it will look extremely odd if > > > pages that are recently accessed by the process appear to be idle which would > > > falsely look like those processes are leaking memory. The reality being, > > > Android forced those pages into swap because of other reasons. I would like > > > for the swapping mechanism, whether forced swapping or memory reclaim, not to > > > interfere with the idle detection. > > > > Hmm, but how are you going to handle situation when the page is unmapped > > and refaulted again (e.g. a normal reclaim of a pagecache)? You are > > losing that information same was as in the swapout case, no? Or am I > > missing something? > > Yes you are right, it would have the same issue, thanks for bringing it up. > Should we rename this bit to PTE_IDLE and do the same thing that we are doing > for swap? What if we decide to tear the page table down as well? E.g. because we can reclaim file backed mappings and free some memory used for page tables. We do not do that right now but I can see that really large mappings might push us that direction. Sure this is mostly a theoretical concern but I am wondering whether promissing to keep the idle bit over unmapping is not too much. I am not sure how to deal with this myself, TBH. In any case the current semantic - via pfn - will lose the idle bit already so can we mimic it as well? We only have 1 bit for each address which makes it challenging. The easiest way would be to declare that the idle bit might disappear on activating or reclaiming the page. How well that suits different usecases is a different question. I would be interested in hearing from other people about this of course.
On Tue, Aug 06, 2019 at 01:57:03PM +0200, Michal Hocko wrote: > On Tue 06-08-19 07:14:46, Joel Fernandes wrote: > > On Tue, Aug 06, 2019 at 12:47:55PM +0200, Michal Hocko wrote: > > > On Tue 06-08-19 06:36:27, Joel Fernandes wrote: > > > > On Tue, Aug 06, 2019 at 10:42:03AM +0200, Michal Hocko wrote: > > > > > On Mon 05-08-19 13:04:49, Joel Fernandes (Google) wrote: > > > > > > This bit will be used by idle page tracking code to correctly identify > > > > > > if a page that was swapped out was idle before it got swapped out. > > > > > > Without this PTE bit, we lose information about if a page is idle or not > > > > > > since the page frame gets unmapped. > > > > > > > > > > And why do we need that? Why cannot we simply assume all swapped out > > > > > pages to be idle? They were certainly idle enough to be reclaimed, > > > > > right? Or what does idle actualy mean here? > > > > > > > > Yes, but other than swapping, in Android a page can be forced to be swapped > > > > out as well using the new hints that Minchan is adding? > > > > > > Yes and that is effectivelly making them idle, no? > > > > That depends on how you think of it. > > I would much prefer to have it documented so that I do not have to guess ;) > > > If you are thinking of a monitoring > > process like a heap profiler, then from the heap profiler's (that only cares > > about the process it is monitoring) perspective it will look extremely odd if > > pages that are recently accessed by the process appear to be idle which would > > falsely look like those processes are leaking memory. The reality being, > > Android forced those pages into swap because of other reasons. I would like > > for the swapping mechanism, whether forced swapping or memory reclaim, not to > > interfere with the idle detection. > > Hmm, but how are you going to handle situation when the page is unmapped > and refaulted again (e.g. a normal reclaim of a pagecache)? You are > losing that information same was as in the swapout case, no? Or am I > missing something? If page is unmapped, it's not a idle memory any longer because it's free memory. We could detect the pte is not present. If page is refaulted, it's not a idle memory any longer because it's accessed again. We could detect it because the newly allocated page doesn't have a PG_idle page flag. Both case, idle page tracking couldn't report them as IDLE so it's okay.
On Tue, Aug 06, 2019 at 11:47:47PM +0900, Minchan Kim wrote: > On Tue, Aug 06, 2019 at 01:57:03PM +0200, Michal Hocko wrote: > > On Tue 06-08-19 07:14:46, Joel Fernandes wrote: > > > On Tue, Aug 06, 2019 at 12:47:55PM +0200, Michal Hocko wrote: > > > > On Tue 06-08-19 06:36:27, Joel Fernandes wrote: > > > > > On Tue, Aug 06, 2019 at 10:42:03AM +0200, Michal Hocko wrote: > > > > > > On Mon 05-08-19 13:04:49, Joel Fernandes (Google) wrote: > > > > > > > This bit will be used by idle page tracking code to correctly identify > > > > > > > if a page that was swapped out was idle before it got swapped out. > > > > > > > Without this PTE bit, we lose information about if a page is idle or not > > > > > > > since the page frame gets unmapped. > > > > > > > > > > > > And why do we need that? Why cannot we simply assume all swapped out > > > > > > pages to be idle? They were certainly idle enough to be reclaimed, > > > > > > right? Or what does idle actualy mean here? > > > > > > > > > > Yes, but other than swapping, in Android a page can be forced to be swapped > > > > > out as well using the new hints that Minchan is adding? > > > > > > > > Yes and that is effectivelly making them idle, no? > > > > > > That depends on how you think of it. > > > > I would much prefer to have it documented so that I do not have to guess ;) > > > > > If you are thinking of a monitoring > > > process like a heap profiler, then from the heap profiler's (that only cares > > > about the process it is monitoring) perspective it will look extremely odd if > > > pages that are recently accessed by the process appear to be idle which would > > > falsely look like those processes are leaking memory. The reality being, > > > Android forced those pages into swap because of other reasons. I would like > > > for the swapping mechanism, whether forced swapping or memory reclaim, not to > > > interfere with the idle detection. > > > > Hmm, but how are you going to handle situation when the page is unmapped > > and refaulted again (e.g. a normal reclaim of a pagecache)? You are > > losing that information same was as in the swapout case, no? Or am I > > missing something? > > If page is unmapped, it's not a idle memory any longer because it's > free memory. We could detect the pte is not present. I think Michal is not talking of explictly being unmapped, but about the case where a file-backed mapped page is unmapped due to memory pressure ? This is similar to the swap situation. Basically... file page is marked idle, then it is accessed by userspace. Then memory pressure drops it off the page cache so the idle information is lost. Next time we check the page_idle, we miss that it was accessed indeed. It is not an issue for the heap profiler or anonymous memory per-se. But is similar to the swap situation. > If page is refaulted, it's not a idle memory any longer because it's > accessed again. We could detect it because the newly allocated page > doesn't have a PG_idle page flag. In the refault case, yes it should not be a problem. thanks, - Joel
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 3adcec05b1f6..9d1412c693d7 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -128,6 +128,7 @@ config ARM64 select HAVE_ARCH_MMAP_RND_BITS select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT select HAVE_ARCH_PREL32_RELOCATIONS + select HAVE_ARCH_PTE_SWP_PGIDLE select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_STACKLEAK select HAVE_ARCH_THREAD_STRUCT_WHITELIST diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 92d2e9f28f28..917b15c5d63a 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -18,6 +18,7 @@ #define PTE_SPECIAL (_AT(pteval_t, 1) << 56) #define PTE_DEVMAP (_AT(pteval_t, 1) << 57) #define PTE_PROT_NONE (_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */ +#define PTE_SWP_PGIDLE PTE_DEVMAP /* for idle page tracking during swapout */ #ifndef __ASSEMBLY__ diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 3f5461f7b560..558f5ebd81ba 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -212,6 +212,21 @@ static inline pte_t pte_mkdevmap(pte_t pte) return set_pte_bit(pte, __pgprot(PTE_DEVMAP)); } +static inline int pte_swp_page_idle(pte_t pte) +{ + return 0; +} + +static inline pte_t pte_swp_mkpage_idle(pte_t pte) +{ + return set_pte_bit(pte, __pgprot(PTE_SWP_PGIDLE)); +} + +static inline pte_t pte_swp_clear_page_idle(pte_t pte) +{ + return clear_pte_bit(pte, __pgprot(PTE_SWP_PGIDLE)); +} + static inline void set_pte(pte_t *ptep, pte_t pte) { WRITE_ONCE(*ptep, pte);
This bit will be used by idle page tracking code to correctly identify if a page that was swapped out was idle before it got swapped out. Without this PTE bit, we lose information about if a page is idle or not since the page frame gets unmapped. In this patch we reuse PTE_DEVMAP bit since idle page tracking only works on user pages in the LRU. Device pages should not consitute those so it should be unused and safe to use. Cc: Robin Murphy <robin.murphy@arm.com> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org> --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/pgtable-prot.h | 1 + arch/arm64/include/asm/pgtable.h | 15 +++++++++++++++ 3 files changed, 17 insertions(+)