diff mbox series

[v4,4/4] mm/page_owner: Record task command name

Message ID 20220131220328.622162-1-longman@redhat.com (mailing list archive)
State New
Headers show
Series [v4,1/4] lib/vsprintf: Avoid redundant work with 0 size | expand

Commit Message

Waiman Long Jan. 31, 2022, 10:03 p.m. UTC
The page_owner information currently includes the pid of the calling
task. That is useful as long as the task is still running. Otherwise,
the number is meaningless. To have more information about the allocating
tasks that had exited by the time the page_owner information is
retrieved, we need to store the command name of the task.

Add a new comm field into page_owner structure to store the command name
and display it when the page_owner information is retrieved.

Signed-off-by: Waiman Long <longman@redhat.com>
---
 mm/page_owner.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

Comments

Michal Hocko Feb. 1, 2022, 3:28 p.m. UTC | #1
Cc Vlastimil

On Mon 31-01-22 17:03:28, Waiman Long wrote:
> The page_owner information currently includes the pid of the calling
> task. That is useful as long as the task is still running. Otherwise,
> the number is meaningless. To have more information about the allocating
> tasks that had exited by the time the page_owner information is
> retrieved, we need to store the command name of the task.
> 
> Add a new comm field into page_owner structure to store the command name
> and display it when the page_owner information is retrieved.

I completely agree that pid is effectivelly useless (if not misleading)
but is comm really telling all that much to compensate for the
additional storage required for _each_ page in the system?

> 
> Signed-off-by: Waiman Long <longman@redhat.com>
> ---
>  mm/page_owner.c | 15 +++++++++++----
>  1 file changed, 11 insertions(+), 4 deletions(-)
> 
> diff --git a/mm/page_owner.c b/mm/page_owner.c
> index a471c74c7fe0..485542155483 100644
> --- a/mm/page_owner.c
> +++ b/mm/page_owner.c
> @@ -29,6 +29,7 @@ struct page_owner {
>  	depot_stack_handle_t free_handle;
>  	u64 ts_nsec;
>  	u64 free_ts_nsec;
> +	char comm[TASK_COMM_LEN];
>  	pid_t pid;
>  };
>  
> @@ -146,6 +147,7 @@ void __reset_page_owner(struct page *page, unsigned short order)
>  		page_owner = get_page_owner(page_ext);
>  		page_owner->free_handle = handle;
>  		page_owner->free_ts_nsec = free_ts_nsec;
> +		page_owner->comm[0] = '\0';
>  		page_ext = page_ext_next(page_ext);
>  	}
>  }
> @@ -165,6 +167,8 @@ static inline void __set_page_owner_handle(struct page_ext *page_ext,
>  		page_owner->last_migrate_reason = -1;
>  		page_owner->pid = current->pid;
>  		page_owner->ts_nsec = local_clock();
> +		strlcpy(page_owner->comm, current->comm,
> +			sizeof(page_owner->comm));
>  		__set_bit(PAGE_EXT_OWNER, &page_ext->flags);
>  		__set_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
>  
> @@ -232,6 +236,7 @@ void __folio_copy_owner(struct folio *newfolio, struct folio *old)
>  	new_page_owner->pid = old_page_owner->pid;
>  	new_page_owner->ts_nsec = old_page_owner->ts_nsec;
>  	new_page_owner->free_ts_nsec = old_page_owner->ts_nsec;
> +	strcpy(new_page_owner->comm, old_page_owner->comm);
>  
>  	/*
>  	 * We don't clear the bit on the old folio as it's going to be freed
> @@ -376,10 +381,11 @@ print_page_owner(char __user *buf, size_t count, unsigned long pfn,
>  		return -ENOMEM;
>  
>  	ret = scnprintf(kbuf, count,
> -			"Page allocated via order %u, mask %#x(%pGg), pid %d, ts %llu ns, free_ts %llu ns\n",
> +			"Page allocated via order %u, mask %#x(%pGg), pid %d (%s), ts %llu ns, free_ts %llu ns\n",
>  			page_owner->order, page_owner->gfp_mask,
>  			&page_owner->gfp_mask, page_owner->pid,
> -			page_owner->ts_nsec, page_owner->free_ts_nsec);
> +			page_owner->comm, page_owner->ts_nsec,
> +			page_owner->free_ts_nsec);
>  
>  	/* Print information relevant to grouping pages by mobility */
>  	pageblock_mt = get_pageblock_migratetype(page);
> @@ -446,9 +452,10 @@ void __dump_page_owner(const struct page *page)
>  	else
>  		pr_alert("page_owner tracks the page as freed\n");
>  
> -	pr_alert("page last allocated via order %u, migratetype %s, gfp_mask %#x(%pGg), pid %d, ts %llu, free_ts %llu\n",
> +	pr_alert("page last allocated via order %u, migratetype %s, gfp_mask %#x(%pGg), pid %d (%s), ts %llu, free_ts %llu\n",
>  		 page_owner->order, migratetype_names[mt], gfp_mask, &gfp_mask,
> -		 page_owner->pid, page_owner->ts_nsec, page_owner->free_ts_nsec);
> +		 page_owner->pid, page_owner->comm, page_owner->ts_nsec,
> +		 page_owner->free_ts_nsec);
>  
>  	handle = READ_ONCE(page_owner->handle);
>  	if (!handle)
> -- 
> 2.27.0
Waiman Long Feb. 2, 2022, 4:53 p.m. UTC | #2
On 2/1/22 10:28, Michal Hocko wrote:
> Cc Vlastimil
>
> On Mon 31-01-22 17:03:28, Waiman Long wrote:
>> The page_owner information currently includes the pid of the calling
>> task. That is useful as long as the task is still running. Otherwise,
>> the number is meaningless. To have more information about the allocating
>> tasks that had exited by the time the page_owner information is
>> retrieved, we need to store the command name of the task.
>>
>> Add a new comm field into page_owner structure to store the command name
>> and display it when the page_owner information is retrieved.
> I completely agree that pid is effectivelly useless (if not misleading)
> but is comm really telling all that much to compensate for the
> additional storage required for _each_ page in the system?

Yes, it does add an extra 16 bytes per page overhead. The command name 
can be useful if one want to find out which userspace command is 
responsible for a problematic page allocation. Maybe we can remove pid 
from page_owner to save 8 bytes as you also agree that this number is 
not that useful.

Cheers,
Longman
Vlastimil Babka Feb. 3, 2022, 12:10 p.m. UTC | #3
On 2/2/22 17:53, Waiman Long wrote:
> 
> On 2/1/22 10:28, Michal Hocko wrote:
>> Cc Vlastimil
>>
>> On Mon 31-01-22 17:03:28, Waiman Long wrote:
>>> The page_owner information currently includes the pid of the calling
>>> task. That is useful as long as the task is still running. Otherwise,
>>> the number is meaningless. To have more information about the allocating
>>> tasks that had exited by the time the page_owner information is
>>> retrieved, we need to store the command name of the task.
>>>
>>> Add a new comm field into page_owner structure to store the command name
>>> and display it when the page_owner information is retrieved.
>> I completely agree that pid is effectivelly useless (if not misleading)
>> but is comm really telling all that much to compensate for the
>> additional storage required for _each_ page in the system?
> 
> Yes, it does add an extra 16 bytes per page overhead. The command name can
> be useful if one want to find out which userspace command is responsible for
> a problematic page allocation. Maybe we can remove pid from page_owner to
> save 8 bytes as you also agree that this number is not that useful.

Pid could be used to correlate command instances (not perfectly if reuse
happens), but command name could have a higher chance to be useful. In my
experience the most useful were the stacktraces and gfp/order etc. anyway.
So I wouldn't be opposed replacing pid with comm. The mild size increase
should be acceptable, this is an opt-in feature for debugging sessions with
known tradeoff for memory and cpu overhead for the extra info.

> Cheers,
> Longman
>
Waiman Long Feb. 3, 2022, 6:53 p.m. UTC | #4
On 2/3/22 07:10, Vlastimil Babka wrote:
> On 2/2/22 17:53, Waiman Long wrote:
>> On 2/1/22 10:28, Michal Hocko wrote:
>>> Cc Vlastimil
>>>
>>> On Mon 31-01-22 17:03:28, Waiman Long wrote:
>>>> The page_owner information currently includes the pid of the calling
>>>> task. That is useful as long as the task is still running. Otherwise,
>>>> the number is meaningless. To have more information about the allocating
>>>> tasks that had exited by the time the page_owner information is
>>>> retrieved, we need to store the command name of the task.
>>>>
>>>> Add a new comm field into page_owner structure to store the command name
>>>> and display it when the page_owner information is retrieved.
>>> I completely agree that pid is effectivelly useless (if not misleading)
>>> but is comm really telling all that much to compensate for the
>>> additional storage required for _each_ page in the system?
>> Yes, it does add an extra 16 bytes per page overhead. The command name can
>> be useful if one want to find out which userspace command is responsible for
>> a problematic page allocation. Maybe we can remove pid from page_owner to
>> save 8 bytes as you also agree that this number is not that useful.
> Pid could be used to correlate command instances (not perfectly if reuse
> happens), but command name could have a higher chance to be useful. In my
> experience the most useful were the stacktraces and gfp/order etc. anyway.
> So I wouldn't be opposed replacing pid with comm. The mild size increase
> should be acceptable, this is an opt-in feature for debugging sessions with
> known tradeoff for memory and cpu overhead for the extra info.

Thanks for the information.

I floated around dropping pid just as a possible way to reduce overall 
memory overhead. I did not do that in my patch and I am not planning to 
post any patch unless everybody agree.

Cheer,
Longman
diff mbox series

Patch

diff --git a/mm/page_owner.c b/mm/page_owner.c
index a471c74c7fe0..485542155483 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -29,6 +29,7 @@  struct page_owner {
 	depot_stack_handle_t free_handle;
 	u64 ts_nsec;
 	u64 free_ts_nsec;
+	char comm[TASK_COMM_LEN];
 	pid_t pid;
 };
 
@@ -146,6 +147,7 @@  void __reset_page_owner(struct page *page, unsigned short order)
 		page_owner = get_page_owner(page_ext);
 		page_owner->free_handle = handle;
 		page_owner->free_ts_nsec = free_ts_nsec;
+		page_owner->comm[0] = '\0';
 		page_ext = page_ext_next(page_ext);
 	}
 }
@@ -165,6 +167,8 @@  static inline void __set_page_owner_handle(struct page_ext *page_ext,
 		page_owner->last_migrate_reason = -1;
 		page_owner->pid = current->pid;
 		page_owner->ts_nsec = local_clock();
+		strlcpy(page_owner->comm, current->comm,
+			sizeof(page_owner->comm));
 		__set_bit(PAGE_EXT_OWNER, &page_ext->flags);
 		__set_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
 
@@ -232,6 +236,7 @@  void __folio_copy_owner(struct folio *newfolio, struct folio *old)
 	new_page_owner->pid = old_page_owner->pid;
 	new_page_owner->ts_nsec = old_page_owner->ts_nsec;
 	new_page_owner->free_ts_nsec = old_page_owner->ts_nsec;
+	strcpy(new_page_owner->comm, old_page_owner->comm);
 
 	/*
 	 * We don't clear the bit on the old folio as it's going to be freed
@@ -376,10 +381,11 @@  print_page_owner(char __user *buf, size_t count, unsigned long pfn,
 		return -ENOMEM;
 
 	ret = scnprintf(kbuf, count,
-			"Page allocated via order %u, mask %#x(%pGg), pid %d, ts %llu ns, free_ts %llu ns\n",
+			"Page allocated via order %u, mask %#x(%pGg), pid %d (%s), ts %llu ns, free_ts %llu ns\n",
 			page_owner->order, page_owner->gfp_mask,
 			&page_owner->gfp_mask, page_owner->pid,
-			page_owner->ts_nsec, page_owner->free_ts_nsec);
+			page_owner->comm, page_owner->ts_nsec,
+			page_owner->free_ts_nsec);
 
 	/* Print information relevant to grouping pages by mobility */
 	pageblock_mt = get_pageblock_migratetype(page);
@@ -446,9 +452,10 @@  void __dump_page_owner(const struct page *page)
 	else
 		pr_alert("page_owner tracks the page as freed\n");
 
-	pr_alert("page last allocated via order %u, migratetype %s, gfp_mask %#x(%pGg), pid %d, ts %llu, free_ts %llu\n",
+	pr_alert("page last allocated via order %u, migratetype %s, gfp_mask %#x(%pGg), pid %d (%s), ts %llu, free_ts %llu\n",
 		 page_owner->order, migratetype_names[mt], gfp_mask, &gfp_mask,
-		 page_owner->pid, page_owner->ts_nsec, page_owner->free_ts_nsec);
+		 page_owner->pid, page_owner->comm, page_owner->ts_nsec,
+		 page_owner->free_ts_nsec);
 
 	handle = READ_ONCE(page_owner->handle);
 	if (!handle)