diff mbox series

audit: add task history record

Message ID 36b65eb1-ccbf-8b81-468f-b8d88c4be5a3@I-love.SAKURA.ne.jp (mailing list archive)
State Rejected
Delegated to: Paul Moore
Headers show
Series audit: add task history record | expand

Commit Message

Tetsuo Handa Aug. 11, 2023, 10:58 a.m. UTC
When an unexpected system event occurs, the administrator may want to
identify which application triggered the event. For example, unexpected
process termination is still a real concern enough to write articles
like https://access.redhat.com/solutions/165993 .

This patch adds a record which emits TOMOYO-like task history information
into the audit logs for better understanding of unexpected system events.

  type=UNKNOWN[1340] msg=audit(1691750738.271:108): history="name=swapper/0;pid=1;start=20230811194329=>name=init;pid=1;start=20230811194343=>name=systemd;pid=1;start=20230811194439=>name=sshd;pid=3660;start=20230811104504=>name=sshd;pid=3767;start=20230811104535"

To be able to avoid bloating audit log files due to this information, this
patch uses audit_history= kernel command line parameter that controls max
length of history in bytes (default is 1024, and setting to 0 disables
recording and emitting).

Unlike execve()'s argv record, records in this history information is
emitted as one string in order to reduce bloat of the audit log files.
This information can be split into an array using => as the tokenizer.
But don't expect that you can compare array elements throughout the whole
audit logs by splitting into an array, for old records get removed from
history when history became too long to append the newest record. This
history information is meant to be interpreted by humans rather than be
analyzed by programs.

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
---
 fs/exec.c                  |   1 +
 include/linux/audit.h      |   5 ++
 include/linux/sched.h      |   1 +
 include/uapi/linux/audit.h |   1 +
 init/init_task.c           |   7 +++
 kernel/audit.c             |   1 +
 kernel/auditsc.c           | 108 +++++++++++++++++++++++++++++++++++++
 7 files changed, 124 insertions(+)

Comments

Richard Guy Briggs Aug. 11, 2023, 5:50 p.m. UTC | #1
On 2023-08-11 19:58, Tetsuo Handa wrote:
> When an unexpected system event occurs, the administrator may want to
> identify which application triggered the event. For example, unexpected
> process termination is still a real concern enough to write articles
> like https://access.redhat.com/solutions/165993 .
> 
> This patch adds a record which emits TOMOYO-like task history information
> into the audit logs for better understanding of unexpected system events.
> 
>   type=UNKNOWN[1340] msg=audit(1691750738.271:108): history="name=swapper/0;pid=1;start=20230811194329=>name=init;pid=1;start=20230811194343=>name=systemd;pid=1;start=20230811194439=>name=sshd;pid=3660;start=20230811104504=>name=sshd;pid=3767;start=20230811104535"
> 
> To be able to avoid bloating audit log files due to this information, this
> patch uses audit_history= kernel command line parameter that controls max
> length of history in bytes (default is 1024, and setting to 0 disables
> recording and emitting).

The record length limit is just below 8k.  Is it reasonable to set the
default at 4k and cap it around 7.5k to be safe?

Is proctitle also useful here?  It contains the full command line, but
is influencible by userspace.

> Unlike execve()'s argv record, records in this history information is
> emitted as one string in order to reduce bloat of the audit log files.
> This information can be split into an array using => as the tokenizer.
> But don't expect that you can compare array elements throughout the whole
> audit logs by splitting into an array, for old records get removed from
> history when history became too long to append the newest record. This
> history information is meant to be interpreted by humans rather than be
> analyzed by programs.

You say this isn't intended to be analysed by programs, but we all know
it will inevitably be attempted...  Would any of the descendants of
audit_log_untrustedstring() be of use, in particular
audit_string_contains_control()

> Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
> ---
>  fs/exec.c                  |   1 +
>  include/linux/audit.h      |   5 ++
>  include/linux/sched.h      |   1 +
>  include/uapi/linux/audit.h |   1 +
>  init/init_task.c           |   7 +++
>  kernel/audit.c             |   1 +
>  kernel/auditsc.c           | 108 +++++++++++++++++++++++++++++++++++++
>  7 files changed, 124 insertions(+)
> 
> diff --git a/fs/exec.c b/fs/exec.c
> index 1a827d55ba94..5c8776f692c5 100644
> --- a/fs/exec.c
> +++ b/fs/exec.c
> @@ -1381,6 +1381,7 @@ int begin_new_exec(struct linux_binprm * bprm)
>  	commit_creds(bprm->cred);
>  	bprm->cred = NULL;
>  
> +	audit_update_history();
>  	/*
>  	 * Disable monitoring for regular users
>  	 * when executing setuid binaries. Must
> diff --git a/include/linux/audit.h b/include/linux/audit.h
> index 6a3a9e122bb5..6291d0f76541 100644
> --- a/include/linux/audit.h
> +++ b/include/linux/audit.h
> @@ -397,6 +397,8 @@ static inline void audit_ptrace(struct task_struct *t)
>  		__audit_ptrace(t);
>  }
>  
> +extern void audit_update_history(void);
> +
>  				/* Private API (for audit.c only) */
>  extern void __audit_ipc_obj(struct kern_ipc_perm *ipcp);
>  extern void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mode);
> @@ -701,6 +703,9 @@ static inline void audit_ntp_log(const struct audit_ntp_data *ad)
>  static inline void audit_ptrace(struct task_struct *t)
>  { }
>  
> +static inline void audit_update_history(void)
> +{ }
> +
>  static inline void audit_log_nfcfg(const char *name, u8 af,
>  				   unsigned int nentries,
>  				   enum audit_nfcfgop op, gfp_t gfp)
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 609bde814cb0..f32076b6b733 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1112,6 +1112,7 @@ struct task_struct {
>  #ifdef CONFIG_AUDIT
>  #ifdef CONFIG_AUDITSYSCALL
>  	struct audit_context		*audit_context;
> +	char				*comm_history;
>  #endif
>  	kuid_t				loginuid;
>  	unsigned int			sessionid;
> diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
> index d676ed2b246e..186c0b5ca1b6 100644
> --- a/include/uapi/linux/audit.h
> +++ b/include/uapi/linux/audit.h
> @@ -122,6 +122,7 @@
>  #define AUDIT_OPENAT2		1337	/* Record showing openat2 how args */
>  #define AUDIT_DM_CTRL		1338	/* Device Mapper target control */
>  #define AUDIT_DM_EVENT		1339	/* Device Mapper events */
> +#define AUDIT_PROCHISTORY	1340	/* Commname history emit event */
>  
>  #define AUDIT_AVC		1400	/* SE Linux avc denial or grant */
>  #define AUDIT_SELINUX_ERR	1401	/* Internal SE Linux Errors */
> diff --git a/init/init_task.c b/init/init_task.c
> index ff6c4b9bfe6b..e3d481d1b010 100644
> --- a/init/init_task.c
> +++ b/init/init_task.c
> @@ -57,6 +57,10 @@ unsigned long init_shadow_call_stack[SCS_SIZE / sizeof(long)]
>  };
>  #endif
>  
> +#ifdef CONFIG_AUDITSYSCALL
> +extern char init_task_audit_history[];
> +#endif
> +
>  /*
>   * Set up the first task table, touch at your own risk!. Base=0,
>   * limit=0x1fffff (=2MB)
> @@ -137,6 +141,9 @@ struct task_struct init_task
>  #ifdef CONFIG_AUDIT
>  	.loginuid	= INVALID_UID,
>  	.sessionid	= AUDIT_SID_UNSET,
> +#ifdef CONFIG_AUDITSYSCALL
> +	.comm_history   = init_task_audit_history,
> +#endif
>  #endif
>  #ifdef CONFIG_PERF_EVENTS
>  	.perf_event_mutex = __MUTEX_INITIALIZER(init_task.perf_event_mutex),
> diff --git a/kernel/audit.c b/kernel/audit.c
> index 9bc0b0301198..034952abd83d 100644
> --- a/kernel/audit.c
> +++ b/kernel/audit.c
> @@ -1674,6 +1674,7 @@ static int __init audit_init(void)
>  {
>  	int i;
>  
> +	audit_update_history();
>  	if (audit_initialized == AUDIT_DISABLED)
>  		return 0;
>  
> diff --git a/kernel/auditsc.c b/kernel/auditsc.c
> index addeed3df15d..6f1b124da2fe 100644
> --- a/kernel/auditsc.c
> +++ b/kernel/auditsc.c
> @@ -80,6 +80,9 @@
>  /* max length to print of cmdline/proctitle value during audit */
>  #define MAX_PROCTITLE_AUDIT_LEN 128
>  
> +/* max length for thread's comm name history */
> +static unsigned int audit_history_size __ro_after_init = 1024;
> +
>  /* number of audit rules */
>  int audit_n_rules;
>  
> @@ -1055,6 +1058,12 @@ int audit_alloc(struct task_struct *tsk)
>  	enum audit_state     state;
>  	char *key = NULL;
>  
> +	if (audit_history_size) {
> +		tsk->comm_history = kmemdup(current->comm_history, audit_history_size, GFP_KERNEL);
> +		if (!tsk->comm_history)
> +			return -ENOMEM;
> +	}
> +
>  	if (likely(!audit_ever_enabled))
>  		return 0;
>  
> @@ -1065,6 +1074,10 @@ int audit_alloc(struct task_struct *tsk)
>  	}
>  
>  	if (!(context = audit_alloc_context(state))) {
> +		if (audit_history_size) {
> +			kfree(tsk->comm_history);
> +			tsk->comm_history = NULL;
> +		}
>  		kfree(key);
>  		audit_log_lost("out of memory in audit_alloc");
>  		return -ENOMEM;
> @@ -1671,6 +1684,18 @@ static void audit_log_uring(struct audit_context *ctx)
>  	audit_log_end(ab);
>  }
>  
> +static void audit_log_history(struct audit_context *context)
> +{
> +	struct audit_buffer *ab;
> +
> +	ab = audit_log_start(context, GFP_KERNEL, AUDIT_PROCHISTORY);
> +	if (!ab)
> +		return; /* audit_panic or being filtered */
> +	audit_log_format(ab, "history=");
> +	audit_log_untrustedstring(ab, current->comm_history);
> +	audit_log_end(ab);
> +}
> +
>  static void audit_log_exit(void)
>  {
>  	int i, call_panic = 0;
> @@ -1805,6 +1830,8 @@ static void audit_log_exit(void)
>  
>  	if (context->context == AUDIT_CTX_SYSCALL)
>  		audit_log_proctitle();
> +	if (audit_history_size)
> +		audit_log_history(context);
>  
>  	/* Send end of event record to help user space know we are finished */
>  	ab = audit_log_start(context, GFP_KERNEL, AUDIT_EOE);
> @@ -1824,6 +1851,10 @@ void __audit_free(struct task_struct *tsk)
>  {
>  	struct audit_context *context = tsk->audit_context;
>  
> +	if (audit_history_size) {
> +		kfree(tsk->comm_history);
> +		tsk->comm_history = NULL;
> +	}
>  	if (!context)
>  		return;
>  
> @@ -3034,3 +3065,80 @@ struct list_head *audit_killed_trees(void)
>  		return NULL;
>  	return &ctx->killed_trees;
>  }
> +
> +char init_task_audit_history[4096];
> +
> +static int __init audit_history_setup(char *str)
> +{
> +	unsigned int size;
> +
> +	if (kstrtouint(str, 10, &size))
> +		return -EINVAL;
> +	if (size > sizeof(init_task_audit_history))
> +		size = sizeof(init_task_audit_history);
> +	audit_history_size = size;
> +	return 0;
> +}
> +early_param("audit_history", audit_history_setup);
> +
> +void audit_update_history(void)
> +{
> +	int i;
> +	int required;
> +	struct tm tm;
> +	char buf[256];
> +	char *cp = buf;
> +
> +	if (!audit_history_size)
> +		return;
> +
> +	cp += snprintf(buf, sizeof(buf) - 1, "name=");
> +	for (i = 0; i < TASK_COMM_LEN; i++) {
> +		const unsigned char c = current->comm[i];
> +
> +		if (!c)
> +			break;
> +		if (isalnum(c) || c == '.' || c == '_' || c == '-' || c == '/') {
> +			*cp++ = c;
> +			continue;
> +		}
> +		*cp++ = '\\';
> +		*cp++ = (c >> 6) + '0';
> +		*cp++ = ((c >> 3) & 7) + '0';
> +		*cp++ = (c & 7) + '0';
> +	}
> +	/* Append PID. */
> +	cp += snprintf(cp, buf - cp + sizeof(buf) - 1, ";pid=%u",
> +		       current->pid);
> +	/* Append timestamp. */
> +	time64_to_tm(ktime_get_real_seconds(), 0, &tm);
> +	cp += snprintf(cp, buf - cp + sizeof(buf) - 1,
> +		       ";start=%04u%02u%02u%02u%02u%02u", (int) tm.tm_year + 1900,
> +		       tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min,
> +		       tm.tm_sec);
> +	/* Terminate the buffer. */
> +	if (cp >= buf + sizeof(buf))
> +		cp = buf + sizeof(buf) - 1;
> +	*cp = '\0';
> +	required = cp - buf + 1;
> +	/* Make some room by truncating old history. */
> +	cp = current->comm_history;
> +	i = strlen(cp);
> +	while (i + required >= audit_history_size - 3) {
> +		char *cp2 = memchr(cp, '>', i);
> +
> +		/* Reset history if audit_history_size is too small to truncate. */
> +		if (!cp2++) {
> +			*cp = '\0';
> +			return;
> +		}
> +		i -= cp2 - cp;
> +		memmove(cp, cp2, i + 1);
> +	}
> +	/* Emit the buffer. */
> +	if (i) {
> +		cp[i++] = '=';
> +		cp[i++] = '>';
> +	}
> +	memcpy(cp + i, buf, required);
> +}
> -- 
> 2.18.4
> 
> --
> Linux-audit mailing list
> Linux-audit@redhat.com
> https://listman.redhat.com/mailman/listinfo/linux-audit
> 

- RGB

--
Richard Guy Briggs <rgb@redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
Upstream IRC: SunRaycer
Voice: +1.613.860 2354 SMS: +1.613.518.6570
Tetsuo Handa Aug. 12, 2023, 10:08 a.m. UTC | #2
On 2023/08/12 2:50, Richard Guy Briggs wrote:
> On 2023-08-11 19:58, Tetsuo Handa wrote:
>> When an unexpected system event occurs, the administrator may want to
>> identify which application triggered the event. For example, unexpected
>> process termination is still a real concern enough to write articles
>> like https://access.redhat.com/solutions/165993 .
>>
>> This patch adds a record which emits TOMOYO-like task history information
>> into the audit logs for better understanding of unexpected system events.
>>
>>   type=UNKNOWN[1340] msg=audit(1691750738.271:108): history="name=swapper/0;pid=1;start=20230811194329=>name=init;pid=1;start=20230811194343=>name=systemd;pid=1;start=20230811194439=>name=sshd;pid=3660;start=20230811104504=>name=sshd;pid=3767;start=20230811104535"
>>
>> To be able to avoid bloating audit log files due to this information, this
>> patch uses audit_history= kernel command line parameter that controls max
>> length of history in bytes (default is 1024, and setting to 0 disables
>> recording and emitting).
> 
> The record length limit is just below 8k.  Is it reasonable to set the
> default at 4k and cap it around 7.5k to be safe?

Below will do it. (I though default at 0 might be better...)

+static unsigned int audit_history_size __ro_after_init = 4096;
+char init_task_audit_history[7680];
+static int __init audit_history_setup(char *str)
+{
+	unsigned int size;
+
+	if (kstrtouint(str, 10, &size))
+		return -EINVAL;
+	if (size > sizeof(init_task_audit_history))
+		size = sizeof(init_task_audit_history);
+	audit_history_size = size;
+	return 0;
+}

> 
> Is proctitle also useful here?  It contains the full command line, but
> is influencible by userspace.

Full pathname is max 4096 bytes. But full command line effectively has no
limit, which might become very long. Shouldn't full command line arguments
be retrieved from execve()'s argv[] record? Since this history information
includes timestamp of execve(), also recording execve() requests will allow
administrators to find the full command line.

>> Unlike execve()'s argv record, records in this history information is
>> emitted as one string in order to reduce bloat of the audit log files.
>> This information can be split into an array using => as the tokenizer.
>> But don't expect that you can compare array elements throughout the whole
>> audit logs by splitting into an array, for old records get removed from
>> history when history became too long to append the newest record. This
>> history information is meant to be interpreted by humans rather than be
>> analyzed by programs.
> 
> You say this isn't intended to be analysed by programs, but we all know
> it will inevitably be attempted...  Would any of the descendants of
> audit_log_untrustedstring() be of use, in particular
> audit_string_contains_control()

I couldn't catch what you are suggesting. Are you suggesting something like

  type=UNKNOWN[1340] msg=audit(1691750738.271:108):
    n[$n]="swapper/0" n[$n+1]="init" n[$n+2]="systemd" n[$n+3]="sshd" n[$n+4]="sshd"
    p[$n]=1 p[$n+1]=1 p[$n+2]=1 p[$n+3]=3660 p[$n+4]=3767
    s[$n]=20230811194329 s[$n+1]=20230811194343 s[$n+2]=20230811194439 s[$n+3]=20230811104504 s[$n+4]=20230811104535

so that value for n[$n] is escaped only when control characters are in use?
Paul Moore Aug. 15, 2023, 6:44 p.m. UTC | #3
On Fri, Aug 11, 2023 at 6:58 AM Tetsuo Handa
<penguin-kernel@i-love.sakura.ne.jp> wrote:
>
> When an unexpected system event occurs, the administrator may want to
> identify which application triggered the event. For example, unexpected
> process termination is still a real concern enough to write articles
> like https://access.redhat.com/solutions/165993 .
>
> This patch adds a record which emits TOMOYO-like task history information
> into the audit logs for better understanding of unexpected system events.
>
>   type=UNKNOWN[1340] msg=audit(1691750738.271:108): history="name=swapper/0;pid=1;start=20230811194329=>name=init;pid=1;start=20230811194343=>name=systemd;pid=1;start=20230811194439=>name=sshd;pid=3660;start=20230811104504=>name=sshd;pid=3767;start=20230811104535"

While I respect your persistence, we've talked about this quite a bit
already in other threads.  What you are trying to do is already
possible with audit and/or TOMOYO enabled and configured so I see no
reason why we want to merge this.  I understand your frustration that
TOMOYO is not enabled by your prefered distribution, but adding
additional (and arguably redundant code) code to the upstream kernel
is not a solution I am willing to support and maintain long term.

> To be able to avoid bloating audit log files due to this information, this
> patch uses audit_history= kernel command line parameter that controls max
> length of history in bytes (default is 1024, and setting to 0 disables
> recording and emitting).
>
> Unlike execve()'s argv record, records in this history information is
> emitted as one string in order to reduce bloat of the audit log files.
> This information can be split into an array using => as the tokenizer.
> But don't expect that you can compare array elements throughout the whole
> audit logs by splitting into an array, for old records get removed from
> history when history became too long to append the newest record. This
> history information is meant to be interpreted by humans rather than be
> analyzed by programs.
>
> Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
> ---
>  fs/exec.c                  |   1 +
>  include/linux/audit.h      |   5 ++
>  include/linux/sched.h      |   1 +
>  include/uapi/linux/audit.h |   1 +
>  init/init_task.c           |   7 +++
>  kernel/audit.c             |   1 +
>  kernel/auditsc.c           | 108 +++++++++++++++++++++++++++++++++++++
>  7 files changed, 124 insertions(+)
Tetsuo Handa Aug. 16, 2023, 10:10 a.m. UTC | #4
On 2023/08/16 3:44, Paul Moore wrote:
> On Fri, Aug 11, 2023 at 6:58 AM Tetsuo Handa
> <penguin-kernel@i-love.sakura.ne.jp> wrote:
>>
>> When an unexpected system event occurs, the administrator may want to
>> identify which application triggered the event. For example, unexpected
>> process termination is still a real concern enough to write articles
>> like https://access.redhat.com/solutions/165993 .
>>
>> This patch adds a record which emits TOMOYO-like task history information
>> into the audit logs for better understanding of unexpected system events.
>>
>>   type=UNKNOWN[1340] msg=audit(1691750738.271:108): history="name=swapper/0;pid=1;start=20230811194329=>name=init;pid=1;start=20230811194343=>name=systemd;pid=1;start=20230811194439=>name=sshd;pid=3660;start=20230811104504=>name=sshd;pid=3767;start=20230811104535"
> 
> While I respect your persistence, we've talked about this quite a bit
> already in other threads.  What you are trying to do is already
> possible with audit

How?

>                     and/or TOMOYO enabled and configured

Wrong. Since not all LSM hooks allow sleeping, TOMOYO is unable to
check sending signals. Also, TOMOYO is not using audit interface.

>                                                          so I see no
> reason why we want to merge this.

This code makes it possible to record sending signals with TOMOYO-like context,
and we can avoid assigning LSM ID for this code if we can merge this code as
a part of audit.

>                                    I understand your frustration that
> TOMOYO is not enabled by your prefered distribution, but adding
> additional (and arguably redundant code) code to the upstream kernel
> is not a solution I am willing to support and maintain long term.

Never a redundant code. Absolutely no reason we don't want to merge.

The only choice is which approach (a standalone LSM module or a part of audit)
to go. Casey suggests this code as a part of audit. You must persuade Casey
if you don't want this code as a part of audit.
Paul Moore Aug. 16, 2023, 1:53 p.m. UTC | #5
On Wed, Aug 16, 2023 at 6:10 AM Tetsuo Handa
<penguin-kernel@i-love.sakura.ne.jp> wrote:
> On 2023/08/16 3:44, Paul Moore wrote:
> > On Fri, Aug 11, 2023 at 6:58 AM Tetsuo Handa
> > <penguin-kernel@i-love.sakura.ne.jp> wrote:
> >>
> >> When an unexpected system event occurs, the administrator may want to
> >> identify which application triggered the event. For example, unexpected
> >> process termination is still a real concern enough to write articles
> >> like https://access.redhat.com/solutions/165993 .
> >>
> >> This patch adds a record which emits TOMOYO-like task history information
> >> into the audit logs for better understanding of unexpected system events.
> >>
> >>   type=UNKNOWN[1340] msg=audit(1691750738.271:108): history="name=swapper/0;pid=1;start=20230811194329=>name=init;pid=1;start=20230811194343=>name=systemd;pid=1;start=20230811194439=>name=sshd;pid=3660;start=20230811104504=>name=sshd;pid=3767;start=20230811104535"
> >
> > While I respect your persistence, we've talked about this quite a bit
> > already in other threads.  What you are trying to do is already
> > possible with audit
>
> How?

If you configure audit to record exec() and friends you should have a
proper history of the processes started on the system.

> >                     and/or TOMOYO enabled and configured
>
> Wrong. Since not all LSM hooks allow sleeping, TOMOYO is unable to
> check sending signals. Also, TOMOYO is not using audit interface.

I said "audit and/or TOMOYO"; I believe the "and/or" is important.  If
I recall correctly, and perhaps I misunderstood you, you conceded that
a combination of audit *and/or* TOMOYO would solve this issue.

> >                                                          so I see no
> > reason why we want to merge this.
>
> This code makes it possible to record sending signals with TOMOYO-like context,
> and we can avoid assigning LSM ID for this code if we can merge this code as
> a part of audit.

If you want TOMOYO-like information, run TOMOYO.  If your preferred
distribution doesn't support TOMOYO, you need to either ask them to
support it, find a new distribution that does, or build your own
kernel.

> >                                    I understand your frustration that
> > TOMOYO is not enabled by your prefered distribution, but adding
> > additional (and arguably redundant code) code to the upstream kernel
> > is not a solution I am willing to support and maintain long term.
>
> Never a redundant code. Absolutely no reason we don't want to merge.

At this point in time, I obviously disagree.

> The only choice is which approach (a standalone LSM module or a part of audit)
> to go. Casey suggests this code as a part of audit. You must persuade Casey
> if you don't want this code as a part of audit.

To be very clear, it isn't my duty to persuade Casey about anything
(although if you've followed the LSM stacking saga you know I've
definitely tried on occasion! <g>).  My role here is to maintain the
audit subsystem and LSM layer (along with others which aren't relevant
here) to the best of my ability.  A big part of that is ensuring we
make "smart decisions" with respect to what code we merge as things
like new LSMs and new audit records are things that we have to support
*forever*.  Because of this rather extreme support burden I need to
make sure that we aren't making our jobs (current developers, current
maintainers, and those that will follow us) more difficult than
absolutely necessary.  From my current perspective, the benefits of
this patch, both in terms of unique functionality and durability of
the design/code, are not enough to outweigh the support burden.
Tetsuo Handa Aug. 18, 2023, 10:29 a.m. UTC | #6
On 2023/08/16 22:53, Paul Moore wrote:
> On Wed, Aug 16, 2023 at 6:10 AM Tetsuo Handa
> <penguin-kernel@i-love.sakura.ne.jp> wrote:
>> On 2023/08/16 3:44, Paul Moore wrote:
>>> On Fri, Aug 11, 2023 at 6:58 AM Tetsuo Handa
>>> <penguin-kernel@i-love.sakura.ne.jp> wrote:
>>>>
>>>> When an unexpected system event occurs, the administrator may want to
>>>> identify which application triggered the event. For example, unexpected
>>>> process termination is still a real concern enough to write articles
>>>> like https://access.redhat.com/solutions/165993 .
>>>>
>>>> This patch adds a record which emits TOMOYO-like task history information
>>>> into the audit logs for better understanding of unexpected system events.
>>>>
>>>>   type=UNKNOWN[1340] msg=audit(1691750738.271:108): history="name=swapper/0;pid=1;start=20230811194329=>name=init;pid=1;start=20230811194343=>name=systemd;pid=1;start=20230811194439=>name=sshd;pid=3660;start=20230811104504=>name=sshd;pid=3767;start=20230811104535"
>>>
>>> While I respect your persistence, we've talked about this quite a bit
>>> already in other threads.  What you are trying to do is already
>>> possible with audit
>>
>> How?
> 
> If you configure audit to record exec() and friends you should have a
> proper history of the processes started on the system.

That is a "No LSM modules other than SELinux is needed because SELinux can do
everything" assertion. People propose different approaches/implementations because
they can't afford utilizing/configuring existing approaches/implementations.

Your assertion is a fatal problem for merging "Re: [PATCH v13 00/11] LSM: Three basic syscalls"
at https://lkml.kernel.org/r/CAHC9VhQ4ttkSLTBCrXNZSBR1FP9UZ_gUHmo0BS37LCdyBmUeyA@mail.gmail.com .

Please please allow LSM modules like https://lkml.kernel.org/r/41d03271-ff8a-9888-11de-a7f53da47328@I-love.SAKURA.ne.jp
to obtain a stable LSM ID if you don't want to support something that possibly have an alternative.
Paul Moore Aug. 18, 2023, 2:59 p.m. UTC | #7
On Fri, Aug 18, 2023 at 6:30 AM Tetsuo Handa
<penguin-kernel@i-love.sakura.ne.jp> wrote:
> On 2023/08/16 22:53, Paul Moore wrote:
> > On Wed, Aug 16, 2023 at 6:10 AM Tetsuo Handa
> > <penguin-kernel@i-love.sakura.ne.jp> wrote:
> >> On 2023/08/16 3:44, Paul Moore wrote:
> >>> On Fri, Aug 11, 2023 at 6:58 AM Tetsuo Handa
> >>> <penguin-kernel@i-love.sakura.ne.jp> wrote:
> >>>>
> >>>> When an unexpected system event occurs, the administrator may want to
> >>>> identify which application triggered the event. For example, unexpected
> >>>> process termination is still a real concern enough to write articles
> >>>> like https://access.redhat.com/solutions/165993 .
> >>>>
> >>>> This patch adds a record which emits TOMOYO-like task history information
> >>>> into the audit logs for better understanding of unexpected system events.
> >>>>
> >>>>   type=UNKNOWN[1340] msg=audit(1691750738.271:108): history="name=swapper/0;pid=1;start=20230811194329=>name=init;pid=1;start=20230811194343=>name=systemd;pid=1;start=20230811194439=>name=sshd;pid=3660;start=20230811104504=>name=sshd;pid=3767;start=20230811104535"
> >>>
> >>> While I respect your persistence, we've talked about this quite a bit
> >>> already in other threads.  What you are trying to do is already
> >>> possible with audit
> >>
> >> How?
> >
> > If you configure audit to record exec() and friends you should have a
> > proper history of the processes started on the system.
>
> That is a "No LSM modules other than SELinux is needed because SELinux can do
> everything" assertion.

Except we are not talking SELinux or LSMs here, we are talking about
audit and the audit subsystem is very different from the LSM layer.
The LSM layer is designed to be pluggable with support for multiple
individual LSMs, whereas the audit subsystem is designed to support a
single audit implementation.  It is my opinion that the audit patch
you have proposed here does not provide an audit administrator with
any new capabilities that they do not currently have as an option.

There are also concerns around field formatting, record length, etc.,
but those are secondary issues compared to the more important issue of
redundant functionality.

> People propose different approaches/implementations because
> they can't afford utilizing/configuring existing approaches/implementations.

From what I've seen, both in this thread as well as the other related
threads from you, these recent efforts are due to a lack of TOMOYO
support in mainstream Linux distributions.  My advice is to stop
trying to duplicate the TOMOYO functionality in other subsystems/LSMs
and start working with the distributions to better understand why they
are not supporting TOMOYO.  I believe that if you can determine why
the distributions are not enabling TOMOYO, you should be able to
develop a plan to address those issues and eventually gain
distribution support for TOMOYO.  I understand that such an approach
will likely be time consuming and difficult, but I think that is your
best option for success.

> Your assertion is a fatal problem for merging "Re: [PATCH v13 00/11] LSM: Three basic syscalls"
> at https://lkml.kernel.org/r/CAHC9VhQ4ttkSLTBCrXNZSBR1FP9UZ_gUHmo0BS37LCdyBmUeyA@mail.gmail.com .
>
> Please please allow LSM modules like https://lkml.kernel.org/r/41d03271-ff8a-9888-11de-a7f53da47328@I-love.SAKURA.ne.jp
> to obtain a stable LSM ID

We've already discussed that in the TaskTracker thread.

> if you don't want to support something that possibly have an alternative.

We've already upstreamed an alternative approach to TaskTracker: TOMOYO.
Tetsuo Handa Aug. 19, 2023, 7:09 a.m. UTC | #8
On 2023/08/18 23:59, Paul Moore wrote:
> Except we are not talking SELinux or LSMs here, we are talking about
> audit and the audit subsystem is very different from the LSM layer.
> The LSM layer is designed to be pluggable with support for multiple
> individual LSMs, whereas the audit subsystem is designed to support a
> single audit implementation.  It is my opinion that the audit patch
> you have proposed here does not provide an audit administrator with
> any new capabilities that they do not currently have as an option.

Before explaining why an audit administrator cannot afford emulating
this patch, I explain what this patch will do.



There are three system calls for managing a process: fork()/execve()/exit().

  https://I-love.SAKURA.ne.jp/tomoyo/fork.gif
  https://I-love.SAKURA.ne.jp/tomoyo/execve.gif
  https://I-love.SAKURA.ne.jp/tomoyo/exit.gif

As a result, history of a process can be represented as a tree, where the
root of the tree is the kernel thread which is started by the boot loader.

  https://I-love.SAKURA.ne.jp/tomoyo/railway.gif

This fundamental mechanism cannot be changed as long as Linux remains as a
Unix-like OS. That is, adding this information will not cause what you call
"the support burden".



Currently, a "struct task_struct" has comm field and pid field, and
people use these fields like

  printk("[%s:%d] some msg comes here\n", current->comm, current->pid);

for giving hints for identifying a process.

What this patch does is to allow people do like

  printk("[%s] some msg comes here\n", current->comm_history);

for giving hints for identifying a process more precisely.
That is, users of this information is not limited to audit. For example,
an LSM module can use this information, an audit record can use this
information, a SystemTap script can use this information, and so on...



> 
> There are also concerns around field formatting, record length, etc.,
> but those are secondary issues compared to the more important issue of
> redundant functionality.

If someone tries to emulate this patch, we need to be able to trace all
fork()/execve()/exit() system calls. Or, the history tree will be broken.

If an audit administrator tries to emulate this patch using system call
auditing functionality, we need to make sure that

  "auditctl -D" must not clear rules for tracing fork()/execve()/exit()
  system calls. This is impossible because this change will break userspace
  programs expecting that "auditctl -D" clears all rules.

  Rules for tracing fork()/execve()/exit() system calls must be enabled
  when the kernel thread which is started by the boot loader starts.
  How can we embed such system call auditing rules into the kernel and
  tell whether to enable these rules using the kernel command line options?

  In order to avoid possibility of loosing fork()/execve()/exit() records,
  auditd must not be stopped even temporarily. Who wants to enforce such
  requirement in order to be able to obtain process history information?

It seems that Linux kernel also offers "proc connector" mechanism. But is that
reliable enough to guarantee that all fork()/execve()/exit() histories are
kept up-to-dated? Any emulation done by userspace programs is unreliable,
for programs for emulating this patch are started too late to trace all.
Only the built-in kernel code can trace all fork()/execve()/exit() events
and guarantee that all fork()/execve()/exit() histories are kept up-to-dated.



The tracing implemented by this patch needs to be done using the kernel code.

https://www.intellilink.co.jp/column/oss/2014/093001.aspx (sorry, this article
is written in Japanese language) explains 4 survey methods for finding locations
affected by CVE-2014-6271 (also known as "ShellShock"). This article was based on
https://I-love.SAKURA.ne.jp/tomoyo/LCJ2014-en.pdf . Copying P43 and P44 of this
PDF file here:

  Is SystemTap good at everything?

    SystemTap can be used for not only measuring performance
    of functionality but also tracing functionality.

      LSM interface allows probing at only locations where LSM
      callback hooks are provided, for it is designed for making
      security decision and auditing.

    SystemTap allows probing at almost everywhere (not only the
    start/end of a function but also any line number in a function in
    the source code).

      For example, you can find out the exact location in the
      source code where the errno the system call auditing would
      record was set, by writing a SystemTap script which probes
      at specific line number

    Unfortunately, SystemTap is not a tool designed for monitoring
    throughout years.

      LSM modules do not skip events nor stop working until
      shutdown, but SystemTap might skip events or stop working
      due to SystemTap's safety mechanism (and/or external
      factors like SIGKILL) before the event you want to record
      occurs.

    Please check whether SystemTap is suitable for solving your
    problem.

      There will be cases where system call auditing is better.
      There will be cases where single function LSM modules
      explained later is better.

P100, P101, P102 of this PDF file demonstrates a SystemTap script
which tries to emulate subset of what this patch can do. The difficult
part of such emulation is mainly managing the process history tree.
If the built-in kernel code offers the process history tree, writing
SystemTap scripts will become much easier. 

But I can't propose this code as a patch for SystemTap, for SystemTap
is not a in-tree kernel code. I need to propose this code as a patch
for in-tree kernel code. LSM and system call auditing are users who
can utilize this code. But you don't want this code as a patch for
audit due to unknown "the support burden".



> 
>> People propose different approaches/implementations because
>> they can't afford utilizing/configuring existing approaches/implementations.
> 
> From what I've seen, both in this thread as well as the other related
> threads from you, these recent efforts are due to a lack of TOMOYO
> support in mainstream Linux distributions.  My advice is to stop
> trying to duplicate the TOMOYO functionality in other subsystems/LSMs
> and start working with the distributions to better understand why they
> are not supporting TOMOYO.  I believe that if you can determine why
> the distributions are not enabling TOMOYO, you should be able to
> develop a plan to address those issues and eventually gain
> distribution support for TOMOYO.  I understand that such an approach
> will likely be time consuming and difficult, but I think that is your
> best option for success.

https://bugzilla.redhat.com/show_bug.cgi?id=542986 was the request for
enabling TOMOYO in Fedora. But Red Hat people do not want to support
TOMOYO due to unknown "the support burden" like you say.

TOMOYO is tested by syzbot, and quite few bugs have been found in TOMOYO.
I think that enabling TOMOYO in Fedora/RHEL kernels won't cause "the support
burden.". But I can't determine why Red Hat people do not enable TOMOYO.
How can I convince Red Hat people afraiding unprovable, unexplainable,
unknown "the support burden" ?



Anyway, enabling TOMOYO in Fedora/RHEL kernels won't solve the problem
this patch is trying to solve, for TOMOYO cannot utilize TOMOYO's process
history information because LSM hook for sending signals does not allow
TOMOYO to sleep...
Serge E. Hallyn Aug. 21, 2023, 4:04 p.m. UTC | #9
On Sat, Aug 19, 2023 at 04:09:46PM +0900, Tetsuo Handa wrote:
> Anyway, enabling TOMOYO in Fedora/RHEL kernels won't solve the problem
> this patch is trying to solve, for TOMOYO cannot utilize TOMOYO's process
> history information because LSM hook for sending signals does not allow
> TOMOYO to sleep...

Hang on a tick - I think perhaps you should have led with this.  The
main argument against this has been (iiuc) that it is a subset of
tomoyo functionality.  In that case, I'm on the fence about whether it
should be included.

But here you say that tomoyo cannot do this.  If that's the case, and
this is simply completely new functionality, that changes things.

-serge
Paul Moore Aug. 21, 2023, 4:35 p.m. UTC | #10
On Sat, Aug 19, 2023 at 3:09 AM Tetsuo Handa
<penguin-kernel@i-love.sakura.ne.jp> wrote:
> On 2023/08/18 23:59, Paul Moore wrote:
> > Except we are not talking SELinux or LSMs here, we are talking about
> > audit and the audit subsystem is very different from the LSM layer.
> > The LSM layer is designed to be pluggable with support for multiple
> > individual LSMs, whereas the audit subsystem is designed to support a
> > single audit implementation.  It is my opinion that the audit patch
> > you have proposed here does not provide an audit administrator with
> > any new capabilities that they do not currently have as an option.
>
> Before explaining why an audit administrator cannot afford emulating
> this patch, I explain what this patch will do.
>
> There are three system calls for managing a process: fork()/execve()/exit().
>
>   https://I-love.SAKURA.ne.jp/tomoyo/fork.gif
>   https://I-love.SAKURA.ne.jp/tomoyo/execve.gif
>   https://I-love.SAKURA.ne.jp/tomoyo/exit.gif
>
> As a result, history of a process can be represented as a tree, where the
> root of the tree is the kernel thread which is started by the boot loader.
>
>   https://I-love.SAKURA.ne.jp/tomoyo/railway.gif
>
> This fundamental mechanism cannot be changed as long as Linux remains as a
> Unix-like OS. That is, adding this information will not cause what you call
> "the support burden" ...

Any new functionality added to the kernel, especially user visible
functionality or some sort of interface, adds a support burden.
Nothing is "free".

> > There are also concerns around field formatting, record length, etc.,
> > but those are secondary issues compared to the more important issue of
> > redundant functionality.
>
> If someone tries to emulate this patch, we need to be able to trace all
> fork()/execve()/exit() system calls. Or, the history tree will be broken.
>
> If an audit administrator tries to emulate this patch using system call
> auditing functionality, we need to make sure that
>
>   "auditctl -D" must not clear rules for tracing fork()/execve()/exit()
>   system calls. This is impossible because this change will break userspace
>   programs expecting that "auditctl -D" clears all rules.

It's a good thing that 'audtictl -d ...' exists so that one can
selectively delete audit rules from the kernel.  If someone wants to
preserve specific audit rules, that is the way to do it; 'auditctl -D'
is a very coarse tool and not something that is likely very useful for
users with strict auditing requirements.

>   Rules for tracing fork()/execve()/exit() system calls must be enabled
>   when the kernel thread which is started by the boot loader starts.
>   How can we embed such system call auditing rules into the kernel and
>   tell whether to enable these rules using the kernel command line options?

I would boot the system with 'audit=1' on the kernel command line and
ensure that your desired audit rules are loaded as early in the boot
process as possible, before any long-running processes/daemons/logins
are started.  Honestly, that's simply a good best practice for anyone
who cares about maintaining a proper audit log, independent of the
specific use case here.

>   In order to avoid possibility of loosing fork()/execve()/exit() records,
>   auditd must not be stopped even temporarily. Who wants to enforce such
>   requirement in order to be able to obtain process history information?

A silly amount of work has gone into ensuring that the audit subsystem
in the kernel doesn't lose records when properly configured.  If you
haven't already, I would encourage you to read the auditctl(8) man
page and look for the parameters that adjust the audit backlog
configuration.
Serge E. Hallyn Aug. 21, 2023, 5:29 p.m. UTC | #11
On Fri, Aug 11, 2023 at 07:58:06PM +0900, Tetsuo Handa wrote:
> When an unexpected system event occurs, the administrator may want to

I think I would at times find this useful, however the audit maintainer
has said no to this patch, and the reasoning is sound (there are other
ways to do or approximate this, and maintainer support burden is
increased - which is why that decision is purely his).  So I'm giving
some technical feedback here, because the code is still out here :), but
I am not arguing with Paul's nack.

> identify which application triggered the event. For example, unexpected
> process termination is still a real concern enough to write articles
> like https://access.redhat.com/solutions/165993 .
> 
> This patch adds a record which emits TOMOYO-like task history information

I don't think you should mention TOMOYO here.  Unless I'm expected
to go read the TOMOYO docs before reviewing this patch.

> into the audit logs for better understanding of unexpected system events.
> 
>   type=UNKNOWN[1340] msg=audit(1691750738.271:108): history="name=swapper/0;pid=1;start=20230811194329=>name=init;pid=1;start=20230811194343=>name=systemd;pid=1;start=20230811194439=>name=sshd;pid=3660;start=20230811104504=>name=sshd;pid=3767;start=20230811104535"
> 
> To be able to avoid bloating audit log files due to this information, this
> patch uses audit_history= kernel command line parameter that controls max
> length of history in bytes (default is 1024, and setting to 0 disables
> recording and emitting).
> 
> Unlike execve()'s argv record, records in this history information is
> emitted as one string in order to reduce bloat of the audit log files.
> This information can be split into an array using => as the tokenizer.
> But don't expect that you can compare array elements throughout the whole
> audit logs by splitting into an array, for old records get removed from
> history when history became too long to append the newest record. This
> history information is meant to be interpreted by humans rather than be
> analyzed by programs.
>
> Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
> ---
>  fs/exec.c                  |   1 +
>  include/linux/audit.h      |   5 ++
>  include/linux/sched.h      |   1 +
>  include/uapi/linux/audit.h |   1 +
>  init/init_task.c           |   7 +++
>  kernel/audit.c             |   1 +
>  kernel/auditsc.c           | 108 +++++++++++++++++++++++++++++++++++++
>  7 files changed, 124 insertions(+)
> 
> diff --git a/fs/exec.c b/fs/exec.c
> index 1a827d55ba94..5c8776f692c5 100644
> --- a/fs/exec.c
> +++ b/fs/exec.c
> @@ -1381,6 +1381,7 @@ int begin_new_exec(struct linux_binprm * bprm)
>  	commit_creds(bprm->cred);
>  	bprm->cred = NULL;
>  
> +	audit_update_history();
>  	/*
>  	 * Disable monitoring for regular users
>  	 * when executing setuid binaries. Must
> diff --git a/include/linux/audit.h b/include/linux/audit.h
> index 6a3a9e122bb5..6291d0f76541 100644
> --- a/include/linux/audit.h
> +++ b/include/linux/audit.h
> @@ -397,6 +397,8 @@ static inline void audit_ptrace(struct task_struct *t)
>  		__audit_ptrace(t);
>  }
>  
> +extern void audit_update_history(void);
> +
>  				/* Private API (for audit.c only) */
>  extern void __audit_ipc_obj(struct kern_ipc_perm *ipcp);
>  extern void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mode);
> @@ -701,6 +703,9 @@ static inline void audit_ntp_log(const struct audit_ntp_data *ad)
>  static inline void audit_ptrace(struct task_struct *t)
>  { }
>  
> +static inline void audit_update_history(void)
> +{ }
> +
>  static inline void audit_log_nfcfg(const char *name, u8 af,
>  				   unsigned int nentries,
>  				   enum audit_nfcfgop op, gfp_t gfp)
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 609bde814cb0..f32076b6b733 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1112,6 +1112,7 @@ struct task_struct {
>  #ifdef CONFIG_AUDIT
>  #ifdef CONFIG_AUDITSYSCALL
>  	struct audit_context		*audit_context;
> +	char				*comm_history;
>  #endif
>  	kuid_t				loginuid;
>  	unsigned int			sessionid;
> diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
> index d676ed2b246e..186c0b5ca1b6 100644
> --- a/include/uapi/linux/audit.h
> +++ b/include/uapi/linux/audit.h
> @@ -122,6 +122,7 @@
>  #define AUDIT_OPENAT2		1337	/* Record showing openat2 how args */
>  #define AUDIT_DM_CTRL		1338	/* Device Mapper target control */
>  #define AUDIT_DM_EVENT		1339	/* Device Mapper events */
> +#define AUDIT_PROCHISTORY	1340	/* Commname history emit event */
>  
>  #define AUDIT_AVC		1400	/* SE Linux avc denial or grant */
>  #define AUDIT_SELINUX_ERR	1401	/* Internal SE Linux Errors */
> diff --git a/init/init_task.c b/init/init_task.c
> index ff6c4b9bfe6b..e3d481d1b010 100644
> --- a/init/init_task.c
> +++ b/init/init_task.c
> @@ -57,6 +57,10 @@ unsigned long init_shadow_call_stack[SCS_SIZE / sizeof(long)]
>  };
>  #endif
>  
> +#ifdef CONFIG_AUDITSYSCALL
> +extern char init_task_audit_history[];
> +#endif
> +
>  /*
>   * Set up the first task table, touch at your own risk!. Base=0,
>   * limit=0x1fffff (=2MB)
> @@ -137,6 +141,9 @@ struct task_struct init_task
>  #ifdef CONFIG_AUDIT
>  	.loginuid	= INVALID_UID,
>  	.sessionid	= AUDIT_SID_UNSET,
> +#ifdef CONFIG_AUDITSYSCALL
> +	.comm_history   = init_task_audit_history,
> +#endif
>  #endif
>  #ifdef CONFIG_PERF_EVENTS
>  	.perf_event_mutex = __MUTEX_INITIALIZER(init_task.perf_event_mutex),
> diff --git a/kernel/audit.c b/kernel/audit.c
> index 9bc0b0301198..034952abd83d 100644
> --- a/kernel/audit.c
> +++ b/kernel/audit.c
> @@ -1674,6 +1674,7 @@ static int __init audit_init(void)
>  {
>  	int i;
>  
> +	audit_update_history();
>  	if (audit_initialized == AUDIT_DISABLED)
>  		return 0;
>  
> diff --git a/kernel/auditsc.c b/kernel/auditsc.c
> index addeed3df15d..6f1b124da2fe 100644
> --- a/kernel/auditsc.c
> +++ b/kernel/auditsc.c
> @@ -80,6 +80,9 @@
>  /* max length to print of cmdline/proctitle value during audit */
>  #define MAX_PROCTITLE_AUDIT_LEN 128
>  
> +/* max length for thread's comm name history */
> +static unsigned int audit_history_size __ro_after_init = 1024;
> +
>  /* number of audit rules */
>  int audit_n_rules;
>  
> @@ -1055,6 +1058,12 @@ int audit_alloc(struct task_struct *tsk)
>  	enum audit_state     state;
>  	char *key = NULL;
>  
> +	if (audit_history_size) {
> +		tsk->comm_history = kmemdup(current->comm_history, audit_history_size, GFP_KERNEL);
> +		if (!tsk->comm_history)
> +			return -ENOMEM;
> +	}
> +
>  	if (likely(!audit_ever_enabled))
>  		return 0;
>  
> @@ -1065,6 +1074,10 @@ int audit_alloc(struct task_struct *tsk)
>  	}
>  
>  	if (!(context = audit_alloc_context(state))) {
> +		if (audit_history_size) {
> +			kfree(tsk->comm_history);
> +			tsk->comm_history = NULL;
> +		}
>  		kfree(key);
>  		audit_log_lost("out of memory in audit_alloc");
>  		return -ENOMEM;
> @@ -1671,6 +1684,18 @@ static void audit_log_uring(struct audit_context *ctx)
>  	audit_log_end(ab);
>  }
>  
> +static void audit_log_history(struct audit_context *context)
> +{
> +	struct audit_buffer *ab;
> +
> +	ab = audit_log_start(context, GFP_KERNEL, AUDIT_PROCHISTORY);
> +	if (!ab)
> +		return; /* audit_panic or being filtered */
> +	audit_log_format(ab, "history=");
> +	audit_log_untrustedstring(ab, current->comm_history);
> +	audit_log_end(ab);
> +}
> +
>  static void audit_log_exit(void)
>  {
>  	int i, call_panic = 0;
> @@ -1805,6 +1830,8 @@ static void audit_log_exit(void)
>  
>  	if (context->context == AUDIT_CTX_SYSCALL)
>  		audit_log_proctitle();
> +	if (audit_history_size)
> +		audit_log_history(context);
>  
>  	/* Send end of event record to help user space know we are finished */
>  	ab = audit_log_start(context, GFP_KERNEL, AUDIT_EOE);
> @@ -1824,6 +1851,10 @@ void __audit_free(struct task_struct *tsk)
>  {
>  	struct audit_context *context = tsk->audit_context;
>  
> +	if (audit_history_size) {
> +		kfree(tsk->comm_history);
> +		tsk->comm_history = NULL;
> +	}
>  	if (!context)
>  		return;
>  
> @@ -3034,3 +3065,80 @@ struct list_head *audit_killed_trees(void)
>  		return NULL;
>  	return &ctx->killed_trees;
>  }
> +
> +char init_task_audit_history[4096];
> +
> +static int __init audit_history_setup(char *str)
> +{
> +	unsigned int size;
> +
> +	if (kstrtouint(str, 10, &size))
> +		return -EINVAL;
> +	if (size > sizeof(init_task_audit_history))
> +		size = sizeof(init_task_audit_history);
> +	audit_history_size = size;
> +	return 0;
> +}
> +early_param("audit_history", audit_history_setup);
> +
> +void audit_update_history(void)
> +{
> +	int i;
> +	int required;
> +	struct tm tm;
> +	char buf[256];
> +	char *cp = buf;
> +
> +	if (!audit_history_size)
> +		return;
> +
> +	cp += snprintf(buf, sizeof(buf) - 1, "name=");

(i don't think -1 is really needed here, snprintf(x, 3, "abcd") will
put 0 into x[2].)

> +	for (i = 0; i < TASK_COMM_LEN; i++) {

Note that while I think it very unlikely that TASK_COMM_LEN would
be increased to > 250, this *is* kind of a timebomb.  Better to
either calculate the size of buf as a fn of TASK_COMM_LEN, or check
for (cp-buf) not getting too large here.

> +		const unsigned char c = current->comm[i];
> +
> +		if (!c)
> +			break;
> +		if (isalnum(c) || c == '.' || c == '_' || c == '-' || c == '/') {
> +			*cp++ = c;
> +			continue;
> +		}
> +		*cp++ = '\\';
> +		*cp++ = (c >> 6) + '0';
> +		*cp++ = ((c >> 3) & 7) + '0';
> +		*cp++ = (c & 7) + '0';
> +	}
> +	/* Append PID. */
> +	cp += snprintf(cp, buf - cp + sizeof(buf) - 1, ";pid=%u",
> +		       current->pid);
> +	/* Append timestamp. */
> +	time64_to_tm(ktime_get_real_seconds(), 0, &tm);
> +	cp += snprintf(cp, buf - cp + sizeof(buf) - 1,
> +		       ";start=%04u%02u%02u%02u%02u%02u", (int) tm.tm_year + 1900,
> +		       tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min,
> +		       tm.tm_sec);
> +	/* Terminate the buffer. */
> +	if (cp >= buf + sizeof(buf))
> +		cp = buf + sizeof(buf) - 1;
> +	*cp = '\0';
> +	required = cp - buf + 1;
> +	/* Make some room by truncating old history. */
> +	cp = current->comm_history;
> +	i = strlen(cp);
> +	while (i + required >= audit_history_size - 3) {
> +		char *cp2 = memchr(cp, '>', i);
> +
> +		/* Reset history if audit_history_size is too small to truncate. */
> +		if (!cp2++) {
> +			*cp = '\0';
> +			return;
> +		}
> +		i -= cp2 - cp;
> +		memmove(cp, cp2, i + 1);

Would it be better to first calculate the optimal cp2 to use,
then do the memmove, to avoid repeated calls to memmove?

> +	}
> +	/* Emit the buffer. */
> +	if (i) {
> +		cp[i++] = '=';
> +		cp[i++] = '>';
> +	}
> +	memcpy(cp + i, buf, required);
> +}
> -- 
> 2.18.4
> 
>
Tetsuo Handa Aug. 21, 2023, 10:23 p.m. UTC | #12
On 2023/08/22 1:04, Serge E. Hallyn wrote:
> On Sat, Aug 19, 2023 at 04:09:46PM +0900, Tetsuo Handa wrote:
>> Anyway, enabling TOMOYO in Fedora/RHEL kernels won't solve the problem
>> this patch is trying to solve, for TOMOYO cannot utilize TOMOYO's process
>> history information because LSM hook for sending signals does not allow
>> TOMOYO to sleep...
> 
> Hang on a tick - I think perhaps you should have led with this.  The
> main argument against this has been (iiuc) that it is a subset of
> tomoyo functionality.  In that case, I'm on the fence about whether it
> should be included.
> 
> But here you say that tomoyo cannot do this.  If that's the case, and
> this is simply completely new functionality, that changes things.

This information is duplicated upon fork() and updated upon execve().
That is how TOMOYO defines TOMOYO's process history information.
Therefore, I'm saying "TOMOYO-like task history information".
But this information provided by this patch is different from TOMOYO's
process history information provided by TOMOYO in two ways.

One is that TOMOYO embeds pathname of a program passed to execve() into
TOMOYO's task history information because it is used for describing access
control rules, whereas this patch embeds comm name and pid and time of execve()
into task history information because it is used for helping administrators
understand system events.

The other is that TOMOYO can check and generate logs with TOMOYO's task history
information for only operations that can sleep (e.g. open()/execve()), whereas
this patch can check and generate logs with task history information for both
sleepable (e.g. open()/execve()) and non-sleepable (e.g. kill()) operations.

Since one of use cases of this task history information is to identify who sent
a signal that caused an unexpected process termination, TOMOYO cannot be do it.

Also, system calls are not the only source of sending signals. There are signals
delivered without security checks via LSM modules. In that case, inserting a
SystemTap script helps catching such signals. But SystemTap scripts are loaded
too late to emulate task history information from boot.
Steve Grubb Aug. 22, 2023, 4:29 p.m. UTC | #13
On Wednesday, August 16, 2023 9:53:58 AM EDT Paul Moore wrote:
> On Wed, Aug 16, 2023 at 6:10 AM Tetsuo Handa
> <penguin-kernel@i-love.sakura.ne.jp> wrote:
> > On 2023/08/16 3:44, Paul Moore wrote:
> > > On Fri, Aug 11, 2023 at 6:58 AM Tetsuo Handa
> > > <penguin-kernel@i-love.sakura.ne.jp> wrote:
> > >> When an unexpected system event occurs, the administrator may want to
> > >> identify which application triggered the event. For example,
> > >> unexpected process termination is still a real concern enough to write
> > >> articles like https://access.redhat.com/solutions/165993 .
> > >> 
> > >> This patch adds a record which emits TOMOYO-like task history
> > >> information into the audit logs for better understanding of unexpected
> > >> system events.
> > >> 
> > >> type=UNKNOWN[1340] msg=audit(1691750738.271:108):
> > >> history="name=swapper/0;pid=1;start=20230811194329=>name=init;pid=1;s
> > >> tart=20230811194343=>name=systemd;pid=1;start=20230811194439=>name=ssh
> > >> d;pid=3660;start=20230811104504=>name=sshd;pid=3767;start=202308111045
> > >> 35"
> > >
> > > While I respect your persistence, we've talked about this quite a bit
> > > already in other threads.  What you are trying to do is already
> > > possible with audit
> > 
> > How?
> 
> If you configure audit to record exec() and friends you should have a
> proper history of the processes started on the system.

This is not a practical solution. Yes, technically this could be done. But it 
would be a huge burden on the system to keep up with this. And it would bury 
events you truly wanted to see effectively DoS'ing the audit system.

-Steve
Paul Moore Aug. 22, 2023, 5:58 p.m. UTC | #14
On Tue, Aug 22, 2023 at 12:29 PM Steve Grubb <sgrubb@redhat.com> wrote:
> On Wednesday, August 16, 2023 9:53:58 AM EDT Paul Moore wrote:
> > On Wed, Aug 16, 2023 at 6:10 AM Tetsuo Handa
> > <penguin-kernel@i-love.sakura.ne.jp> wrote:
> > > On 2023/08/16 3:44, Paul Moore wrote:
> > > > On Fri, Aug 11, 2023 at 6:58 AM Tetsuo Handa
> > > > <penguin-kernel@i-love.sakura.ne.jp> wrote:
> > > >> When an unexpected system event occurs, the administrator may want to
> > > >> identify which application triggered the event. For example,
> > > >> unexpected process termination is still a real concern enough to write
> > > >> articles like https://access.redhat.com/solutions/165993 .
> > > >>
> > > >> This patch adds a record which emits TOMOYO-like task history
> > > >> information into the audit logs for better understanding of unexpected
> > > >> system events.
> > > >>
> > > >> type=UNKNOWN[1340] msg=audit(1691750738.271:108):
> > > >> history="name=swapper/0;pid=1;start=20230811194329=>name=init;pid=1;s
> > > >> tart=20230811194343=>name=systemd;pid=1;start=20230811194439=>name=ssh
> > > >> d;pid=3660;start=20230811104504=>name=sshd;pid=3767;start=202308111045
> > > >> 35"
> > > >
> > > > While I respect your persistence, we've talked about this quite a bit
> > > > already in other threads.  What you are trying to do is already
> > > > possible with audit
> > >
> > > How?
> >
> > If you configure audit to record exec() and friends you should have a
> > proper history of the processes started on the system.
>
> This is not a practical solution. Yes, technically this could be done. But it
> would be a huge burden on the system to keep up with this. And it would bury
> events you truly wanted to see effectively DoS'ing the audit system.

If the audit subsystem can't handle the load, that is a separate issue.
Tetsuo Handa Aug. 23, 2023, 2:18 p.m. UTC | #15
On 2023/08/22 1:35, Paul Moore wrote:
>>   "auditctl -D" must not clear rules for tracing fork()/execve()/exit()
>>   system calls. This is impossible because this change will break userspace
>>   programs expecting that "auditctl -D" clears all rules.
> 
> It's a good thing that 'audtictl -d ...' exists so that one can
> selectively delete audit rules from the kernel.  If someone wants to
> preserve specific audit rules, that is the way to do it; 'auditctl -D'
> is a very coarse tool and not something that is likely very useful for
> users with strict auditing requirements.

In most systems, "auditctl -D" is the first command done via /etc/audit/audit.rules file.
You are asking all administrators who want to emulate this patch's functionality using
auditd to customize that line. We can't afford asking such administrators to become
specialist of strict auditing configurations, as well as we can't afford asking
every administrator to become specialist of strict SELinux policy configurations.

Like Steve Grubb mentions, technically possible and practically affordable are
different. The audit subsystem could handle the load, but the system administrator 
can't handle the load. That's why I said

  That is a "No LSM modules other than SELinux is needed because SELinux can do
  everything" assertion.

and your response

  Except we are not talking SELinux or LSMs here, we are talking about
  audit and the audit subsystem is very different from the LSM layer.
  The LSM layer is designed to be pluggable with support for multiple
  individual LSMs, whereas the audit subsystem is designed to support a
  single audit implementation.

is totally missing the point.

For example, doing

  auditctl -a exit,always -F arch=b64 -S exit,exit_group

(in order to allow userspace daemon which tries to emulate this patch's
functionality) will let auditd to generate process termination logs via exit()
system call. This command alone can generate too much stress on a busy system
(performance DoS and storage DoS). And moreover, this command will not let
auditd to generate process termination logs via kill() system call.

  kill -9 $$

Auditing kill system call may generate more stress than auditing exit system call.
Too much noisy logs for catching the exact one event we want to know.

Also, despite too much logs, system calls are not the only source of sending
signals. There are signals delivered without security checks via LSM modules.



The requirements for emulating functionality provided by this patch will be

  (1) Catch _all_ process creations (both via fork()/clone() system calls and
      kthread_create() from the kernel), and duplicate the history upon process
      creation.

  (2) Catch _all_ execve(), and update the history upon successful execve().

  (3) Catch _all_ process terminations (both exit()/exit_group()/kill() system
      calls and internal reasons such as OOM killer), and erase the history upon
      process termination.

  (4) Do the above things without asking administrators to become a specialist of
      system management and without asking administrators to drastically change
      system configurations and without consuming too much CPU and storage.

. Simply we can't emulate functionality provided by this patch using audit rules.
Paul Moore Aug. 23, 2023, 2:48 p.m. UTC | #16
On Wed, Aug 23, 2023 at 10:18 AM Tetsuo Handa
<penguin-kernel@i-love.sakura.ne.jp> wrote:
> On 2023/08/22 1:35, Paul Moore wrote:
> >>   "auditctl -D" must not clear rules for tracing fork()/execve()/exit()
> >>   system calls. This is impossible because this change will break userspace
> >>   programs expecting that "auditctl -D" clears all rules.
> >
> > It's a good thing that 'audtictl -d ...' exists so that one can
> > selectively delete audit rules from the kernel.  If someone wants to
> > preserve specific audit rules, that is the way to do it; 'auditctl -D'
> > is a very coarse tool and not something that is likely very useful for
> > users with strict auditing requirements.
>
> In most systems, "auditctl -D" is the first command done via /etc/audit/audit.rules file.
> You are asking all administrators who want to emulate this patch's functionality using
> auditd to customize that line. We can't afford asking such administrators to become
> specialist of strict auditing configurations, as well as we can't afford asking
> every administrator to become specialist of strict SELinux policy configurations.

If an administrator/user needs to configure the audit subsystem to do
something, removing one line in a configuration file seems like a very
reasonable thing to do.  You can expect the default configuration of
every Linux distribution to fit every conceivable use case.

> Like Steve Grubb mentions, technically possible and practically affordable are
> different. The audit subsystem could handle the load, but the system administrator
> can't handle the load.

If an administrator/user wants this type of information in their audit
logs they need to be prepared to handle that information.

> That's why I said
>
>   That is a "No LSM modules other than SELinux is needed because SELinux can do
>   everything" assertion.

What?  That doesn't make any sense following what you said above.
You're starting to cherry pick quotes and apply them out of context,
which only hurts your argument further.

> and your response
>
>   Except we are not talking SELinux or LSMs here, we are talking about
>   audit and the audit subsystem is very different from the LSM layer.
>   The LSM layer is designed to be pluggable with support for multiple
>   individual LSMs, whereas the audit subsystem is designed to support a
>   single audit implementation.
>
> is totally missing the point.

It makes perfect sense in the original context, see my comment above,
and perhaps go re-read that original email.

> For example, doing
>
>   auditctl -a exit,always -F arch=b64 -S exit,exit_group
>
> (in order to allow userspace daemon which tries to emulate this patch's
> functionality) will let auditd to generate process termination logs via exit()
> system call. This command alone can generate too much stress on a busy system
> (performance DoS and storage DoS).

However, in this very same email, a few paragraphs above you conceded
that "The audit subsystem could handle the load".

> And moreover, this command will not let
> auditd to generate process termination logs via kill() system call.
>
>   kill -9 $$
>
> Auditing kill system call may generate more stress than auditing exit system call.
> Too much noisy logs for catching the exact one event we want to know.

We've already discussed this both from a kernel load perspective (it
should be able to handle the load, if not that is a separate problem
to address) as well as the human perspective (if you want auditing,
you need to be able to handle auditing).

Tetsuo, as should be apparent at this point, I'm finding your
arguments unconvincing at best, and confusing at worst.  If you or
someone else wants to take a different approach towards arguing for
this patch I'll entertain the discussion further, but please do not
reply back with the same approach; it simply isn't constructive at
this point.
Tetsuo Handa Aug. 24, 2023, 1:21 p.m. UTC | #17
On 2023/08/23 23:48, Paul Moore wrote:
> We've already discussed this both from a kernel load perspective (it
> should be able to handle the load, if not that is a separate problem
> to address) as well as the human perspective (if you want auditing,
> you need to be able to handle auditing).

No. You haven't shown us audit rules that can satisfy requirements shown below.

  (1) Catch _all_ process creations (both via fork()/clone() system calls and
      kthread_create() from the kernel), and duplicate the history upon process
      creation.

  (2) Catch _all_ execve(), and update the history upon successful execve().

  (3) Catch _all_ process terminations (both exit()/exit_group()/kill() system
      calls and internal reasons such as OOM killer), and erase the history upon
      process termination.

  (4) Do the above things without asking administrators to become a specialist of
      system management and without asking administrators to drastically change
      system configurations and without consuming too much CPU and storage.

We know we can't satisfy requirements shown above using audit rules. That's why
this patch maintains process history information without using audit rules.

> 
> Tetsuo, as should be apparent at this point, I'm finding your
> arguments unconvincing at best, and confusing at worst.  If you or
> someone else wants to take a different approach towards arguing for
> this patch I'll entertain the discussion further, but please do not
> reply back with the same approach; it simply isn't constructive at
> this point.

Although it will be nice if we can fetch this process history information
directly from "current", I can live with fetching this process history
information from "current->security" (that is, moving what you call "the
support burden" from audit subsystem to LSM module authors).
Paul Moore Aug. 24, 2023, 1:30 p.m. UTC | #18
On Thu, Aug 24, 2023 at 9:21 AM Tetsuo Handa
<penguin-kernel@i-love.sakura.ne.jp> wrote:
>
> On 2023/08/23 23:48, Paul Moore wrote:
> > We've already discussed this both from a kernel load perspective (it
> > should be able to handle the load, if not that is a separate problem
> > to address) as well as the human perspective (if you want auditing,
> > you need to be able to handle auditing).
>
> No. You haven't shown us audit rules that can satisfy requirements shown below.
>
>   (1) Catch _all_ process creations (both via fork()/clone() system calls and
>       kthread_create() from the kernel), and duplicate the history upon process
>       creation.

Create an audit filter rule to record the syscalls you are interested
in logging.

>   (2) Catch _all_ execve(), and update the history upon successful execve().

Create an audit filter rule to record the syscalls you are interested
in logging.

>   (3) Catch _all_ process terminations (both exit()/exit_group()/kill() system
>       calls and internal reasons such as OOM killer), and erase the history upon
>       process termination.

Create an audit filter rule to record the events you are interested in
logging, if there is an event which isn't being recorded feel free to
submit a patch to generate an audit record.
Tetsuo Handa Aug. 24, 2023, 1:39 p.m. UTC | #19
On 2023/08/24 22:30, Paul Moore wrote:
> On Thu, Aug 24, 2023 at 9:21 AM Tetsuo Handa
> <penguin-kernel@i-love.sakura.ne.jp> wrote:
>>
>> On 2023/08/23 23:48, Paul Moore wrote:
>>> We've already discussed this both from a kernel load perspective (it
>>> should be able to handle the load, if not that is a separate problem
>>> to address) as well as the human perspective (if you want auditing,
>>> you need to be able to handle auditing).
>>
>> No. You haven't shown us audit rules that can satisfy requirements shown below.
>>
>>   (1) Catch _all_ process creations (both via fork()/clone() system calls and
>>       kthread_create() from the kernel), and duplicate the history upon process
>>       creation.
> 
> Create an audit filter rule to record the syscalls you are interested
> in logging.

I can't interpret what you are talking about. Please show me using command line.

> 
>>   (2) Catch _all_ execve(), and update the history upon successful execve().
> 
> Create an audit filter rule to record the syscalls you are interested
> in logging.
> 
>>   (3) Catch _all_ process terminations (both exit()/exit_group()/kill() system
>>       calls and internal reasons such as OOM killer), and erase the history upon
>>       process termination.
> 
> Create an audit filter rule to record the events you are interested in
> logging, if there is an event which isn't being recorded feel free to
> submit a patch to generate an audit record.
>
Tetsuo Handa Aug. 24, 2023, 1:47 p.m. UTC | #20
On 2023/08/24 22:39, Tetsuo Handa wrote:
>>>   (1) Catch _all_ process creations (both via fork()/clone() system calls and
>>>       kthread_create() from the kernel), and duplicate the history upon process
>>>       creation.
>>
>> Create an audit filter rule to record the syscalls you are interested
>> in logging.
> 
> I can't interpret what you are talking about. Please show me using command line.

I'm not interested in logging the syscalls just for maintaining process history
information. I want you to explain using command line how we can trace process
creation/termination (both via syscalls and via kernel internal reasons).
How can auditd generate logs that are not triggered via syscalls?
Paul Moore Aug. 24, 2023, 2:24 p.m. UTC | #21
On Thu, Aug 24, 2023 at 9:39 AM Tetsuo Handa
<penguin-kernel@i-love.sakura.ne.jp> wrote:
> On 2023/08/24 22:30, Paul Moore wrote:
> > On Thu, Aug 24, 2023 at 9:21 AM Tetsuo Handa
> > <penguin-kernel@i-love.sakura.ne.jp> wrote:
> >>
> >> On 2023/08/23 23:48, Paul Moore wrote:
> >>> We've already discussed this both from a kernel load perspective (it
> >>> should be able to handle the load, if not that is a separate problem
> >>> to address) as well as the human perspective (if you want auditing,
> >>> you need to be able to handle auditing).
> >>
> >> No. You haven't shown us audit rules that can satisfy requirements shown below.
> >>
> >>   (1) Catch _all_ process creations (both via fork()/clone() system calls and
> >>       kthread_create() from the kernel), and duplicate the history upon process
> >>       creation.
> >
> > Create an audit filter rule to record the syscalls you are interested
> > in logging.
>
> I can't interpret what you are talking about. Please show me using command line.

I'm sorry Tetsuo, but I've already spent far too much time going in
circles with you on this topic.  As you are capable of submitting
kernel patches, you should be capable of reading a manpage and
experimenting yourself:

https://man7.org/linux/man-pages/man8/auditctl.8.html
Paul Moore Aug. 24, 2023, 2:26 p.m. UTC | #22
On Thu, Aug 24, 2023 at 9:47 AM Tetsuo Handa
<penguin-kernel@i-love.sakura.ne.jp> wrote:
> On 2023/08/24 22:39, Tetsuo Handa wrote:
> >>>   (1) Catch _all_ process creations (both via fork()/clone() system calls and
> >>>       kthread_create() from the kernel), and duplicate the history upon process
> >>>       creation.
> >>
> >> Create an audit filter rule to record the syscalls you are interested
> >> in logging.
> >
> > I can't interpret what you are talking about. Please show me using command line.
>
> I'm not interested in logging the syscalls just for maintaining process history
> information.

That's unfortunate because I'm not interested in merging your patch
when we already have an audit log which can be used to trace process
history information.
Steve Grubb Aug. 24, 2023, 3:55 p.m. UTC | #23
Hello Paul,

On Thursday, August 24, 2023 9:30:10 AM EDT Paul Moore wrote:
> On Thu, Aug 24, 2023 at 9:21 AM Tetsuo Handa
> <penguin-kernel@i-love.sakura.ne.jp> wrote:
> > On 2023/08/23 23:48, Paul Moore wrote:
> > > We've already discussed this both from a kernel load perspective (it
> > > should be able to handle the load, if not that is a separate problem
> > > to address) as well as the human perspective (if you want auditing,
> > > you need to be able to handle auditing).
> > 
> > No. You haven't shown us audit rules that can satisfy requirements shown
> > below.> 
> >   (1) Catch _all_ process creations (both via fork()/clone() system calls
> >   and kthread_create() from the kernel), and duplicate the history upon
> >   process creation.
> 
> Create an audit filter rule to record the syscalls you are interested
> in logging.
> 
> >   (2) Catch _all_ execve(), and update the history upon successful
> >   execve().
> 
> Create an audit filter rule to record the syscalls you are interested
> in logging.
> 
> >   (3) Catch _all_ process terminations (both exit()/exit_group()/kill()
> >   system  calls and internal reasons such as OOM killer), and erase the
> >   history upon process termination.
> 
> Create an audit filter rule to record the events you are interested in
> logging, if there is an event which isn't being recorded feel free to
> submit a patch to generate an audit record.

I'm not for or against this or a similar patch. The information Tetsuo is 
looking for cannot be recreated from logs. What if it were a daemon that's 
been running for a year? With the amount of data you are suggesting to log, 
it would have rotated away months ago. To log all of the system calls you 
mention would be abusive of the audit system, hurt performance, wear out SSD 
drives, and ultimately fail.

There may be other reasons you don't like the patch and that's fine. But 
saying it can be done from user space after the fact is not helpful.

-Steve
Paul Moore Aug. 24, 2023, 5:02 p.m. UTC | #24
On Thu, Aug 24, 2023 at 11:55 AM Steve Grubb <sgrubb@redhat.com> wrote:
> On Thursday, August 24, 2023 9:30:10 AM EDT Paul Moore wrote:
> > On Thu, Aug 24, 2023 at 9:21 AM Tetsuo Handa
> > <penguin-kernel@i-love.sakura.ne.jp> wrote:
> > > On 2023/08/23 23:48, Paul Moore wrote:
> > > > We've already discussed this both from a kernel load perspective (it
> > > > should be able to handle the load, if not that is a separate problem
> > > > to address) as well as the human perspective (if you want auditing,
> > > > you need to be able to handle auditing).
> > >
> > > No. You haven't shown us audit rules that can satisfy requirements shown
> > > below.>
> > >   (1) Catch _all_ process creations (both via fork()/clone() system calls
> > >   and kthread_create() from the kernel), and duplicate the history upon
> > >   process creation.
> >
> > Create an audit filter rule to record the syscalls you are interested
> > in logging.
> >
> > >   (2) Catch _all_ execve(), and update the history upon successful
> > >   execve().
> >
> > Create an audit filter rule to record the syscalls you are interested
> > in logging.
> >
> > >   (3) Catch _all_ process terminations (both exit()/exit_group()/kill()
> > >   system  calls and internal reasons such as OOM killer), and erase the
> > >   history upon process termination.
> >
> > Create an audit filter rule to record the events you are interested in
> > logging, if there is an event which isn't being recorded feel free to
> > submit a patch to generate an audit record.
>
> I'm not for or against this or a similar patch.

That was my impression based on your previous comments, my opinion
remains unchanged.

> The information Tetsuo is
> looking for cannot be recreated from logs. What if it were a daemon that's
> been running for a year? With the amount of data you are suggesting to log,
> it would have rotated away months ago.

Just because it requires configuration and/or a way of maintaining log
information over a period of time does not mean it "cannot" be done.
I also suspect that the number of well managed, and properly updated
systems that have uptimes over a year are increasingly rare.  Yes,
there are systems with uptimes much longer than that, but my argument
is that those systems are not likely as security focused as they may
claim.

> To log all of the system calls you
> mention would be abusive of the audit system, hurt performance, wear out SSD
> drives, and ultimately fail.

Thank you for your input.  It is clear that we have different opinions
on this matter.

> There may be other reasons you don't like the patch and that's fine. But
> saying it can be done from user space after the fact is not helpful.

Arguably your choice to reintroduce arguments you have previously
made, which I believe I've answered, is also not helpful, yet here we
are.
Tetsuo Handa Aug. 24, 2023, 10:24 p.m. UTC | #25
On 2023/08/24 23:26, Paul Moore wrote:
> On Thu, Aug 24, 2023 at 9:47 AM Tetsuo Handa
> <penguin-kernel@i-love.sakura.ne.jp> wrote:
>> On 2023/08/24 22:39, Tetsuo Handa wrote:
>>>>>   (1) Catch _all_ process creations (both via fork()/clone() system calls and
>>>>>       kthread_create() from the kernel), and duplicate the history upon process
>>>>>       creation.
>>>>
>>>> Create an audit filter rule to record the syscalls you are interested
>>>> in logging.
>>>
>>> I can't interpret what you are talking about. Please show me using command line.
>>
>> I'm not interested in logging the syscalls just for maintaining process history
>> information.
> 
> That's unfortunate because I'm not interested in merging your patch
> when we already have an audit log which can be used to trace process
> history information.
> 

It is unfortunate that you continue ignoring the

  How can auditd generate logs that are not triggered via syscalls?

line. I know how to configure syscall rules using "-S" option. But I do
not know how to configure non syscall rules (such as process creation via
kthread_create(), process termination due to tty hangup or OOM killer).

It is unfortunate that you continue ignoring the

  What this patch does is to allow people do like

    printk("[%s] some msg comes here\n", current->comm_history);

  for giving hints for identifying a process more precisely.
  That is, users of this information is not limited to audit. For example,
  an LSM module can use this information, an audit record can use this
  information, a SystemTap script can use this information, and so on...

block. The logs generated via system call auditing is just an example.
I want to use this information from e.g. SystemTap scripts, but you care
about only system call auditing.

There are cases where the logs generated via system call auditing can solve
problems. But there are cases we need to hook different locations using e.g.
SystemTap scripts in order to catch the event.

I repeat:

  The auditd is not capable of generating _all_ records needed for maintaining
  this information.

  The logs generated via system call auditing is just an example user
  of this information.
Paul Moore Aug. 25, 2023, 3:36 a.m. UTC | #26
On August 24, 2023 6:24:47 PM Tetsuo Handa 
<penguin-kernel@I-love.SAKURA.ne.jp> wrote:
> On 2023/08/24 23:26, Paul Moore wrote:
>> On Thu, Aug 24, 2023 at 9:47 AM Tetsuo Handa
>> <penguin-kernel@i-love.sakura.ne.jp> wrote:
>>> On 2023/08/24 22:39, Tetsuo Handa wrote:
>>>>>> (1) Catch _all_ process creations (both via fork()/clone() system calls and
>>>>>>  kthread_create() from the kernel), and duplicate the history upon process
>>>>>>  creation.
>>>>>
>>>>> Create an audit filter rule to record the syscalls you are interested
>>>>> in logging.
>>>>
>>>> I can't interpret what you are talking about. Please show me using command 
>>>> line.
>>>
>>> I'm not interested in logging the syscalls just for maintaining process history
>>> information.
>>
>> That's unfortunate because I'm not interested in merging your patch
>> when we already have an audit log which can be used to trace process
>> history information.
>
> It is unfortunate that you continue ignoring the
>
>  How can auditd generate logs that are not triggered via syscalls?
>
> line. I know how to configure syscall rules using "-S" option. But I do
> not know how to configure non syscall rules (such as process creation via
> kthread_create(), process termination due to tty hangup or OOM killer).

At this point you've exhausted my goodwill so I would suggest simply 
reading the audit code, manages, and experimenting with a running system to 
understand how things work, especially for non-syscall records.

> I repeat:
>
>  The auditd is not capable of generating _all_ records needed for maintaining
>  this information.
>
>  The logs generated via system call auditing is just an example user
>  of this information.

I repeat:

If you find a place in the code where you believe there should be an audit 
record, post a patch and we can discuss it.

--
paul-moore.com
Tetsuo Handa Aug. 26, 2023, 6:38 a.m. UTC | #27
On 2023/08/25 12:36, Paul Moore wrote:
>> It is unfortunate that you continue ignoring the
>>
>>  How can auditd generate logs that are not triggered via syscalls?
>>
>> line. I know how to configure syscall rules using "-S" option. But I do
>> not know how to configure non syscall rules (such as process creation via
>> kthread_create(), process termination due to tty hangup or OOM killer).
> 
> At this point you've exhausted my goodwill so I would suggest simply reading
> the audit code, manages, and experimenting with a running system to understand
> how things work, especially for non-syscall records.

Are we on the same page that non-syscall records include process creation via
kthread_create() and process termination via send_sig() ?

I tried "make M=audit_test/" with below example.

audit_test/audit_test.c
----------------------------------------
#include <linux/module.h>
#include <linux/kthread.h>
#include <linux/sched/signal.h>

static int test_kthread(void *unused)
{
	char *argv[3] = { "/bin/sleep", "10", NULL };
	char *envp[1] = { NULL };
	struct task_struct *p;
	
	printk("test_kthread is running with PID=%d\n", current->pid);
	call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
	rcu_read_lock();
	for_each_process(p) {
		if (!(p->flags & PF_KTHREAD) && !strcmp(p->comm, "sleep")) {
			printk("Sending signal to PID=%d\n", p->pid);
			send_sig(SIGKILL, p, 1);
		}
	}
	rcu_read_unlock();
	return 0;
}

static int __init test_init(void)
{
	struct task_struct *task = kthread_create(test_kthread, NULL, "test_kthread");

	if (!IS_ERR(task)) {
		wake_up_process(task);
		schedule_timeout_uninterruptible(5 * HZ);
	}
	return -EINVAL;
}

module_init(test_init);
MODULE_LICENSE("GPL");
----------------------------------------

audit_test/Makefile
----------------------------------------
obj-m += audit_test.o
----------------------------------------

I tried below steps in order to generate all possible records using auditd.

----------------------------------------
# auditctl -D
No rules
# auditctl -a exit,always
# auditctl -a task,always
# insmod audit_test/audit_test.ko
insmod: ERROR: could not insert module audit_test/audit_test.ko: Invalid parameters
# auditctl -D
No rules
# dmesg
[  219.826840] test_kthread is running with PID=4044
[  219.832367] Sending signal to PID=4045
# ausearch -p 4044
<no matches>
# ausearch -p 4045 | sed -e 's/ /\n/g' | grep syscall= | sort -uV
syscall=0
syscall=2
syscall=3
syscall=5
syscall=9
syscall=10
syscall=11
syscall=12
syscall=21
syscall=35
syscall=158
----------------------------------------

Only records issued by system calls (read(),open(),close(),fstat(),mmap(),
mprotect(),munmap(),brk(),access(),nanosleep(),arch_prctl()) are generated.
Neither records issued by process creation via kthread_create() nor records
issued by process termination via send_sig() are generated.

Are you confident that auditd is already capable of generating records for e.g.
process creation via kthread_create() and process termination via send_sig() ?

> If you find a place in the code where you believe there should be an audit record,
> post a patch and we can discuss it.

I believe that auditd needs to be able to generate records for e.g. process creation
via kthread_create() and process termination via send_sig(), if you insist that we can
emulate process history information offered by this patch from user space using records
generated by auditd. (That sounds beyond CONFIG_AUDITSYSCALL=y though...)
Paul Moore Aug. 26, 2023, 2:47 p.m. UTC | #28
On August 26, 2023 2:38:55 AM Tetsuo Handa 
<penguin-kernel@I-love.SAKURA.ne.jp> wrote:
> On 2023/08/25 12:36, Paul Moore wrote:
>>> It is unfortunate that you continue ignoring the
>>>
>>> How can auditd generate logs that are not triggered via syscalls?
>>>
>>> line. I know how to configure syscall rules using "-S" option. But I do
>>> not know how to configure non syscall rules (such as process creation via
>>> kthread_create(), process termination due to tty hangup or OOM killer).
>>
>> At this point you've exhausted my goodwill so I would suggest simply reading
>> the audit code, manages, and experimenting with a running system to understand
>> how things work, especially for non-syscall records.
>
> Are we on the same page that non-syscall records include process creation via
> kthread_create() and process termination via send_sig() ?

I'm not confident that we are in agreement on many of the issues in this 
thread, some of that is likely due to disagreements in what functionality 
is worthwhile, but I believe some of that is due to misunderstandings.  I'm 
not going to merge the patch you posted at the start of this thread, but as 
I've mentioned *several* times now, if you find a kernel code path that is 
missing an audit logging call submit a patch with an explanation of why the 
audit call is needed and we can discuss it.

My initial thinking is that we do not need, or want, an audit call in 
kthread_create(). I'm less sure about send_sig(), and as my network access 
is limited at the moment I don't have the ability to look into it further 
right now. If you feel we do need an additional audit call, create a patch, 
test it, write a good commit description explaining why it is necessary and 
we can review it.

--
paul-moore.com
>
diff mbox series

Patch

diff --git a/fs/exec.c b/fs/exec.c
index 1a827d55ba94..5c8776f692c5 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1381,6 +1381,7 @@  int begin_new_exec(struct linux_binprm * bprm)
 	commit_creds(bprm->cred);
 	bprm->cred = NULL;
 
+	audit_update_history();
 	/*
 	 * Disable monitoring for regular users
 	 * when executing setuid binaries. Must
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 6a3a9e122bb5..6291d0f76541 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -397,6 +397,8 @@  static inline void audit_ptrace(struct task_struct *t)
 		__audit_ptrace(t);
 }
 
+extern void audit_update_history(void);
+
 				/* Private API (for audit.c only) */
 extern void __audit_ipc_obj(struct kern_ipc_perm *ipcp);
 extern void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mode);
@@ -701,6 +703,9 @@  static inline void audit_ntp_log(const struct audit_ntp_data *ad)
 static inline void audit_ptrace(struct task_struct *t)
 { }
 
+static inline void audit_update_history(void)
+{ }
+
 static inline void audit_log_nfcfg(const char *name, u8 af,
 				   unsigned int nentries,
 				   enum audit_nfcfgop op, gfp_t gfp)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 609bde814cb0..f32076b6b733 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1112,6 +1112,7 @@  struct task_struct {
 #ifdef CONFIG_AUDIT
 #ifdef CONFIG_AUDITSYSCALL
 	struct audit_context		*audit_context;
+	char				*comm_history;
 #endif
 	kuid_t				loginuid;
 	unsigned int			sessionid;
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index d676ed2b246e..186c0b5ca1b6 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -122,6 +122,7 @@ 
 #define AUDIT_OPENAT2		1337	/* Record showing openat2 how args */
 #define AUDIT_DM_CTRL		1338	/* Device Mapper target control */
 #define AUDIT_DM_EVENT		1339	/* Device Mapper events */
+#define AUDIT_PROCHISTORY	1340	/* Commname history emit event */
 
 #define AUDIT_AVC		1400	/* SE Linux avc denial or grant */
 #define AUDIT_SELINUX_ERR	1401	/* Internal SE Linux Errors */
diff --git a/init/init_task.c b/init/init_task.c
index ff6c4b9bfe6b..e3d481d1b010 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -57,6 +57,10 @@  unsigned long init_shadow_call_stack[SCS_SIZE / sizeof(long)]
 };
 #endif
 
+#ifdef CONFIG_AUDITSYSCALL
+extern char init_task_audit_history[];
+#endif
+
 /*
  * Set up the first task table, touch at your own risk!. Base=0,
  * limit=0x1fffff (=2MB)
@@ -137,6 +141,9 @@  struct task_struct init_task
 #ifdef CONFIG_AUDIT
 	.loginuid	= INVALID_UID,
 	.sessionid	= AUDIT_SID_UNSET,
+#ifdef CONFIG_AUDITSYSCALL
+	.comm_history   = init_task_audit_history,
+#endif
 #endif
 #ifdef CONFIG_PERF_EVENTS
 	.perf_event_mutex = __MUTEX_INITIALIZER(init_task.perf_event_mutex),
diff --git a/kernel/audit.c b/kernel/audit.c
index 9bc0b0301198..034952abd83d 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1674,6 +1674,7 @@  static int __init audit_init(void)
 {
 	int i;
 
+	audit_update_history();
 	if (audit_initialized == AUDIT_DISABLED)
 		return 0;
 
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index addeed3df15d..6f1b124da2fe 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -80,6 +80,9 @@ 
 /* max length to print of cmdline/proctitle value during audit */
 #define MAX_PROCTITLE_AUDIT_LEN 128
 
+/* max length for thread's comm name history */
+static unsigned int audit_history_size __ro_after_init = 1024;
+
 /* number of audit rules */
 int audit_n_rules;
 
@@ -1055,6 +1058,12 @@  int audit_alloc(struct task_struct *tsk)
 	enum audit_state     state;
 	char *key = NULL;
 
+	if (audit_history_size) {
+		tsk->comm_history = kmemdup(current->comm_history, audit_history_size, GFP_KERNEL);
+		if (!tsk->comm_history)
+			return -ENOMEM;
+	}
+
 	if (likely(!audit_ever_enabled))
 		return 0;
 
@@ -1065,6 +1074,10 @@  int audit_alloc(struct task_struct *tsk)
 	}
 
 	if (!(context = audit_alloc_context(state))) {
+		if (audit_history_size) {
+			kfree(tsk->comm_history);
+			tsk->comm_history = NULL;
+		}
 		kfree(key);
 		audit_log_lost("out of memory in audit_alloc");
 		return -ENOMEM;
@@ -1671,6 +1684,18 @@  static void audit_log_uring(struct audit_context *ctx)
 	audit_log_end(ab);
 }
 
+static void audit_log_history(struct audit_context *context)
+{
+	struct audit_buffer *ab;
+
+	ab = audit_log_start(context, GFP_KERNEL, AUDIT_PROCHISTORY);
+	if (!ab)
+		return; /* audit_panic or being filtered */
+	audit_log_format(ab, "history=");
+	audit_log_untrustedstring(ab, current->comm_history);
+	audit_log_end(ab);
+}
+
 static void audit_log_exit(void)
 {
 	int i, call_panic = 0;
@@ -1805,6 +1830,8 @@  static void audit_log_exit(void)
 
 	if (context->context == AUDIT_CTX_SYSCALL)
 		audit_log_proctitle();
+	if (audit_history_size)
+		audit_log_history(context);
 
 	/* Send end of event record to help user space know we are finished */
 	ab = audit_log_start(context, GFP_KERNEL, AUDIT_EOE);
@@ -1824,6 +1851,10 @@  void __audit_free(struct task_struct *tsk)
 {
 	struct audit_context *context = tsk->audit_context;
 
+	if (audit_history_size) {
+		kfree(tsk->comm_history);
+		tsk->comm_history = NULL;
+	}
 	if (!context)
 		return;
 
@@ -3034,3 +3065,80 @@  struct list_head *audit_killed_trees(void)
 		return NULL;
 	return &ctx->killed_trees;
 }
+
+char init_task_audit_history[4096];
+
+static int __init audit_history_setup(char *str)
+{
+	unsigned int size;
+
+	if (kstrtouint(str, 10, &size))
+		return -EINVAL;
+	if (size > sizeof(init_task_audit_history))
+		size = sizeof(init_task_audit_history);
+	audit_history_size = size;
+	return 0;
+}
+early_param("audit_history", audit_history_setup);
+
+void audit_update_history(void)
+{
+	int i;
+	int required;
+	struct tm tm;
+	char buf[256];
+	char *cp = buf;
+
+	if (!audit_history_size)
+		return;
+
+	cp += snprintf(buf, sizeof(buf) - 1, "name=");
+	for (i = 0; i < TASK_COMM_LEN; i++) {
+		const unsigned char c = current->comm[i];
+
+		if (!c)
+			break;
+		if (isalnum(c) || c == '.' || c == '_' || c == '-' || c == '/') {
+			*cp++ = c;
+			continue;
+		}
+		*cp++ = '\\';
+		*cp++ = (c >> 6) + '0';
+		*cp++ = ((c >> 3) & 7) + '0';
+		*cp++ = (c & 7) + '0';
+	}
+	/* Append PID. */
+	cp += snprintf(cp, buf - cp + sizeof(buf) - 1, ";pid=%u",
+		       current->pid);
+	/* Append timestamp. */
+	time64_to_tm(ktime_get_real_seconds(), 0, &tm);
+	cp += snprintf(cp, buf - cp + sizeof(buf) - 1,
+		       ";start=%04u%02u%02u%02u%02u%02u", (int) tm.tm_year + 1900,
+		       tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min,
+		       tm.tm_sec);
+	/* Terminate the buffer. */
+	if (cp >= buf + sizeof(buf))
+		cp = buf + sizeof(buf) - 1;
+	*cp = '\0';
+	required = cp - buf + 1;
+	/* Make some room by truncating old history. */
+	cp = current->comm_history;
+	i = strlen(cp);
+	while (i + required >= audit_history_size - 3) {
+		char *cp2 = memchr(cp, '>', i);
+
+		/* Reset history if audit_history_size is too small to truncate. */
+		if (!cp2++) {
+			*cp = '\0';
+			return;
+		}
+		i -= cp2 - cp;
+		memmove(cp, cp2, i + 1);
+	}
+	/* Emit the buffer. */
+	if (i) {
+		cp[i++] = '=';
+		cp[i++] = '>';
+	}
+	memcpy(cp + i, buf, required);
+}