diff mbox series

[RFC,10/13] x86/uintr: Introduce user IPI sender syscalls

Message ID 20210913200132.3396598-11-sohil.mehta@intel.com (mailing list archive)
State New
Headers show
Series x86 User Interrupts support | expand

Commit Message

Sohil Mehta Sept. 13, 2021, 8:01 p.m. UTC
Add a registration syscall for a task to register itself as a user
interrupt sender using the uintr_fd generated by the receiver. A task
can register multiple uintr_fds. Each unique successful connection
creates a new entry in the User Interrupt Target Table (UITT).

Each entry in the UITT table is referred by the UITT index (uipi_index).
The uipi_index returned during the registration syscall lets a sender
generate a user IPI using the 'SENDUIPI <uipi_index>' instruction.

Also, add a sender unregister syscall to unregister a particular task
from the uintr_fd. Calling close on the uintr_fd will disconnect all
threads in a sender process from that FD.

Currently, the UITT size is arbitrarily chosen as 256 entries
corresponding to a 4KB page. Based on feedback and usage data this can
either be increased/decreased or made dynamic later.

Architecturally, the UITT table can be unique for each thread or shared
across threads of the same thread group. The current implementation
keeps the UITT as unique for the each thread. This makes the kernel
implementation relatively simple and only threads that use uintr get
setup with the related structures. However, this means that the
uipi_index for each thread would be inconsistent wrt to other threads.
(Executing 'SENDUIPI 2' on threads of the same process could generate
different user interrupts.)

Alternatively, the benefit of sharing the UITT table is that all threads
would see the same view of the UITT table. Also the kernel UITT memory
allocation would be more efficient if multiple threads connect to the
same uintr_fd. However, this would mean the kernel needs to keep the
UITT table size MISC_MSR[] in sync across these threads. Also the
UPID/UITT teardown flows might need additional consideration.

Signed-off-by: Sohil Mehta <sohil.mehta@intel.com>
---
 arch/x86/include/asm/processor.h |   2 +
 arch/x86/include/asm/uintr.h     |  15 ++
 arch/x86/kernel/process.c        |   1 +
 arch/x86/kernel/uintr_core.c     | 355 ++++++++++++++++++++++++++++++-
 arch/x86/kernel/uintr_fd.c       | 133 ++++++++++++
 5 files changed, 495 insertions(+), 11 deletions(-)

Comments

Greg Kroah-Hartman Sept. 23, 2021, 12:28 p.m. UTC | #1
On Mon, Sep 13, 2021 at 01:01:29PM -0700, Sohil Mehta wrote:
> Add a registration syscall for a task to register itself as a user
> interrupt sender using the uintr_fd generated by the receiver. A task
> can register multiple uintr_fds. Each unique successful connection
> creates a new entry in the User Interrupt Target Table (UITT).
> 
> Each entry in the UITT table is referred by the UITT index (uipi_index).
> The uipi_index returned during the registration syscall lets a sender
> generate a user IPI using the 'SENDUIPI <uipi_index>' instruction.
> 
> Also, add a sender unregister syscall to unregister a particular task
> from the uintr_fd. Calling close on the uintr_fd will disconnect all
> threads in a sender process from that FD.
> 
> Currently, the UITT size is arbitrarily chosen as 256 entries
> corresponding to a 4KB page. Based on feedback and usage data this can
> either be increased/decreased or made dynamic later.
> 
> Architecturally, the UITT table can be unique for each thread or shared
> across threads of the same thread group. The current implementation
> keeps the UITT as unique for the each thread. This makes the kernel
> implementation relatively simple and only threads that use uintr get
> setup with the related structures. However, this means that the
> uipi_index for each thread would be inconsistent wrt to other threads.
> (Executing 'SENDUIPI 2' on threads of the same process could generate
> different user interrupts.)
> 
> Alternatively, the benefit of sharing the UITT table is that all threads
> would see the same view of the UITT table. Also the kernel UITT memory
> allocation would be more efficient if multiple threads connect to the
> same uintr_fd. However, this would mean the kernel needs to keep the
> UITT table size MISC_MSR[] in sync across these threads. Also the
> UPID/UITT teardown flows might need additional consideration.
> 
> Signed-off-by: Sohil Mehta <sohil.mehta@intel.com>
> ---
>  arch/x86/include/asm/processor.h |   2 +
>  arch/x86/include/asm/uintr.h     |  15 ++
>  arch/x86/kernel/process.c        |   1 +
>  arch/x86/kernel/uintr_core.c     | 355 ++++++++++++++++++++++++++++++-
>  arch/x86/kernel/uintr_fd.c       | 133 ++++++++++++
>  5 files changed, 495 insertions(+), 11 deletions(-)
> 
> diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
> index d229bfac8b4f..3482c3182e39 100644
> --- a/arch/x86/include/asm/processor.h
> +++ b/arch/x86/include/asm/processor.h
> @@ -10,6 +10,7 @@ struct mm_struct;
>  struct io_bitmap;
>  struct vm86;
>  struct uintr_receiver;
> +struct uintr_sender;
>  
>  #include <asm/math_emu.h>
>  #include <asm/segment.h>
> @@ -533,6 +534,7 @@ struct thread_struct {
>  #ifdef CONFIG_X86_USER_INTERRUPTS
>  	/* User Interrupt state*/
>  	struct uintr_receiver	*ui_recv;
> +	struct uintr_sender	*ui_send;
>  #endif
>  
>  	/* Floating point and extended processor state */
> diff --git a/arch/x86/include/asm/uintr.h b/arch/x86/include/asm/uintr.h
> index 1f00e2a63da4..ef3521dd7fb9 100644
> --- a/arch/x86/include/asm/uintr.h
> +++ b/arch/x86/include/asm/uintr.h
> @@ -8,6 +8,7 @@ struct uintr_upid_ctx {
>  	struct task_struct *task;	/* Receiver task */
>  	struct uintr_upid *upid;
>  	refcount_t refs;
> +	bool receiver_active;		/* Flag for UPID being mapped to a receiver */
>  };
>  
>  struct uintr_receiver_info {
> @@ -16,12 +17,26 @@ struct uintr_receiver_info {
>  	u64 uvec;				/* Vector number */
>  };
>  
> +struct uintr_sender_info {
> +	struct list_head node;
> +	struct uintr_uitt_ctx *uitt_ctx;
> +	struct task_struct *task;
> +	struct uintr_upid_ctx *r_upid_ctx;	/* Receiver's UPID context */
> +	struct callback_head twork;		/* Task work head */
> +	unsigned int uitt_index;
> +};
> +
>  bool uintr_arch_enabled(void);
>  int do_uintr_register_handler(u64 handler);
>  int do_uintr_unregister_handler(void);
>  int do_uintr_register_vector(struct uintr_receiver_info *r_info);
>  void do_uintr_unregister_vector(struct uintr_receiver_info *r_info);
>  
> +int do_uintr_register_sender(struct uintr_receiver_info *r_info,
> +			     struct uintr_sender_info *s_info);
> +void do_uintr_unregister_sender(struct uintr_receiver_info *r_info,
> +				struct uintr_sender_info *s_info);
> +
>  void uintr_free(struct task_struct *task);
>  
>  /* TODO: Inline the context switch related functions */
> diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
> index 83677f76bd7b..9db33e467b30 100644
> --- a/arch/x86/kernel/process.c
> +++ b/arch/x86/kernel/process.c
> @@ -92,6 +92,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
>  #ifdef CONFIG_X86_USER_INTERRUPTS
>  	/* User Interrupt state is unique for each task */
>  	dst->thread.ui_recv = NULL;
> +	dst->thread.ui_send = NULL;
>  #endif
>  
>  	return fpu_clone(dst);
> diff --git a/arch/x86/kernel/uintr_core.c b/arch/x86/kernel/uintr_core.c
> index 9dcb9f60e5bc..8f331c5fe0cf 100644
> --- a/arch/x86/kernel/uintr_core.c
> +++ b/arch/x86/kernel/uintr_core.c
> @@ -21,6 +21,11 @@
>  #include <asm/msr-index.h>
>  #include <asm/uintr.h>
>  
> +/*
> + * Each UITT entry is 16 bytes in size.
> + * Current UITT table size is set as 4KB (256 * 16 bytes)
> + */
> +#define UINTR_MAX_UITT_NR 256
>  #define UINTR_MAX_UVEC_NR 64
>  
>  /* User Posted Interrupt Descriptor (UPID) */
> @@ -44,6 +49,27 @@ struct uintr_receiver {
>  	u64 uvec_mask;	/* track active vector per bit */
>  };
>  
> +/* User Interrupt Target Table Entry (UITTE) */
> +struct uintr_uitt_entry {
> +	u8	valid;			/* bit 0: valid, bit 1-7: reserved */

Do you check that the other bits are set to 0?

> +	u8	user_vec;
> +	u8	reserved[6];

What is this reserved for?

> +	u64	target_upid_addr;

If this is a pointer, why not say it is a pointer?

> +} __packed __aligned(16);
> +
> +struct uintr_uitt_ctx {
> +	struct uintr_uitt_entry *uitt;
> +	/* Protect UITT */
> +	spinlock_t uitt_lock;
> +	refcount_t refs;

Again, a kref please.

thanks,

greg k-h
Thomas Gleixner Sept. 24, 2021, 10:54 a.m. UTC | #2
On Mon, Sep 13 2021 at 13:01, Sohil Mehta wrote:
> +/*
> + * No lock is needed to read the active flag. Writes only happen from
> + * r_info->task that owns the UPID. Everyone else would just read this flag.
> + *
> + * This only provides a static check. The receiver may become inactive right
> + * after this check. The primary reason to have this check is to prevent future
> + * senders from connecting with this UPID, since the receiver task has already
> + * made this UPID inactive.

How is that not racy?

> +static void free_uitt(struct uintr_uitt_ctx *uitt_ctx)
> +{
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&uitt_ctx->uitt_lock, flags);
> +	kfree(uitt_ctx->uitt);

Again. Please move kfree() outside of the lock held region. But aside of
that what is this lock protecting here?

> +	uitt_ctx->uitt = NULL;
> +	spin_unlock_irqrestore(&uitt_ctx->uitt_lock, flags);

If there is concurrency then the other task which is blocked on
uitt_lock will operate on uitt_ctx while the same is freed.

Again, this lacks any life time and serialization rules. Just sprinkling
locks all over the place does not make it magically correct.

> +	kfree(uitt_ctx);
> +}

> +static void put_uitt_ref(struct uintr_uitt_ctx *uitt_ctx)
> +{
> +	if (refcount_dec_and_test(&uitt_ctx->refs))
> +		free_uitt(uitt_ctx);
> +}


> +static struct uintr_uitt_ctx *get_uitt_ref(struct uintr_uitt_ctx *uitt_ctx)
> +{
> +	refcount_inc(&uitt_ctx->refs);
> +	return uitt_ctx;
> +}
> +
> +static inline void mark_uitte_invalid(struct uintr_sender_info *s_info)
> +{
> +	struct uintr_uitt_entry *uitte;
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&s_info->uitt_ctx->uitt_lock, flags);
> +	uitte = &s_info->uitt_ctx->uitt[s_info->uitt_index];
> +	uitte->valid = 0;
> +	spin_unlock_irqrestore(&s_info->uitt_ctx->uitt_lock, flags);
> +}
> +
>  static void __clear_vector_from_upid(u64 uvec, struct uintr_upid *upid)
>  {
>  	clear_bit(uvec, (unsigned long *)&upid->puir);
> @@ -175,6 +290,210 @@ static void receiver_clear_uvec(struct callback_head *head)
>  	kfree(r_info);
>  }
>  
> +static void teardown_uitt(void)
> +{
> +	struct task_struct *t = current;
> +	struct fpu *fpu = &t->thread.fpu;
> +	u64 msr64;
> +
> +	put_uitt_ref(t->thread.ui_send->uitt_ctx);
> +	kfree(t->thread.ui_send);
> +	t->thread.ui_send = NULL;
> +
> +	fpregs_lock();
> +
> +	if (fpregs_state_valid(fpu, smp_processor_id())) {
> +		/* Modify only the relevant bits of the MISC MSR */
> +		rdmsrl(MSR_IA32_UINTR_MISC, msr64);
> +		msr64 &= GENMASK_ULL(63, 32);

More magic numbers.

> +		wrmsrl(MSR_IA32_UINTR_MISC, msr64);
> +		wrmsrl(MSR_IA32_UINTR_TT, 0ULL);

> +static void __free_uitt_entry(unsigned int entry)
> +{
> +	struct task_struct *t = current;
> +	unsigned long flags;
> +
> +	if (entry >= UINTR_MAX_UITT_NR)
> +		return;
> +
> +	if (!is_uintr_sender(t))
> +		return;
> +
> +	pr_debug("send: Freeing UITTE entry %d for task=%d\n", entry, t->pid);
> +
> +	spin_lock_irqsave(&t->thread.ui_send->uitt_ctx->uitt_lock, flags);
> +	memset(&t->thread.ui_send->uitt_ctx->uitt[entry], 0,
> +	       sizeof(struct uintr_uitt_entry));
> +	spin_unlock_irqrestore(&t->thread.ui_send->uitt_ctx->uitt_lock,
> flags);

What's the spinlock protecting here?

> +	clear_bit(entry, (unsigned long *)t->thread.ui_send->uitt_mask);
> +
> +	if (is_uitt_empty(t)) {
> +		pr_debug("send: UITT mask is empty. Dereference and teardown UITT\n");
> +		teardown_uitt();
> +	}
> +}

> +void do_uintr_unregister_sender(struct uintr_receiver_info *r_info,
> +				struct uintr_sender_info *s_info)
> +{
> +	int ret;
> +
> +	/*
> +	 * To make sure any new senduipi result in a #GP fault.
> +	 * The task work might take non-zero time to kick the process out.

-ENOPARSE

> +	 */
> +	mark_uitte_invalid(s_info);
> +
> +	pr_debug("send: Adding Free UITTE %d task work for task=%d\n",
> +		 s_info->uitt_index, s_info->task->pid);
> +
> +	init_task_work(&s_info->twork, sender_free_uitte);
> +	ret = task_work_add(s_info->task, &s_info->twork, true);
> +	if (ret) {
> +		/*
> +		 * Dereferencing the UITT and UPID here since the task has
> +		 * exited.
> +		 */
> +		pr_debug("send: Free UITTE %d task=%d has already exited\n",
> +			 s_info->uitt_index, s_info->task->pid);
> +		put_upid_ref(s_info->r_upid_ctx);
> +		put_uitt_ref(s_info->uitt_ctx);
> +		put_task_struct(s_info->task);
> +		kfree(s_info);
> +		return;
> +	}
> +}
> +
> +int do_uintr_register_sender(struct uintr_receiver_info *r_info,
> +			     struct uintr_sender_info *s_info)
> +{
> +	struct uintr_uitt_entry *uitte = NULL;
> +	struct uintr_sender *ui_send;
> +	struct task_struct *t = current;
> +	unsigned long flags;
> +	int entry;
> +	int ret;
> +
> +	/*
> +	 * Only a static check. Receiver could exit anytime after this check.
> +	 * This check only prevents connections using uintr_fd after the
> +	 * receiver has already exited/unregistered.
> +	 */
> +	if (!uintr_is_receiver_active(r_info))
> +		return -ESHUTDOWN;

How is this safe against a concurrent unregister/exit operation?

Thanks,

        tglx
Sohil Mehta Sept. 28, 2021, 6:01 p.m. UTC | #3
On 9/23/2021 5:28 AM, Greg KH wrote:
> On Mon, Sep 13, 2021 at 01:01:29PM -0700, Sohil Mehta wrote:
>> +/* User Interrupt Target Table Entry (UITTE) */
>> +struct uintr_uitt_entry {
>> +	u8	valid;			/* bit 0: valid, bit 1-7: reserved */
> Do you check that the other bits are set to 0?

I don't have a check but kzalloc() in alloc_uitt() should set it to 0.

>> +	u8	user_vec;
>> +	u8	reserved[6];
> What is this reserved for?

This is hardware defined structure as well. I should probably mention 
this it in the comment above.

>> +	u64	target_upid_addr;
> If this is a pointer, why not say it is a pointer?

I used a u64 to get the size and alignment of this structure as required 
by the hardware. I wasn't sure if using a struct upid * would complicate 
that.

Also this field is never used as a pointer by the kernel. It is only 
used to program an entry that is read by the hardware.

Is this reasonable or would you still prefer a pointer?


>> +} __packed __aligned(16);
>> +
>> +struct uintr_uitt_ctx {
>> +	struct uintr_uitt_entry *uitt;
>> +	/* Protect UITT */
>> +	spinlock_t uitt_lock;
>> +	refcount_t refs;
> Again, a kref please.

Will do.

Thanks,

Sohil
Greg Kroah-Hartman Sept. 29, 2021, 7:04 a.m. UTC | #4
On Tue, Sep 28, 2021 at 11:01:54AM -0700, Sohil Mehta wrote:
> On 9/23/2021 5:28 AM, Greg KH wrote:
> > On Mon, Sep 13, 2021 at 01:01:29PM -0700, Sohil Mehta wrote:
> > > +/* User Interrupt Target Table Entry (UITTE) */
> > > +struct uintr_uitt_entry {
> > > +	u8	valid;			/* bit 0: valid, bit 1-7: reserved */
> > Do you check that the other bits are set to 0?
> 
> I don't have a check but kzalloc() in alloc_uitt() should set it to 0.
> 
> > > +	u8	user_vec;
> > > +	u8	reserved[6];
> > What is this reserved for?
> 
> This is hardware defined structure as well. I should probably mention this
> it in the comment above.
> 
> > > +	u64	target_upid_addr;
> > If this is a pointer, why not say it is a pointer?
> 
> I used a u64 to get the size and alignment of this structure as required by
> the hardware. I wasn't sure if using a struct upid * would complicate that.
> 
> Also this field is never used as a pointer by the kernel. It is only used to
> program an entry that is read by the hardware.
> 
> Is this reasonable or would you still prefer a pointer?

Ok, just document it really well that this is NOT a real address used by
the kernel.  As it is, that's not obvious at all.

And if this crosses the user/kernel boundry, it needs to be __u64 right?

thanks,

greg k-h
Sohil Mehta Sept. 29, 2021, 2:27 p.m. UTC | #5
On 9/29/2021 12:04 AM, Greg KH wrote:
> On Tue, Sep 28, 2021 at 11:01:54AM -0700, Sohil Mehta wrote:
>>
>> Is this reasonable or would you still prefer a pointer?
> Ok, just document it really well that this is NOT a real address used by
> the kernel.  As it is, that's not obvious at all.


Thanks. I'll do that.

>
> And if this crosses the user/kernel boundry, it needs to be __u64 right?


This one doesn't cross the user/kernel boundary. The kernel programs a 
value in this struct for the hardware to consume.

But there might be other places where I have missed that. I'll fix those.

Thanks,
Sohil
diff mbox series

Patch

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index d229bfac8b4f..3482c3182e39 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -10,6 +10,7 @@  struct mm_struct;
 struct io_bitmap;
 struct vm86;
 struct uintr_receiver;
+struct uintr_sender;
 
 #include <asm/math_emu.h>
 #include <asm/segment.h>
@@ -533,6 +534,7 @@  struct thread_struct {
 #ifdef CONFIG_X86_USER_INTERRUPTS
 	/* User Interrupt state*/
 	struct uintr_receiver	*ui_recv;
+	struct uintr_sender	*ui_send;
 #endif
 
 	/* Floating point and extended processor state */
diff --git a/arch/x86/include/asm/uintr.h b/arch/x86/include/asm/uintr.h
index 1f00e2a63da4..ef3521dd7fb9 100644
--- a/arch/x86/include/asm/uintr.h
+++ b/arch/x86/include/asm/uintr.h
@@ -8,6 +8,7 @@  struct uintr_upid_ctx {
 	struct task_struct *task;	/* Receiver task */
 	struct uintr_upid *upid;
 	refcount_t refs;
+	bool receiver_active;		/* Flag for UPID being mapped to a receiver */
 };
 
 struct uintr_receiver_info {
@@ -16,12 +17,26 @@  struct uintr_receiver_info {
 	u64 uvec;				/* Vector number */
 };
 
+struct uintr_sender_info {
+	struct list_head node;
+	struct uintr_uitt_ctx *uitt_ctx;
+	struct task_struct *task;
+	struct uintr_upid_ctx *r_upid_ctx;	/* Receiver's UPID context */
+	struct callback_head twork;		/* Task work head */
+	unsigned int uitt_index;
+};
+
 bool uintr_arch_enabled(void);
 int do_uintr_register_handler(u64 handler);
 int do_uintr_unregister_handler(void);
 int do_uintr_register_vector(struct uintr_receiver_info *r_info);
 void do_uintr_unregister_vector(struct uintr_receiver_info *r_info);
 
+int do_uintr_register_sender(struct uintr_receiver_info *r_info,
+			     struct uintr_sender_info *s_info);
+void do_uintr_unregister_sender(struct uintr_receiver_info *r_info,
+				struct uintr_sender_info *s_info);
+
 void uintr_free(struct task_struct *task);
 
 /* TODO: Inline the context switch related functions */
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 83677f76bd7b..9db33e467b30 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -92,6 +92,7 @@  int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 #ifdef CONFIG_X86_USER_INTERRUPTS
 	/* User Interrupt state is unique for each task */
 	dst->thread.ui_recv = NULL;
+	dst->thread.ui_send = NULL;
 #endif
 
 	return fpu_clone(dst);
diff --git a/arch/x86/kernel/uintr_core.c b/arch/x86/kernel/uintr_core.c
index 9dcb9f60e5bc..8f331c5fe0cf 100644
--- a/arch/x86/kernel/uintr_core.c
+++ b/arch/x86/kernel/uintr_core.c
@@ -21,6 +21,11 @@ 
 #include <asm/msr-index.h>
 #include <asm/uintr.h>
 
+/*
+ * Each UITT entry is 16 bytes in size.
+ * Current UITT table size is set as 4KB (256 * 16 bytes)
+ */
+#define UINTR_MAX_UITT_NR 256
 #define UINTR_MAX_UVEC_NR 64
 
 /* User Posted Interrupt Descriptor (UPID) */
@@ -44,6 +49,27 @@  struct uintr_receiver {
 	u64 uvec_mask;	/* track active vector per bit */
 };
 
+/* User Interrupt Target Table Entry (UITTE) */
+struct uintr_uitt_entry {
+	u8	valid;			/* bit 0: valid, bit 1-7: reserved */
+	u8	user_vec;
+	u8	reserved[6];
+	u64	target_upid_addr;
+} __packed __aligned(16);
+
+struct uintr_uitt_ctx {
+	struct uintr_uitt_entry *uitt;
+	/* Protect UITT */
+	spinlock_t uitt_lock;
+	refcount_t refs;
+};
+
+struct uintr_sender {
+	struct uintr_uitt_ctx *uitt_ctx;
+	/* track active uitt entries per bit */
+	u64 uitt_mask[BITS_TO_U64(UINTR_MAX_UITT_NR)];
+};
+
 inline bool uintr_arch_enabled(void)
 {
 	return static_cpu_has(X86_FEATURE_UINTR);
@@ -54,6 +80,36 @@  static inline bool is_uintr_receiver(struct task_struct *t)
 	return !!t->thread.ui_recv;
 }
 
+static inline bool is_uintr_sender(struct task_struct *t)
+{
+	return !!t->thread.ui_send;
+}
+
+static inline bool is_uintr_task(struct task_struct *t)
+{
+	return(is_uintr_receiver(t) || is_uintr_sender(t));
+}
+
+static inline bool is_uitt_empty(struct task_struct *t)
+{
+	return !!bitmap_empty((unsigned long *)t->thread.ui_send->uitt_mask,
+			      UINTR_MAX_UITT_NR);
+}
+
+/*
+ * No lock is needed to read the active flag. Writes only happen from
+ * r_info->task that owns the UPID. Everyone else would just read this flag.
+ *
+ * This only provides a static check. The receiver may become inactive right
+ * after this check. The primary reason to have this check is to prevent future
+ * senders from connecting with this UPID, since the receiver task has already
+ * made this UPID inactive.
+ */
+static bool uintr_is_receiver_active(struct uintr_receiver_info *r_info)
+{
+	return r_info->upid_ctx->receiver_active;
+}
+
 static inline u32 cpu_to_ndst(int cpu)
 {
 	u32 apicid = (u32)apic->cpu_present_to_apicid(cpu);
@@ -94,6 +150,7 @@  static struct uintr_upid_ctx *alloc_upid(void)
 	upid_ctx->upid = upid;
 	refcount_set(&upid_ctx->refs, 1);
 	upid_ctx->task = get_task_struct(current);
+	upid_ctx->receiver_active = true;
 
 	return upid_ctx;
 }
@@ -110,6 +167,64 @@  static struct uintr_upid_ctx *get_upid_ref(struct uintr_upid_ctx *upid_ctx)
 	return upid_ctx;
 }
 
+static void free_uitt(struct uintr_uitt_ctx *uitt_ctx)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&uitt_ctx->uitt_lock, flags);
+	kfree(uitt_ctx->uitt);
+	uitt_ctx->uitt = NULL;
+	spin_unlock_irqrestore(&uitt_ctx->uitt_lock, flags);
+
+	kfree(uitt_ctx);
+}
+
+/* TODO: Replace UITT allocation with KPTI compatible memory allocator */
+static struct uintr_uitt_ctx *alloc_uitt(void)
+{
+	struct uintr_uitt_ctx *uitt_ctx;
+	struct uintr_uitt_entry *uitt;
+
+	uitt_ctx = kzalloc(sizeof(*uitt_ctx), GFP_KERNEL);
+	if (!uitt_ctx)
+		return NULL;
+
+	uitt = kzalloc(sizeof(*uitt) * UINTR_MAX_UITT_NR, GFP_KERNEL);
+	if (!uitt) {
+		kfree(uitt_ctx);
+		return NULL;
+	}
+
+	uitt_ctx->uitt = uitt;
+	spin_lock_init(&uitt_ctx->uitt_lock);
+	refcount_set(&uitt_ctx->refs, 1);
+
+	return uitt_ctx;
+}
+
+static void put_uitt_ref(struct uintr_uitt_ctx *uitt_ctx)
+{
+	if (refcount_dec_and_test(&uitt_ctx->refs))
+		free_uitt(uitt_ctx);
+}
+
+static struct uintr_uitt_ctx *get_uitt_ref(struct uintr_uitt_ctx *uitt_ctx)
+{
+	refcount_inc(&uitt_ctx->refs);
+	return uitt_ctx;
+}
+
+static inline void mark_uitte_invalid(struct uintr_sender_info *s_info)
+{
+	struct uintr_uitt_entry *uitte;
+	unsigned long flags;
+
+	spin_lock_irqsave(&s_info->uitt_ctx->uitt_lock, flags);
+	uitte = &s_info->uitt_ctx->uitt[s_info->uitt_index];
+	uitte->valid = 0;
+	spin_unlock_irqrestore(&s_info->uitt_ctx->uitt_lock, flags);
+}
+
 static void __clear_vector_from_upid(u64 uvec, struct uintr_upid *upid)
 {
 	clear_bit(uvec, (unsigned long *)&upid->puir);
@@ -175,6 +290,210 @@  static void receiver_clear_uvec(struct callback_head *head)
 	kfree(r_info);
 }
 
+static void teardown_uitt(void)
+{
+	struct task_struct *t = current;
+	struct fpu *fpu = &t->thread.fpu;
+	u64 msr64;
+
+	put_uitt_ref(t->thread.ui_send->uitt_ctx);
+	kfree(t->thread.ui_send);
+	t->thread.ui_send = NULL;
+
+	fpregs_lock();
+
+	if (fpregs_state_valid(fpu, smp_processor_id())) {
+		/* Modify only the relevant bits of the MISC MSR */
+		rdmsrl(MSR_IA32_UINTR_MISC, msr64);
+		msr64 &= GENMASK_ULL(63, 32);
+		wrmsrl(MSR_IA32_UINTR_MISC, msr64);
+		wrmsrl(MSR_IA32_UINTR_TT, 0ULL);
+	} else {
+		struct uintr_state *p;
+
+		p = get_xsave_addr(&fpu->state.xsave, XFEATURE_UINTR);
+		if (p) {
+			p->uitt_size = 0;
+			p->uitt_addr = 0;
+		}
+	}
+
+	fpregs_unlock();
+}
+
+static int init_uitt(void)
+{
+	struct task_struct *t = current;
+	struct fpu *fpu = &t->thread.fpu;
+	struct uintr_sender *ui_send;
+	u64 msr64;
+
+	ui_send = kzalloc(sizeof(*t->thread.ui_send), GFP_KERNEL);
+	if (!ui_send)
+		return -ENOMEM;
+
+	ui_send->uitt_ctx = alloc_uitt();
+	if (!ui_send->uitt_ctx) {
+		pr_debug("send: Alloc UITT failed for task=%d\n", t->pid);
+		kfree(ui_send);
+		return -ENOMEM;
+	}
+
+	fpregs_lock();
+
+	if (fpregs_state_valid(fpu, smp_processor_id())) {
+		wrmsrl(MSR_IA32_UINTR_TT, (u64)ui_send->uitt_ctx->uitt | 1);
+		/* Modify only the relevant bits of the MISC MSR */
+		rdmsrl(MSR_IA32_UINTR_MISC, msr64);
+		msr64 &= GENMASK_ULL(63, 32);
+		msr64 |= UINTR_MAX_UITT_NR;
+		wrmsrl(MSR_IA32_UINTR_MISC, msr64);
+	} else {
+		struct xregs_state *xsave;
+		struct uintr_state *p;
+
+		xsave = &fpu->state.xsave;
+		xsave->header.xfeatures |= XFEATURE_MASK_UINTR;
+		p = get_xsave_addr(&fpu->state.xsave, XFEATURE_UINTR);
+		if (p) {
+			p->uitt_size = UINTR_MAX_UITT_NR;
+			p->uitt_addr = (u64)ui_send->uitt_ctx->uitt | 1;
+		}
+	}
+
+	fpregs_unlock();
+
+	pr_debug("send: Setup a new UITT=%px for task=%d with size %d\n",
+		 ui_send->uitt_ctx->uitt, t->pid, UINTR_MAX_UITT_NR * 16);
+
+	t->thread.ui_send = ui_send;
+
+	return 0;
+}
+
+static void __free_uitt_entry(unsigned int entry)
+{
+	struct task_struct *t = current;
+	unsigned long flags;
+
+	if (entry >= UINTR_MAX_UITT_NR)
+		return;
+
+	if (!is_uintr_sender(t))
+		return;
+
+	pr_debug("send: Freeing UITTE entry %d for task=%d\n", entry, t->pid);
+
+	spin_lock_irqsave(&t->thread.ui_send->uitt_ctx->uitt_lock, flags);
+	memset(&t->thread.ui_send->uitt_ctx->uitt[entry], 0,
+	       sizeof(struct uintr_uitt_entry));
+	spin_unlock_irqrestore(&t->thread.ui_send->uitt_ctx->uitt_lock, flags);
+
+	clear_bit(entry, (unsigned long *)t->thread.ui_send->uitt_mask);
+
+	if (is_uitt_empty(t)) {
+		pr_debug("send: UITT mask is empty. Dereference and teardown UITT\n");
+		teardown_uitt();
+	}
+}
+
+static void sender_free_uitte(struct callback_head *head)
+{
+	struct uintr_sender_info *s_info;
+
+	s_info = container_of(head, struct uintr_sender_info, twork);
+
+	__free_uitt_entry(s_info->uitt_index);
+	put_uitt_ref(s_info->uitt_ctx);
+	put_upid_ref(s_info->r_upid_ctx);
+	put_task_struct(s_info->task);
+	kfree(s_info);
+}
+
+void do_uintr_unregister_sender(struct uintr_receiver_info *r_info,
+				struct uintr_sender_info *s_info)
+{
+	int ret;
+
+	/*
+	 * To make sure any new senduipi result in a #GP fault.
+	 * The task work might take non-zero time to kick the process out.
+	 */
+	mark_uitte_invalid(s_info);
+
+	pr_debug("send: Adding Free UITTE %d task work for task=%d\n",
+		 s_info->uitt_index, s_info->task->pid);
+
+	init_task_work(&s_info->twork, sender_free_uitte);
+	ret = task_work_add(s_info->task, &s_info->twork, true);
+	if (ret) {
+		/*
+		 * Dereferencing the UITT and UPID here since the task has
+		 * exited.
+		 */
+		pr_debug("send: Free UITTE %d task=%d has already exited\n",
+			 s_info->uitt_index, s_info->task->pid);
+		put_upid_ref(s_info->r_upid_ctx);
+		put_uitt_ref(s_info->uitt_ctx);
+		put_task_struct(s_info->task);
+		kfree(s_info);
+		return;
+	}
+}
+
+int do_uintr_register_sender(struct uintr_receiver_info *r_info,
+			     struct uintr_sender_info *s_info)
+{
+	struct uintr_uitt_entry *uitte = NULL;
+	struct uintr_sender *ui_send;
+	struct task_struct *t = current;
+	unsigned long flags;
+	int entry;
+	int ret;
+
+	/*
+	 * Only a static check. Receiver could exit anytime after this check.
+	 * This check only prevents connections using uintr_fd after the
+	 * receiver has already exited/unregistered.
+	 */
+	if (!uintr_is_receiver_active(r_info))
+		return -ESHUTDOWN;
+
+	if (is_uintr_sender(t)) {
+		entry = find_first_zero_bit((unsigned long *)t->thread.ui_send->uitt_mask,
+					    UINTR_MAX_UITT_NR);
+		if (entry >= UINTR_MAX_UITT_NR)
+			return -ENOSPC;
+	} else {
+		BUILD_BUG_ON(UINTR_MAX_UITT_NR < 1);
+		entry = 0;
+		ret = init_uitt();
+		if (ret)
+			return ret;
+	}
+
+	ui_send = t->thread.ui_send;
+
+	set_bit(entry, (unsigned long *)ui_send->uitt_mask);
+
+	spin_lock_irqsave(&ui_send->uitt_ctx->uitt_lock, flags);
+	uitte = &ui_send->uitt_ctx->uitt[entry];
+	pr_debug("send: sender=%d receiver=%d UITTE entry %d address %px\n",
+		 current->pid, r_info->upid_ctx->task->pid, entry, uitte);
+
+	uitte->user_vec = r_info->uvec;
+	uitte->target_upid_addr = (u64)r_info->upid_ctx->upid;
+	uitte->valid = 1;
+	spin_unlock_irqrestore(&ui_send->uitt_ctx->uitt_lock, flags);
+
+	s_info->r_upid_ctx = get_upid_ref(r_info->upid_ctx);
+	s_info->uitt_ctx = get_uitt_ref(ui_send->uitt_ctx);
+	s_info->task = get_task_struct(current);
+	s_info->uitt_index = entry;
+
+	return 0;
+}
+
 int do_uintr_unregister_handler(void)
 {
 	struct task_struct *t = current;
@@ -222,6 +541,8 @@  int do_uintr_unregister_handler(void)
 	}
 
 	ui_recv = t->thread.ui_recv;
+	ui_recv->upid_ctx->receiver_active = false;
+
 	/*
 	 * Suppress notifications so that no further interrupts are generated
 	 * based on this UPID.
@@ -437,14 +758,14 @@  void switch_uintr_return(void)
  * This should only be called from exit_thread().
  * exit_thread() can happen in current context when the current thread is
  * exiting or it can happen for a new thread that is being created.
- * For new threads is_uintr_receiver() should fail.
+ * For new threads is_uintr_task() should fail.
  */
 void uintr_free(struct task_struct *t)
 {
 	struct uintr_receiver *ui_recv;
 	struct fpu *fpu;
 
-	if (!static_cpu_has(X86_FEATURE_UINTR) || !is_uintr_receiver(t))
+	if (!static_cpu_has(X86_FEATURE_UINTR) || !is_uintr_task(t))
 		return;
 
 	if (WARN_ON_ONCE(t != current))
@@ -456,6 +777,7 @@  void uintr_free(struct task_struct *t)
 
 	if (fpregs_state_valid(fpu, smp_processor_id())) {
 		wrmsrl(MSR_IA32_UINTR_MISC, 0ULL);
+		wrmsrl(MSR_IA32_UINTR_TT, 0ULL);
 		wrmsrl(MSR_IA32_UINTR_PD, 0ULL);
 		wrmsrl(MSR_IA32_UINTR_RR, 0ULL);
 		wrmsrl(MSR_IA32_UINTR_STACKADJUST, 0ULL);
@@ -470,20 +792,31 @@  void uintr_free(struct task_struct *t)
 			p->upid_addr = 0;
 			p->stack_adjust = 0;
 			p->uinv = 0;
+			p->uitt_addr = 0;
+			p->uitt_size = 0;
 		}
 	}
 
 	/* Check: Can a thread be context switched while it is exiting? */
-	ui_recv = t->thread.ui_recv;
+	if (is_uintr_receiver(t)) {
+		ui_recv = t->thread.ui_recv;
 
-	/*
-	 * Suppress notifications so that no further interrupts are
-	 * generated based on this UPID.
-	 */
-	set_bit(UPID_SN, (unsigned long *)&ui_recv->upid_ctx->upid->nc.status);
-	put_upid_ref(ui_recv->upid_ctx);
-	kfree(ui_recv);
-	t->thread.ui_recv = NULL;
+		/*
+		 * Suppress notifications so that no further interrupts are
+		 * generated based on this UPID.
+		 */
+		set_bit(UPID_SN, (unsigned long *)&ui_recv->upid_ctx->upid->nc.status);
+		ui_recv->upid_ctx->receiver_active = false;
+		put_upid_ref(ui_recv->upid_ctx);
+		kfree(ui_recv);
+		t->thread.ui_recv = NULL;
+	}
 
 	fpregs_unlock();
+
+	if (is_uintr_sender(t)) {
+		put_uitt_ref(t->thread.ui_send->uitt_ctx);
+		kfree(t->thread.ui_send);
+		t->thread.ui_send = NULL;
+	}
 }
diff --git a/arch/x86/kernel/uintr_fd.c b/arch/x86/kernel/uintr_fd.c
index f0548bbac776..3c82c032c0b9 100644
--- a/arch/x86/kernel/uintr_fd.c
+++ b/arch/x86/kernel/uintr_fd.c
@@ -15,6 +15,9 @@ 
 
 struct uintrfd_ctx {
 	struct uintr_receiver_info *r_info;
+	/* Protect sender_list */
+	spinlock_t sender_lock;
+	struct list_head sender_list;
 };
 
 #ifdef CONFIG_PROC_FS
@@ -30,11 +33,20 @@  static void uintrfd_show_fdinfo(struct seq_file *m, struct file *file)
 static int uintrfd_release(struct inode *inode, struct file *file)
 {
 	struct uintrfd_ctx *uintrfd_ctx = file->private_data;
+	struct uintr_sender_info *s_info, *tmp;
+	unsigned long flags;
 
 	pr_debug("recv: Release uintrfd for r_task %d uvec %llu\n",
 		 uintrfd_ctx->r_info->upid_ctx->task->pid,
 		 uintrfd_ctx->r_info->uvec);
 
+	spin_lock_irqsave(&uintrfd_ctx->sender_lock, flags);
+	list_for_each_entry_safe(s_info, tmp, &uintrfd_ctx->sender_list, node) {
+		list_del(&s_info->node);
+		do_uintr_unregister_sender(uintrfd_ctx->r_info, s_info);
+	}
+	spin_unlock_irqrestore(&uintrfd_ctx->sender_lock, flags);
+
 	do_uintr_unregister_vector(uintrfd_ctx->r_info);
 	kfree(uintrfd_ctx);
 
@@ -81,6 +93,9 @@  SYSCALL_DEFINE2(uintr_create_fd, u64, vector, unsigned int, flags)
 		goto out_free_ctx;
 	}
 
+	INIT_LIST_HEAD(&uintrfd_ctx->sender_list);
+	spin_lock_init(&uintrfd_ctx->sender_lock);
+
 	/* TODO: Get user input for flags - UFD_CLOEXEC */
 	/* Check: Do we need O_NONBLOCK? */
 	uintrfd = anon_inode_getfd("[uintrfd]", &uintrfd_fops, uintrfd_ctx,
@@ -150,3 +165,121 @@  SYSCALL_DEFINE1(uintr_unregister_handler, unsigned int, flags)
 
 	return ret;
 }
+
+/*
+ * sys_uintr_register_sender - setup user inter-processor interrupt sender.
+ */
+SYSCALL_DEFINE2(uintr_register_sender, int, uintrfd, unsigned int, flags)
+{
+	struct uintr_sender_info *s_info;
+	struct uintrfd_ctx *uintrfd_ctx;
+	unsigned long lock_flags;
+	struct file *uintr_f;
+	struct fd f;
+	int ret = 0;
+
+	if (!uintr_arch_enabled())
+		return -EOPNOTSUPP;
+
+	if (flags)
+		return -EINVAL;
+
+	f = fdget(uintrfd);
+	uintr_f = f.file;
+	if (!uintr_f)
+		return -EBADF;
+
+	if (uintr_f->f_op != &uintrfd_fops) {
+		ret = -EOPNOTSUPP;
+		goto out_fdput;
+	}
+
+	uintrfd_ctx = (struct uintrfd_ctx *)uintr_f->private_data;
+
+	spin_lock_irqsave(&uintrfd_ctx->sender_lock, lock_flags);
+	list_for_each_entry(s_info, &uintrfd_ctx->sender_list, node) {
+		if (s_info->task == current) {
+			ret = -EISCONN;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&uintrfd_ctx->sender_lock, lock_flags);
+
+	if (ret)
+		goto out_fdput;
+
+	s_info = kzalloc(sizeof(*s_info), GFP_KERNEL);
+	if (!s_info) {
+		ret = -ENOMEM;
+		goto out_fdput;
+	}
+
+	ret = do_uintr_register_sender(uintrfd_ctx->r_info, s_info);
+	if (ret) {
+		kfree(s_info);
+		goto out_fdput;
+	}
+
+	spin_lock_irqsave(&uintrfd_ctx->sender_lock, lock_flags);
+	list_add(&s_info->node, &uintrfd_ctx->sender_list);
+	spin_unlock_irqrestore(&uintrfd_ctx->sender_lock, lock_flags);
+
+	ret = s_info->uitt_index;
+
+out_fdput:
+	pr_debug("send: register sender task=%d flags %d ret(uipi_id)=%d\n",
+		 current->pid, flags, ret);
+
+	fdput(f);
+	return ret;
+}
+
+/*
+ * sys_uintr_unregister_sender - Unregister user inter-processor interrupt sender.
+ */
+SYSCALL_DEFINE2(uintr_unregister_sender, int, uintrfd, unsigned int, flags)
+{
+	struct uintr_sender_info *s_info;
+	struct uintrfd_ctx *uintrfd_ctx;
+	struct file *uintr_f;
+	unsigned long lock_flags;
+	struct fd f;
+	int ret;
+
+	if (!uintr_arch_enabled())
+		return -EOPNOTSUPP;
+
+	if (flags)
+		return -EINVAL;
+
+	f = fdget(uintrfd);
+	uintr_f = f.file;
+	if (!uintr_f)
+		return -EBADF;
+
+	if (uintr_f->f_op != &uintrfd_fops) {
+		ret = -EOPNOTSUPP;
+		goto out_fdput;
+	}
+
+	uintrfd_ctx = (struct uintrfd_ctx *)uintr_f->private_data;
+
+	ret = -EINVAL;
+	spin_lock_irqsave(&uintrfd_ctx->sender_lock, lock_flags);
+	list_for_each_entry(s_info, &uintrfd_ctx->sender_list, node) {
+		if (s_info->task == current) {
+			ret = 0;
+			list_del(&s_info->node);
+			do_uintr_unregister_sender(uintrfd_ctx->r_info, s_info);
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&uintrfd_ctx->sender_lock, lock_flags);
+
+	pr_debug("send: unregister sender uintrfd %d for task=%d ret %d\n",
+		 uintrfd, current->pid, ret);
+
+out_fdput:
+	fdput(f);
+	return ret;
+}