diff mbox series

[RFC,v4,5/8] hisi_acc_vfio_pci: Restrict access to VF dev BAR2 migration region

Message ID 20220208133425.1096-6-shameerali.kolothum.thodi@huawei.com (mailing list archive)
State Not Applicable
Delegated to: Herbert Xu
Headers show
Series vfio/hisilicon: add ACC live migration driver | expand

Commit Message

Shameerali Kolothum Thodi Feb. 8, 2022, 1:34 p.m. UTC
HiSilicon ACC VF device BAR2 region consists of both functional
register space and migration control register space. From a
security point of view, it's not advisable to export the migration
control region to Guest.

Hence, override the ioctl/read/write/mmap methods to hide the
migration region and limit the access only to the functional register
space.

Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
---
 drivers/vfio/pci/hisi_acc_vfio_pci.c | 122 ++++++++++++++++++++++++++-
 1 file changed, 118 insertions(+), 4 deletions(-)

Comments

Alex Williamson Feb. 9, 2022, 9:41 p.m. UTC | #1
On Tue, 8 Feb 2022 13:34:22 +0000
Shameer Kolothum <shameerali.kolothum.thodi@huawei.com> wrote:

> HiSilicon ACC VF device BAR2 region consists of both functional
> register space and migration control register space. From a
> security point of view, it's not advisable to export the migration
> control region to Guest.
> 
> Hence, override the ioctl/read/write/mmap methods to hide the
> migration region and limit the access only to the functional register
> space.
> 
> Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
> ---
>  drivers/vfio/pci/hisi_acc_vfio_pci.c | 122 ++++++++++++++++++++++++++-
>  1 file changed, 118 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/vfio/pci/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisi_acc_vfio_pci.c
> index 8b59e628110e..563ed2cc861f 100644
> --- a/drivers/vfio/pci/hisi_acc_vfio_pci.c
> +++ b/drivers/vfio/pci/hisi_acc_vfio_pci.c
> @@ -13,6 +13,120 @@
>  #include <linux/vfio.h>
>  #include <linux/vfio_pci_core.h>
>  
> +static int hisi_acc_pci_rw_access_check(struct vfio_device *core_vdev,
> +					size_t count, loff_t *ppos,
> +					size_t *new_count)
> +{
> +	unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
> +	struct vfio_pci_core_device *vdev =
> +		container_of(core_vdev, struct vfio_pci_core_device, vdev);
> +
> +	if (index == VFIO_PCI_BAR2_REGION_INDEX) {
> +		loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
> +		resource_size_t end = pci_resource_len(vdev->pdev, index) / 2;

Be careful here, there are nested assignment use cases.  This can only
survive one level of assignment before we've restricted more than we
intended.  If migration support is dependent on PF access, can we use
that to determine when to when to expose only half the BAR and when to
expose the full BAR?

We should also follow the mlx5 lead to use a vendor sub-directory below
drivers/vfio/pci/  Thanks,

Alex

> +
> +		/* Check if access is for migration control region */
> +		if (pos >= end)
> +			return -EINVAL;
> +
> +		*new_count = min(count, (size_t)(end - pos));
> +	}
> +
> +	return 0;
> +}
> +
> +static int hisi_acc_vfio_pci_mmap(struct vfio_device *core_vdev,
> +				  struct vm_area_struct *vma)
> +{
> +	struct vfio_pci_core_device *vdev =
> +		container_of(core_vdev, struct vfio_pci_core_device, vdev);
> +	unsigned int index;
> +
> +	index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
> +	if (index == VFIO_PCI_BAR2_REGION_INDEX) {
> +		u64 req_len, pgoff, req_start;
> +		resource_size_t end = pci_resource_len(vdev->pdev, index) / 2;
> +
> +		req_len = vma->vm_end - vma->vm_start;
> +		pgoff = vma->vm_pgoff &
> +			((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
> +		req_start = pgoff << PAGE_SHIFT;
> +
> +		if (req_start + req_len > end)
> +			return -EINVAL;
> +	}
> +
> +	return vfio_pci_core_mmap(core_vdev, vma);
> +}
> +
> +static ssize_t hisi_acc_vfio_pci_write(struct vfio_device *core_vdev,
> +				       const char __user *buf, size_t count,
> +				       loff_t *ppos)
> +{
> +	size_t new_count = count;
> +	int ret;
> +
> +	ret = hisi_acc_pci_rw_access_check(core_vdev, count, ppos, &new_count);
> +	if (ret)
> +		return ret;
> +
> +	return vfio_pci_core_write(core_vdev, buf, new_count, ppos);
> +}
> +
> +static ssize_t hisi_acc_vfio_pci_read(struct vfio_device *core_vdev,
> +				      char __user *buf, size_t count,
> +				      loff_t *ppos)
> +{
> +	size_t new_count = count;
> +	int ret;
> +
> +	ret = hisi_acc_pci_rw_access_check(core_vdev, count, ppos, &new_count);
> +	if (ret)
> +		return ret;
> +
> +	return vfio_pci_core_read(core_vdev, buf, new_count, ppos);
> +}
> +
> +static long hisi_acc_vfio_pci_ioctl(struct vfio_device *core_vdev, unsigned int cmd,
> +				    unsigned long arg)
> +{
> +	struct vfio_pci_core_device *vdev =
> +		container_of(core_vdev, struct vfio_pci_core_device, vdev);
> +
> +	if (cmd == VFIO_DEVICE_GET_REGION_INFO) {
> +		struct pci_dev *pdev = vdev->pdev;
> +		struct vfio_region_info info;
> +		unsigned long minsz;
> +
> +		minsz = offsetofend(struct vfio_region_info, offset);
> +
> +		if (copy_from_user(&info, (void __user *)arg, minsz))
> +			return -EFAULT;
> +
> +		if (info.argsz < minsz)
> +			return -EINVAL;
> +
> +		if (info.index == VFIO_PCI_BAR2_REGION_INDEX) {
> +			info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
> +
> +			/*
> +			 * ACC VF dev BAR2 region consists of both functional
> +			 * register space and migration control register space.
> +			 * Report only the functional region to Guest.
> +			 */
> +			info.size = pci_resource_len(pdev, info.index) / 2;
> +
> +			info.flags = VFIO_REGION_INFO_FLAG_READ |
> +					VFIO_REGION_INFO_FLAG_WRITE |
> +					VFIO_REGION_INFO_FLAG_MMAP;
> +
> +			return copy_to_user((void __user *)arg, &info, minsz) ?
> +					    -EFAULT : 0;
> +		}
> +	}
> +	return vfio_pci_core_ioctl(core_vdev, cmd, arg);
> +}
> +
>  static int hisi_acc_vfio_pci_open_device(struct vfio_device *core_vdev)
>  {
>  	struct vfio_pci_core_device *vdev =
> @@ -32,10 +146,10 @@ static const struct vfio_device_ops hisi_acc_vfio_pci_ops = {
>  	.name = "hisi-acc-vfio-pci",
>  	.open_device = hisi_acc_vfio_pci_open_device,
>  	.close_device = vfio_pci_core_close_device,
> -	.ioctl = vfio_pci_core_ioctl,
> -	.read = vfio_pci_core_read,
> -	.write = vfio_pci_core_write,
> -	.mmap = vfio_pci_core_mmap,
> +	.ioctl = hisi_acc_vfio_pci_ioctl,
> +	.read = hisi_acc_vfio_pci_read,
> +	.write = hisi_acc_vfio_pci_write,
> +	.mmap = hisi_acc_vfio_pci_mmap,
>  	.request = vfio_pci_core_request,
>  	.match = vfio_pci_core_match,
>  };
Shameerali Kolothum Thodi Feb. 10, 2022, 3:01 p.m. UTC | #2
> -----Original Message-----
> From: Alex Williamson [mailto:alex.williamson@redhat.com]
> Sent: 09 February 2022 21:42
> To: Shameerali Kolothum Thodi <shameerali.kolothum.thodi@huawei.com>
> Cc: kvm@vger.kernel.org; linux-kernel@vger.kernel.org;
> linux-crypto@vger.kernel.org; jgg@nvidia.com; cohuck@redhat.com;
> mgurtovoy@nvidia.com; yishaih@nvidia.com; Linuxarm
> <linuxarm@huawei.com>; liulongfang <liulongfang@huawei.com>; Zengtao (B)
> <prime.zeng@hisilicon.com>; Jonathan Cameron
> <jonathan.cameron@huawei.com>; Wangzhou (B) <wangzhou1@hisilicon.com>
> Subject: Re: [RFC v4 5/8] hisi_acc_vfio_pci: Restrict access to VF dev BAR2
> migration region
> 
> On Tue, 8 Feb 2022 13:34:22 +0000
> Shameer Kolothum <shameerali.kolothum.thodi@huawei.com> wrote:
> 
> > HiSilicon ACC VF device BAR2 region consists of both functional
> > register space and migration control register space. From a
> > security point of view, it's not advisable to export the migration
> > control region to Guest.
> >
> > Hence, override the ioctl/read/write/mmap methods to hide the
> > migration region and limit the access only to the functional register
> > space.
> >
> > Signed-off-by: Shameer Kolothum
> <shameerali.kolothum.thodi@huawei.com>
> > ---
> >  drivers/vfio/pci/hisi_acc_vfio_pci.c | 122 ++++++++++++++++++++++++++-
> >  1 file changed, 118 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/vfio/pci/hisi_acc_vfio_pci.c
> b/drivers/vfio/pci/hisi_acc_vfio_pci.c
> > index 8b59e628110e..563ed2cc861f 100644
> > --- a/drivers/vfio/pci/hisi_acc_vfio_pci.c
> > +++ b/drivers/vfio/pci/hisi_acc_vfio_pci.c
> > @@ -13,6 +13,120 @@
> >  #include <linux/vfio.h>
> >  #include <linux/vfio_pci_core.h>
> >
> > +static int hisi_acc_pci_rw_access_check(struct vfio_device *core_vdev,
> > +					size_t count, loff_t *ppos,
> > +					size_t *new_count)
> > +{
> > +	unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
> > +	struct vfio_pci_core_device *vdev =
> > +		container_of(core_vdev, struct vfio_pci_core_device, vdev);
> > +
> > +	if (index == VFIO_PCI_BAR2_REGION_INDEX) {
> > +		loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
> > +		resource_size_t end = pci_resource_len(vdev->pdev, index) / 2;
> 
> Be careful here, there are nested assignment use cases.  This can only
> survive one level of assignment before we've restricted more than we
> intended.  If migration support is dependent on PF access, can we use
> that to determine when to when to expose only half the BAR and when to
> expose the full BAR?

Ok. I will add a check here.

> We should also follow the mlx5 lead to use a vendor sub-directory below
> drivers/vfio/pci/

Sure.

Thanks,
Shameer

> 
> Alex
> 
> > +
> > +		/* Check if access is for migration control region */
> > +		if (pos >= end)
> > +			return -EINVAL;
> > +
> > +		*new_count = min(count, (size_t)(end - pos));
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static int hisi_acc_vfio_pci_mmap(struct vfio_device *core_vdev,
> > +				  struct vm_area_struct *vma)
> > +{
> > +	struct vfio_pci_core_device *vdev =
> > +		container_of(core_vdev, struct vfio_pci_core_device, vdev);
> > +	unsigned int index;
> > +
> > +	index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
> > +	if (index == VFIO_PCI_BAR2_REGION_INDEX) {
> > +		u64 req_len, pgoff, req_start;
> > +		resource_size_t end = pci_resource_len(vdev->pdev, index) / 2;
> > +
> > +		req_len = vma->vm_end - vma->vm_start;
> > +		pgoff = vma->vm_pgoff &
> > +			((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
> > +		req_start = pgoff << PAGE_SHIFT;
> > +
> > +		if (req_start + req_len > end)
> > +			return -EINVAL;
> > +	}
> > +
> > +	return vfio_pci_core_mmap(core_vdev, vma);
> > +}
> > +
> > +static ssize_t hisi_acc_vfio_pci_write(struct vfio_device *core_vdev,
> > +				       const char __user *buf, size_t count,
> > +				       loff_t *ppos)
> > +{
> > +	size_t new_count = count;
> > +	int ret;
> > +
> > +	ret = hisi_acc_pci_rw_access_check(core_vdev, count, ppos,
> &new_count);
> > +	if (ret)
> > +		return ret;
> > +
> > +	return vfio_pci_core_write(core_vdev, buf, new_count, ppos);
> > +}
> > +
> > +static ssize_t hisi_acc_vfio_pci_read(struct vfio_device *core_vdev,
> > +				      char __user *buf, size_t count,
> > +				      loff_t *ppos)
> > +{
> > +	size_t new_count = count;
> > +	int ret;
> > +
> > +	ret = hisi_acc_pci_rw_access_check(core_vdev, count, ppos,
> &new_count);
> > +	if (ret)
> > +		return ret;
> > +
> > +	return vfio_pci_core_read(core_vdev, buf, new_count, ppos);
> > +}
> > +
> > +static long hisi_acc_vfio_pci_ioctl(struct vfio_device *core_vdev, unsigned
> int cmd,
> > +				    unsigned long arg)
> > +{
> > +	struct vfio_pci_core_device *vdev =
> > +		container_of(core_vdev, struct vfio_pci_core_device, vdev);
> > +
> > +	if (cmd == VFIO_DEVICE_GET_REGION_INFO) {
> > +		struct pci_dev *pdev = vdev->pdev;
> > +		struct vfio_region_info info;
> > +		unsigned long minsz;
> > +
> > +		minsz = offsetofend(struct vfio_region_info, offset);
> > +
> > +		if (copy_from_user(&info, (void __user *)arg, minsz))
> > +			return -EFAULT;
> > +
> > +		if (info.argsz < minsz)
> > +			return -EINVAL;
> > +
> > +		if (info.index == VFIO_PCI_BAR2_REGION_INDEX) {
> > +			info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
> > +
> > +			/*
> > +			 * ACC VF dev BAR2 region consists of both functional
> > +			 * register space and migration control register space.
> > +			 * Report only the functional region to Guest.
> > +			 */
> > +			info.size = pci_resource_len(pdev, info.index) / 2;
> > +
> > +			info.flags = VFIO_REGION_INFO_FLAG_READ |
> > +					VFIO_REGION_INFO_FLAG_WRITE |
> > +					VFIO_REGION_INFO_FLAG_MMAP;
> > +
> > +			return copy_to_user((void __user *)arg, &info, minsz) ?
> > +					    -EFAULT : 0;
> > +		}
> > +	}
> > +	return vfio_pci_core_ioctl(core_vdev, cmd, arg);
> > +}
> > +
> >  static int hisi_acc_vfio_pci_open_device(struct vfio_device *core_vdev)
> >  {
> >  	struct vfio_pci_core_device *vdev =
> > @@ -32,10 +146,10 @@ static const struct vfio_device_ops
> hisi_acc_vfio_pci_ops = {
> >  	.name = "hisi-acc-vfio-pci",
> >  	.open_device = hisi_acc_vfio_pci_open_device,
> >  	.close_device = vfio_pci_core_close_device,
> > -	.ioctl = vfio_pci_core_ioctl,
> > -	.read = vfio_pci_core_read,
> > -	.write = vfio_pci_core_write,
> > -	.mmap = vfio_pci_core_mmap,
> > +	.ioctl = hisi_acc_vfio_pci_ioctl,
> > +	.read = hisi_acc_vfio_pci_read,
> > +	.write = hisi_acc_vfio_pci_write,
> > +	.mmap = hisi_acc_vfio_pci_mmap,
> >  	.request = vfio_pci_core_request,
> >  	.match = vfio_pci_core_match,
> >  };
Jason Gunthorpe Feb. 10, 2022, 3:19 p.m. UTC | #3
On Thu, Feb 10, 2022 at 03:01:50PM +0000, Shameerali Kolothum Thodi wrote:
> > > +	unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
> > > +	struct vfio_pci_core_device *vdev =
> > > +		container_of(core_vdev, struct vfio_pci_core_device, vdev);
> > > +
> > > +	if (index == VFIO_PCI_BAR2_REGION_INDEX) {
> > > +		loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
> > > +		resource_size_t end = pci_resource_len(vdev->pdev, index) / 2;
> > 
> > Be careful here, there are nested assignment use cases.  This can only
> > survive one level of assignment before we've restricted more than we
> > intended.  If migration support is dependent on PF access, can we use
> > that to determine when to when to expose only half the BAR and when to
> > expose the full BAR?
> 
> Ok. I will add a check here.

You might be better to just install a different ops when migration is
not supported, none of this stuff should be actived in that case.

Jason
diff mbox series

Patch

diff --git a/drivers/vfio/pci/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisi_acc_vfio_pci.c
index 8b59e628110e..563ed2cc861f 100644
--- a/drivers/vfio/pci/hisi_acc_vfio_pci.c
+++ b/drivers/vfio/pci/hisi_acc_vfio_pci.c
@@ -13,6 +13,120 @@ 
 #include <linux/vfio.h>
 #include <linux/vfio_pci_core.h>
 
+static int hisi_acc_pci_rw_access_check(struct vfio_device *core_vdev,
+					size_t count, loff_t *ppos,
+					size_t *new_count)
+{
+	unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
+	struct vfio_pci_core_device *vdev =
+		container_of(core_vdev, struct vfio_pci_core_device, vdev);
+
+	if (index == VFIO_PCI_BAR2_REGION_INDEX) {
+		loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
+		resource_size_t end = pci_resource_len(vdev->pdev, index) / 2;
+
+		/* Check if access is for migration control region */
+		if (pos >= end)
+			return -EINVAL;
+
+		*new_count = min(count, (size_t)(end - pos));
+	}
+
+	return 0;
+}
+
+static int hisi_acc_vfio_pci_mmap(struct vfio_device *core_vdev,
+				  struct vm_area_struct *vma)
+{
+	struct vfio_pci_core_device *vdev =
+		container_of(core_vdev, struct vfio_pci_core_device, vdev);
+	unsigned int index;
+
+	index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
+	if (index == VFIO_PCI_BAR2_REGION_INDEX) {
+		u64 req_len, pgoff, req_start;
+		resource_size_t end = pci_resource_len(vdev->pdev, index) / 2;
+
+		req_len = vma->vm_end - vma->vm_start;
+		pgoff = vma->vm_pgoff &
+			((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
+		req_start = pgoff << PAGE_SHIFT;
+
+		if (req_start + req_len > end)
+			return -EINVAL;
+	}
+
+	return vfio_pci_core_mmap(core_vdev, vma);
+}
+
+static ssize_t hisi_acc_vfio_pci_write(struct vfio_device *core_vdev,
+				       const char __user *buf, size_t count,
+				       loff_t *ppos)
+{
+	size_t new_count = count;
+	int ret;
+
+	ret = hisi_acc_pci_rw_access_check(core_vdev, count, ppos, &new_count);
+	if (ret)
+		return ret;
+
+	return vfio_pci_core_write(core_vdev, buf, new_count, ppos);
+}
+
+static ssize_t hisi_acc_vfio_pci_read(struct vfio_device *core_vdev,
+				      char __user *buf, size_t count,
+				      loff_t *ppos)
+{
+	size_t new_count = count;
+	int ret;
+
+	ret = hisi_acc_pci_rw_access_check(core_vdev, count, ppos, &new_count);
+	if (ret)
+		return ret;
+
+	return vfio_pci_core_read(core_vdev, buf, new_count, ppos);
+}
+
+static long hisi_acc_vfio_pci_ioctl(struct vfio_device *core_vdev, unsigned int cmd,
+				    unsigned long arg)
+{
+	struct vfio_pci_core_device *vdev =
+		container_of(core_vdev, struct vfio_pci_core_device, vdev);
+
+	if (cmd == VFIO_DEVICE_GET_REGION_INFO) {
+		struct pci_dev *pdev = vdev->pdev;
+		struct vfio_region_info info;
+		unsigned long minsz;
+
+		minsz = offsetofend(struct vfio_region_info, offset);
+
+		if (copy_from_user(&info, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		if (info.argsz < minsz)
+			return -EINVAL;
+
+		if (info.index == VFIO_PCI_BAR2_REGION_INDEX) {
+			info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
+
+			/*
+			 * ACC VF dev BAR2 region consists of both functional
+			 * register space and migration control register space.
+			 * Report only the functional region to Guest.
+			 */
+			info.size = pci_resource_len(pdev, info.index) / 2;
+
+			info.flags = VFIO_REGION_INFO_FLAG_READ |
+					VFIO_REGION_INFO_FLAG_WRITE |
+					VFIO_REGION_INFO_FLAG_MMAP;
+
+			return copy_to_user((void __user *)arg, &info, minsz) ?
+					    -EFAULT : 0;
+		}
+	}
+	return vfio_pci_core_ioctl(core_vdev, cmd, arg);
+}
+
 static int hisi_acc_vfio_pci_open_device(struct vfio_device *core_vdev)
 {
 	struct vfio_pci_core_device *vdev =
@@ -32,10 +146,10 @@  static const struct vfio_device_ops hisi_acc_vfio_pci_ops = {
 	.name = "hisi-acc-vfio-pci",
 	.open_device = hisi_acc_vfio_pci_open_device,
 	.close_device = vfio_pci_core_close_device,
-	.ioctl = vfio_pci_core_ioctl,
-	.read = vfio_pci_core_read,
-	.write = vfio_pci_core_write,
-	.mmap = vfio_pci_core_mmap,
+	.ioctl = hisi_acc_vfio_pci_ioctl,
+	.read = hisi_acc_vfio_pci_read,
+	.write = hisi_acc_vfio_pci_write,
+	.mmap = hisi_acc_vfio_pci_mmap,
 	.request = vfio_pci_core_request,
 	.match = vfio_pci_core_match,
 };