diff mbox series

[v2,02/13] vfio/mdev: Allow the mdev_parent_ops to specify the device driver to bind

Message ID 2-v2-7667f42c9bad+935-vfio3_jgg@nvidia.com (mailing list archive)
State New, archived
Headers show
Series Remove vfio_mdev.c, mdev_parent_ops and more | expand

Commit Message

Jason Gunthorpe April 26, 2021, 8 p.m. UTC
This allows a mdev driver to opt out of using vfio_mdev.c, instead the
driver will provide a 'struct mdev_driver' and register directly with the
driver core.

Much of mdev_parent_ops becomes unused in this mode:
- create()/remove() are done via the mdev_driver probe()/remove()
- mdev_attr_groups becomes mdev_driver driver.dev_groups
- Wrapper function callbacks are replaced with the same ones from
  struct vfio_device_ops

Following patches convert all the drivers.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/vfio/mdev/mdev_core.c   | 64 ++++++++++++++++++++++++++++-----
 drivers/vfio/mdev/mdev_driver.c | 17 ++++++++-
 include/linux/mdev.h            |  3 ++
 3 files changed, 75 insertions(+), 9 deletions(-)

Comments

Cornelia Huck April 27, 2021, 12:32 p.m. UTC | #1
On Mon, 26 Apr 2021 17:00:04 -0300
Jason Gunthorpe <jgg@nvidia.com> wrote:

> This allows a mdev driver to opt out of using vfio_mdev.c, instead the
> driver will provide a 'struct mdev_driver' and register directly with the
> driver core.
> 
> Much of mdev_parent_ops becomes unused in this mode:
> - create()/remove() are done via the mdev_driver probe()/remove()
> - mdev_attr_groups becomes mdev_driver driver.dev_groups
> - Wrapper function callbacks are replaced with the same ones from
>   struct vfio_device_ops
> 
> Following patches convert all the drivers.
> 
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> ---
>  drivers/vfio/mdev/mdev_core.c   | 64 ++++++++++++++++++++++++++++-----
>  drivers/vfio/mdev/mdev_driver.c | 17 ++++++++-
>  include/linux/mdev.h            |  3 ++
>  3 files changed, 75 insertions(+), 9 deletions(-)
> 

(...)

> +/*
> + * mdev drivers can refuse to bind during probe(), in this case we want to fail
> + * the creation of the mdev all the way back to sysfs. This is a weird model
> + * that doesn't fit in the driver core well, nor does it seem to appear any
> + * place else in the kernel, so use a simple hack.
> + */
> +static int mdev_bind_driver(struct mdev_device *mdev)
> +{
> +	struct mdev_driver *drv = mdev->type->parent->ops->device_driver;
> +	int ret;
> +
> +	if (!drv)
> +		drv = &vfio_mdev_driver;
> +
> +	while (1) {
> +		device_lock(&mdev->dev);
> +		if (mdev->dev.driver == &drv->driver) {
> +			ret = 0;
> +			goto out_unlock;
> +		}
> +		if (mdev->probe_err) {
> +			ret = mdev->probe_err;
> +			goto out_unlock;
> +		}
> +		device_unlock(&mdev->dev);
> +		ret = device_attach(&mdev->dev);
> +		if (ret)
> +			return ret;

device_attach() can return 0 (no driver), 1 (bound), or -ENODEV (device
not registered). I would expect mdev_bind_driver() to return 0 in case
of success and !0 otherwise, and I think the calling code does so as
well?

> +		mdev->probe_err = -EINVAL;
> +	}
> +	return 0;
> +
> +out_unlock:
> +	device_unlock(&mdev->dev);
> +	return ret;
> +}
> +

(...)

Rest of the patch looks good to me.
Jason Gunthorpe April 27, 2021, 11:20 p.m. UTC | #2
On Tue, Apr 27, 2021 at 02:32:27PM +0200, Cornelia Huck wrote:

> > +		device_unlock(&mdev->dev);
> > +		ret = device_attach(&mdev->dev);
> > +		if (ret)
> > +			return ret;
> 
> device_attach() can return 0 (no driver), 1 (bound), or -ENODEV (device
> not registered). I would expect mdev_bind_driver() to return 0 in case
> of success and !0 otherwise, and I think the calling code does so as
> well?

Oops yes it can, I changed it to this, thanks!

@@ -269,7 +269,7 @@ static int mdev_bind_driver(struct mdev_device *mdev)
 		}
 		device_unlock(&mdev->dev);
 		ret = device_attach(&mdev->dev);
-		if (ret)
+		if (ret < 0)
 			return ret;
 		mdev->probe_err = -EINVAL;
 	}

Jason
Christoph Hellwig April 28, 2021, 6:03 a.m. UTC | #3
On Mon, Apr 26, 2021 at 05:00:04PM -0300, Jason Gunthorpe wrote:
> +/*
> + * mdev drivers can refuse to bind during probe(), in this case we want to fail
> + * the creation of the mdev all the way back to sysfs. This is a weird model
> + * that doesn't fit in the driver core well, nor does it seem to appear any
> + * place else in the kernel, so use a simple hack.
> + */
> +static int mdev_bind_driver(struct mdev_device *mdev)
> +{
> +	struct mdev_driver *drv = mdev->type->parent->ops->device_driver;
> +	int ret;
> +
> +	if (!drv)
> +		drv = &vfio_mdev_driver;
> +
> +	while (1) {
> +		device_lock(&mdev->dev);
> +		if (mdev->dev.driver == &drv->driver) {
> +			ret = 0;
> +			goto out_unlock;
> +		}
> +		if (mdev->probe_err) {
> +			ret = mdev->probe_err;
> +			goto out_unlock;
> +		}
> +		device_unlock(&mdev->dev);
> +		ret = device_attach(&mdev->dev);
> +		if (ret)
> +			return ret;
> +		mdev->probe_err = -EINVAL;
> +	}
> +	return 0;
> +
> +out_unlock:
> +	device_unlock(&mdev->dev);
> +	return ret;
> +}

> +++ b/drivers/vfio/mdev/mdev_driver.c
> @@ -49,7 +49,7 @@ static int mdev_probe(struct device *dev)
>  		return ret;
>  
>  	if (drv->probe) {
> -		ret = drv->probe(mdev);
> +		ret = mdev->probe_err = drv->probe(mdev);
>  		if (ret)
>  			mdev_detach_iommu(mdev);
>  	}

>  	return 0;
>  }
>  
> +static int mdev_match(struct device *dev, struct device_driver *drv)
> +{
> +	struct mdev_device *mdev = to_mdev_device(dev);
> +	struct mdev_driver *target = mdev->type->parent->ops->device_driver;
> +
> +	/*
> +	 * The ops specify the device driver to connect, fall back to the old
> +	 * shim driver if the driver hasn't been converted.
> +	 */
> +	if (!target)
> +		target = &vfio_mdev_driver;
> +	return drv == &target->driver;
> +}

I still think this going the wrong way.  Why can't we enhance the core
driver code with a version of device_bind_driver() that does call into
->probe?  That probably seems like a better model for those existing
direct users of device_bind_driver or device_attach with a pre-set
->drv anyway.
Leon Romanovsky April 28, 2021, 6:44 a.m. UTC | #4
On Mon, Apr 26, 2021 at 05:00:04PM -0300, Jason Gunthorpe wrote:
> This allows a mdev driver to opt out of using vfio_mdev.c, instead the
> driver will provide a 'struct mdev_driver' and register directly with the
> driver core.
> 
> Much of mdev_parent_ops becomes unused in this mode:
> - create()/remove() are done via the mdev_driver probe()/remove()
> - mdev_attr_groups becomes mdev_driver driver.dev_groups
> - Wrapper function callbacks are replaced with the same ones from
>   struct vfio_device_ops
> 
> Following patches convert all the drivers.
> 
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> ---
>  drivers/vfio/mdev/mdev_core.c   | 64 ++++++++++++++++++++++++++++-----
>  drivers/vfio/mdev/mdev_driver.c | 17 ++++++++-
>  include/linux/mdev.h            |  3 ++
>  3 files changed, 75 insertions(+), 9 deletions(-)

<...>

> +/*
> + * mdev drivers can refuse to bind during probe(), in this case we want to fail
> + * the creation of the mdev all the way back to sysfs. This is a weird model
> + * that doesn't fit in the driver core well, nor does it seem to appear any
> + * place else in the kernel, so use a simple hack.
> + */
> +static int mdev_bind_driver(struct mdev_device *mdev)
> +{
> +	struct mdev_driver *drv = mdev->type->parent->ops->device_driver;
> +	int ret;
> +
> +	if (!drv)
> +		drv = &vfio_mdev_driver;
> +
> +	while (1) {
> +		device_lock(&mdev->dev);
> +		if (mdev->dev.driver == &drv->driver) {
> +			ret = 0;
> +			goto out_unlock;
> +		}
> +		if (mdev->probe_err) {
> +			ret = mdev->probe_err;
> +			goto out_unlock;
> +		}
> +		device_unlock(&mdev->dev);
> +		ret = device_attach(&mdev->dev);

The sequence above looks sketchy:
1. lock
2. check for driver
3. unlock
4. device_attach - it takes internally same lock as in step 1.

Why don't you rely on internal to device_attach() driver check?

Thanks
Dan Williams April 28, 2021, 7:56 a.m. UTC | #5
On Tue, Apr 27, 2021 at 11:04 PM Christoph Hellwig <hch@lst.de> wrote:
>
> On Mon, Apr 26, 2021 at 05:00:04PM -0300, Jason Gunthorpe wrote:
> > +/*
> > + * mdev drivers can refuse to bind during probe(), in this case we want to fail
> > + * the creation of the mdev all the way back to sysfs. This is a weird model
> > + * that doesn't fit in the driver core well, nor does it seem to appear any
> > + * place else in the kernel, so use a simple hack.
> > + */
> > +static int mdev_bind_driver(struct mdev_device *mdev)
> > +{
> > +     struct mdev_driver *drv = mdev->type->parent->ops->device_driver;
> > +     int ret;
> > +
> > +     if (!drv)
> > +             drv = &vfio_mdev_driver;
> > +
> > +     while (1) {
> > +             device_lock(&mdev->dev);
> > +             if (mdev->dev.driver == &drv->driver) {
> > +                     ret = 0;
> > +                     goto out_unlock;
> > +             }
> > +             if (mdev->probe_err) {
> > +                     ret = mdev->probe_err;
> > +                     goto out_unlock;
> > +             }
> > +             device_unlock(&mdev->dev);
> > +             ret = device_attach(&mdev->dev);
> > +             if (ret)
> > +                     return ret;
> > +             mdev->probe_err = -EINVAL;
> > +     }
> > +     return 0;
> > +
> > +out_unlock:
> > +     device_unlock(&mdev->dev);
> > +     return ret;
> > +}
>
> > +++ b/drivers/vfio/mdev/mdev_driver.c
> > @@ -49,7 +49,7 @@ static int mdev_probe(struct device *dev)
> >               return ret;
> >
> >       if (drv->probe) {
> > -             ret = drv->probe(mdev);
> > +             ret = mdev->probe_err = drv->probe(mdev);
> >               if (ret)
> >                       mdev_detach_iommu(mdev);
> >       }
>
> >       return 0;
> >  }
> >
> > +static int mdev_match(struct device *dev, struct device_driver *drv)
> > +{
> > +     struct mdev_device *mdev = to_mdev_device(dev);
> > +     struct mdev_driver *target = mdev->type->parent->ops->device_driver;
> > +
> > +     /*
> > +      * The ops specify the device driver to connect, fall back to the old
> > +      * shim driver if the driver hasn't been converted.
> > +      */
> > +     if (!target)
> > +             target = &vfio_mdev_driver;
> > +     return drv == &target->driver;
> > +}
>
> I still think this going the wrong way.  Why can't we enhance the core
> driver code with a version of device_bind_driver() that does call into
> ->probe?  That probably seems like a better model for those existing
> direct users of device_bind_driver or device_attach with a pre-set
> ->drv anyway.

Wouldn't that just be "export device_driver_attach()" so that drivers
can implement their own custom bind implementation?
Christoph Hellwig April 28, 2021, 12:41 p.m. UTC | #6
On Wed, Apr 28, 2021 at 12:56:21AM -0700, Dan Williams wrote:
> > I still think this going the wrong way.  Why can't we enhance the core
> > driver code with a version of device_bind_driver() that does call into
> > ->probe?  That probably seems like a better model for those existing
> > direct users of device_bind_driver or device_attach with a pre-set
> > ->drv anyway.
> 
> Wouldn't that just be "export device_driver_attach()" so that drivers
> can implement their own custom bind implementation?

That looks like it might be all that is needed.
Jason Gunthorpe April 28, 2021, 2 p.m. UTC | #7
On Wed, Apr 28, 2021 at 02:41:53PM +0200, Christoph Hellwig wrote:
> On Wed, Apr 28, 2021 at 12:56:21AM -0700, Dan Williams wrote:
> > > I still think this going the wrong way.  Why can't we enhance the core
> > > driver code with a version of device_bind_driver() that does call into
> > > ->probe?  That probably seems like a better model for those existing
> > > direct users of device_bind_driver or device_attach with a pre-set
> > > ->drv anyway.
> > 
> > Wouldn't that just be "export device_driver_attach()" so that drivers
> > can implement their own custom bind implementation?
> 
> That looks like it might be all that is needed.

I thought about doing it like that, it is generally a good idea,
however, if I add new API surface to the driver core I really want to
get rid of device_bind_driver(), or at least most of its users.

I'm pretty sure Greg will ask for it too.

So, I need a way to sequence that which doesn't mean I have to shelf
the mdev stuff for ages while I try to get acks from lots of places.

Leave this alone and fix it after? Export device_driver_attach() and
say to try and fix the rest after?

I think this will still need the ugly errno capture though..

Jason
Jason Gunthorpe April 28, 2021, 2:14 p.m. UTC | #8
On Wed, Apr 28, 2021 at 09:44:07AM +0300, Leon Romanovsky wrote:
> On Mon, Apr 26, 2021 at 05:00:04PM -0300, Jason Gunthorpe wrote:
> > This allows a mdev driver to opt out of using vfio_mdev.c, instead the
> > driver will provide a 'struct mdev_driver' and register directly with the
> > driver core.
> > 
> > Much of mdev_parent_ops becomes unused in this mode:
> > - create()/remove() are done via the mdev_driver probe()/remove()
> > - mdev_attr_groups becomes mdev_driver driver.dev_groups
> > - Wrapper function callbacks are replaced with the same ones from
> >   struct vfio_device_ops
> > 
> > Following patches convert all the drivers.
> > 
> > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> >  drivers/vfio/mdev/mdev_core.c   | 64 ++++++++++++++++++++++++++++-----
> >  drivers/vfio/mdev/mdev_driver.c | 17 ++++++++-
> >  include/linux/mdev.h            |  3 ++
> >  3 files changed, 75 insertions(+), 9 deletions(-)
> 
> <...>
> 
> > +/*
> > + * mdev drivers can refuse to bind during probe(), in this case we want to fail
> > + * the creation of the mdev all the way back to sysfs. This is a weird model
> > + * that doesn't fit in the driver core well, nor does it seem to appear any
> > + * place else in the kernel, so use a simple hack.
> > + */
> > +static int mdev_bind_driver(struct mdev_device *mdev)
> > +{
> > +	struct mdev_driver *drv = mdev->type->parent->ops->device_driver;
> > +	int ret;
> > +
> > +	if (!drv)
> > +		drv = &vfio_mdev_driver;
> > +
> > +	while (1) {
> > +		device_lock(&mdev->dev);
> > +		if (mdev->dev.driver == &drv->driver) {
> > +			ret = 0;
> > +			goto out_unlock;
> > +		}
> > +		if (mdev->probe_err) {
> > +			ret = mdev->probe_err;
> > +			goto out_unlock;
> > +		}
> > +		device_unlock(&mdev->dev);
> > +		ret = device_attach(&mdev->dev);
> 
> The sequence above looks sketchy:
> 1. lock
> 2. check for driver
> 3. unlock
> 4. device_attach - it takes internally same lock as in step 1.
> 
> Why don't you rely on internal to device_attach() driver check?

This is locking both probe_err and the check that the right driver is
bound. device_attach() doesn't tell you the same information

Jason
Leon Romanovsky April 28, 2021, 2:24 p.m. UTC | #9
On Wed, Apr 28, 2021 at 11:14:46AM -0300, Jason Gunthorpe wrote:
> On Wed, Apr 28, 2021 at 09:44:07AM +0300, Leon Romanovsky wrote:
> > On Mon, Apr 26, 2021 at 05:00:04PM -0300, Jason Gunthorpe wrote:
> > > This allows a mdev driver to opt out of using vfio_mdev.c, instead the
> > > driver will provide a 'struct mdev_driver' and register directly with the
> > > driver core.
> > > 
> > > Much of mdev_parent_ops becomes unused in this mode:
> > > - create()/remove() are done via the mdev_driver probe()/remove()
> > > - mdev_attr_groups becomes mdev_driver driver.dev_groups
> > > - Wrapper function callbacks are replaced with the same ones from
> > >   struct vfio_device_ops
> > > 
> > > Following patches convert all the drivers.
> > > 
> > > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> > >  drivers/vfio/mdev/mdev_core.c   | 64 ++++++++++++++++++++++++++++-----
> > >  drivers/vfio/mdev/mdev_driver.c | 17 ++++++++-
> > >  include/linux/mdev.h            |  3 ++
> > >  3 files changed, 75 insertions(+), 9 deletions(-)
> > 
> > <...>
> > 
> > > +/*
> > > + * mdev drivers can refuse to bind during probe(), in this case we want to fail
> > > + * the creation of the mdev all the way back to sysfs. This is a weird model
> > > + * that doesn't fit in the driver core well, nor does it seem to appear any
> > > + * place else in the kernel, so use a simple hack.
> > > + */
> > > +static int mdev_bind_driver(struct mdev_device *mdev)
> > > +{
> > > +	struct mdev_driver *drv = mdev->type->parent->ops->device_driver;
> > > +	int ret;
> > > +
> > > +	if (!drv)
> > > +		drv = &vfio_mdev_driver;
> > > +
> > > +	while (1) {
> > > +		device_lock(&mdev->dev);
> > > +		if (mdev->dev.driver == &drv->driver) {
> > > +			ret = 0;
> > > +			goto out_unlock;
> > > +		}
> > > +		if (mdev->probe_err) {
> > > +			ret = mdev->probe_err;
> > > +			goto out_unlock;
> > > +		}
> > > +		device_unlock(&mdev->dev);
> > > +		ret = device_attach(&mdev->dev);
> > 
> > The sequence above looks sketchy:
> > 1. lock
> > 2. check for driver
> > 3. unlock
> > 4. device_attach - it takes internally same lock as in step 1.
> > 
> > Why don't you rely on internal to device_attach() driver check?
> 
> This is locking both probe_err and the check that the right driver is
> bound. device_attach() doesn't tell you the same information

device_attach() returns you the information that driver is already
bound, which is the same as you are doing here, because you don't
unbind "the wrong driver".

Thanks

> 
> Jason
Dan Williams April 28, 2021, 7:58 p.m. UTC | #10
On Wed, Apr 28, 2021 at 7:00 AM Jason Gunthorpe <jgg@nvidia.com> wrote:
>
> On Wed, Apr 28, 2021 at 02:41:53PM +0200, Christoph Hellwig wrote:
> > On Wed, Apr 28, 2021 at 12:56:21AM -0700, Dan Williams wrote:
> > > > I still think this going the wrong way.  Why can't we enhance the core
> > > > driver code with a version of device_bind_driver() that does call into
> > > > ->probe?  That probably seems like a better model for those existing
> > > > direct users of device_bind_driver or device_attach with a pre-set
> > > > ->drv anyway.
> > >
> > > Wouldn't that just be "export device_driver_attach()" so that drivers
> > > can implement their own custom bind implementation?
> >
> > That looks like it might be all that is needed.
>
> I thought about doing it like that, it is generally a good idea,
> however, if I add new API surface to the driver core I really want to
> get rid of device_bind_driver(), or at least most of its users.

I might be missing where you are going with this comment, but
device_driver_attach() isn't a drop-in replacement for
device_bind_driver(). So while I agree with you that it's a
significant escalation of the driver core API surface, I don't see why
it would be necessarily predicated on removing device_bind_driver()?

If this export prevented a new device_bind_driver() user, I think
that's a net positive, because device_bind_driver() seems an odd way
to implement bus code to me.

> I'm pretty sure Greg will ask for it too.

I think it's worth asking.

I have an ulterior motive / additional use case in mind here which is
the work-in-progress cleanup of the DSA driver. It uses the driver
model to assign an engine to different use cases via driver binding.
However, it currently has a custom bind implementation that does not
operate like a typical /sys/bus/$bus/drivers interface. If
device_driver_attach() was exported then some DSA compat code could
model the current way while also allowing a transition path to the
right way. As is I was telling Dave that the compat code would need to
be built-in because I don't think fixing a DSA device-model problem is
enough justification on its own to ask for a device_driver_attach()
export.

> So, I need a way to sequence that which doesn't mean I have to shelf
> the mdev stuff for ages while I try to get acks from lots of places.

Lets see if it can stand alone.
Jason Gunthorpe April 28, 2021, 11:38 p.m. UTC | #11
On Wed, Apr 28, 2021 at 12:58:29PM -0700, Dan Williams wrote:
> On Wed, Apr 28, 2021 at 7:00 AM Jason Gunthorpe <jgg@nvidia.com> wrote:
> >
> > On Wed, Apr 28, 2021 at 02:41:53PM +0200, Christoph Hellwig wrote:
> > > On Wed, Apr 28, 2021 at 12:56:21AM -0700, Dan Williams wrote:
> > > > > I still think this going the wrong way.  Why can't we enhance the core
> > > > > driver code with a version of device_bind_driver() that does call into
> > > > > ->probe?  That probably seems like a better model for those existing
> > > > > direct users of device_bind_driver or device_attach with a pre-set
> > > > > ->drv anyway.
> > > >
> > > > Wouldn't that just be "export device_driver_attach()" so that drivers
> > > > can implement their own custom bind implementation?
> > >
> > > That looks like it might be all that is needed.
> >
> > I thought about doing it like that, it is generally a good idea,
> > however, if I add new API surface to the driver core I really want to
> > get rid of device_bind_driver(), or at least most of its users.
> 
> I might be missing where you are going with this comment, but
> device_driver_attach() isn't a drop-in replacement for
> device_bind_driver().

Many of the places calling device_bind_driver() are wonky things
like this:

        dev->dev.driver = &drv->link.driver;
        if (pnp_bus_type.probe(&dev->dev))
                goto err_out;
        if (device_bind_driver(&dev->dev))
                goto err_out;

So device_driver_attach() does replace that - with some differences.

Notable is that bind_driver requires the driver_lock but driver_attach
gets it internally. However, as far as I can tell, none of the
bind_driver callers do get it, so huh.

Aside from the driver_lock there are lots of small subtle differences
that are probably not important unless they are for some very complex
reason. :\

Of the callers:
  drivers/input/serio/serio.c
    This definitely doesn't have the device_lock
    It uses connect instead of probe and for some reason uses its own
    mutex instead of the device_lock. Murky.

  drivers/input/gameport/gameport.c
    This looks alot like serio, same comments

  drivers/net/phy/phy_device.c
    device_driver_attach() is better, looks unlikely that
    device_lock is properly held here. Little unclear on what
    the bus is and if bus->probe will be OK

  drivers/net/wireless/mac80211_hwsim.c
    Definitely does not hold the driver lock, the class and the driver
    have NULL probes so this could be changed

  drivers/pnp/card.c
    device_driver_attach() is better, very unlikely that a random
    device pulled from a linked list has the driver_lock held

  drivers/usb/core/driver.c
    This comment says the caller must have the device lock, but it
    doesn't call probe, and when I look at cdc_ether.c I wonder
    where the device_lock is hidden? Murky.

Basically, there is some mess here, and eliminating
device_bind_driver() for device_driver_attach() is quite a reasonable
cleanup. But hard, complex enough it needs testing each patch.

The other driver self bind scenario is to directly assign driver
before device_add, but I have a hard time finding those cases in the
tree with grep.

> If this export prevented a new device_bind_driver() user, I think
> that's a net positive, because device_bind_driver() seems an odd way
> to implement bus code to me.

Yes, I looked into why it is like this and concluded it is just very
very old.
 
> I have an ulterior motive / additional use case in mind here which is
> the work-in-progress cleanup of the DSA driver. It uses the driver
> model to assign an engine to different use cases via driver binding.
> However, it currently has a custom bind implementation that does not
> operate like a typical /sys/bus/$bus/drivers interface. If
> device_driver_attach() was exported then some DSA compat code could
> model the current way while also allowing a transition path to the
> right way. As is I was telling Dave that the compat code would need to
> be built-in because I don't think fixing a DSA device-model problem is
> enough justification on its own to ask for a device_driver_attach()
> export.

Can you make and test a DSA patch? If we have two concrete things and
I can sketch two more out of the above that should meet Greg's "need 4
things" general thinking for driver core API changes.

But I still would like to keep this going while we wait for acks, you
know how long that can take...

Jason
Dave Jiang April 29, 2021, midnight UTC | #12
On 4/28/2021 4:38 PM, Jason Gunthorpe wrote:
> On Wed, Apr 28, 2021 at 12:58:29PM -0700, Dan Williams wrote:
>> On Wed, Apr 28, 2021 at 7:00 AM Jason Gunthorpe <jgg@nvidia.com> wrote:
>>> On Wed, Apr 28, 2021 at 02:41:53PM +0200, Christoph Hellwig wrote:
>>>> On Wed, Apr 28, 2021 at 12:56:21AM -0700, Dan Williams wrote:
>>>>>> I still think this going the wrong way.  Why can't we enhance the core
>>>>>> driver code with a version of device_bind_driver() that does call into
>>>>>> ->probe?  That probably seems like a better model for those existing
>>>>>> direct users of device_bind_driver or device_attach with a pre-set
>>>>>> ->drv anyway.
>>>>> Wouldn't that just be "export device_driver_attach()" so that drivers
>>>>> can implement their own custom bind implementation?
>>>> That looks like it might be all that is needed.
>>> I thought about doing it like that, it is generally a good idea,
>>> however, if I add new API surface to the driver core I really want to
>>> get rid of device_bind_driver(), or at least most of its users.
>> I might be missing where you are going with this comment, but
>> device_driver_attach() isn't a drop-in replacement for
>> device_bind_driver().
> Many of the places calling device_bind_driver() are wonky things
> like this:
>
>          dev->dev.driver = &drv->link.driver;
>          if (pnp_bus_type.probe(&dev->dev))
>                  goto err_out;
>          if (device_bind_driver(&dev->dev))
>                  goto err_out;
>
> So device_driver_attach() does replace that - with some differences.
>
> Notable is that bind_driver requires the driver_lock but driver_attach
> gets it internally. However, as far as I can tell, none of the
> bind_driver callers do get it, so huh.
>
> Aside from the driver_lock there are lots of small subtle differences
> that are probably not important unless they are for some very complex
> reason. :\
>
> Of the callers:
>    drivers/input/serio/serio.c
>      This definitely doesn't have the device_lock
>      It uses connect instead of probe and for some reason uses its own
>      mutex instead of the device_lock. Murky.
>
>    drivers/input/gameport/gameport.c
>      This looks alot like serio, same comments
>
>    drivers/net/phy/phy_device.c
>      device_driver_attach() is better, looks unlikely that
>      device_lock is properly held here. Little unclear on what
>      the bus is and if bus->probe will be OK
>
>    drivers/net/wireless/mac80211_hwsim.c
>      Definitely does not hold the driver lock, the class and the driver
>      have NULL probes so this could be changed
>
>    drivers/pnp/card.c
>      device_driver_attach() is better, very unlikely that a random
>      device pulled from a linked list has the driver_lock held
>
>    drivers/usb/core/driver.c
>      This comment says the caller must have the device lock, but it
>      doesn't call probe, and when I look at cdc_ether.c I wonder
>      where the device_lock is hidden? Murky.
>
> Basically, there is some mess here, and eliminating
> device_bind_driver() for device_driver_attach() is quite a reasonable
> cleanup. But hard, complex enough it needs testing each patch.
>
> The other driver self bind scenario is to directly assign driver
> before device_add, but I have a hard time finding those cases in the
> tree with grep.
>
>> If this export prevented a new device_bind_driver() user, I think
>> that's a net positive, because device_bind_driver() seems an odd way
>> to implement bus code to me.
> Yes, I looked into why it is like this and concluded it is just very
> very old.
>   
>> I have an ulterior motive / additional use case in mind here which is
>> the work-in-progress cleanup of the DSA driver. It uses the driver
>> model to assign an engine to different use cases via driver binding.
>> However, it currently has a custom bind implementation that does not
>> operate like a typical /sys/bus/$bus/drivers interface. If
>> device_driver_attach() was exported then some DSA compat code could
>> model the current way while also allowing a transition path to the
>> right way. As is I was telling Dave that the compat code would need to
>> be built-in because I don't think fixing a DSA device-model problem is
>> enough justification on its own to ask for a device_driver_attach()
>> export.
> Can you make and test a DSA patch? If we have two concrete things and
> I can sketch two more out of the above that should meet Greg's "need 4
> things" general thinking for driver core API changes.

Working on it. Having device_driver_attach() exported will definitely 
make things easier on my side. Thanks for doing the heavy lifting.


>
> But I still would like to keep this going while we wait for acks, you
> know how long that can take...
>
> Jason
Christoph Hellwig April 29, 2021, 6:51 a.m. UTC | #13
On Wed, Apr 28, 2021 at 11:00:05AM -0300, Jason Gunthorpe wrote:
> I thought about doing it like that, it is generally a good idea,
> however, if I add new API surface to the driver core I really want to
> get rid of device_bind_driver(), or at least most of its users.
> 
> I'm pretty sure Greg will ask for it too.

Well, let's ask them.  I though I had Cced him on my original mail,
but must have missed that.
Christoph Hellwig May 4, 2021, 9:36 a.m. UTC | #14
On Wed, Apr 28, 2021 at 11:00:05AM -0300, Jason Gunthorpe wrote:
> I thought about doing it like that, it is generally a good idea,
> however, if I add new API surface to the driver core I really want to
> get rid of device_bind_driver(), or at least most of its users.
> 
> I'm pretty sure Greg will ask for it too.
> 
> So, I need a way to sequence that which doesn't mean I have to shelf
> the mdev stuff for ages while I try to get acks from lots of places.
> 
> Leave this alone and fix it after? Export device_driver_attach() and
> say to try and fix the rest after?

Maybe.  Or convert one or two samples.

> I think this will still need the ugly errno capture though..

Why?
Jason Gunthorpe May 4, 2021, 11:30 a.m. UTC | #15
On Tue, May 04, 2021 at 11:36:36AM +0200, Christoph Hellwig wrote:
> On Wed, Apr 28, 2021 at 11:00:05AM -0300, Jason Gunthorpe wrote:
> > I thought about doing it like that, it is generally a good idea,
> > however, if I add new API surface to the driver core I really want to
> > get rid of device_bind_driver(), or at least most of its users.
> > 
> > I'm pretty sure Greg will ask for it too.
> > 
> > So, I need a way to sequence that which doesn't mean I have to shelf
> > the mdev stuff for ages while I try to get acks from lots of places.
> > 
> > Leave this alone and fix it after? Export device_driver_attach() and
> > say to try and fix the rest after?
> 
> Maybe.  Or convert one or two samples.

The conversions are easy I just can't test them or completely tell if
they are correct..
 
> > I think this will still need the ugly errno capture though..
> 
> Why?

Several of the mdev drivers are checking some predicate during their
new probe function, like total # of devices. So if userspace exceeds
that then the old behavior was to fail the sysfs create operation. eg:


static int vfio_ccw_mdev_probe(struct mdev_device *mdev)
{
	if (private->state == VFIO_CCW_STATE_NOT_OPER)
		return -ENODEV;

	if (atomic_dec_if_positive(&private->avail) < 0)
		return -EPERM;

Without the errno capture this doesn't work anymore and things end
succeeding to create a device and but failing to attach a driver.

It could be changed to loose the errno and just return with some
generic -EINVAL if no driver bound, but that seems pretty ugly too.

Returning the probe error from some device_driver_attach() also make
some sense, but revising the code to do that is a big touch and this
is so strange I don't know if it is worth it.

Jason
Jason Gunthorpe May 26, 2021, 12:42 a.m. UTC | #16
On Wed, Apr 28, 2021 at 12:58:29PM -0700, Dan Williams wrote:

> I have an ulterior motive / additional use case in mind here which is
> the work-in-progress cleanup of the DSA driver. 

Well, I worked on it for a while, please take a look at this:

https://github.com/jgunthorpe/linux/commits/device_driver_attach

It makes device_driver_attach() into what this mdev stuff needs, and I
think improves the sysfs bind file as a side effect.

Is it what you need for DSA?

Thanks,
Jason
Dan Williams May 26, 2021, 1:42 a.m. UTC | #17
On Tue, May 25, 2021 at 5:42 PM Jason Gunthorpe <jgg@nvidia.com> wrote:
>
> On Wed, Apr 28, 2021 at 12:58:29PM -0700, Dan Williams wrote:
>
> > I have an ulterior motive / additional use case in mind here which is
> > the work-in-progress cleanup of the DSA driver.
>
> Well, I worked on it for a while, please take a look at this:
>
> https://github.com/jgunthorpe/linux/commits/device_driver_attach
>
> It makes device_driver_attach() into what this mdev stuff needs, and I
> think improves the sysfs bind file as a side effect.

Nice, yes, it looks like it does.

> Is it what you need for DSA?

Yes.
Christoph Hellwig May 27, 2021, 11:44 a.m. UTC | #18
On Tue, May 25, 2021 at 09:42:30PM -0300, Jason Gunthorpe wrote:
> On Wed, Apr 28, 2021 at 12:58:29PM -0700, Dan Williams wrote:
> 
> > I have an ulterior motive / additional use case in mind here which is
> > the work-in-progress cleanup of the DSA driver. 
> 
> Well, I worked on it for a while, please take a look at this:
> 
> https://github.com/jgunthorpe/linux/commits/device_driver_attach
> 
> It makes device_driver_attach() into what this mdev stuff needs, and I
> think improves the sysfs bind file as a side effect.

This looks great.  Please get it out to Greg ASAP!
Jason Gunthorpe May 27, 2021, 2:53 p.m. UTC | #19
On Thu, May 27, 2021 at 01:44:52PM +0200, Christoph Hellwig wrote:
> On Tue, May 25, 2021 at 09:42:30PM -0300, Jason Gunthorpe wrote:
> > On Wed, Apr 28, 2021 at 12:58:29PM -0700, Dan Williams wrote:
> > 
> > > I have an ulterior motive / additional use case in mind here which is
> > > the work-in-progress cleanup of the DSA driver. 
> > 
> > Well, I worked on it for a while, please take a look at this:
> > 
> > https://github.com/jgunthorpe/linux/commits/device_driver_attach
> > 
> > It makes device_driver_attach() into what this mdev stuff needs, and I
> > think improves the sysfs bind file as a side effect.
> 
> This looks great.  Please get it out to Greg ASAP!

Thanks, I need to test it all carefully it is pretty tricky. You are
OK with the funky in/out flag? That was the most ick part

Jason
Christoph Hellwig May 27, 2021, 3:13 p.m. UTC | #20
On Thu, May 27, 2021 at 11:53:42AM -0300, Jason Gunthorpe wrote:
> > This looks great.  Please get it out to Greg ASAP!
> 
> Thanks, I need to test it all carefully it is pretty tricky. You are
> OK with the funky in/out flag? That was the most ick part

It is a little ugly, but what are the alternatives?  Would an input
flag to never return EPROBE_DEFER work instead?
diff mbox series

Patch

diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c
index ff8c1a84516698..51b8a9fcf866ad 100644
--- a/drivers/vfio/mdev/mdev_core.c
+++ b/drivers/vfio/mdev/mdev_core.c
@@ -94,9 +94,11 @@  static void mdev_device_remove_common(struct mdev_device *mdev)
 	mdev_remove_sysfs_files(mdev);
 	device_del(&mdev->dev);
 	lockdep_assert_held(&parent->unreg_sem);
-	ret = parent->ops->remove(mdev);
-	if (ret)
-		dev_err(&mdev->dev, "Remove failed: err=%d\n", ret);
+	if (parent->ops->remove) {
+		ret = parent->ops->remove(mdev);
+		if (ret)
+			dev_err(&mdev->dev, "Remove failed: err=%d\n", ret);
+	}
 
 	/* Balances with device_initialize() */
 	put_device(&mdev->dev);
@@ -127,7 +129,9 @@  int mdev_register_device(struct device *dev, const struct mdev_parent_ops *ops)
 	char *envp[] = { env_string, NULL };
 
 	/* check for mandatory ops */
-	if (!ops || !ops->create || !ops->remove || !ops->supported_type_groups)
+	if (!ops || !ops->supported_type_groups)
+		return -EINVAL;
+	if (!ops->device_driver && (!ops->create || !ops->remove))
 		return -EINVAL;
 
 	dev = get_device(dev);
@@ -251,6 +255,43 @@  static void mdev_device_release(struct device *dev)
 	kfree(mdev);
 }
 
+/*
+ * mdev drivers can refuse to bind during probe(), in this case we want to fail
+ * the creation of the mdev all the way back to sysfs. This is a weird model
+ * that doesn't fit in the driver core well, nor does it seem to appear any
+ * place else in the kernel, so use a simple hack.
+ */
+static int mdev_bind_driver(struct mdev_device *mdev)
+{
+	struct mdev_driver *drv = mdev->type->parent->ops->device_driver;
+	int ret;
+
+	if (!drv)
+		drv = &vfio_mdev_driver;
+
+	while (1) {
+		device_lock(&mdev->dev);
+		if (mdev->dev.driver == &drv->driver) {
+			ret = 0;
+			goto out_unlock;
+		}
+		if (mdev->probe_err) {
+			ret = mdev->probe_err;
+			goto out_unlock;
+		}
+		device_unlock(&mdev->dev);
+		ret = device_attach(&mdev->dev);
+		if (ret)
+			return ret;
+		mdev->probe_err = -EINVAL;
+	}
+	return 0;
+
+out_unlock:
+	device_unlock(&mdev->dev);
+	return ret;
+}
+
 int mdev_device_create(struct mdev_type *type, const guid_t *uuid)
 {
 	int ret;
@@ -296,14 +337,20 @@  int mdev_device_create(struct mdev_type *type, const guid_t *uuid)
 		goto out_put_device;
 	}
 
-	ret = parent->ops->create(mdev);
-	if (ret)
-		goto out_unlock;
+	if (parent->ops->create) {
+		ret = parent->ops->create(mdev);
+		if (ret)
+			goto out_unlock;
+	}
 
 	ret = device_add(&mdev->dev);
 	if (ret)
 		goto out_remove;
 
+	ret = mdev_bind_driver(mdev);
+	if (ret)
+		goto out_del;
+
 	ret = mdev_create_sysfs_files(mdev);
 	if (ret)
 		goto out_del;
@@ -317,7 +364,8 @@  int mdev_device_create(struct mdev_type *type, const guid_t *uuid)
 out_del:
 	device_del(&mdev->dev);
 out_remove:
-	parent->ops->remove(mdev);
+	if (parent->ops->remove)
+		parent->ops->remove(mdev);
 out_unlock:
 	up_read(&parent->unreg_sem);
 out_put_device:
diff --git a/drivers/vfio/mdev/mdev_driver.c b/drivers/vfio/mdev/mdev_driver.c
index 041699571b7e55..6e96c023d7823d 100644
--- a/drivers/vfio/mdev/mdev_driver.c
+++ b/drivers/vfio/mdev/mdev_driver.c
@@ -49,7 +49,7 @@  static int mdev_probe(struct device *dev)
 		return ret;
 
 	if (drv->probe) {
-		ret = drv->probe(mdev);
+		ret = mdev->probe_err = drv->probe(mdev);
 		if (ret)
 			mdev_detach_iommu(mdev);
 	}
@@ -71,10 +71,25 @@  static int mdev_remove(struct device *dev)
 	return 0;
 }
 
+static int mdev_match(struct device *dev, struct device_driver *drv)
+{
+	struct mdev_device *mdev = to_mdev_device(dev);
+	struct mdev_driver *target = mdev->type->parent->ops->device_driver;
+
+	/*
+	 * The ops specify the device driver to connect, fall back to the old
+	 * shim driver if the driver hasn't been converted.
+	 */
+	if (!target)
+		target = &vfio_mdev_driver;
+	return drv == &target->driver;
+}
+
 struct bus_type mdev_bus_type = {
 	.name		= "mdev",
 	.probe		= mdev_probe,
 	.remove		= mdev_remove,
+	.match		= mdev_match,
 };
 EXPORT_SYMBOL_GPL(mdev_bus_type);
 
diff --git a/include/linux/mdev.h b/include/linux/mdev.h
index 1fb34ea394ad46..49cc4f65120d57 100644
--- a/include/linux/mdev.h
+++ b/include/linux/mdev.h
@@ -19,6 +19,7 @@  struct mdev_device {
 	struct list_head next;
 	struct mdev_type *type;
 	struct device *iommu_device;
+	int probe_err;
 	bool active;
 };
 
@@ -55,6 +56,7 @@  struct device *mtype_get_parent_dev(struct mdev_type *mtype);
  * register the device to mdev module.
  *
  * @owner:		The module owner.
+ * @device_driver:	Which device driver to probe() on newly created devices
  * @dev_attr_groups:	Attributes of the parent device.
  * @mdev_attr_groups:	Attributes of the mediated device.
  * @supported_type_groups: Attributes to define supported types. It is mandatory
@@ -103,6 +105,7 @@  struct device *mtype_get_parent_dev(struct mdev_type *mtype);
  **/
 struct mdev_parent_ops {
 	struct module   *owner;
+	struct mdev_driver *device_driver;
 	const struct attribute_group **dev_attr_groups;
 	const struct attribute_group **mdev_attr_groups;
 	struct attribute_group **supported_type_groups;