Message ID | 20190514145422.16923-3-pagupta@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | virtio pmem driver | expand |
On 5/14/19 7:54 AM, Pankaj Gupta wrote: > diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig > index 35897649c24f..94bad084ebab 100644 > --- a/drivers/virtio/Kconfig > +++ b/drivers/virtio/Kconfig > @@ -42,6 +42,17 @@ config VIRTIO_PCI_LEGACY > > If unsure, say Y. > > +config VIRTIO_PMEM > + tristate "Support for virtio pmem driver" > + depends on VIRTIO > + depends on LIBNVDIMM > + help > + This driver provides access to virtio-pmem devices, storage devices > + that are mapped into the physical address space - similar to NVDIMMs > + - with a virtio-based flushing interface. > + > + If unsure, say M. <beep> from Documentation/process/coding-style.rst: "Lines under a ``config`` definition are indented with one tab, while help text is indented an additional two spaces." > + > config VIRTIO_BALLOON > tristate "Virtio balloon driver" > depends on VIRTIO thanks.
> On 5/14/19 7:54 AM, Pankaj Gupta wrote: > > diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig > > index 35897649c24f..94bad084ebab 100644 > > --- a/drivers/virtio/Kconfig > > +++ b/drivers/virtio/Kconfig > > @@ -42,6 +42,17 @@ config VIRTIO_PCI_LEGACY > > > > If unsure, say Y. > > > > +config VIRTIO_PMEM > > + tristate "Support for virtio pmem driver" > > + depends on VIRTIO > > + depends on LIBNVDIMM > > + help > > + This driver provides access to virtio-pmem devices, storage devices > > + that are mapped into the physical address space - similar to NVDIMMs > > + - with a virtio-based flushing interface. > > + > > + If unsure, say M. > > <beep> > from Documentation/process/coding-style.rst: > "Lines under a ``config`` definition > are indented with one tab, while help text is indented an additional two > spaces." ah... I changed help text and 'checkpatch' did not say anything :( . Will wait for Dan, If its possible to add two spaces to help text while applying the series. Thanks, Pankaj > > > + > > config VIRTIO_BALLOON > > tristate "Virtio balloon driver" > > depends on VIRTIO > > thanks. > -- > ~Randy > >
> + vpmem->vdev = vdev; > + vdev->priv = vpmem; > + err = init_vq(vpmem); > + if (err) { > + dev_err(&vdev->dev, "failed to initialize virtio pmem vq's\n"); > + goto out_err; > + } > + > + virtio_cread(vpmem->vdev, struct virtio_pmem_config, > + start, &vpmem->start); > + virtio_cread(vpmem->vdev, struct virtio_pmem_config, > + size, &vpmem->size); > + > + res.start = vpmem->start; > + res.end = vpmem->start + vpmem->size-1; nit: " - 1;" > + vpmem->nd_desc.provider_name = "virtio-pmem"; > + vpmem->nd_desc.module = THIS_MODULE; > + > + vpmem->nvdimm_bus = nvdimm_bus_register(&vdev->dev, > + &vpmem->nd_desc); > + if (!vpmem->nvdimm_bus) { > + dev_err(&vdev->dev, "failed to register device with nvdimm_bus\n"); > + err = -ENXIO; > + goto out_vq; > + } > + > + dev_set_drvdata(&vdev->dev, vpmem->nvdimm_bus); > + > + ndr_desc.res = &res; > + ndr_desc.numa_node = nid; > + ndr_desc.flush = async_pmem_flush; > + set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags); > + set_bit(ND_REGION_ASYNC, &ndr_desc.flags); > + nd_region = nvdimm_pmem_region_create(vpmem->nvdimm_bus, &ndr_desc); > + if (!nd_region) { > + dev_err(&vdev->dev, "failed to create nvdimm region\n"); > + err = -ENXIO; > + goto out_nd; > + } > + nd_region->provider_data = dev_to_virtio(nd_region->dev.parent->parent); > + return 0; > +out_nd: > + nvdimm_bus_unregister(vpmem->nvdimm_bus); > +out_vq: > + vdev->config->del_vqs(vdev); > +out_err: > + return err; > +} > + > +static void virtio_pmem_remove(struct virtio_device *vdev) > +{ > + struct nvdimm_bus *nvdimm_bus = dev_get_drvdata(&vdev->dev); > + > + nvdimm_bus_unregister(nvdimm_bus); > + vdev->config->del_vqs(vdev); > + vdev->config->reset(vdev); > +} > + > +static struct virtio_driver virtio_pmem_driver = { > + .driver.name = KBUILD_MODNAME, > + .driver.owner = THIS_MODULE, > + .id_table = id_table, > + .probe = virtio_pmem_probe, > + .remove = virtio_pmem_remove, > +}; > + > +module_virtio_driver(virtio_pmem_driver); > +MODULE_DEVICE_TABLE(virtio, id_table); > +MODULE_DESCRIPTION("Virtio pmem driver"); > +MODULE_LICENSE("GPL"); > diff --git a/drivers/nvdimm/virtio_pmem.h b/drivers/nvdimm/virtio_pmem.h > new file mode 100644 > index 000000000000..ab1da877575d > --- /dev/null > +++ b/drivers/nvdimm/virtio_pmem.h > @@ -0,0 +1,60 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +/* > + * virtio_pmem.h: virtio pmem Driver > + * > + * Discovers persistent memory range information > + * from host and provides a virtio based flushing > + * interface. > + **/ > + > +#ifndef _LINUX_VIRTIO_PMEM_H > +#define _LINUX_VIRTIO_PMEM_H > + > +#include <linux/virtio_ids.h> > +#include <linux/module.h> > +#include <linux/virtio_config.h> > +#include <uapi/linux/virtio_pmem.h> > +#include <linux/libnvdimm.h> > +#include <linux/spinlock.h> > + > +struct virtio_pmem_request { > + /* Host return status corresponding to flush request */ > + int ret; > + > + /* command name*/ > + char name[16]; So ... why are we sending string commands and expect native-endianess integers and don't define a proper request/response structure + request types in include/uapi/linux/virtio_pmem.h like struct virtio_pmem_resp { __virtio32 ret; } #define VIRTIO_PMEM_REQ_TYPE_FLUSH 1 struct virtio_pmem_req { __virtio16 type; } ... and this way we also define a proper endianess format for exchange and keep it extensible @MST, what's your take on this?
On Tue, May 14, 2019 at 8:25 AM Pankaj Gupta <pagupta@redhat.com> wrote: > > > > On 5/14/19 7:54 AM, Pankaj Gupta wrote: > > > diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig > > > index 35897649c24f..94bad084ebab 100644 > > > --- a/drivers/virtio/Kconfig > > > +++ b/drivers/virtio/Kconfig > > > @@ -42,6 +42,17 @@ config VIRTIO_PCI_LEGACY > > > > > > If unsure, say Y. > > > > > > +config VIRTIO_PMEM > > > + tristate "Support for virtio pmem driver" > > > + depends on VIRTIO > > > + depends on LIBNVDIMM > > > + help > > > + This driver provides access to virtio-pmem devices, storage devices > > > + that are mapped into the physical address space - similar to NVDIMMs > > > + - with a virtio-based flushing interface. > > > + > > > + If unsure, say M. > > > > <beep> > > from Documentation/process/coding-style.rst: > > "Lines under a ``config`` definition > > are indented with one tab, while help text is indented an additional two > > spaces." > > ah... I changed help text and 'checkpatch' did not say anything :( . > > Will wait for Dan, If its possible to add two spaces to help text while applying > the series. I'm inclined to handle this with a fixup appended to the end of the series just so the patchwork-bot does not get confused by the content changing from what was sent to the list.
On 15.05.19 22:46, David Hildenbrand wrote: >> + vpmem->vdev = vdev; >> + vdev->priv = vpmem; >> + err = init_vq(vpmem); >> + if (err) { >> + dev_err(&vdev->dev, "failed to initialize virtio pmem vq's\n"); >> + goto out_err; >> + } >> + >> + virtio_cread(vpmem->vdev, struct virtio_pmem_config, >> + start, &vpmem->start); >> + virtio_cread(vpmem->vdev, struct virtio_pmem_config, >> + size, &vpmem->size); >> + >> + res.start = vpmem->start; >> + res.end = vpmem->start + vpmem->size-1; > > nit: " - 1;" > >> + vpmem->nd_desc.provider_name = "virtio-pmem"; >> + vpmem->nd_desc.module = THIS_MODULE; >> + >> + vpmem->nvdimm_bus = nvdimm_bus_register(&vdev->dev, >> + &vpmem->nd_desc); >> + if (!vpmem->nvdimm_bus) { >> + dev_err(&vdev->dev, "failed to register device with nvdimm_bus\n"); >> + err = -ENXIO; >> + goto out_vq; >> + } >> + >> + dev_set_drvdata(&vdev->dev, vpmem->nvdimm_bus); >> + >> + ndr_desc.res = &res; >> + ndr_desc.numa_node = nid; >> + ndr_desc.flush = async_pmem_flush; >> + set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags); >> + set_bit(ND_REGION_ASYNC, &ndr_desc.flags); >> + nd_region = nvdimm_pmem_region_create(vpmem->nvdimm_bus, &ndr_desc); >> + if (!nd_region) { >> + dev_err(&vdev->dev, "failed to create nvdimm region\n"); >> + err = -ENXIO; >> + goto out_nd; >> + } >> + nd_region->provider_data = dev_to_virtio(nd_region->dev.parent->parent); >> + return 0; >> +out_nd: >> + nvdimm_bus_unregister(vpmem->nvdimm_bus); >> +out_vq: >> + vdev->config->del_vqs(vdev); >> +out_err: >> + return err; >> +} >> + >> +static void virtio_pmem_remove(struct virtio_device *vdev) >> +{ >> + struct nvdimm_bus *nvdimm_bus = dev_get_drvdata(&vdev->dev); >> + >> + nvdimm_bus_unregister(nvdimm_bus); >> + vdev->config->del_vqs(vdev); >> + vdev->config->reset(vdev); >> +} >> + >> +static struct virtio_driver virtio_pmem_driver = { >> + .driver.name = KBUILD_MODNAME, >> + .driver.owner = THIS_MODULE, >> + .id_table = id_table, >> + .probe = virtio_pmem_probe, >> + .remove = virtio_pmem_remove, >> +}; >> + >> +module_virtio_driver(virtio_pmem_driver); >> +MODULE_DEVICE_TABLE(virtio, id_table); >> +MODULE_DESCRIPTION("Virtio pmem driver"); >> +MODULE_LICENSE("GPL"); >> diff --git a/drivers/nvdimm/virtio_pmem.h b/drivers/nvdimm/virtio_pmem.h >> new file mode 100644 >> index 000000000000..ab1da877575d >> --- /dev/null >> +++ b/drivers/nvdimm/virtio_pmem.h >> @@ -0,0 +1,60 @@ >> +/* SPDX-License-Identifier: GPL-2.0 */ >> +/* >> + * virtio_pmem.h: virtio pmem Driver >> + * >> + * Discovers persistent memory range information >> + * from host and provides a virtio based flushing >> + * interface. >> + **/ >> + >> +#ifndef _LINUX_VIRTIO_PMEM_H >> +#define _LINUX_VIRTIO_PMEM_H >> + >> +#include <linux/virtio_ids.h> >> +#include <linux/module.h> >> +#include <linux/virtio_config.h> >> +#include <uapi/linux/virtio_pmem.h> >> +#include <linux/libnvdimm.h> >> +#include <linux/spinlock.h> >> + >> +struct virtio_pmem_request { >> + /* Host return status corresponding to flush request */ >> + int ret; >> + >> + /* command name*/ >> + char name[16]; > > So ... why are we sending string commands and expect native-endianess > integers and don't define a proper request/response structure + request > types in include/uapi/linux/virtio_pmem.h like > > struct virtio_pmem_resp { > __virtio32 ret; > } FWIW, I wonder if we should even properly translate return values and define types like VIRTIO_PMEM_RESP_TYPE_OK 0 VIRTIO_PMEM_RESP_TYPE_EIO 1 .. > > #define VIRTIO_PMEM_REQ_TYPE_FLUSH 1 > struct virtio_pmem_req { > __virtio16 type; > } > > ... and this way we also define a proper endianess format for exchange > and keep it extensible > > @MST, what's your take on this? > >
> > > + vpmem->vdev = vdev; > > + vdev->priv = vpmem; > > + err = init_vq(vpmem); > > + if (err) { > > + dev_err(&vdev->dev, "failed to initialize virtio pmem vq's\n"); > > + goto out_err; > > + } > > + > > + virtio_cread(vpmem->vdev, struct virtio_pmem_config, > > + start, &vpmem->start); > > + virtio_cread(vpmem->vdev, struct virtio_pmem_config, > > + size, &vpmem->size); > > + > > + res.start = vpmem->start; > > + res.end = vpmem->start + vpmem->size-1; > > nit: " - 1;" Sure. > > > + vpmem->nd_desc.provider_name = "virtio-pmem"; > > + vpmem->nd_desc.module = THIS_MODULE; > > + > > + vpmem->nvdimm_bus = nvdimm_bus_register(&vdev->dev, > > + &vpmem->nd_desc); > > + if (!vpmem->nvdimm_bus) { > > + dev_err(&vdev->dev, "failed to register device with nvdimm_bus\n"); > > + err = -ENXIO; > > + goto out_vq; > > + } > > + > > + dev_set_drvdata(&vdev->dev, vpmem->nvdimm_bus); > > + > > + ndr_desc.res = &res; > > + ndr_desc.numa_node = nid; > > + ndr_desc.flush = async_pmem_flush; > > + set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags); > > + set_bit(ND_REGION_ASYNC, &ndr_desc.flags); > > + nd_region = nvdimm_pmem_region_create(vpmem->nvdimm_bus, &ndr_desc); > > + if (!nd_region) { > > + dev_err(&vdev->dev, "failed to create nvdimm region\n"); > > + err = -ENXIO; > > + goto out_nd; > > + } > > + nd_region->provider_data = dev_to_virtio(nd_region->dev.parent->parent); > > + return 0; > > +out_nd: > > + nvdimm_bus_unregister(vpmem->nvdimm_bus); > > +out_vq: > > + vdev->config->del_vqs(vdev); > > +out_err: > > + return err; > > +} > > + > > +static void virtio_pmem_remove(struct virtio_device *vdev) > > +{ > > + struct nvdimm_bus *nvdimm_bus = dev_get_drvdata(&vdev->dev); > > + > > + nvdimm_bus_unregister(nvdimm_bus); > > + vdev->config->del_vqs(vdev); > > + vdev->config->reset(vdev); > > +} > > + > > +static struct virtio_driver virtio_pmem_driver = { > > + .driver.name = KBUILD_MODNAME, > > + .driver.owner = THIS_MODULE, > > + .id_table = id_table, > > + .probe = virtio_pmem_probe, > > + .remove = virtio_pmem_remove, > > +}; > > + > > +module_virtio_driver(virtio_pmem_driver); > > +MODULE_DEVICE_TABLE(virtio, id_table); > > +MODULE_DESCRIPTION("Virtio pmem driver"); > > +MODULE_LICENSE("GPL"); > > diff --git a/drivers/nvdimm/virtio_pmem.h b/drivers/nvdimm/virtio_pmem.h > > new file mode 100644 > > index 000000000000..ab1da877575d > > --- /dev/null > > +++ b/drivers/nvdimm/virtio_pmem.h > > @@ -0,0 +1,60 @@ > > +/* SPDX-License-Identifier: GPL-2.0 */ > > +/* > > + * virtio_pmem.h: virtio pmem Driver > > + * > > + * Discovers persistent memory range information > > + * from host and provides a virtio based flushing > > + * interface. > > + **/ > > + > > +#ifndef _LINUX_VIRTIO_PMEM_H > > +#define _LINUX_VIRTIO_PMEM_H > > + > > +#include <linux/virtio_ids.h> > > +#include <linux/module.h> > > +#include <linux/virtio_config.h> > > +#include <uapi/linux/virtio_pmem.h> > > +#include <linux/libnvdimm.h> > > +#include <linux/spinlock.h> > > + > > +struct virtio_pmem_request { > > + /* Host return status corresponding to flush request */ > > + int ret; > > + > > + /* command name*/ > > + char name[16]; > > So ... why are we sending string commands and expect native-endianess > integers and don't define a proper request/response structure + request > types in include/uapi/linux/virtio_pmem.h like > > struct virtio_pmem_resp { > __virtio32 ret; > } > > #define VIRTIO_PMEM_REQ_TYPE_FLUSH 1 > struct virtio_pmem_req { > __virtio16 type; > } > > ... and this way we also define a proper endianess format for exchange > and keep it extensible Done the suggested change. Thank you, Pankaj > > @MST, what's your take on this? > > > -- > > Thanks, > > David / dhildenb >
> >> + vpmem->vdev = vdev; > >> + vdev->priv = vpmem; > >> + err = init_vq(vpmem); > >> + if (err) { > >> + dev_err(&vdev->dev, "failed to initialize virtio pmem vq's\n"); > >> + goto out_err; > >> + } > >> + > >> + virtio_cread(vpmem->vdev, struct virtio_pmem_config, > >> + start, &vpmem->start); > >> + virtio_cread(vpmem->vdev, struct virtio_pmem_config, > >> + size, &vpmem->size); > >> + > >> + res.start = vpmem->start; > >> + res.end = vpmem->start + vpmem->size-1; > > > > nit: " - 1;" > > > >> + vpmem->nd_desc.provider_name = "virtio-pmem"; > >> + vpmem->nd_desc.module = THIS_MODULE; > >> + > >> + vpmem->nvdimm_bus = nvdimm_bus_register(&vdev->dev, > >> + &vpmem->nd_desc); > >> + if (!vpmem->nvdimm_bus) { > >> + dev_err(&vdev->dev, "failed to register device with nvdimm_bus\n"); > >> + err = -ENXIO; > >> + goto out_vq; > >> + } > >> + > >> + dev_set_drvdata(&vdev->dev, vpmem->nvdimm_bus); > >> + > >> + ndr_desc.res = &res; > >> + ndr_desc.numa_node = nid; > >> + ndr_desc.flush = async_pmem_flush; > >> + set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags); > >> + set_bit(ND_REGION_ASYNC, &ndr_desc.flags); > >> + nd_region = nvdimm_pmem_region_create(vpmem->nvdimm_bus, &ndr_desc); > >> + if (!nd_region) { > >> + dev_err(&vdev->dev, "failed to create nvdimm region\n"); > >> + err = -ENXIO; > >> + goto out_nd; > >> + } > >> + nd_region->provider_data = dev_to_virtio(nd_region->dev.parent->parent); > >> + return 0; > >> +out_nd: > >> + nvdimm_bus_unregister(vpmem->nvdimm_bus); > >> +out_vq: > >> + vdev->config->del_vqs(vdev); > >> +out_err: > >> + return err; > >> +} > >> + > >> +static void virtio_pmem_remove(struct virtio_device *vdev) > >> +{ > >> + struct nvdimm_bus *nvdimm_bus = dev_get_drvdata(&vdev->dev); > >> + > >> + nvdimm_bus_unregister(nvdimm_bus); > >> + vdev->config->del_vqs(vdev); > >> + vdev->config->reset(vdev); > >> +} > >> + > >> +static struct virtio_driver virtio_pmem_driver = { > >> + .driver.name = KBUILD_MODNAME, > >> + .driver.owner = THIS_MODULE, > >> + .id_table = id_table, > >> + .probe = virtio_pmem_probe, > >> + .remove = virtio_pmem_remove, > >> +}; > >> + > >> +module_virtio_driver(virtio_pmem_driver); > >> +MODULE_DEVICE_TABLE(virtio, id_table); > >> +MODULE_DESCRIPTION("Virtio pmem driver"); > >> +MODULE_LICENSE("GPL"); > >> diff --git a/drivers/nvdimm/virtio_pmem.h b/drivers/nvdimm/virtio_pmem.h > >> new file mode 100644 > >> index 000000000000..ab1da877575d > >> --- /dev/null > >> +++ b/drivers/nvdimm/virtio_pmem.h > >> @@ -0,0 +1,60 @@ > >> +/* SPDX-License-Identifier: GPL-2.0 */ > >> +/* > >> + * virtio_pmem.h: virtio pmem Driver > >> + * > >> + * Discovers persistent memory range information > >> + * from host and provides a virtio based flushing > >> + * interface. > >> + **/ > >> + > >> +#ifndef _LINUX_VIRTIO_PMEM_H > >> +#define _LINUX_VIRTIO_PMEM_H > >> + > >> +#include <linux/virtio_ids.h> > >> +#include <linux/module.h> > >> +#include <linux/virtio_config.h> > >> +#include <uapi/linux/virtio_pmem.h> > >> +#include <linux/libnvdimm.h> > >> +#include <linux/spinlock.h> > >> + > >> +struct virtio_pmem_request { > >> + /* Host return status corresponding to flush request */ > >> + int ret; > >> + > >> + /* command name*/ > >> + char name[16]; > > > > So ... why are we sending string commands and expect native-endianess > > integers and don't define a proper request/response structure + request > > types in include/uapi/linux/virtio_pmem.h like > > > > struct virtio_pmem_resp { > > __virtio32 ret; > > } > > FWIW, I wonder if we should even properly translate return values and > define types like > > VIRTIO_PMEM_RESP_TYPE_OK 0 > VIRTIO_PMEM_RESP_TYPE_EIO 1 Don't think these are required as only failure and success return types easy to understand. Thanks, Pankaj > > .. > > > > > #define VIRTIO_PMEM_REQ_TYPE_FLUSH 1 > > struct virtio_pmem_req { > > __virtio16 type; > > } > > > > ... and this way we also define a proper endianess format for exchange > > and keep it extensible > > > > @MST, what's your take on this? > > > > > > > -- > > Thanks, > > David / dhildenb >
On Wed, May 15, 2019 at 10:46:00PM +0200, David Hildenbrand wrote: > > + vpmem->vdev = vdev; > > + vdev->priv = vpmem; > > + err = init_vq(vpmem); > > + if (err) { > > + dev_err(&vdev->dev, "failed to initialize virtio pmem vq's\n"); > > + goto out_err; > > + } > > + > > + virtio_cread(vpmem->vdev, struct virtio_pmem_config, > > + start, &vpmem->start); > > + virtio_cread(vpmem->vdev, struct virtio_pmem_config, > > + size, &vpmem->size); > > + > > + res.start = vpmem->start; > > + res.end = vpmem->start + vpmem->size-1; > > nit: " - 1;" > > > + vpmem->nd_desc.provider_name = "virtio-pmem"; > > + vpmem->nd_desc.module = THIS_MODULE; > > + > > + vpmem->nvdimm_bus = nvdimm_bus_register(&vdev->dev, > > + &vpmem->nd_desc); > > + if (!vpmem->nvdimm_bus) { > > + dev_err(&vdev->dev, "failed to register device with nvdimm_bus\n"); > > + err = -ENXIO; > > + goto out_vq; > > + } > > + > > + dev_set_drvdata(&vdev->dev, vpmem->nvdimm_bus); > > + > > + ndr_desc.res = &res; > > + ndr_desc.numa_node = nid; > > + ndr_desc.flush = async_pmem_flush; > > + set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags); > > + set_bit(ND_REGION_ASYNC, &ndr_desc.flags); > > + nd_region = nvdimm_pmem_region_create(vpmem->nvdimm_bus, &ndr_desc); > > + if (!nd_region) { > > + dev_err(&vdev->dev, "failed to create nvdimm region\n"); > > + err = -ENXIO; > > + goto out_nd; > > + } > > + nd_region->provider_data = dev_to_virtio(nd_region->dev.parent->parent); > > + return 0; > > +out_nd: > > + nvdimm_bus_unregister(vpmem->nvdimm_bus); > > +out_vq: > > + vdev->config->del_vqs(vdev); > > +out_err: > > + return err; > > +} > > + > > +static void virtio_pmem_remove(struct virtio_device *vdev) > > +{ > > + struct nvdimm_bus *nvdimm_bus = dev_get_drvdata(&vdev->dev); > > + > > + nvdimm_bus_unregister(nvdimm_bus); > > + vdev->config->del_vqs(vdev); > > + vdev->config->reset(vdev); > > +} > > + > > +static struct virtio_driver virtio_pmem_driver = { > > + .driver.name = KBUILD_MODNAME, > > + .driver.owner = THIS_MODULE, > > + .id_table = id_table, > > + .probe = virtio_pmem_probe, > > + .remove = virtio_pmem_remove, > > +}; > > + > > +module_virtio_driver(virtio_pmem_driver); > > +MODULE_DEVICE_TABLE(virtio, id_table); > > +MODULE_DESCRIPTION("Virtio pmem driver"); > > +MODULE_LICENSE("GPL"); > > diff --git a/drivers/nvdimm/virtio_pmem.h b/drivers/nvdimm/virtio_pmem.h > > new file mode 100644 > > index 000000000000..ab1da877575d > > --- /dev/null > > +++ b/drivers/nvdimm/virtio_pmem.h > > @@ -0,0 +1,60 @@ > > +/* SPDX-License-Identifier: GPL-2.0 */ > > +/* > > + * virtio_pmem.h: virtio pmem Driver > > + * > > + * Discovers persistent memory range information > > + * from host and provides a virtio based flushing > > + * interface. > > + **/ > > + > > +#ifndef _LINUX_VIRTIO_PMEM_H > > +#define _LINUX_VIRTIO_PMEM_H > > + > > +#include <linux/virtio_ids.h> > > +#include <linux/module.h> > > +#include <linux/virtio_config.h> > > +#include <uapi/linux/virtio_pmem.h> > > +#include <linux/libnvdimm.h> > > +#include <linux/spinlock.h> > > + > > +struct virtio_pmem_request { > > + /* Host return status corresponding to flush request */ > > + int ret; > > + > > + /* command name*/ > > + char name[16]; > > So ... why are we sending string commands and expect native-endianess > integers and don't define a proper request/response structure + request > types in include/uapi/linux/virtio_pmem.h like passing names could be ok. I missed the fact we return a native endian int. Pls fix that. > > struct virtio_pmem_resp { > __virtio32 ret; > } > > #define VIRTIO_PMEM_REQ_TYPE_FLUSH 1 > struct virtio_pmem_req { > __virtio16 type; > } > > ... and this way we also define a proper endianess format for exchange > and keep it extensible > > @MST, what's your take on this? Extensions can always use feature bits so I don't think it's a problem. > > -- > > Thanks, > > David / dhildenb
On 5/14/19 7:54 AM, Pankaj Gupta wrote: > + if (!list_empty(&vpmem->req_list)) { > + req_buf = list_first_entry(&vpmem->req_list, > + struct virtio_pmem_request, list); > + req_buf->wq_buf_avail = true; > + wake_up(&req_buf->wq_buf); > + list_del(&req_buf->list); Yes, this change is the right one, thank you! > + /* > + * If virtqueue_add_sgs returns -ENOSPC then req_vq virtual > + * queue does not have free descriptor. We add the request > + * to req_list and wait for host_ack to wake us up when free > + * slots are available. > + */ > + while ((err = virtqueue_add_sgs(vpmem->req_vq, sgs, 1, 1, req, > + GFP_ATOMIC)) == -ENOSPC) { > + > + dev_err(&vdev->dev, "failed to send command to virtio pmem" \ > + "device, no free slots in the virtqueue\n"); > + req->wq_buf_avail = false; > + list_add_tail(&req->list, &vpmem->req_list); > + spin_unlock_irqrestore(&vpmem->pmem_lock, flags); > + > + /* A host response results in "host_ack" getting called */ > + wait_event(req->wq_buf, req->wq_buf_avail); > + spin_lock_irqsave(&vpmem->pmem_lock, flags); > + } > + err1 = virtqueue_kick(vpmem->req_vq); > + spin_unlock_irqrestore(&vpmem->pmem_lock, flags); > + > + /* > + * virtqueue_add_sgs failed with error different than -ENOSPC, we can't > + * do anything about that. > + */ > + if (err || !err1) { > + dev_info(&vdev->dev, "failed to send command to virtio pmem device\n"); > + err = -EIO; > + } else { > + /* A host repsonse results in "host_ack" getting called */ > + wait_event(req->host_acked, req->done); > + err = req->ret; > +I confirm that the failures I was facing with the `-ENOSPC` error path are not present in v9. Best, Jakub Staron
> > On Wed, May 15, 2019 at 10:46:00PM +0200, David Hildenbrand wrote: > > > + vpmem->vdev = vdev; > > > + vdev->priv = vpmem; > > > + err = init_vq(vpmem); > > > + if (err) { > > > + dev_err(&vdev->dev, "failed to initialize virtio pmem vq's\n"); > > > + goto out_err; > > > + } > > > + > > > + virtio_cread(vpmem->vdev, struct virtio_pmem_config, > > > + start, &vpmem->start); > > > + virtio_cread(vpmem->vdev, struct virtio_pmem_config, > > > + size, &vpmem->size); > > > + > > > + res.start = vpmem->start; > > > + res.end = vpmem->start + vpmem->size-1; > > > > nit: " - 1;" > > > > > + vpmem->nd_desc.provider_name = "virtio-pmem"; > > > + vpmem->nd_desc.module = THIS_MODULE; > > > + > > > + vpmem->nvdimm_bus = nvdimm_bus_register(&vdev->dev, > > > + &vpmem->nd_desc); > > > + if (!vpmem->nvdimm_bus) { > > > + dev_err(&vdev->dev, "failed to register device with nvdimm_bus\n"); > > > + err = -ENXIO; > > > + goto out_vq; > > > + } > > > + > > > + dev_set_drvdata(&vdev->dev, vpmem->nvdimm_bus); > > > + > > > + ndr_desc.res = &res; > > > + ndr_desc.numa_node = nid; > > > + ndr_desc.flush = async_pmem_flush; > > > + set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags); > > > + set_bit(ND_REGION_ASYNC, &ndr_desc.flags); > > > + nd_region = nvdimm_pmem_region_create(vpmem->nvdimm_bus, &ndr_desc); > > > + if (!nd_region) { > > > + dev_err(&vdev->dev, "failed to create nvdimm region\n"); > > > + err = -ENXIO; > > > + goto out_nd; > > > + } > > > + nd_region->provider_data = > > > dev_to_virtio(nd_region->dev.parent->parent); > > > + return 0; > > > +out_nd: > > > + nvdimm_bus_unregister(vpmem->nvdimm_bus); > > > +out_vq: > > > + vdev->config->del_vqs(vdev); > > > +out_err: > > > + return err; > > > +} > > > + > > > +static void virtio_pmem_remove(struct virtio_device *vdev) > > > +{ > > > + struct nvdimm_bus *nvdimm_bus = dev_get_drvdata(&vdev->dev); > > > + > > > + nvdimm_bus_unregister(nvdimm_bus); > > > + vdev->config->del_vqs(vdev); > > > + vdev->config->reset(vdev); > > > +} > > > + > > > +static struct virtio_driver virtio_pmem_driver = { > > > + .driver.name = KBUILD_MODNAME, > > > + .driver.owner = THIS_MODULE, > > > + .id_table = id_table, > > > + .probe = virtio_pmem_probe, > > > + .remove = virtio_pmem_remove, > > > +}; > > > + > > > +module_virtio_driver(virtio_pmem_driver); > > > +MODULE_DEVICE_TABLE(virtio, id_table); > > > +MODULE_DESCRIPTION("Virtio pmem driver"); > > > +MODULE_LICENSE("GPL"); > > > diff --git a/drivers/nvdimm/virtio_pmem.h b/drivers/nvdimm/virtio_pmem.h > > > new file mode 100644 > > > index 000000000000..ab1da877575d > > > --- /dev/null > > > +++ b/drivers/nvdimm/virtio_pmem.h > > > @@ -0,0 +1,60 @@ > > > +/* SPDX-License-Identifier: GPL-2.0 */ > > > +/* > > > + * virtio_pmem.h: virtio pmem Driver > > > + * > > > + * Discovers persistent memory range information > > > + * from host and provides a virtio based flushing > > > + * interface. > > > + **/ > > > + > > > +#ifndef _LINUX_VIRTIO_PMEM_H > > > +#define _LINUX_VIRTIO_PMEM_H > > > + > > > +#include <linux/virtio_ids.h> > > > +#include <linux/module.h> > > > +#include <linux/virtio_config.h> > > > +#include <uapi/linux/virtio_pmem.h> > > > +#include <linux/libnvdimm.h> > > > +#include <linux/spinlock.h> > > > + > > > +struct virtio_pmem_request { > > > + /* Host return status corresponding to flush request */ > > > + int ret; > > > + > > > + /* command name*/ > > > + char name[16]; > > > > So ... why are we sending string commands and expect native-endianess > > integers and don't define a proper request/response structure + request > > types in include/uapi/linux/virtio_pmem.h like > > passing names could be ok. > I missed the fact we return a native endian int. > Pls fix that. Sure. will fix this. > > > > > > struct virtio_pmem_resp { > > __virtio32 ret; > > } > > > > #define VIRTIO_PMEM_REQ_TYPE_FLUSH 1 > > struct virtio_pmem_req { > > __virtio16 type; > > } > > > > ... and this way we also define a proper endianess format for exchange > > and keep it extensible > > > > @MST, what's your take on this? > > Extensions can always use feature bits so I don't think > it's a problem. That was exactly my thought when I implemented this. Though I am fine with separate structures for request/response and I made the change. Thank you for all the comments. Best regards, Pankaj > > > > -- > > > > Thanks, > > > > David / dhildenb > >
Hi Jakub, > > On 5/14/19 7:54 AM, Pankaj Gupta wrote: > > + if (!list_empty(&vpmem->req_list)) { > > + req_buf = list_first_entry(&vpmem->req_list, > > + struct virtio_pmem_request, list); > > + req_buf->wq_buf_avail = true; > > + wake_up(&req_buf->wq_buf); > > + list_del(&req_buf->list); > Yes, this change is the right one, thank you! Thank you for the confirmation. > > > + /* > > + * If virtqueue_add_sgs returns -ENOSPC then req_vq virtual > > + * queue does not have free descriptor. We add the request > > + * to req_list and wait for host_ack to wake us up when free > > + * slots are available. > > + */ > > + while ((err = virtqueue_add_sgs(vpmem->req_vq, sgs, 1, 1, req, > > + GFP_ATOMIC)) == -ENOSPC) { > > + > > + dev_err(&vdev->dev, "failed to send command to virtio pmem" \ > > + "device, no free slots in the virtqueue\n"); > > + req->wq_buf_avail = false; > > + list_add_tail(&req->list, &vpmem->req_list); > > + spin_unlock_irqrestore(&vpmem->pmem_lock, flags); > > + > > + /* A host response results in "host_ack" getting called */ > > + wait_event(req->wq_buf, req->wq_buf_avail); > > + spin_lock_irqsave(&vpmem->pmem_lock, flags); > > + } > > + err1 = virtqueue_kick(vpmem->req_vq); > > + spin_unlock_irqrestore(&vpmem->pmem_lock, flags); > > + > > + /* > > + * virtqueue_add_sgs failed with error different than -ENOSPC, we can't > > + * do anything about that. > > + */ > > + if (err || !err1) { > > + dev_info(&vdev->dev, "failed to send command to virtio pmem device\n"); > > + err = -EIO; > > + } else { > > + /* A host repsonse results in "host_ack" getting called */ > > + wait_event(req->host_acked, req->done); > > + err = req->ret; > > +I confirm that the failures I was facing with the `-ENOSPC` error path are > > not present in v9. Can I take it your reviewed/acked-by? or tested-by tag? for the virtio patch :) Thank you, Pankaj > > Best, > Jakub Staron > >
On 5/16/19 10:35 PM, Pankaj Gupta wrote: > Can I take it your reviewed/acked-by? or tested-by tag? for the virtio patch :)I don't feel that I have enough expertise to give the reviewed-by tag, but you can take my acked-by + tested-by. Acked-by: Jakub Staron <jstaron@google.com> Tested-by: Jakub Staron <jstaron@google.com> No kernel panics/stalls encountered during testing this patches (v9) with QEMU + xfstests. Some CPU stalls encountered while testing with crosvm instead of QEMU with xfstests (test generic/464) but no repro for QEMU, so the fault may be on the side of crosvm. The dump for the crosvm/xfstests stall: [ 2504.175276] rcu: INFO: rcu_sched detected stalls on CPUs/tasks: [ 2504.176681] rcu: 0-...!: (1 GPs behind) idle=9b2/1/0x4000000000000000 softirq=1089198/1089202 fqs=0 [ 2504.178270] rcu: 2-...!: (1 ticks this GP) idle=cfe/1/0x4000000000000002 softirq=1055108/1055110 fqs=0 [ 2504.179802] rcu: 3-...!: (1 GPs behind) idle=1d6/1/0x4000000000000002 softirq=1046798/1046802 fqs=0 [ 2504.181215] rcu: 4-...!: (2 ticks this GP) idle=522/1/0x4000000000000002 softirq=1249063/1249064 fqs=0 [ 2504.182625] rcu: 5-...!: (1 GPs behind) idle=6da/1/0x4000000000000000 softirq=1131036/1131047 fqs=0 [ 2504.183955] (detected by 3, t=0 jiffies, g=1232529, q=1370) [ 2504.184762] Sending NMI from CPU 3 to CPUs 0: [ 2504.186400] NMI backtrace for cpu 0 [ 2504.186401] CPU: 0 PID: 6670 Comm: 464 Not tainted 5.1.0+ #1 [ 2504.186401] Hardware name: ChromiumOS crosvm, BIOS 0 [ 2504.186402] RIP: 0010:queued_spin_lock_slowpath+0x1c/0x1e0 [ 2504.186402] Code: e7 89 c8 f0 44 0f b1 07 39 c1 75 dc f3 c3 0f 1f 44 00 00 ba 01 00 00 00 8b 07 85 c0 75 0a f0 0f b1 17 85 c0 75 f2 f3 c3 f3 90 <eb> ec 81 fe 00 01 00 00 0f 84 ab 00 00 00 81 e6 00 ff ff ff 75 44 [ 2504.186403] RSP: 0018:ffffc90000003ee8 EFLAGS: 00000002 [ 2504.186404] RAX: 0000000000000001 RBX: 0000000000000246 RCX: 0000000000404044 [ 2504.186404] RDX: 0000000000000001 RSI: 0000000000000001 RDI: ffffffff8244a280 [ 2504.186405] RBP: ffffffff8244a280 R08: 00000000000f4200 R09: 0000024709ed6c32 [ 2504.186405] R10: 0000000000000000 R11: 0000000000000001 R12: ffffffff8244a280 [ 2504.186405] R13: 0000000000000009 R14: 0000000000000009 R15: 0000000000000000 [ 2504.186406] FS: 0000000000000000(0000) GS:ffff8880cc600000(0000) knlGS:0000000000000000 [ 2504.186406] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 2504.186406] CR2: 00007efd6b0f15d8 CR3: 000000000260a006 CR4: 0000000000360ef0 [ 2504.186407] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 2504.186407] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 2504.186407] Call Trace: [ 2504.186408] <IRQ> [ 2504.186408] _raw_spin_lock_irqsave+0x1d/0x30 [ 2504.186408] rcu_core+0x3b6/0x740 [ 2504.186408] ? __hrtimer_run_queues+0x133/0x280 [ 2504.186409] ? recalibrate_cpu_khz+0x10/0x10 [ 2504.186409] __do_softirq+0xd8/0x2e4 [ 2504.186409] irq_exit+0xa3/0xb0 [ 2504.186410] smp_apic_timer_interrupt+0x67/0x120 [ 2504.186410] apic_timer_interrupt+0xf/0x20 [ 2504.186410] </IRQ> [ 2504.186410] RIP: 0010:unmap_page_range+0x47a/0x9b0 [ 2504.186411] Code: 0f 46 46 10 49 39 6e 18 49 89 46 10 48 89 e8 49 0f 43 46 18 41 80 4e 20 08 4d 85 c9 49 89 46 18 0f 84 68 ff ff ff 49 8b 51 08 <48> 8d 42 ff 83 e2 01 49 0f 44 c1 f6 40 18 01 75 38 48 ba ff 0f 00 [ 2504.186411] RSP: 0018:ffffc900036cbcc8 EFLAGS: 00000282 ORIG_RAX: ffffffffffffff13 [ 2504.186412] RAX: ffffffffffffffff RBX: 800000003751d045 RCX: 0000000000000001 [ 2504.186413] RDX: ffffea0002e09288 RSI: 000000000269b000 RDI: ffff8880b6525e40 [ 2504.186413] RBP: 000000000269c000 R08: 0000000000000000 R09: ffffea0000dd4740 [ 2504.186413] R10: ffffea0001755700 R11: ffff8880cc62d120 R12: 0000000002794000 [ 2504.186414] R13: 000000000269b000 R14: ffffc900036cbdf0 R15: ffff8880572434d8 [ 2504.186414] ? unmap_page_range+0x420/0x9b0 [ 2504.186414] ? release_pages+0x175/0x390 [ 2504.186414] unmap_vmas+0x7c/0xe0 [ 2504.186415] exit_mmap+0xa4/0x190 [ 2504.186415] mmput+0x3b/0x100 [ 2504.186415] do_exit+0x276/0xc10 [ 2504.186415] do_group_exit+0x35/0xa0 [ 2504.186415] __x64_sys_exit_group+0xf/0x10 [ 2504.186416] do_syscall_64+0x43/0x120 [ 2504.186416] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 2504.186416] RIP: 0033:0x7efd6ae10618 [ 2504.186416] Code: Bad RIP value. [ 2504.186417] RSP: 002b:00007ffcac9bde38 EFLAGS: 00000246 ORIG_RAX: 00000000000000e7 [ 2504.186417] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007efd6ae10618 [ 2504.186418] RDX: 0000000000000000 RSI: 000000000000003c RDI: 0000000000000000 [ 2504.186418] RBP: 00007efd6b0ed8e0 R08: 00000000000000e7 R09: ffffffffffffff98 [ 2504.186418] R10: 00007ffcac9bddb8 R11: 0000000000000246 R12: 00007efd6b0ed8e0 [ 2504.186419] R13: 00007efd6b0f2c20 R14: 0000000000000060 R15: 000000000070e705 [ 2504.186421] NMI backtrace for cpu 3 [ 2504.226980] CPU: 3 PID: 6596 Comm: xfs_io Not tainted 5.1.0+ #1 [ 2504.227661] Hardware name: ChromiumOS crosvm, BIOS 0 [ 2504.228261] Call Trace: [ 2504.228552] <IRQ> [ 2504.228795] dump_stack+0x46/0x5b [ 2504.229180] nmi_cpu_backtrace+0x89/0x90 [ 2504.229649] ? lapic_can_unplug_cpu+0x90/0x90 [ 2504.230157] nmi_trigger_cpumask_backtrace+0x82/0xc0 [ 2504.230751] rcu_dump_cpu_stacks+0x8b/0xb7 [ 2504.231222] rcu_sched_clock_irq+0x6f6/0x720 [ 2504.231726] ? tick_sched_do_timer+0x50/0x50 [ 2504.232214] update_process_times+0x23/0x50 [ 2504.232693] tick_sched_handle+0x2f/0x40 [ 2504.233144] tick_sched_timer+0x32/0x70 [ 2504.233594] __hrtimer_run_queues+0x103/0x280 [ 2504.234092] hrtimer_interrupt+0xe0/0x240 [ 2504.234580] smp_apic_timer_interrupt+0x5d/0x120 [ 2504.235152] apic_timer_interrupt+0xf/0x20 [ 2504.235627] </IRQ> [ 2504.235879] RIP: 0010:__memcpy_flushcache+0x4b/0x180 [ 2504.236452] Code: 8d 5d e0 4c 8d 62 20 48 89 f7 48 29 d7 48 89 d9 48 83 e1 e0 4c 01 e1 48 8d 04 17 4c 8b 02 4c 8b 4a 08 4c 8b 52 10 4c 8b 5a 18 <4c> 0f c3 00 4c 0f c3 48 08 4c 0f c3 50 10 4c 0f c3 58 18 48 83 c2 [ 2504.238592] RSP: 0018:ffffc90003ae38e8 EFLAGS: 00010286 ORIG_RAX: ffffffffffffff13 [ 2504.239467] RAX: ffff888341800000 RBX: 0000000000000fe0 RCX: ffff88801bd22000 [ 2504.240277] RDX: ffff88801bd21000 RSI: ffff888341800000 RDI: 0000000325adf000 [ 2504.241092] RBP: 0000000000001000 R08: cdcdcdcdcdcdcdcd R09: cdcdcdcdcdcdcdcd [ 2504.241908] R10: cdcdcdcdcdcdcdcd R11: cdcdcdcdcdcdcdcd R12: ffff88801bd21020 [ 2504.242751] R13: ffff8880b916b600 R14: ffff888341800000 R15: ffffea00006f4840 [ 2504.243602] write_pmem+0x61/0x90 [ 2504.244002] pmem_do_bvec+0x178/0x2c0 [ 2504.244469] ? chksum_update+0xe/0x20 [ 2504.244908] pmem_make_request+0xf7/0x270 [ 2504.245509] generic_make_request+0x199/0x3f0 [ 2504.246179] ? submit_bio+0x67/0x130 [ 2504.246710] submit_bio+0x67/0x130 [ 2504.247117] ext4_io_submit+0x44/0x50 [ 2504.247556] ext4_writepages+0x621/0xe80 [ 2504.248028] ? 0xffffffff81000000 [ 2504.248418] ? do_writepages+0x46/0xd0 [ 2504.248880] ? ext4_mark_inode_dirty+0x1d0/0x1d0 [ 2504.249417] do_writepages+0x46/0xd0 [ 2504.249833] ? release_pages+0x175/0x390 [ 2504.250290] ? __filemap_fdatawrite_range+0x7c/0xb0 [ 2504.250879] __filemap_fdatawrite_range+0x7c/0xb0 [ 2504.251427] ext4_release_file+0x67/0xa0 [ 2504.251897] __fput+0xb1/0x220 [ 2504.252260] task_work_run+0x79/0xa0 [ 2504.252676] do_exit+0x2ca/0xc10 [ 2504.253063] ? __switch_to_asm+0x40/0x70 [ 2504.253530] ? __switch_to_asm+0x34/0x70 [ 2504.253995] ? __switch_to_asm+0x40/0x70 [ 2504.254446] do_group_exit+0x35/0xa0 [ 2504.254865] get_signal+0x14e/0x7a0 [ 2504.255281] ? __switch_to_asm+0x34/0x70 [ 2504.255749] ? __switch_to_asm+0x40/0x70 [ 2504.256224] do_signal+0x2b/0x5e0 [ 2504.256619] ? __switch_to_asm+0x40/0x70 [ 2504.257086] ? __switch_to_asm+0x34/0x70 [ 2504.257552] ? __switch_to_asm+0x40/0x70 [ 2504.258022] ? __switch_to_asm+0x34/0x70 [ 2504.258488] ? __schedule+0x253/0x530 [ 2504.258943] ? __switch_to_asm+0x34/0x70 [ 2504.259398] exit_to_usermode_loop+0x87/0xa0 [ 2504.259900] do_syscall_64+0xf7/0x120 [ 2504.260326] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 2504.260923] RIP: 0033:0x7faf347e28bd [ 2504.261348] Code: Bad RIP value. [ 2504.261727] RSP: 002b:00007faf33fc5f40 EFLAGS: 00000293 ORIG_RAX: 0000000000000022 [ 2504.262594] RAX: fffffffffffffdfe RBX: 0000000000000000 RCX: 00007faf347e28bd [ 2504.263416] RDX: 8b9da1f4246cdb38 RSI: 0000000000000000 RDI: 0000000000000000 [ 2504.264215] RBP: 0000000000000000 R08: 00007faf33fc6700 R09: 00007faf33fc6700 [ 2504.265061] R10: 000000000000012d R11: 0000000000000293 R12: 00007ffdf142327e [ 2504.266082] R13: 00007ffdf142327f R14: 00007faf337c6000 R15: 0000000000000003 Arch: x86_64 Kernel: stable top with virtio-pmem v9 patches applied Distro: Debian Stretch But as I said, it may be just a problem with crosvm. Thank you, Jakub Staron
> On 5/16/19 10:35 PM, Pankaj Gupta wrote: > > Can I take it your reviewed/acked-by? or tested-by tag? for the virtio > > patch :)I don't feel that I have enough expertise to give the reviewed-by > > tag, but you can > take my acked-by + tested-by. > > Acked-by: Jakub Staron <jstaron@google.com> > Tested-by: Jakub Staron <jstaron@google.com> > > No kernel panics/stalls encountered during testing this patches (v9) with > QEMU + xfstests. Thank you for testing and confirming the results. I will add your tested & acked-by in v10. > Some CPU stalls encountered while testing with crosvm instead of QEMU with > xfstests > (test generic/464) but no repro for QEMU, so the fault may be on the side of > crosvm. yes, looks like crosvm related as we did not see any of this in my and your testing with Qemu. > > > The dump for the crosvm/xfstests stall: > [ 2504.175276] rcu: INFO: rcu_sched detected stalls on CPUs/tasks: > [ 2504.176681] rcu: 0-...!: (1 GPs behind) idle=9b2/1/0x4000000000000000 > softirq=1089198/1089202 fqs=0 > [ 2504.178270] rcu: 2-...!: (1 ticks this GP) > idle=cfe/1/0x4000000000000002 softirq=1055108/1055110 fqs=0 > [ 2504.179802] rcu: 3-...!: (1 GPs behind) idle=1d6/1/0x4000000000000002 > softirq=1046798/1046802 fqs=0 > [ 2504.181215] rcu: 4-...!: (2 ticks this GP) > idle=522/1/0x4000000000000002 softirq=1249063/1249064 fqs=0 > [ 2504.182625] rcu: 5-...!: (1 GPs behind) idle=6da/1/0x4000000000000000 > softirq=1131036/1131047 fqs=0 > [ 2504.183955] (detected by 3, t=0 jiffies, g=1232529, q=1370) > [ 2504.184762] Sending NMI from CPU 3 to CPUs 0: > [ 2504.186400] NMI backtrace for cpu 0 > [ 2504.186401] CPU: 0 PID: 6670 Comm: 464 Not tainted 5.1.0+ #1 > [ 2504.186401] Hardware name: ChromiumOS crosvm, BIOS 0 > [ 2504.186402] RIP: 0010:queued_spin_lock_slowpath+0x1c/0x1e0 > [ 2504.186402] Code: e7 89 c8 f0 44 0f b1 07 39 c1 75 dc f3 c3 0f 1f 44 00 00 > ba 01 00 00 00 8b 07 85 c0 75 0a f0 0f b1 17 85 c0 75 f2 f3 c3 f3 90 <eb> ec > 81 fe 00 01 00 00 0f 84 ab 00 00 00 81 e6 00 ff ff ff 75 44 > [ 2504.186403] RSP: 0018:ffffc90000003ee8 EFLAGS: 00000002 > [ 2504.186404] RAX: 0000000000000001 RBX: 0000000000000246 RCX: > 0000000000404044 > [ 2504.186404] RDX: 0000000000000001 RSI: 0000000000000001 RDI: > ffffffff8244a280 > [ 2504.186405] RBP: ffffffff8244a280 R08: 00000000000f4200 R09: > 0000024709ed6c32 > [ 2504.186405] R10: 0000000000000000 R11: 0000000000000001 R12: > ffffffff8244a280 > [ 2504.186405] R13: 0000000000000009 R14: 0000000000000009 R15: > 0000000000000000 > [ 2504.186406] FS: 0000000000000000(0000) GS:ffff8880cc600000(0000) > knlGS:0000000000000000 > [ 2504.186406] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > [ 2504.186406] CR2: 00007efd6b0f15d8 CR3: 000000000260a006 CR4: > 0000000000360ef0 > [ 2504.186407] DR0: 0000000000000000 DR1: 0000000000000000 DR2: > 0000000000000000 > [ 2504.186407] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: > 0000000000000400 > [ 2504.186407] Call Trace: > [ 2504.186408] <IRQ> > [ 2504.186408] _raw_spin_lock_irqsave+0x1d/0x30 > [ 2504.186408] rcu_core+0x3b6/0x740 > [ 2504.186408] ? __hrtimer_run_queues+0x133/0x280 > [ 2504.186409] ? recalibrate_cpu_khz+0x10/0x10 > [ 2504.186409] __do_softirq+0xd8/0x2e4 > [ 2504.186409] irq_exit+0xa3/0xb0 > [ 2504.186410] smp_apic_timer_interrupt+0x67/0x120 > [ 2504.186410] apic_timer_interrupt+0xf/0x20 > [ 2504.186410] </IRQ> > [ 2504.186410] RIP: 0010:unmap_page_range+0x47a/0x9b0 > [ 2504.186411] Code: 0f 46 46 10 49 39 6e 18 49 89 46 10 48 89 e8 49 0f 43 46 > 18 41 80 4e 20 08 4d 85 c9 49 89 46 18 0f 84 68 ff ff ff 49 8b 51 08 <48> 8d > 42 ff 83 e2 01 49 0f 44 c1 f6 40 18 01 75 38 48 ba ff 0f 00 > [ 2504.186411] RSP: 0018:ffffc900036cbcc8 EFLAGS: 00000282 ORIG_RAX: > ffffffffffffff13 > [ 2504.186412] RAX: ffffffffffffffff RBX: 800000003751d045 RCX: > 0000000000000001 > [ 2504.186413] RDX: ffffea0002e09288 RSI: 000000000269b000 RDI: > ffff8880b6525e40 > [ 2504.186413] RBP: 000000000269c000 R08: 0000000000000000 R09: > ffffea0000dd4740 > [ 2504.186413] R10: ffffea0001755700 R11: ffff8880cc62d120 R12: > 0000000002794000 > [ 2504.186414] R13: 000000000269b000 R14: ffffc900036cbdf0 R15: > ffff8880572434d8 > [ 2504.186414] ? unmap_page_range+0x420/0x9b0 > [ 2504.186414] ? release_pages+0x175/0x390 > [ 2504.186414] unmap_vmas+0x7c/0xe0 > [ 2504.186415] exit_mmap+0xa4/0x190 > [ 2504.186415] mmput+0x3b/0x100 > [ 2504.186415] do_exit+0x276/0xc10 > [ 2504.186415] do_group_exit+0x35/0xa0 > [ 2504.186415] __x64_sys_exit_group+0xf/0x10 > [ 2504.186416] do_syscall_64+0x43/0x120 > [ 2504.186416] entry_SYSCALL_64_after_hwframe+0x44/0xa9 > [ 2504.186416] RIP: 0033:0x7efd6ae10618 > [ 2504.186416] Code: Bad RIP value. > [ 2504.186417] RSP: 002b:00007ffcac9bde38 EFLAGS: 00000246 ORIG_RAX: > 00000000000000e7 > [ 2504.186417] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: > 00007efd6ae10618 > [ 2504.186418] RDX: 0000000000000000 RSI: 000000000000003c RDI: > 0000000000000000 > [ 2504.186418] RBP: 00007efd6b0ed8e0 R08: 00000000000000e7 R09: > ffffffffffffff98 > [ 2504.186418] R10: 00007ffcac9bddb8 R11: 0000000000000246 R12: > 00007efd6b0ed8e0 > [ 2504.186419] R13: 00007efd6b0f2c20 R14: 0000000000000060 R15: > 000000000070e705 > [ 2504.186421] NMI backtrace for cpu 3 > [ 2504.226980] CPU: 3 PID: 6596 Comm: xfs_io Not tainted 5.1.0+ #1 > [ 2504.227661] Hardware name: ChromiumOS crosvm, BIOS 0 > [ 2504.228261] Call Trace: > [ 2504.228552] <IRQ> > [ 2504.228795] dump_stack+0x46/0x5b > [ 2504.229180] nmi_cpu_backtrace+0x89/0x90 > [ 2504.229649] ? lapic_can_unplug_cpu+0x90/0x90 > [ 2504.230157] nmi_trigger_cpumask_backtrace+0x82/0xc0 > [ 2504.230751] rcu_dump_cpu_stacks+0x8b/0xb7 > [ 2504.231222] rcu_sched_clock_irq+0x6f6/0x720 > [ 2504.231726] ? tick_sched_do_timer+0x50/0x50 > [ 2504.232214] update_process_times+0x23/0x50 > [ 2504.232693] tick_sched_handle+0x2f/0x40 > [ 2504.233144] tick_sched_timer+0x32/0x70 > [ 2504.233594] __hrtimer_run_queues+0x103/0x280 > [ 2504.234092] hrtimer_interrupt+0xe0/0x240 > [ 2504.234580] smp_apic_timer_interrupt+0x5d/0x120 > [ 2504.235152] apic_timer_interrupt+0xf/0x20 > [ 2504.235627] </IRQ> > [ 2504.235879] RIP: 0010:__memcpy_flushcache+0x4b/0x180 > [ 2504.236452] Code: 8d 5d e0 4c 8d 62 20 48 89 f7 48 29 d7 48 89 d9 48 83 e1 > e0 4c 01 e1 48 8d 04 17 4c 8b 02 4c 8b 4a 08 4c 8b 52 10 4c 8b 5a 18 <4c> 0f > c3 00 4c 0f c3 48 08 4c 0f c3 50 10 4c 0f c3 58 18 48 83 c2 > [ 2504.238592] RSP: 0018:ffffc90003ae38e8 EFLAGS: 00010286 ORIG_RAX: > ffffffffffffff13 > [ 2504.239467] RAX: ffff888341800000 RBX: 0000000000000fe0 RCX: > ffff88801bd22000 > [ 2504.240277] RDX: ffff88801bd21000 RSI: ffff888341800000 RDI: > 0000000325adf000 > [ 2504.241092] RBP: 0000000000001000 R08: cdcdcdcdcdcdcdcd R09: > cdcdcdcdcdcdcdcd > [ 2504.241908] R10: cdcdcdcdcdcdcdcd R11: cdcdcdcdcdcdcdcd R12: > ffff88801bd21020 > [ 2504.242751] R13: ffff8880b916b600 R14: ffff888341800000 R15: > ffffea00006f4840 > [ 2504.243602] write_pmem+0x61/0x90 > [ 2504.244002] pmem_do_bvec+0x178/0x2c0 > [ 2504.244469] ? chksum_update+0xe/0x20 > [ 2504.244908] pmem_make_request+0xf7/0x270 > [ 2504.245509] generic_make_request+0x199/0x3f0 > [ 2504.246179] ? submit_bio+0x67/0x130 > [ 2504.246710] submit_bio+0x67/0x130 > [ 2504.247117] ext4_io_submit+0x44/0x50 > [ 2504.247556] ext4_writepages+0x621/0xe80 > [ 2504.248028] ? 0xffffffff81000000 > [ 2504.248418] ? do_writepages+0x46/0xd0 > [ 2504.248880] ? ext4_mark_inode_dirty+0x1d0/0x1d0 > [ 2504.249417] do_writepages+0x46/0xd0 > [ 2504.249833] ? release_pages+0x175/0x390 > [ 2504.250290] ? __filemap_fdatawrite_range+0x7c/0xb0 > [ 2504.250879] __filemap_fdatawrite_range+0x7c/0xb0 > [ 2504.251427] ext4_release_file+0x67/0xa0 > [ 2504.251897] __fput+0xb1/0x220 > [ 2504.252260] task_work_run+0x79/0xa0 > [ 2504.252676] do_exit+0x2ca/0xc10 > [ 2504.253063] ? __switch_to_asm+0x40/0x70 > [ 2504.253530] ? __switch_to_asm+0x34/0x70 > [ 2504.253995] ? __switch_to_asm+0x40/0x70 > [ 2504.254446] do_group_exit+0x35/0xa0 > [ 2504.254865] get_signal+0x14e/0x7a0 > [ 2504.255281] ? __switch_to_asm+0x34/0x70 > [ 2504.255749] ? __switch_to_asm+0x40/0x70 > [ 2504.256224] do_signal+0x2b/0x5e0 > [ 2504.256619] ? __switch_to_asm+0x40/0x70 > [ 2504.257086] ? __switch_to_asm+0x34/0x70 > [ 2504.257552] ? __switch_to_asm+0x40/0x70 > [ 2504.258022] ? __switch_to_asm+0x34/0x70 > [ 2504.258488] ? __schedule+0x253/0x530 > [ 2504.258943] ? __switch_to_asm+0x34/0x70 > [ 2504.259398] exit_to_usermode_loop+0x87/0xa0 > [ 2504.259900] do_syscall_64+0xf7/0x120 > [ 2504.260326] entry_SYSCALL_64_after_hwframe+0x44/0xa9 > [ 2504.260923] RIP: 0033:0x7faf347e28bd > [ 2504.261348] Code: Bad RIP value. > [ 2504.261727] RSP: 002b:00007faf33fc5f40 EFLAGS: 00000293 ORIG_RAX: > 0000000000000022 > [ 2504.262594] RAX: fffffffffffffdfe RBX: 0000000000000000 RCX: > 00007faf347e28bd > [ 2504.263416] RDX: 8b9da1f4246cdb38 RSI: 0000000000000000 RDI: > 0000000000000000 > [ 2504.264215] RBP: 0000000000000000 R08: 00007faf33fc6700 R09: > 00007faf33fc6700 > [ 2504.265061] R10: 000000000000012d R11: 0000000000000293 R12: > 00007ffdf142327e > [ 2504.266082] R13: 00007ffdf142327f R14: 00007faf337c6000 R15: > 0000000000000003 > > Arch: x86_64 > Kernel: stable top with virtio-pmem v9 patches applied > Distro: Debian Stretch > > But as I said, it may be just a problem with crosvm. Right. Thanks, Pankaj > > > Thank you, > Jakub Staron > >
> > > > On 5/16/19 10:35 PM, Pankaj Gupta wrote: > > > Can I take it your reviewed/acked-by? or tested-by tag? for the virtio > > > patch :)I don't feel that I have enough expertise to give the reviewed-by > > > tag, but you can > > take my acked-by + tested-by. > > > > Acked-by: Jakub Staron <jstaron@google.com> > > Tested-by: Jakub Staron <jstaron@google.com> > > > > No kernel panics/stalls encountered during testing this patches (v9) with > > QEMU + xfstests. > > Thank you for testing and confirming the results. I will add your tested & > acked-by in v10. > > > Some CPU stalls encountered while testing with crosvm instead of QEMU with > > xfstests > > (test generic/464) but no repro for QEMU, so the fault may be on the side > > of > > crosvm. > > yes, looks like crosvm related as we did not see any of this in my and your > testing with Qemu. Also, they don't seem to be related with virtio-pmem. Thanks, Pankaj
diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile index 6f2a088afad6..cefe233e0b52 100644 --- a/drivers/nvdimm/Makefile +++ b/drivers/nvdimm/Makefile @@ -5,6 +5,7 @@ obj-$(CONFIG_ND_BTT) += nd_btt.o obj-$(CONFIG_ND_BLK) += nd_blk.o obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o obj-$(CONFIG_OF_PMEM) += of_pmem.o +obj-$(CONFIG_VIRTIO_PMEM) += virtio_pmem.o nd_virtio.o nd_pmem-y := pmem.o diff --git a/drivers/nvdimm/nd_virtio.c b/drivers/nvdimm/nd_virtio.c new file mode 100644 index 000000000000..dd95cfecfe4c --- /dev/null +++ b/drivers/nvdimm/nd_virtio.c @@ -0,0 +1,126 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * virtio_pmem.c: Virtio pmem Driver + * + * Discovers persistent memory range information + * from host and provides a virtio based flushing + * interface. + */ +#include "virtio_pmem.h" +#include "nd.h" + + /* The interrupt handler */ +void host_ack(struct virtqueue *vq) +{ + struct virtio_pmem *vpmem = vq->vdev->priv; + struct virtio_pmem_request *req, *req_buf; + unsigned long flags; + unsigned int len; + + spin_lock_irqsave(&vpmem->pmem_lock, flags); + while ((req = virtqueue_get_buf(vq, &len)) != NULL) { + req->done = true; + wake_up(&req->host_acked); + + if (!list_empty(&vpmem->req_list)) { + req_buf = list_first_entry(&vpmem->req_list, + struct virtio_pmem_request, list); + req_buf->wq_buf_avail = true; + wake_up(&req_buf->wq_buf); + list_del(&req_buf->list); + } + } + spin_unlock_irqrestore(&vpmem->pmem_lock, flags); +} +EXPORT_SYMBOL_GPL(host_ack); + + /* The request submission function */ +int virtio_pmem_flush(struct nd_region *nd_region) +{ + struct virtio_device *vdev = nd_region->provider_data; + struct virtio_pmem *vpmem = vdev->priv; + struct scatterlist *sgs[2], sg, ret; + struct virtio_pmem_request *req; + unsigned long flags; + int err, err1; + + might_sleep(); + req = kmalloc(sizeof(*req), GFP_KERNEL); + if (!req) + return -ENOMEM; + + req->done = false; + strcpy(req->name, "FLUSH"); + init_waitqueue_head(&req->host_acked); + init_waitqueue_head(&req->wq_buf); + INIT_LIST_HEAD(&req->list); + sg_init_one(&sg, req->name, strlen(req->name)); + sgs[0] = &sg; + sg_init_one(&ret, &req->ret, sizeof(req->ret)); + sgs[1] = &ret; + + spin_lock_irqsave(&vpmem->pmem_lock, flags); + /* + * If virtqueue_add_sgs returns -ENOSPC then req_vq virtual + * queue does not have free descriptor. We add the request + * to req_list and wait for host_ack to wake us up when free + * slots are available. + */ + while ((err = virtqueue_add_sgs(vpmem->req_vq, sgs, 1, 1, req, + GFP_ATOMIC)) == -ENOSPC) { + + dev_err(&vdev->dev, "failed to send command to virtio pmem" \ + "device, no free slots in the virtqueue\n"); + req->wq_buf_avail = false; + list_add_tail(&req->list, &vpmem->req_list); + spin_unlock_irqrestore(&vpmem->pmem_lock, flags); + + /* A host response results in "host_ack" getting called */ + wait_event(req->wq_buf, req->wq_buf_avail); + spin_lock_irqsave(&vpmem->pmem_lock, flags); + } + err1 = virtqueue_kick(vpmem->req_vq); + spin_unlock_irqrestore(&vpmem->pmem_lock, flags); + + /* + * virtqueue_add_sgs failed with error different than -ENOSPC, we can't + * do anything about that. + */ + if (err || !err1) { + dev_info(&vdev->dev, "failed to send command to virtio pmem device\n"); + err = -EIO; + } else { + /* A host repsonse results in "host_ack" getting called */ + wait_event(req->host_acked, req->done); + err = req->ret; + } + + kfree(req); + return err; +}; + +/* The asynchronous flush callback function */ +int async_pmem_flush(struct nd_region *nd_region, struct bio *bio) +{ + /* Create child bio for asynchronous flush and chain with + * parent bio. Otherwise directly call nd_region flush. + */ + if (bio && bio->bi_iter.bi_sector != -1) { + struct bio *child = bio_alloc(GFP_ATOMIC, 0); + + if (!child) + return -ENOMEM; + bio_copy_dev(child, bio); + child->bi_opf = REQ_PREFLUSH; + child->bi_iter.bi_sector = -1; + bio_chain(child, bio); + submit_bio(child); + return 0; + } + if (virtio_pmem_flush(nd_region)) + return -EIO; + + return 0; +}; +EXPORT_SYMBOL_GPL(async_pmem_flush); +MODULE_LICENSE("GPL"); diff --git a/drivers/nvdimm/virtio_pmem.c b/drivers/nvdimm/virtio_pmem.c new file mode 100644 index 000000000000..73a1cd085eae --- /dev/null +++ b/drivers/nvdimm/virtio_pmem.c @@ -0,0 +1,122 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * virtio_pmem.c: Virtio pmem Driver + * + * Discovers persistent memory range information + * from host and registers the virtual pmem device + * with libnvdimm core. + */ +#include "virtio_pmem.h" +#include "nd.h" + +static struct virtio_device_id id_table[] = { + { VIRTIO_ID_PMEM, VIRTIO_DEV_ANY_ID }, + { 0 }, +}; + + /* Initialize virt queue */ +static int init_vq(struct virtio_pmem *vpmem) +{ + /* single vq */ + vpmem->req_vq = virtio_find_single_vq(vpmem->vdev, + host_ack, "flush_queue"); + if (IS_ERR(vpmem->req_vq)) + return PTR_ERR(vpmem->req_vq); + + spin_lock_init(&vpmem->pmem_lock); + INIT_LIST_HEAD(&vpmem->req_list); + + return 0; +}; + +static int virtio_pmem_probe(struct virtio_device *vdev) +{ + struct nd_region_desc ndr_desc = {}; + int nid = dev_to_node(&vdev->dev); + struct nd_region *nd_region; + struct virtio_pmem *vpmem; + struct resource res; + int err = 0; + + if (!vdev->config->get) { + dev_err(&vdev->dev, "%s failure: config access disabled\n", + __func__); + return -EINVAL; + } + + vpmem = devm_kzalloc(&vdev->dev, sizeof(*vpmem), GFP_KERNEL); + if (!vpmem) { + err = -ENOMEM; + goto out_err; + } + + vpmem->vdev = vdev; + vdev->priv = vpmem; + err = init_vq(vpmem); + if (err) { + dev_err(&vdev->dev, "failed to initialize virtio pmem vq's\n"); + goto out_err; + } + + virtio_cread(vpmem->vdev, struct virtio_pmem_config, + start, &vpmem->start); + virtio_cread(vpmem->vdev, struct virtio_pmem_config, + size, &vpmem->size); + + res.start = vpmem->start; + res.end = vpmem->start + vpmem->size-1; + vpmem->nd_desc.provider_name = "virtio-pmem"; + vpmem->nd_desc.module = THIS_MODULE; + + vpmem->nvdimm_bus = nvdimm_bus_register(&vdev->dev, + &vpmem->nd_desc); + if (!vpmem->nvdimm_bus) { + dev_err(&vdev->dev, "failed to register device with nvdimm_bus\n"); + err = -ENXIO; + goto out_vq; + } + + dev_set_drvdata(&vdev->dev, vpmem->nvdimm_bus); + + ndr_desc.res = &res; + ndr_desc.numa_node = nid; + ndr_desc.flush = async_pmem_flush; + set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags); + set_bit(ND_REGION_ASYNC, &ndr_desc.flags); + nd_region = nvdimm_pmem_region_create(vpmem->nvdimm_bus, &ndr_desc); + if (!nd_region) { + dev_err(&vdev->dev, "failed to create nvdimm region\n"); + err = -ENXIO; + goto out_nd; + } + nd_region->provider_data = dev_to_virtio(nd_region->dev.parent->parent); + return 0; +out_nd: + nvdimm_bus_unregister(vpmem->nvdimm_bus); +out_vq: + vdev->config->del_vqs(vdev); +out_err: + return err; +} + +static void virtio_pmem_remove(struct virtio_device *vdev) +{ + struct nvdimm_bus *nvdimm_bus = dev_get_drvdata(&vdev->dev); + + nvdimm_bus_unregister(nvdimm_bus); + vdev->config->del_vqs(vdev); + vdev->config->reset(vdev); +} + +static struct virtio_driver virtio_pmem_driver = { + .driver.name = KBUILD_MODNAME, + .driver.owner = THIS_MODULE, + .id_table = id_table, + .probe = virtio_pmem_probe, + .remove = virtio_pmem_remove, +}; + +module_virtio_driver(virtio_pmem_driver); +MODULE_DEVICE_TABLE(virtio, id_table); +MODULE_DESCRIPTION("Virtio pmem driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/nvdimm/virtio_pmem.h b/drivers/nvdimm/virtio_pmem.h new file mode 100644 index 000000000000..ab1da877575d --- /dev/null +++ b/drivers/nvdimm/virtio_pmem.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * virtio_pmem.h: virtio pmem Driver + * + * Discovers persistent memory range information + * from host and provides a virtio based flushing + * interface. + **/ + +#ifndef _LINUX_VIRTIO_PMEM_H +#define _LINUX_VIRTIO_PMEM_H + +#include <linux/virtio_ids.h> +#include <linux/module.h> +#include <linux/virtio_config.h> +#include <uapi/linux/virtio_pmem.h> +#include <linux/libnvdimm.h> +#include <linux/spinlock.h> + +struct virtio_pmem_request { + /* Host return status corresponding to flush request */ + int ret; + + /* command name*/ + char name[16]; + + /* Wait queue to process deferred work after ack from host */ + wait_queue_head_t host_acked; + bool done; + + /* Wait queue to process deferred work after virt queue buffer avail */ + wait_queue_head_t wq_buf; + bool wq_buf_avail; + struct list_head list; +}; + +struct virtio_pmem { + struct virtio_device *vdev; + + /* Virtio pmem request queue */ + struct virtqueue *req_vq; + + /* nvdimm bus registers virtio pmem device */ + struct nvdimm_bus *nvdimm_bus; + struct nvdimm_bus_descriptor nd_desc; + + /* List to store deferred work if virtqueue is full */ + struct list_head req_list; + + /* Synchronize virtqueue data */ + spinlock_t pmem_lock; + + /* Memory region information */ + uint64_t start; + uint64_t size; +}; + +void host_ack(struct virtqueue *vq); +int async_pmem_flush(struct nd_region *nd_region, struct bio *bio); +#endif diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig index 35897649c24f..94bad084ebab 100644 --- a/drivers/virtio/Kconfig +++ b/drivers/virtio/Kconfig @@ -42,6 +42,17 @@ config VIRTIO_PCI_LEGACY If unsure, say Y. +config VIRTIO_PMEM + tristate "Support for virtio pmem driver" + depends on VIRTIO + depends on LIBNVDIMM + help + This driver provides access to virtio-pmem devices, storage devices + that are mapped into the physical address space - similar to NVDIMMs + - with a virtio-based flushing interface. + + If unsure, say M. + config VIRTIO_BALLOON tristate "Virtio balloon driver" depends on VIRTIO diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h index 6d5c3b2d4f4d..32b2f94d1f58 100644 --- a/include/uapi/linux/virtio_ids.h +++ b/include/uapi/linux/virtio_ids.h @@ -43,5 +43,6 @@ #define VIRTIO_ID_INPUT 18 /* virtio input */ #define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */ #define VIRTIO_ID_CRYPTO 20 /* virtio crypto */ +#define VIRTIO_ID_PMEM 27 /* virtio pmem */ #endif /* _LINUX_VIRTIO_IDS_H */ diff --git a/include/uapi/linux/virtio_pmem.h b/include/uapi/linux/virtio_pmem.h new file mode 100644 index 000000000000..fa3f7d52717a --- /dev/null +++ b/include/uapi/linux/virtio_pmem.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _UAPI_LINUX_VIRTIO_PMEM_H +#define _UAPI_LINUX_VIRTIO_PMEM_H + +struct virtio_pmem_config { + __le64 start; + __le64 size; +}; +#endif