@@ -178,11 +178,13 @@ static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned index,
u16 msix_vec)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
- struct virtio_pci_vq_info *info = kmalloc(sizeof *info, GFP_KERNEL);
+ int node = dev_to_node(&vdev->dev);
+ struct virtio_pci_vq_info *info;
struct virtqueue *vq;
unsigned long flags;
/* fill out our structure that represents an active queue */
+ info = kmalloc_node(sizeof *info, GFP_KERNEL, node);
if (!info)
return ERR_PTR(-ENOMEM);
@@ -283,10 +285,12 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
struct irq_affinity *desc)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+ int node = dev_to_node(&vdev->dev);
u16 msix_vec;
int i, err, nvectors, allocated_vectors, queue_idx = 0;
- vp_dev->vqs = kcalloc(nvqs, sizeof(*vp_dev->vqs), GFP_KERNEL);
+ vp_dev->vqs = kcalloc_node(nvqs, sizeof(*vp_dev->vqs),
+ GFP_KERNEL, node);
if (!vp_dev->vqs)
return -ENOMEM;
@@ -355,9 +359,11 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned nvqs,
const char * const names[], const bool *ctx)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+ int node = dev_to_node(&vdev->dev);
int i, err, queue_idx = 0;
- vp_dev->vqs = kcalloc(nvqs, sizeof(*vp_dev->vqs), GFP_KERNEL);
+ vp_dev->vqs = kcalloc_node(nvqs, sizeof(*vp_dev->vqs),
+ GFP_KERNEL, node);
if (!vp_dev->vqs)
return -ENOMEM;
@@ -513,10 +519,12 @@ static int virtio_pci_probe(struct pci_dev *pci_dev,
const struct pci_device_id *id)
{
struct virtio_pci_device *vp_dev, *reg_dev = NULL;
+ int node = dev_to_node(&pci_dev->dev);
int rc;
/* allocate our structure and fill it out */
- vp_dev = kzalloc(sizeof(struct virtio_pci_device), GFP_KERNEL);
+ vp_dev = kzalloc_node(sizeof(struct virtio_pci_device),
+ GFP_KERNEL, node);
if (!vp_dev)
return -ENOMEM;
Allocate frequently-accessed data structures from the NUMA node associated with this virtio-pci device. This avoids slow cross-NUMA node memory accesses. Only the following memory allocations are made NUMA-aware: 1. Called during probe. If called in the data path then hopefully we're executing on a CPU in the same NUMA node as the device. If the CPU is not in the right NUMA node then it's unclear whether forcing memory allocations to use the device's NUMA node will increase or decrease performance. 2. Memory will be frequently accessed from the data path. There is no need to worry about data that is not accessed from performance-critical code paths. Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> --- drivers/virtio/virtio_pci_common.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-)