Message ID | 20210530150628.2063957-11-ltykernel@gmail.com (mailing list archive) |
---|---|
State | Not Applicable |
Headers | show |
Series | x86/Hyper-V: Add Hyper-V Isolation VM support | expand |
On Sun, May 30, 2021 at 11:06:27AM -0400, Tianyu Lan wrote: > + if (hv_isolation_type_snp()) { > + pfns = kcalloc(buf_size / HV_HYP_PAGE_SIZE, sizeof(unsigned long), > + GFP_KERNEL); > + for (i = 0; i < buf_size / HV_HYP_PAGE_SIZE; i++) > + pfns[i] = virt_to_hvpfn(net_device->recv_buf + i * HV_HYP_PAGE_SIZE) + > + (ms_hyperv.shared_gpa_boundary >> HV_HYP_PAGE_SHIFT); > + > + vaddr = vmap_pfn(pfns, buf_size / HV_HYP_PAGE_SIZE, PAGE_KERNEL_IO); > + kfree(pfns); > + if (!vaddr) > + goto cleanup; > + net_device->recv_original_buf = net_device->recv_buf; > + net_device->recv_buf = vaddr; > + } This probably wnats a helper to make the thing more readable. But who came up with this fucked up communication protocol where the host needs to map random pfns into a contigous range? Sometime I really have to wonder what crack the hyper-v people take when comparing this to the relatively sane approach others take. > + for (i = 0; i < page_count; i++) > + dma_unmap_single(&hv_dev->device, packet->dma_range[i].dma, > + packet->dma_range[i].mapping_size, > + DMA_TO_DEVICE); > + > + kfree(packet->dma_range); Any reason this isn't simply using a struct scatterlist? > + for (i = 0; i < page_count; i++) { > + char *src = phys_to_virt((pb[i].pfn << HV_HYP_PAGE_SHIFT) > + + pb[i].offset); > + u32 len = pb[i].len; > + > + dma = dma_map_single(&hv_dev->device, src, len, > + DMA_TO_DEVICE); dma_map_single can only be used on page baked memory, and if this is using page backed memory you wouldn't need to do thee phys_to_virt tricks. Can someone explain the mess here in more detail? > struct rndis_device *dev = nvdev->extension; > struct rndis_request *request = NULL; > + struct hv_device *hv_dev = ((struct net_device_context *) > + netdev_priv(ndev))->device_ctx; Why not use a net_device_context local variable instead of this cast galore?
On 6/7/2021 2:50 PM, Christoph Hellwig wrote: > On Sun, May 30, 2021 at 11:06:27AM -0400, Tianyu Lan wrote: >> + if (hv_isolation_type_snp()) { >> + pfns = kcalloc(buf_size / HV_HYP_PAGE_SIZE, sizeof(unsigned long), >> + GFP_KERNEL); >> + for (i = 0; i < buf_size / HV_HYP_PAGE_SIZE; i++) >> + pfns[i] = virt_to_hvpfn(net_device->recv_buf + i * HV_HYP_PAGE_SIZE) + >> + (ms_hyperv.shared_gpa_boundary >> HV_HYP_PAGE_SHIFT); >> + >> + vaddr = vmap_pfn(pfns, buf_size / HV_HYP_PAGE_SIZE, PAGE_KERNEL_IO); >> + kfree(pfns); >> + if (!vaddr) >> + goto cleanup; >> + net_device->recv_original_buf = net_device->recv_buf; >> + net_device->recv_buf = vaddr; >> + } > > This probably wnats a helper to make the thing more readable. But who > came up with this fucked up communication protocol where the host needs > to map random pfns into a contigous range? Sometime I really have to > wonder what crack the hyper-v people take when comparing this to the > relatively sane approach others take. Agree. Will add a helper function. > >> + for (i = 0; i < page_count; i++) >> + dma_unmap_single(&hv_dev->device, packet->dma_range[i].dma, >> + packet->dma_range[i].mapping_size, >> + DMA_TO_DEVICE); >> + >> + kfree(packet->dma_range); > > Any reason this isn't simply using a struct scatterlist? I will have a look. Thanks to reminder scatterlist. > >> + for (i = 0; i < page_count; i++) { >> + char *src = phys_to_virt((pb[i].pfn << HV_HYP_PAGE_SHIFT) >> + + pb[i].offset); >> + u32 len = pb[i].len; >> + >> + dma = dma_map_single(&hv_dev->device, src, len, >> + DMA_TO_DEVICE); > > dma_map_single can only be used on page baked memory, and if this is > using page backed memory you wouldn't need to do thee phys_to_virt > tricks. Can someone explain the mess here in more detail? Sorry. Could you elaborate the issue? These pages in the pb array are not allocated by DMA API and using dma_map_single() here is to map these pages' address to bounce buffer physical address. > >> struct rndis_device *dev = nvdev->extension; >> struct rndis_request *request = NULL; >> + struct hv_device *hv_dev = ((struct net_device_context *) >> + netdev_priv(ndev))->device_ctx; > > Why not use a net_device_context local variable instead of this cast > galore? > OK. I will update. Thanks.
Tianyu Lan <ltykernel@gmail.com> writes: > From: Tianyu Lan <Tianyu.Lan@microsoft.com> > > In Isolation VM, all shared memory with host needs to mark visible > to host via hvcall. vmbus_establish_gpadl() has already done it for > netvsc rx/tx ring buffer. The page buffer used by vmbus_sendpacket_ > pagebuffer() still need to handle. Use DMA API to map/umap these > memory during sending/receiving packet and Hyper-V DMA ops callback > will use swiotlb function to allocate bounce buffer and copy data > from/to bounce buffer. > > Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com> > --- > drivers/net/hyperv/hyperv_net.h | 6 ++ > drivers/net/hyperv/netvsc.c | 125 ++++++++++++++++++++++++++++-- > drivers/net/hyperv/rndis_filter.c | 3 + > include/linux/hyperv.h | 5 ++ > 4 files changed, 133 insertions(+), 6 deletions(-) > > diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h > index b11aa68b44ec..c2fbb9d4df2c 100644 > --- a/drivers/net/hyperv/hyperv_net.h > +++ b/drivers/net/hyperv/hyperv_net.h > @@ -164,6 +164,7 @@ struct hv_netvsc_packet { > u32 total_bytes; > u32 send_buf_index; > u32 total_data_buflen; > + struct hv_dma_range *dma_range; > }; > > #define NETVSC_HASH_KEYLEN 40 > @@ -1074,6 +1075,7 @@ struct netvsc_device { > > /* Receive buffer allocated by us but manages by NetVSP */ > void *recv_buf; > + void *recv_original_buf; > u32 recv_buf_size; /* allocated bytes */ > u32 recv_buf_gpadl_handle; > u32 recv_section_cnt; > @@ -1082,6 +1084,8 @@ struct netvsc_device { > > /* Send buffer allocated by us */ > void *send_buf; > + void *send_original_buf; > + u32 send_buf_size; > u32 send_buf_gpadl_handle; > u32 send_section_cnt; > u32 send_section_size; > @@ -1729,4 +1733,6 @@ struct rndis_message { > #define RETRY_US_HI 10000 > #define RETRY_MAX 2000 /* >10 sec */ > > +void netvsc_dma_unmap(struct hv_device *hv_dev, > + struct hv_netvsc_packet *packet); > #endif /* _HYPERV_NET_H */ > diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c > index 7bd935412853..a01740c6c6b8 100644 > --- a/drivers/net/hyperv/netvsc.c > +++ b/drivers/net/hyperv/netvsc.c > @@ -153,8 +153,21 @@ static void free_netvsc_device(struct rcu_head *head) > int i; > > kfree(nvdev->extension); > - vfree(nvdev->recv_buf); > - vfree(nvdev->send_buf); > + > + if (nvdev->recv_original_buf) { > + vunmap(nvdev->recv_buf); > + vfree(nvdev->recv_original_buf); > + } else { > + vfree(nvdev->recv_buf); > + } > + > + if (nvdev->send_original_buf) { > + vunmap(nvdev->send_buf); > + vfree(nvdev->send_original_buf); > + } else { > + vfree(nvdev->send_buf); > + } > + > kfree(nvdev->send_section_map); > > for (i = 0; i < VRSS_CHANNEL_MAX; i++) { > @@ -338,8 +351,10 @@ static int netvsc_init_buf(struct hv_device *device, > struct net_device *ndev = hv_get_drvdata(device); > struct nvsp_message *init_packet; > unsigned int buf_size; > + unsigned long *pfns; > size_t map_words; > int i, ret = 0; > + void *vaddr; > > /* Get receive buffer area. */ > buf_size = device_info->recv_sections * device_info->recv_section_size; > @@ -375,6 +390,21 @@ static int netvsc_init_buf(struct hv_device *device, > goto cleanup; > } > > + if (hv_isolation_type_snp()) { > + pfns = kcalloc(buf_size / HV_HYP_PAGE_SIZE, sizeof(unsigned long), > + GFP_KERNEL); > + for (i = 0; i < buf_size / HV_HYP_PAGE_SIZE; i++) > + pfns[i] = virt_to_hvpfn(net_device->recv_buf + i * HV_HYP_PAGE_SIZE) + > + (ms_hyperv.shared_gpa_boundary >> HV_HYP_PAGE_SHIFT); > + > + vaddr = vmap_pfn(pfns, buf_size / HV_HYP_PAGE_SIZE, PAGE_KERNEL_IO); > + kfree(pfns); > + if (!vaddr) > + goto cleanup; > + net_device->recv_original_buf = net_device->recv_buf; > + net_device->recv_buf = vaddr; > + } > + > /* Notify the NetVsp of the gpadl handle */ > init_packet = &net_device->channel_init_pkt; > memset(init_packet, 0, sizeof(struct nvsp_message)); > @@ -477,6 +507,23 @@ static int netvsc_init_buf(struct hv_device *device, > goto cleanup; > } > > + if (hv_isolation_type_snp()) { > + pfns = kcalloc(buf_size / HV_HYP_PAGE_SIZE, sizeof(unsigned long), > + GFP_KERNEL); > + > + for (i = 0; i < buf_size / HV_HYP_PAGE_SIZE; i++) > + pfns[i] = virt_to_hvpfn(net_device->send_buf + i * HV_HYP_PAGE_SIZE) > + + (ms_hyperv.shared_gpa_boundary >> HV_HYP_PAGE_SHIFT); > + > + vaddr = vmap_pfn(pfns, buf_size / HV_HYP_PAGE_SIZE, PAGE_KERNEL_IO); > + kfree(pfns); > + if (!vaddr) > + goto cleanup; > + > + net_device->send_original_buf = net_device->send_buf; > + net_device->send_buf = vaddr; > + } > + > /* Notify the NetVsp of the gpadl handle */ > init_packet = &net_device->channel_init_pkt; > memset(init_packet, 0, sizeof(struct nvsp_message)); > @@ -767,7 +814,7 @@ static void netvsc_send_tx_complete(struct net_device *ndev, > > /* Notify the layer above us */ > if (likely(skb)) { > - const struct hv_netvsc_packet *packet > + struct hv_netvsc_packet *packet > = (struct hv_netvsc_packet *)skb->cb; > u32 send_index = packet->send_buf_index; > struct netvsc_stats *tx_stats; > @@ -783,6 +830,7 @@ static void netvsc_send_tx_complete(struct net_device *ndev, > tx_stats->bytes += packet->total_bytes; > u64_stats_update_end(&tx_stats->syncp); > > + netvsc_dma_unmap(ndev_ctx->device_ctx, packet); > napi_consume_skb(skb, budget); > } > > @@ -947,6 +995,63 @@ static void netvsc_copy_to_send_buf(struct netvsc_device *net_device, > memset(dest, 0, padding); > } > > +void netvsc_dma_unmap(struct hv_device *hv_dev, > + struct hv_netvsc_packet *packet) > +{ > + u32 page_count = packet->cp_partial ? > + packet->page_buf_cnt - packet->rmsg_pgcnt : > + packet->page_buf_cnt; > + int i; > + > + if (!packet->dma_range) > + return; > + > + for (i = 0; i < page_count; i++) > + dma_unmap_single(&hv_dev->device, packet->dma_range[i].dma, > + packet->dma_range[i].mapping_size, > + DMA_TO_DEVICE); > + > + kfree(packet->dma_range); > +} > + > +int netvsc_dma_map(struct hv_device *hv_dev, > + struct hv_netvsc_packet *packet, > + struct hv_page_buffer *pb) > +{ > + u32 page_count = packet->cp_partial ? > + packet->page_buf_cnt - packet->rmsg_pgcnt : > + packet->page_buf_cnt; > + dma_addr_t dma; > + int i; > + > + packet->dma_range = kcalloc(page_count, > + sizeof(*packet->dma_range), > + GFP_KERNEL); > + if (!packet->dma_range) > + return -ENOMEM; > + > + for (i = 0; i < page_count; i++) { > + char *src = phys_to_virt((pb[i].pfn << HV_HYP_PAGE_SHIFT) > + + pb[i].offset); > + u32 len = pb[i].len; > + > + dma = dma_map_single(&hv_dev->device, src, len, > + DMA_TO_DEVICE); > + if (dma_mapping_error(&hv_dev->device, dma)) { > + kfree(packet->dma_range); > + return -ENOMEM; > + } > + > + packet->dma_range[i].dma = dma; > + packet->dma_range[i].mapping_size = len; > + pb[i].pfn = dma >> HV_HYP_PAGE_SHIFT; > + pb[i].offset = offset_in_hvpage(dma); > + pb[i].len = len; > + } > + > + return 0; > +} > + > static inline int netvsc_send_pkt( > struct hv_device *device, > struct hv_netvsc_packet *packet, > @@ -987,14 +1092,22 @@ static inline int netvsc_send_pkt( > > trace_nvsp_send_pkt(ndev, out_channel, rpkt); > > + packet->dma_range = NULL; > if (packet->page_buf_cnt) { > if (packet->cp_partial) > pb += packet->rmsg_pgcnt; > > + ret = netvsc_dma_map(ndev_ctx->device_ctx, packet, pb); > + if (ret) > + return ret; > + > ret = vmbus_sendpacket_pagebuffer(out_channel, > - pb, packet->page_buf_cnt, > - &nvmsg, sizeof(nvmsg), > - req_id); > + pb, packet->page_buf_cnt, > + &nvmsg, sizeof(nvmsg), > + req_id); Nitpick: stray change? > + > + if (ret) > + netvsc_dma_unmap(ndev_ctx->device_ctx, packet); > } else { > ret = vmbus_sendpacket(out_channel, > &nvmsg, sizeof(nvmsg), > diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c > index 983bf362466a..448c1ee39246 100644 > --- a/drivers/net/hyperv/rndis_filter.c > +++ b/drivers/net/hyperv/rndis_filter.c > @@ -293,6 +293,8 @@ static void rndis_filter_receive_response(struct net_device *ndev, > u32 *req_id = &resp->msg.init_complete.req_id; > struct rndis_device *dev = nvdev->extension; > struct rndis_request *request = NULL; > + struct hv_device *hv_dev = ((struct net_device_context *) > + netdev_priv(ndev))->device_ctx; > bool found = false; > unsigned long flags; > > @@ -361,6 +363,7 @@ static void rndis_filter_receive_response(struct net_device *ndev, > } > } > > + netvsc_dma_unmap(hv_dev, &request->pkt); > complete(&request->wait_event); > } else { > netdev_err(ndev, > diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h > index babbe19f57e2..90abff664495 100644 > --- a/include/linux/hyperv.h > +++ b/include/linux/hyperv.h > @@ -1616,6 +1616,11 @@ struct hyperv_service_callback { > void (*callback)(void *context); > }; > > +struct hv_dma_range { > + dma_addr_t dma; > + u32 mapping_size; > +}; > + > #define MAX_SRV_VER 0x7ffffff > extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf, u32 buflen, > const int *fw_version, int fw_vercnt,
On Mon, Jun 07, 2021 at 11:21:20PM +0800, Tianyu Lan wrote: >> dma_map_single can only be used on page baked memory, and if this is >> using page backed memory you wouldn't need to do thee phys_to_virt >> tricks. Can someone explain the mess here in more detail? > > Sorry. Could you elaborate the issue? These pages in the pb array are not > allocated by DMA API and using dma_map_single() here is to map these pages' > address to bounce buffer physical address. dma_map_single just calls dma_map_page using virt_to_page. So this can't work on addresses not in the kernel linear mapping.
On 6/14/2021 3:09 PM, Christoph Hellwig wrote: > On Mon, Jun 07, 2021 at 11:21:20PM +0800, Tianyu Lan wrote: >>> dma_map_single can only be used on page baked memory, and if this is >>> using page backed memory you wouldn't need to do thee phys_to_virt >>> tricks. Can someone explain the mess here in more detail? >> >> Sorry. Could you elaborate the issue? These pages in the pb array are not >> allocated by DMA API and using dma_map_single() here is to map these pages' >> address to bounce buffer physical address. > > dma_map_single just calls dma_map_page using virt_to_page. So this > can't work on addresses not in the kernel linear mapping. > The pages in the hv_page_buffer array here are in the kernel linear mapping. The packet sent to host will contain an array which contains transaction data. In the isolation VM, data in the these pages needs to be copied to bounce buffer and so call dma_map_single() here to map these data pages with bounce buffer. The vmbus has ring buffer where the send/receive packets are copied to/from. The ring buffer has been remapped to the extra space above shared gpa boundary/vTom during probing Netvsc driver and so not call dma map function for vmbus ring buffer.
On Mon, Jun 14, 2021 at 10:04:06PM +0800, Tianyu Lan wrote: > The pages in the hv_page_buffer array here are in the kernel linear > mapping. The packet sent to host will contain an array which contains > transaction data. In the isolation VM, data in the these pages needs to be > copied to bounce buffer and so call dma_map_single() here to map these data > pages with bounce buffer. The vmbus has ring buffer where the send/receive > packets are copied to/from. The ring buffer has been remapped to the extra > space above shared gpa boundary/vTom during probing Netvsc driver and so > not call dma map function for vmbus ring > buffer. So why do we have all that PFN magic instead of using struct page or the usual kernel I/O buffers that contain a page pointer?
On 6/14/2021 11:33 PM, Christoph Hellwig wrote: > On Mon, Jun 14, 2021 at 10:04:06PM +0800, Tianyu Lan wrote: >> The pages in the hv_page_buffer array here are in the kernel linear >> mapping. The packet sent to host will contain an array which contains >> transaction data. In the isolation VM, data in the these pages needs to be >> copied to bounce buffer and so call dma_map_single() here to map these data >> pages with bounce buffer. The vmbus has ring buffer where the send/receive >> packets are copied to/from. The ring buffer has been remapped to the extra >> space above shared gpa boundary/vTom during probing Netvsc driver and so >> not call dma map function for vmbus ring >> buffer. > > So why do we have all that PFN magic instead of using struct page or > the usual kernel I/O buffers that contain a page pointer? > These PFNs originally is part of Hyper-V protocol data and will be sent to host. Host accepts these GFN and copy data from/to guest memory. The translation from va to pa is done by caller that populates the hv_page_buffer array. I will try calling dma map function before populating struct hv_page_buffer and this can avoid redundant translation between PA and VA.
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index b11aa68b44ec..c2fbb9d4df2c 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -164,6 +164,7 @@ struct hv_netvsc_packet { u32 total_bytes; u32 send_buf_index; u32 total_data_buflen; + struct hv_dma_range *dma_range; }; #define NETVSC_HASH_KEYLEN 40 @@ -1074,6 +1075,7 @@ struct netvsc_device { /* Receive buffer allocated by us but manages by NetVSP */ void *recv_buf; + void *recv_original_buf; u32 recv_buf_size; /* allocated bytes */ u32 recv_buf_gpadl_handle; u32 recv_section_cnt; @@ -1082,6 +1084,8 @@ struct netvsc_device { /* Send buffer allocated by us */ void *send_buf; + void *send_original_buf; + u32 send_buf_size; u32 send_buf_gpadl_handle; u32 send_section_cnt; u32 send_section_size; @@ -1729,4 +1733,6 @@ struct rndis_message { #define RETRY_US_HI 10000 #define RETRY_MAX 2000 /* >10 sec */ +void netvsc_dma_unmap(struct hv_device *hv_dev, + struct hv_netvsc_packet *packet); #endif /* _HYPERV_NET_H */ diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 7bd935412853..a01740c6c6b8 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -153,8 +153,21 @@ static void free_netvsc_device(struct rcu_head *head) int i; kfree(nvdev->extension); - vfree(nvdev->recv_buf); - vfree(nvdev->send_buf); + + if (nvdev->recv_original_buf) { + vunmap(nvdev->recv_buf); + vfree(nvdev->recv_original_buf); + } else { + vfree(nvdev->recv_buf); + } + + if (nvdev->send_original_buf) { + vunmap(nvdev->send_buf); + vfree(nvdev->send_original_buf); + } else { + vfree(nvdev->send_buf); + } + kfree(nvdev->send_section_map); for (i = 0; i < VRSS_CHANNEL_MAX; i++) { @@ -338,8 +351,10 @@ static int netvsc_init_buf(struct hv_device *device, struct net_device *ndev = hv_get_drvdata(device); struct nvsp_message *init_packet; unsigned int buf_size; + unsigned long *pfns; size_t map_words; int i, ret = 0; + void *vaddr; /* Get receive buffer area. */ buf_size = device_info->recv_sections * device_info->recv_section_size; @@ -375,6 +390,21 @@ static int netvsc_init_buf(struct hv_device *device, goto cleanup; } + if (hv_isolation_type_snp()) { + pfns = kcalloc(buf_size / HV_HYP_PAGE_SIZE, sizeof(unsigned long), + GFP_KERNEL); + for (i = 0; i < buf_size / HV_HYP_PAGE_SIZE; i++) + pfns[i] = virt_to_hvpfn(net_device->recv_buf + i * HV_HYP_PAGE_SIZE) + + (ms_hyperv.shared_gpa_boundary >> HV_HYP_PAGE_SHIFT); + + vaddr = vmap_pfn(pfns, buf_size / HV_HYP_PAGE_SIZE, PAGE_KERNEL_IO); + kfree(pfns); + if (!vaddr) + goto cleanup; + net_device->recv_original_buf = net_device->recv_buf; + net_device->recv_buf = vaddr; + } + /* Notify the NetVsp of the gpadl handle */ init_packet = &net_device->channel_init_pkt; memset(init_packet, 0, sizeof(struct nvsp_message)); @@ -477,6 +507,23 @@ static int netvsc_init_buf(struct hv_device *device, goto cleanup; } + if (hv_isolation_type_snp()) { + pfns = kcalloc(buf_size / HV_HYP_PAGE_SIZE, sizeof(unsigned long), + GFP_KERNEL); + + for (i = 0; i < buf_size / HV_HYP_PAGE_SIZE; i++) + pfns[i] = virt_to_hvpfn(net_device->send_buf + i * HV_HYP_PAGE_SIZE) + + (ms_hyperv.shared_gpa_boundary >> HV_HYP_PAGE_SHIFT); + + vaddr = vmap_pfn(pfns, buf_size / HV_HYP_PAGE_SIZE, PAGE_KERNEL_IO); + kfree(pfns); + if (!vaddr) + goto cleanup; + + net_device->send_original_buf = net_device->send_buf; + net_device->send_buf = vaddr; + } + /* Notify the NetVsp of the gpadl handle */ init_packet = &net_device->channel_init_pkt; memset(init_packet, 0, sizeof(struct nvsp_message)); @@ -767,7 +814,7 @@ static void netvsc_send_tx_complete(struct net_device *ndev, /* Notify the layer above us */ if (likely(skb)) { - const struct hv_netvsc_packet *packet + struct hv_netvsc_packet *packet = (struct hv_netvsc_packet *)skb->cb; u32 send_index = packet->send_buf_index; struct netvsc_stats *tx_stats; @@ -783,6 +830,7 @@ static void netvsc_send_tx_complete(struct net_device *ndev, tx_stats->bytes += packet->total_bytes; u64_stats_update_end(&tx_stats->syncp); + netvsc_dma_unmap(ndev_ctx->device_ctx, packet); napi_consume_skb(skb, budget); } @@ -947,6 +995,63 @@ static void netvsc_copy_to_send_buf(struct netvsc_device *net_device, memset(dest, 0, padding); } +void netvsc_dma_unmap(struct hv_device *hv_dev, + struct hv_netvsc_packet *packet) +{ + u32 page_count = packet->cp_partial ? + packet->page_buf_cnt - packet->rmsg_pgcnt : + packet->page_buf_cnt; + int i; + + if (!packet->dma_range) + return; + + for (i = 0; i < page_count; i++) + dma_unmap_single(&hv_dev->device, packet->dma_range[i].dma, + packet->dma_range[i].mapping_size, + DMA_TO_DEVICE); + + kfree(packet->dma_range); +} + +int netvsc_dma_map(struct hv_device *hv_dev, + struct hv_netvsc_packet *packet, + struct hv_page_buffer *pb) +{ + u32 page_count = packet->cp_partial ? + packet->page_buf_cnt - packet->rmsg_pgcnt : + packet->page_buf_cnt; + dma_addr_t dma; + int i; + + packet->dma_range = kcalloc(page_count, + sizeof(*packet->dma_range), + GFP_KERNEL); + if (!packet->dma_range) + return -ENOMEM; + + for (i = 0; i < page_count; i++) { + char *src = phys_to_virt((pb[i].pfn << HV_HYP_PAGE_SHIFT) + + pb[i].offset); + u32 len = pb[i].len; + + dma = dma_map_single(&hv_dev->device, src, len, + DMA_TO_DEVICE); + if (dma_mapping_error(&hv_dev->device, dma)) { + kfree(packet->dma_range); + return -ENOMEM; + } + + packet->dma_range[i].dma = dma; + packet->dma_range[i].mapping_size = len; + pb[i].pfn = dma >> HV_HYP_PAGE_SHIFT; + pb[i].offset = offset_in_hvpage(dma); + pb[i].len = len; + } + + return 0; +} + static inline int netvsc_send_pkt( struct hv_device *device, struct hv_netvsc_packet *packet, @@ -987,14 +1092,22 @@ static inline int netvsc_send_pkt( trace_nvsp_send_pkt(ndev, out_channel, rpkt); + packet->dma_range = NULL; if (packet->page_buf_cnt) { if (packet->cp_partial) pb += packet->rmsg_pgcnt; + ret = netvsc_dma_map(ndev_ctx->device_ctx, packet, pb); + if (ret) + return ret; + ret = vmbus_sendpacket_pagebuffer(out_channel, - pb, packet->page_buf_cnt, - &nvmsg, sizeof(nvmsg), - req_id); + pb, packet->page_buf_cnt, + &nvmsg, sizeof(nvmsg), + req_id); + + if (ret) + netvsc_dma_unmap(ndev_ctx->device_ctx, packet); } else { ret = vmbus_sendpacket(out_channel, &nvmsg, sizeof(nvmsg), diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 983bf362466a..448c1ee39246 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -293,6 +293,8 @@ static void rndis_filter_receive_response(struct net_device *ndev, u32 *req_id = &resp->msg.init_complete.req_id; struct rndis_device *dev = nvdev->extension; struct rndis_request *request = NULL; + struct hv_device *hv_dev = ((struct net_device_context *) + netdev_priv(ndev))->device_ctx; bool found = false; unsigned long flags; @@ -361,6 +363,7 @@ static void rndis_filter_receive_response(struct net_device *ndev, } } + netvsc_dma_unmap(hv_dev, &request->pkt); complete(&request->wait_event); } else { netdev_err(ndev, diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index babbe19f57e2..90abff664495 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1616,6 +1616,11 @@ struct hyperv_service_callback { void (*callback)(void *context); }; +struct hv_dma_range { + dma_addr_t dma; + u32 mapping_size; +}; + #define MAX_SRV_VER 0x7ffffff extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf, u32 buflen, const int *fw_version, int fw_vercnt,