Message ID | 20210414144945.3460554-8-ltykernel@gmail.com (mailing list archive) |
---|---|
State | RFC |
Headers | show |
Series | x86/Hyper-V: Add Hyper-V Isolation VM support | expand |
Context | Check | Description |
---|---|---|
netdev/tree_selection | success | Guessing tree name failed - patch did not apply |
On Wed, Apr 14, 2021 at 10:49:40AM -0400, Tianyu Lan wrote: > From: Tianyu Lan <Tianyu.Lan@microsoft.com> > > VMbus ring buffer are shared with host and it's need to > be accessed via extra address space of Isolation VM with > SNP support. This patch is to map the ring buffer > address in extra address space via ioremap(). HV host Why do you need to use ioremap()? Why not just use vmap? > visibility hvcall smears data in the ring buffer and > so reset the ring buffer memory to zero after calling > visibility hvcall. So you are exposing these two: EXPORT_SYMBOL_GPL(get_vm_area); EXPORT_SYMBOL_GPL(ioremap_page_range); But if you used vmap wouldn't you get the same thing for free? > > Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com> > --- > drivers/hv/channel.c | 10 +++++ > drivers/hv/hyperv_vmbus.h | 2 + > drivers/hv/ring_buffer.c | 83 +++++++++++++++++++++++++++++---------- > mm/ioremap.c | 1 + > mm/vmalloc.c | 1 + > 5 files changed, 76 insertions(+), 21 deletions(-) > > diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c > index 407b74d72f3f..4a9fb7ad4c72 100644 > --- a/drivers/hv/channel.c > +++ b/drivers/hv/channel.c > @@ -634,6 +634,16 @@ static int __vmbus_open(struct vmbus_channel *newchannel, > if (err) > goto error_clean_ring; > > + err = hv_ringbuffer_post_init(&newchannel->outbound, > + page, send_pages); > + if (err) > + goto error_free_gpadl; > + > + err = hv_ringbuffer_post_init(&newchannel->inbound, > + &page[send_pages], recv_pages); > + if (err) > + goto error_free_gpadl; > + > /* Create and init the channel open message */ > open_info = kzalloc(sizeof(*open_info) + > sizeof(struct vmbus_channel_open_channel), > diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h > index 0778add21a9c..d78a04ad5490 100644 > --- a/drivers/hv/hyperv_vmbus.h > +++ b/drivers/hv/hyperv_vmbus.h > @@ -172,6 +172,8 @@ extern int hv_synic_cleanup(unsigned int cpu); > /* Interface */ > > void hv_ringbuffer_pre_init(struct vmbus_channel *channel); > +int hv_ringbuffer_post_init(struct hv_ring_buffer_info *ring_info, > + struct page *pages, u32 page_cnt); > > int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info, > struct page *pages, u32 pagecnt); > diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c > index 35833d4d1a1d..c8b0f7b45158 100644 > --- a/drivers/hv/ring_buffer.c > +++ b/drivers/hv/ring_buffer.c > @@ -17,6 +17,8 @@ > #include <linux/vmalloc.h> > #include <linux/slab.h> > #include <linux/prefetch.h> > +#include <linux/io.h> > +#include <asm/mshyperv.h> > > #include "hyperv_vmbus.h" > > @@ -188,6 +190,44 @@ void hv_ringbuffer_pre_init(struct vmbus_channel *channel) > mutex_init(&channel->outbound.ring_buffer_mutex); > } > > +int hv_ringbuffer_post_init(struct hv_ring_buffer_info *ring_info, > + struct page *pages, u32 page_cnt) > +{ > + struct vm_struct *area; > + u64 physic_addr = page_to_pfn(pages) << PAGE_SHIFT; > + unsigned long vaddr; > + int err = 0; > + > + if (!hv_isolation_type_snp()) > + return 0; > + > + physic_addr += ms_hyperv.shared_gpa_boundary; > + area = get_vm_area((2 * page_cnt - 1) * PAGE_SIZE, VM_IOREMAP); > + if (!area || !area->addr) > + return -EFAULT; > + > + vaddr = (unsigned long)area->addr; > + err = ioremap_page_range(vaddr, vaddr + page_cnt * PAGE_SIZE, > + physic_addr, PAGE_KERNEL_IO); > + err |= ioremap_page_range(vaddr + page_cnt * PAGE_SIZE, > + vaddr + (2 * page_cnt - 1) * PAGE_SIZE, > + physic_addr + PAGE_SIZE, PAGE_KERNEL_IO); > + if (err) { > + vunmap((void *)vaddr); > + return -EFAULT; > + } > + > + /* Clean memory after setting host visibility. */ > + memset((void *)vaddr, 0x00, page_cnt * PAGE_SIZE); > + > + ring_info->ring_buffer = (struct hv_ring_buffer *)vaddr; > + ring_info->ring_buffer->read_index = 0; > + ring_info->ring_buffer->write_index = 0; > + ring_info->ring_buffer->feature_bits.value = 1; > + > + return 0; > +} > + > /* Initialize the ring buffer. */ > int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info, > struct page *pages, u32 page_cnt) > @@ -197,33 +237,34 @@ int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info, > > BUILD_BUG_ON((sizeof(struct hv_ring_buffer) != PAGE_SIZE)); > > - /* > - * First page holds struct hv_ring_buffer, do wraparound mapping for > - * the rest. > - */ > - pages_wraparound = kcalloc(page_cnt * 2 - 1, sizeof(struct page *), > - GFP_KERNEL); > - if (!pages_wraparound) > - return -ENOMEM; > - > - pages_wraparound[0] = pages; > - for (i = 0; i < 2 * (page_cnt - 1); i++) > - pages_wraparound[i + 1] = &pages[i % (page_cnt - 1) + 1]; > + if (!hv_isolation_type_snp()) { > + /* > + * First page holds struct hv_ring_buffer, do wraparound mapping for > + * the rest. > + */ > + pages_wraparound = kcalloc(page_cnt * 2 - 1, sizeof(struct page *), > + GFP_KERNEL); > + if (!pages_wraparound) > + return -ENOMEM; > > - ring_info->ring_buffer = (struct hv_ring_buffer *) > - vmap(pages_wraparound, page_cnt * 2 - 1, VM_MAP, PAGE_KERNEL); > + pages_wraparound[0] = pages; > + for (i = 0; i < 2 * (page_cnt - 1); i++) > + pages_wraparound[i + 1] = &pages[i % (page_cnt - 1) + 1]; > > - kfree(pages_wraparound); > + ring_info->ring_buffer = (struct hv_ring_buffer *) > + vmap(pages_wraparound, page_cnt * 2 - 1, VM_MAP, PAGE_KERNEL); > > + kfree(pages_wraparound); > > - if (!ring_info->ring_buffer) > - return -ENOMEM; > + if (!ring_info->ring_buffer) > + return -ENOMEM; > > - ring_info->ring_buffer->read_index = > - ring_info->ring_buffer->write_index = 0; > + ring_info->ring_buffer->read_index = > + ring_info->ring_buffer->write_index = 0; > > - /* Set the feature bit for enabling flow control. */ > - ring_info->ring_buffer->feature_bits.value = 1; > + /* Set the feature bit for enabling flow control. */ > + ring_info->ring_buffer->feature_bits.value = 1; > + } > > ring_info->ring_size = page_cnt << PAGE_SHIFT; > ring_info->ring_size_div10_reciprocal = > diff --git a/mm/ioremap.c b/mm/ioremap.c > index 5fa1ab41d152..d63c4ba067f9 100644 > --- a/mm/ioremap.c > +++ b/mm/ioremap.c > @@ -248,6 +248,7 @@ int ioremap_page_range(unsigned long addr, > > return err; > } > +EXPORT_SYMBOL_GPL(ioremap_page_range); > > #ifdef CONFIG_GENERIC_IOREMAP > void __iomem *ioremap_prot(phys_addr_t addr, size_t size, unsigned long prot) > diff --git a/mm/vmalloc.c b/mm/vmalloc.c > index e6f352bf0498..19724a8ebcb7 100644 > --- a/mm/vmalloc.c > +++ b/mm/vmalloc.c > @@ -2131,6 +2131,7 @@ struct vm_struct *get_vm_area(unsigned long size, unsigned long flags) > NUMA_NO_NODE, GFP_KERNEL, > __builtin_return_address(0)); > } > +EXPORT_SYMBOL_GPL(get_vm_area); > > struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags, > const void *caller) > -- > 2.25.1 >
On Thu, Apr 15, 2021 at 04:24:15PM -0400, Konrad Rzeszutek Wilk wrote: > So you are exposing these two: > EXPORT_SYMBOL_GPL(get_vm_area); > EXPORT_SYMBOL_GPL(ioremap_page_range); > > But if you used vmap wouldn't you get the same thing for free? Yes, this needs to go into some vmap version, preferably reusing the existing code in kernel/dma/remap.c. Exporting get_vm_area is a complete dealbreaker and not going to happen. We worked hard on not exposing it to modules.
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index 407b74d72f3f..4a9fb7ad4c72 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -634,6 +634,16 @@ static int __vmbus_open(struct vmbus_channel *newchannel, if (err) goto error_clean_ring; + err = hv_ringbuffer_post_init(&newchannel->outbound, + page, send_pages); + if (err) + goto error_free_gpadl; + + err = hv_ringbuffer_post_init(&newchannel->inbound, + &page[send_pages], recv_pages); + if (err) + goto error_free_gpadl; + /* Create and init the channel open message */ open_info = kzalloc(sizeof(*open_info) + sizeof(struct vmbus_channel_open_channel), diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 0778add21a9c..d78a04ad5490 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -172,6 +172,8 @@ extern int hv_synic_cleanup(unsigned int cpu); /* Interface */ void hv_ringbuffer_pre_init(struct vmbus_channel *channel); +int hv_ringbuffer_post_init(struct hv_ring_buffer_info *ring_info, + struct page *pages, u32 page_cnt); int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info, struct page *pages, u32 pagecnt); diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c index 35833d4d1a1d..c8b0f7b45158 100644 --- a/drivers/hv/ring_buffer.c +++ b/drivers/hv/ring_buffer.c @@ -17,6 +17,8 @@ #include <linux/vmalloc.h> #include <linux/slab.h> #include <linux/prefetch.h> +#include <linux/io.h> +#include <asm/mshyperv.h> #include "hyperv_vmbus.h" @@ -188,6 +190,44 @@ void hv_ringbuffer_pre_init(struct vmbus_channel *channel) mutex_init(&channel->outbound.ring_buffer_mutex); } +int hv_ringbuffer_post_init(struct hv_ring_buffer_info *ring_info, + struct page *pages, u32 page_cnt) +{ + struct vm_struct *area; + u64 physic_addr = page_to_pfn(pages) << PAGE_SHIFT; + unsigned long vaddr; + int err = 0; + + if (!hv_isolation_type_snp()) + return 0; + + physic_addr += ms_hyperv.shared_gpa_boundary; + area = get_vm_area((2 * page_cnt - 1) * PAGE_SIZE, VM_IOREMAP); + if (!area || !area->addr) + return -EFAULT; + + vaddr = (unsigned long)area->addr; + err = ioremap_page_range(vaddr, vaddr + page_cnt * PAGE_SIZE, + physic_addr, PAGE_KERNEL_IO); + err |= ioremap_page_range(vaddr + page_cnt * PAGE_SIZE, + vaddr + (2 * page_cnt - 1) * PAGE_SIZE, + physic_addr + PAGE_SIZE, PAGE_KERNEL_IO); + if (err) { + vunmap((void *)vaddr); + return -EFAULT; + } + + /* Clean memory after setting host visibility. */ + memset((void *)vaddr, 0x00, page_cnt * PAGE_SIZE); + + ring_info->ring_buffer = (struct hv_ring_buffer *)vaddr; + ring_info->ring_buffer->read_index = 0; + ring_info->ring_buffer->write_index = 0; + ring_info->ring_buffer->feature_bits.value = 1; + + return 0; +} + /* Initialize the ring buffer. */ int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info, struct page *pages, u32 page_cnt) @@ -197,33 +237,34 @@ int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info, BUILD_BUG_ON((sizeof(struct hv_ring_buffer) != PAGE_SIZE)); - /* - * First page holds struct hv_ring_buffer, do wraparound mapping for - * the rest. - */ - pages_wraparound = kcalloc(page_cnt * 2 - 1, sizeof(struct page *), - GFP_KERNEL); - if (!pages_wraparound) - return -ENOMEM; - - pages_wraparound[0] = pages; - for (i = 0; i < 2 * (page_cnt - 1); i++) - pages_wraparound[i + 1] = &pages[i % (page_cnt - 1) + 1]; + if (!hv_isolation_type_snp()) { + /* + * First page holds struct hv_ring_buffer, do wraparound mapping for + * the rest. + */ + pages_wraparound = kcalloc(page_cnt * 2 - 1, sizeof(struct page *), + GFP_KERNEL); + if (!pages_wraparound) + return -ENOMEM; - ring_info->ring_buffer = (struct hv_ring_buffer *) - vmap(pages_wraparound, page_cnt * 2 - 1, VM_MAP, PAGE_KERNEL); + pages_wraparound[0] = pages; + for (i = 0; i < 2 * (page_cnt - 1); i++) + pages_wraparound[i + 1] = &pages[i % (page_cnt - 1) + 1]; - kfree(pages_wraparound); + ring_info->ring_buffer = (struct hv_ring_buffer *) + vmap(pages_wraparound, page_cnt * 2 - 1, VM_MAP, PAGE_KERNEL); + kfree(pages_wraparound); - if (!ring_info->ring_buffer) - return -ENOMEM; + if (!ring_info->ring_buffer) + return -ENOMEM; - ring_info->ring_buffer->read_index = - ring_info->ring_buffer->write_index = 0; + ring_info->ring_buffer->read_index = + ring_info->ring_buffer->write_index = 0; - /* Set the feature bit for enabling flow control. */ - ring_info->ring_buffer->feature_bits.value = 1; + /* Set the feature bit for enabling flow control. */ + ring_info->ring_buffer->feature_bits.value = 1; + } ring_info->ring_size = page_cnt << PAGE_SHIFT; ring_info->ring_size_div10_reciprocal = diff --git a/mm/ioremap.c b/mm/ioremap.c index 5fa1ab41d152..d63c4ba067f9 100644 --- a/mm/ioremap.c +++ b/mm/ioremap.c @@ -248,6 +248,7 @@ int ioremap_page_range(unsigned long addr, return err; } +EXPORT_SYMBOL_GPL(ioremap_page_range); #ifdef CONFIG_GENERIC_IOREMAP void __iomem *ioremap_prot(phys_addr_t addr, size_t size, unsigned long prot) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index e6f352bf0498..19724a8ebcb7 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2131,6 +2131,7 @@ struct vm_struct *get_vm_area(unsigned long size, unsigned long flags) NUMA_NO_NODE, GFP_KERNEL, __builtin_return_address(0)); } +EXPORT_SYMBOL_GPL(get_vm_area); struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags, const void *caller)