Message ID | 20210827172114.414281-9-ltykernel@gmail.com (mailing list archive) |
---|---|
State | Not Applicable |
Headers | show |
Series | x86/Hyper-V: Add Hyper-V Isolation VM support | expand |
Context | Check | Description |
---|---|---|
netdev/tree_selection | success | Not a local patch |
From: Tianyu Lan <ltykernel@gmail.com> Sent: Friday, August 27, 2021 10:21 AM > Subject tag should be "Drivers: hv: vmbus: " > VMbus ring buffer are shared with host and it's need to > be accessed via extra address space of Isolation VM with > AMD SNP support. This patch is to map the ring buffer > address in extra address space via vmap_pfn(). Hyperv set > memory host visibility hvcall smears data in the ring buffer > and so reset the ring buffer memory to zero after mapping. > > Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com> > --- > Change since v3: > * Remove hv_ringbuffer_post_init(), merge map > operation for Isolation VM into hv_ringbuffer_init() > * Call hv_ringbuffer_init() after __vmbus_establish_gpadl(). > --- > drivers/hv/Kconfig | 1 + > drivers/hv/channel.c | 19 +++++++------- > drivers/hv/ring_buffer.c | 56 ++++++++++++++++++++++++++++++---------- > 3 files changed, 54 insertions(+), 22 deletions(-) > > diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig > index d1123ceb38f3..dd12af20e467 100644 > --- a/drivers/hv/Kconfig > +++ b/drivers/hv/Kconfig > @@ -8,6 +8,7 @@ config HYPERV > || (ARM64 && !CPU_BIG_ENDIAN)) > select PARAVIRT > select X86_HV_CALLBACK_VECTOR if X86 > + select VMAP_PFN > help > Select this option to run Linux as a Hyper-V client operating > system. > diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c > index 82650beb3af0..81f8629e4491 100644 > --- a/drivers/hv/channel.c > +++ b/drivers/hv/channel.c > @@ -679,15 +679,6 @@ static int __vmbus_open(struct vmbus_channel *newchannel, > if (!newchannel->max_pkt_size) > newchannel->max_pkt_size = VMBUS_DEFAULT_MAX_PKT_SIZE; > > - err = hv_ringbuffer_init(&newchannel->outbound, page, send_pages, 0); > - if (err) > - goto error_clean_ring; > - > - err = hv_ringbuffer_init(&newchannel->inbound, &page[send_pages], > - recv_pages, newchannel->max_pkt_size); > - if (err) > - goto error_clean_ring; > - > /* Establish the gpadl for the ring buffer */ > newchannel->ringbuffer_gpadlhandle = 0; > > @@ -699,6 +690,16 @@ static int __vmbus_open(struct vmbus_channel *newchannel, > if (err) > goto error_clean_ring; > > + err = hv_ringbuffer_init(&newchannel->outbound, > + page, send_pages, 0); > + if (err) > + goto error_free_gpadl; > + > + err = hv_ringbuffer_init(&newchannel->inbound, &page[send_pages], > + recv_pages, newchannel->max_pkt_size); > + if (err) > + goto error_free_gpadl; > + > /* Create and init the channel open message */ > open_info = kzalloc(sizeof(*open_info) + > sizeof(struct vmbus_channel_open_channel), > diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c > index 2aee356840a2..24d64d18eb65 100644 > --- a/drivers/hv/ring_buffer.c > +++ b/drivers/hv/ring_buffer.c > @@ -17,6 +17,8 @@ > #include <linux/vmalloc.h> > #include <linux/slab.h> > #include <linux/prefetch.h> > +#include <linux/io.h> > +#include <asm/mshyperv.h> > > #include "hyperv_vmbus.h" > > @@ -183,8 +185,10 @@ void hv_ringbuffer_pre_init(struct vmbus_channel *channel) > int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info, > struct page *pages, u32 page_cnt, u32 max_pkt_size) > { > - int i; > struct page **pages_wraparound; > + unsigned long *pfns_wraparound; > + u64 pfn; > + int i; > > BUILD_BUG_ON((sizeof(struct hv_ring_buffer) != PAGE_SIZE)); > > @@ -192,23 +196,49 @@ int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info, > * First page holds struct hv_ring_buffer, do wraparound mapping for > * the rest. > */ > - pages_wraparound = kcalloc(page_cnt * 2 - 1, sizeof(struct page *), > - GFP_KERNEL); > - if (!pages_wraparound) > - return -ENOMEM; > + if (hv_isolation_type_snp()) { > + pfn = page_to_pfn(pages) + > + HVPFN_DOWN(ms_hyperv.shared_gpa_boundary); Use PFN_DOWN, not HVPFN_DOWN. This is all done in units of guest page size, not Hyper-V page size. > > - pages_wraparound[0] = pages; > - for (i = 0; i < 2 * (page_cnt - 1); i++) > - pages_wraparound[i + 1] = &pages[i % (page_cnt - 1) + 1]; > + pfns_wraparound = kcalloc(page_cnt * 2 - 1, > + sizeof(unsigned long), GFP_KERNEL); > + if (!pfns_wraparound) > + return -ENOMEM; > > - ring_info->ring_buffer = (struct hv_ring_buffer *) > - vmap(pages_wraparound, page_cnt * 2 - 1, VM_MAP, PAGE_KERNEL); > + pfns_wraparound[0] = pfn; > + for (i = 0; i < 2 * (page_cnt - 1); i++) > + pfns_wraparound[i + 1] = pfn + i % (page_cnt - 1) + 1; > > - kfree(pages_wraparound); > + ring_info->ring_buffer = (struct hv_ring_buffer *) > + vmap_pfn(pfns_wraparound, page_cnt * 2 - 1, > + PAGE_KERNEL); > + kfree(pfns_wraparound); > > + if (!ring_info->ring_buffer) > + return -ENOMEM; > + > + /* Zero ring buffer after setting memory host visibility. */ > + memset(ring_info->ring_buffer, 0x00, > + HV_HYP_PAGE_SIZE * page_cnt); The page_cnt parameter is in units of the guest page size. So this should use PAGE_SIZE, not HV_HYP_PAGE_SIZE. > + } else { > + pages_wraparound = kcalloc(page_cnt * 2 - 1, > + sizeof(struct page *), > + GFP_KERNEL); > + > + pages_wraparound[0] = pages; > + for (i = 0; i < 2 * (page_cnt - 1); i++) > + pages_wraparound[i + 1] = > + &pages[i % (page_cnt - 1) + 1]; > + > + ring_info->ring_buffer = (struct hv_ring_buffer *) > + vmap(pages_wraparound, page_cnt * 2 - 1, VM_MAP, > + PAGE_KERNEL); > + > + kfree(pages_wraparound); > + if (!ring_info->ring_buffer) > + return -ENOMEM; > + } With this patch, the code is a big "if" statement with two halves -- one when SNP isolation is in effect, and the other when not. The SNP isolation case does the work using PFNs with the shared_gpa_boundary added, while the other case does the same work but using struct page. Perhaps I'm missing something, but can both halves be combined and always do the work using PFNs? The only difference is whether to add the shared_gpa_boundary, and whether to zero the memory when done. So get the starting PFN, then have an "if" statement for whether to add the shared_gpa_boundary. Then everything else is the same. At the end, use an "if" statement to decide whether to zero the memory. It would really be better to have the logic in this algorithm coded only once. > > - if (!ring_info->ring_buffer) > - return -ENOMEM; > > ring_info->ring_buffer->read_index = > ring_info->ring_buffer->write_index = 0; > -- > 2.25.1
On 9/2/2021 8:23 AM, Michael Kelley wrote: >> + } else { >> + pages_wraparound = kcalloc(page_cnt * 2 - 1, >> + sizeof(struct page *), >> + GFP_KERNEL); >> + >> + pages_wraparound[0] = pages; >> + for (i = 0; i < 2 * (page_cnt - 1); i++) >> + pages_wraparound[i + 1] = >> + &pages[i % (page_cnt - 1) + 1]; >> + >> + ring_info->ring_buffer = (struct hv_ring_buffer *) >> + vmap(pages_wraparound, page_cnt * 2 - 1, VM_MAP, >> + PAGE_KERNEL); >> + >> + kfree(pages_wraparound); >> + if (!ring_info->ring_buffer) >> + return -ENOMEM; >> + } > With this patch, the code is a big "if" statement with two halves -- one > when SNP isolation is in effect, and the other when not. The SNP isolation > case does the work using PFNs with the shared_gpa_boundary added, > while the other case does the same work but using struct page. Perhaps > I'm missing something, but can both halves be combined and always > do the work using PFNs? The only difference is whether to add the > shared_gpa_boundary, and whether to zero the memory when done. > So get the starting PFN, then have an "if" statement for whether to > add the shared_gpa_boundary. Then everything else is the same. > At the end, use an "if" statement to decide whether to zero the > memory. It would really be better to have the logic in this algorithm > coded only once. > Hi Michael: I have tried this before. But vmap_pfn() only works for those pfns out of normal memory. Please see vmap_pfn_apply() for detail and return error when the PFN is valid.
From: Tianyu Lan <ltykernel@gmail.com> Sent: Thursday, September 2, 2021 6:36 AM > > On 9/2/2021 8:23 AM, Michael Kelley wrote: > >> + } else { > >> + pages_wraparound = kcalloc(page_cnt * 2 - 1, > >> + sizeof(struct page *), > >> + GFP_KERNEL); > >> + > >> + pages_wraparound[0] = pages; > >> + for (i = 0; i < 2 * (page_cnt - 1); i++) > >> + pages_wraparound[i + 1] = > >> + &pages[i % (page_cnt - 1) + 1]; > >> + > >> + ring_info->ring_buffer = (struct hv_ring_buffer *) > >> + vmap(pages_wraparound, page_cnt * 2 - 1, VM_MAP, > >> + PAGE_KERNEL); > >> + > >> + kfree(pages_wraparound); > >> + if (!ring_info->ring_buffer) > >> + return -ENOMEM; > >> + } > > With this patch, the code is a big "if" statement with two halves -- one > > when SNP isolation is in effect, and the other when not. The SNP isolation > > case does the work using PFNs with the shared_gpa_boundary added, > > while the other case does the same work but using struct page. Perhaps > > I'm missing something, but can both halves be combined and always > > do the work using PFNs? The only difference is whether to add the > > shared_gpa_boundary, and whether to zero the memory when done. > > So get the starting PFN, then have an "if" statement for whether to > > add the shared_gpa_boundary. Then everything else is the same. > > At the end, use an "if" statement to decide whether to zero the > > memory. It would really be better to have the logic in this algorithm > > coded only once. > > > > Hi Michael: > I have tried this before. But vmap_pfn() only works for those pfns out > of normal memory. Please see vmap_pfn_apply() for detail and > return error when the PFN is valid. > Indeed. This ties into the discussion with Christoph about coming up with generalized helper functions to assist in handling the shared_gpa_boundary. Having a single implementation here in hv_ringbuffer_init() would be a good goal as well. Michael
diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig index d1123ceb38f3..dd12af20e467 100644 --- a/drivers/hv/Kconfig +++ b/drivers/hv/Kconfig @@ -8,6 +8,7 @@ config HYPERV || (ARM64 && !CPU_BIG_ENDIAN)) select PARAVIRT select X86_HV_CALLBACK_VECTOR if X86 + select VMAP_PFN help Select this option to run Linux as a Hyper-V client operating system. diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index 82650beb3af0..81f8629e4491 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -679,15 +679,6 @@ static int __vmbus_open(struct vmbus_channel *newchannel, if (!newchannel->max_pkt_size) newchannel->max_pkt_size = VMBUS_DEFAULT_MAX_PKT_SIZE; - err = hv_ringbuffer_init(&newchannel->outbound, page, send_pages, 0); - if (err) - goto error_clean_ring; - - err = hv_ringbuffer_init(&newchannel->inbound, &page[send_pages], - recv_pages, newchannel->max_pkt_size); - if (err) - goto error_clean_ring; - /* Establish the gpadl for the ring buffer */ newchannel->ringbuffer_gpadlhandle = 0; @@ -699,6 +690,16 @@ static int __vmbus_open(struct vmbus_channel *newchannel, if (err) goto error_clean_ring; + err = hv_ringbuffer_init(&newchannel->outbound, + page, send_pages, 0); + if (err) + goto error_free_gpadl; + + err = hv_ringbuffer_init(&newchannel->inbound, &page[send_pages], + recv_pages, newchannel->max_pkt_size); + if (err) + goto error_free_gpadl; + /* Create and init the channel open message */ open_info = kzalloc(sizeof(*open_info) + sizeof(struct vmbus_channel_open_channel), diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c index 2aee356840a2..24d64d18eb65 100644 --- a/drivers/hv/ring_buffer.c +++ b/drivers/hv/ring_buffer.c @@ -17,6 +17,8 @@ #include <linux/vmalloc.h> #include <linux/slab.h> #include <linux/prefetch.h> +#include <linux/io.h> +#include <asm/mshyperv.h> #include "hyperv_vmbus.h" @@ -183,8 +185,10 @@ void hv_ringbuffer_pre_init(struct vmbus_channel *channel) int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info, struct page *pages, u32 page_cnt, u32 max_pkt_size) { - int i; struct page **pages_wraparound; + unsigned long *pfns_wraparound; + u64 pfn; + int i; BUILD_BUG_ON((sizeof(struct hv_ring_buffer) != PAGE_SIZE)); @@ -192,23 +196,49 @@ int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info, * First page holds struct hv_ring_buffer, do wraparound mapping for * the rest. */ - pages_wraparound = kcalloc(page_cnt * 2 - 1, sizeof(struct page *), - GFP_KERNEL); - if (!pages_wraparound) - return -ENOMEM; + if (hv_isolation_type_snp()) { + pfn = page_to_pfn(pages) + + HVPFN_DOWN(ms_hyperv.shared_gpa_boundary); - pages_wraparound[0] = pages; - for (i = 0; i < 2 * (page_cnt - 1); i++) - pages_wraparound[i + 1] = &pages[i % (page_cnt - 1) + 1]; + pfns_wraparound = kcalloc(page_cnt * 2 - 1, + sizeof(unsigned long), GFP_KERNEL); + if (!pfns_wraparound) + return -ENOMEM; - ring_info->ring_buffer = (struct hv_ring_buffer *) - vmap(pages_wraparound, page_cnt * 2 - 1, VM_MAP, PAGE_KERNEL); + pfns_wraparound[0] = pfn; + for (i = 0; i < 2 * (page_cnt - 1); i++) + pfns_wraparound[i + 1] = pfn + i % (page_cnt - 1) + 1; - kfree(pages_wraparound); + ring_info->ring_buffer = (struct hv_ring_buffer *) + vmap_pfn(pfns_wraparound, page_cnt * 2 - 1, + PAGE_KERNEL); + kfree(pfns_wraparound); + if (!ring_info->ring_buffer) + return -ENOMEM; + + /* Zero ring buffer after setting memory host visibility. */ + memset(ring_info->ring_buffer, 0x00, + HV_HYP_PAGE_SIZE * page_cnt); + } else { + pages_wraparound = kcalloc(page_cnt * 2 - 1, + sizeof(struct page *), + GFP_KERNEL); + + pages_wraparound[0] = pages; + for (i = 0; i < 2 * (page_cnt - 1); i++) + pages_wraparound[i + 1] = + &pages[i % (page_cnt - 1) + 1]; + + ring_info->ring_buffer = (struct hv_ring_buffer *) + vmap(pages_wraparound, page_cnt * 2 - 1, VM_MAP, + PAGE_KERNEL); + + kfree(pages_wraparound); + if (!ring_info->ring_buffer) + return -ENOMEM; + } - if (!ring_info->ring_buffer) - return -ENOMEM; ring_info->ring_buffer->read_index = ring_info->ring_buffer->write_index = 0;