Message ID | ef0f91fd4c49c623dda09a1774392d2f2a99ae35.1587142844.git.tamas.lengyel@intel.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | VM forking | expand |
On Fri, Apr 17, 2020 at 10:06:31AM -0700, Tamas K Lengyel wrote: > When a forked VM is being reset while having vm_events active, re-copying the > vCPU info page can lead to events being lost. This seems to only affect a > subset of events (interrupts), while not others (cpuid, MTF, EPT) thus it was I'm slightly lost by the sentence, is the guest or the hypervisor the one losing events? Ie: interrupts are events from a guest PoV, but cpuid or EPT is not something that triggers events that are injected to the guest. I think the commit message needs clarification. > not discovered beforehand. Only copying vCPU info page contents during initial > fork fixes the problem. Hm, I'm not sure I understand why this is causing issues. When you reset a fork you should reset the vcpu info page, or else event masks would be in a wrong state? > > Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com> > --- > xen/arch/x86/mm/mem_sharing.c | 50 +++++++++++++++++------------------ > 1 file changed, 25 insertions(+), 25 deletions(-) > > diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c > index e572e9e39d..4b31a8b8f6 100644 > --- a/xen/arch/x86/mm/mem_sharing.c > +++ b/xen/arch/x86/mm/mem_sharing.c > @@ -1534,28 +1534,6 @@ int mem_sharing_fork_page(struct domain *d, gfn_t gfn, bool unsharing) > p2m->default_access, -1); > } > > -static int bring_up_vcpus(struct domain *cd, struct domain *d) > -{ > - unsigned int i; > - int ret = -EINVAL; > - > - if ( d->max_vcpus != cd->max_vcpus || > - (ret = cpupool_move_domain(cd, d->cpupool)) ) > - return ret; > - > - for ( i = 0; i < cd->max_vcpus; i++ ) > - { > - if ( !d->vcpu[i] || cd->vcpu[i] ) > - continue; > - > - if ( !vcpu_create(cd, i) ) > - return -EINVAL; > - } > - > - domain_update_node_affinity(cd); > - return 0; > -} > - > static int copy_vcpu_settings(struct domain *cd, const struct domain *d) > { > unsigned int i; > @@ -1614,6 +1592,31 @@ static int copy_vcpu_settings(struct domain *cd, const struct domain *d) > return 0; > } > > +static int bring_up_vcpus(struct domain *cd, struct domain *d) > +{ > + unsigned int i; > + int ret = -EINVAL; > + > + if ( d->max_vcpus != cd->max_vcpus || > + (ret = cpupool_move_domain(cd, d->cpupool)) ) > + return ret; > + > + for ( i = 0; i < cd->max_vcpus; i++ ) > + { > + if ( !d->vcpu[i] || cd->vcpu[i] ) > + continue; > + > + if ( !vcpu_create(cd, i) ) > + return -EINVAL; > + } > + > + if ( (ret = copy_vcpu_settings(cd, d)) ) > + return ret; > + > + domain_update_node_affinity(cd); > + return 0; > +} I would prefer the code movement to be in a different patch: it makes it more difficult to spot non-functional changes being made while moving the function around. Thanks, Roger.
On Mon, Apr 20, 2020 at 1:45 AM Roger Pau Monné <roger.pau@citrix.com> wrote: > > On Fri, Apr 17, 2020 at 10:06:31AM -0700, Tamas K Lengyel wrote: > > When a forked VM is being reset while having vm_events active, re-copying the > > vCPU info page can lead to events being lost. This seems to only affect a > > subset of events (interrupts), while not others (cpuid, MTF, EPT) thus it was > > I'm slightly lost by the sentence, is the guest or the hypervisor > the one losing events? > > Ie: interrupts are events from a guest PoV, but cpuid or EPT is not > something that triggers events that are injected to the guest. I think > the commit message needs clarification. Sorry, what I meant was software interrupts are not triggered anymore, ie. int3 and it's associated event is not sent to the monitor application (VM_EVENT_REASON_SOFTWARE_BREAKPOINT). > > > not discovered beforehand. Only copying vCPU info page contents during initial > > fork fixes the problem. > > Hm, I'm not sure I understand why this is causing issues. When you > reset a fork you should reset the vcpu info page, or else event masks would > be in a wrong state? When we reset a fork we only want to 1) discard any memory allocated for it 2) reset the vCPU registers. We don't want to reset event channels or anything else. We have active vm_events on the domain and the whole point of doing a fork reset is to avoid having to reinitialize all that as it's quite slow. > > > > Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com> > > --- > > xen/arch/x86/mm/mem_sharing.c | 50 +++++++++++++++++------------------ > > 1 file changed, 25 insertions(+), 25 deletions(-) > > > > diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c > > index e572e9e39d..4b31a8b8f6 100644 > > --- a/xen/arch/x86/mm/mem_sharing.c > > +++ b/xen/arch/x86/mm/mem_sharing.c > > @@ -1534,28 +1534,6 @@ int mem_sharing_fork_page(struct domain *d, gfn_t gfn, bool unsharing) > > p2m->default_access, -1); > > } > > > > -static int bring_up_vcpus(struct domain *cd, struct domain *d) > > -{ > > - unsigned int i; > > - int ret = -EINVAL; > > - > > - if ( d->max_vcpus != cd->max_vcpus || > > - (ret = cpupool_move_domain(cd, d->cpupool)) ) > > - return ret; > > - > > - for ( i = 0; i < cd->max_vcpus; i++ ) > > - { > > - if ( !d->vcpu[i] || cd->vcpu[i] ) > > - continue; > > - > > - if ( !vcpu_create(cd, i) ) > > - return -EINVAL; > > - } > > - > > - domain_update_node_affinity(cd); > > - return 0; > > -} > > - > > static int copy_vcpu_settings(struct domain *cd, const struct domain *d) > > { > > unsigned int i; > > @@ -1614,6 +1592,31 @@ static int copy_vcpu_settings(struct domain *cd, const struct domain *d) > > return 0; > > } > > > > +static int bring_up_vcpus(struct domain *cd, struct domain *d) > > +{ > > + unsigned int i; > > + int ret = -EINVAL; > > + > > + if ( d->max_vcpus != cd->max_vcpus || > > + (ret = cpupool_move_domain(cd, d->cpupool)) ) > > + return ret; > > + > > + for ( i = 0; i < cd->max_vcpus; i++ ) > > + { > > + if ( !d->vcpu[i] || cd->vcpu[i] ) > > + continue; > > + > > + if ( !vcpu_create(cd, i) ) > > + return -EINVAL; > > + } > > + > > + if ( (ret = copy_vcpu_settings(cd, d)) ) > > + return ret; > > + > > + domain_update_node_affinity(cd); > > + return 0; > > +} > > I would prefer the code movement to be in a different patch: it makes > it more difficult to spot non-functional changes being made while > moving the function around. I don't think it's worth splitting this patch because of that. The patch is quite small an the move is trivial to allow the function bring_up_vcpus be able to call copy_vcpu_settings without having to pre-declare that function. Tamas
On 20.04.2020 16:15, Tamas K Lengyel wrote: > On Mon, Apr 20, 2020 at 1:45 AM Roger Pau Monné <roger.pau@citrix.com> wrote: >> >> On Fri, Apr 17, 2020 at 10:06:31AM -0700, Tamas K Lengyel wrote: >>> When a forked VM is being reset while having vm_events active, re-copying the >>> vCPU info page can lead to events being lost. This seems to only affect a >>> subset of events (interrupts), while not others (cpuid, MTF, EPT) thus it was >> >> I'm slightly lost by the sentence, is the guest or the hypervisor >> the one losing events? >> >> Ie: interrupts are events from a guest PoV, but cpuid or EPT is not >> something that triggers events that are injected to the guest. I think >> the commit message needs clarification. > > Sorry, what I meant was software interrupts are not triggered anymore, > ie. int3 and it's associated event is not sent to the monitor > application (VM_EVENT_REASON_SOFTWARE_BREAKPOINT). > >> >>> not discovered beforehand. Only copying vCPU info page contents during initial >>> fork fixes the problem. >> >> Hm, I'm not sure I understand why this is causing issues. When you >> reset a fork you should reset the vcpu info page, or else event masks would >> be in a wrong state? > > When we reset a fork we only want to 1) discard any memory allocated > for it 2) reset the vCPU registers. We don't want to reset event > channels or anything else. We have active vm_events on the domain and > the whole point of doing a fork reset is to avoid having to > reinitialize all that as it's quite slow. So for an arbitrary piece of state, what are the criteria to establish whether to copy or re-init them during a fork? Is it really now and forever only memory that wants resetting? I have to admit I'm confused by you also mentioning CPU registers - aren't they to be copied rather than reset? Jan
On Mon, Apr 20, 2020 at 8:19 AM Jan Beulich <jbeulich@suse.com> wrote: > > On 20.04.2020 16:15, Tamas K Lengyel wrote: > > On Mon, Apr 20, 2020 at 1:45 AM Roger Pau Monné <roger.pau@citrix.com> wrote: > >> > >> On Fri, Apr 17, 2020 at 10:06:31AM -0700, Tamas K Lengyel wrote: > >>> When a forked VM is being reset while having vm_events active, re-copying the > >>> vCPU info page can lead to events being lost. This seems to only affect a > >>> subset of events (interrupts), while not others (cpuid, MTF, EPT) thus it was > >> > >> I'm slightly lost by the sentence, is the guest or the hypervisor > >> the one losing events? > >> > >> Ie: interrupts are events from a guest PoV, but cpuid or EPT is not > >> something that triggers events that are injected to the guest. I think > >> the commit message needs clarification. > > > > Sorry, what I meant was software interrupts are not triggered anymore, > > ie. int3 and it's associated event is not sent to the monitor > > application (VM_EVENT_REASON_SOFTWARE_BREAKPOINT). > > > >> > >>> not discovered beforehand. Only copying vCPU info page contents during initial > >>> fork fixes the problem. > >> > >> Hm, I'm not sure I understand why this is causing issues. When you > >> reset a fork you should reset the vcpu info page, or else event masks would > >> be in a wrong state? > > > > When we reset a fork we only want to 1) discard any memory allocated > > for it 2) reset the vCPU registers. We don't want to reset event > > channels or anything else. We have active vm_events on the domain and > > the whole point of doing a fork reset is to avoid having to > > reinitialize all that as it's quite slow. > > So for an arbitrary piece of state, what are the criteria to establish > whether to copy or re-init them during a fork? Is it really now and > forever only memory that wants resetting? I have to admit I'm confused > by you also mentioning CPU registers - aren't they to be copied rather > than reset? Registers are being reset by copying them from the parent. Allocated memory is discarded as the memory that's needed for the new execution will get copied when EPT faults happen as it's executing. The goal is to put the domain back to its initial execution state without having to reinitialize vm_events. In our experiments when the forks are executed only for a very short period (fuzzing), having to reinitialize the vm_event interfaces mean going from ~100 execution/s to ~2 executions/s. Unfortunately in the current state the fork doesn't generate the required vm_events after the first execution and for some reason it only happens for int3 generated events. Tamas
On 20.04.2020 16:27, Tamas K Lengyel wrote: > On Mon, Apr 20, 2020 at 8:19 AM Jan Beulich <jbeulich@suse.com> wrote: >> >> On 20.04.2020 16:15, Tamas K Lengyel wrote: >>> On Mon, Apr 20, 2020 at 1:45 AM Roger Pau Monné <roger.pau@citrix.com> wrote: >>>> >>>> On Fri, Apr 17, 2020 at 10:06:31AM -0700, Tamas K Lengyel wrote: >>>>> When a forked VM is being reset while having vm_events active, re-copying the >>>>> vCPU info page can lead to events being lost. This seems to only affect a >>>>> subset of events (interrupts), while not others (cpuid, MTF, EPT) thus it was >>>> >>>> I'm slightly lost by the sentence, is the guest or the hypervisor >>>> the one losing events? >>>> >>>> Ie: interrupts are events from a guest PoV, but cpuid or EPT is not >>>> something that triggers events that are injected to the guest. I think >>>> the commit message needs clarification. >>> >>> Sorry, what I meant was software interrupts are not triggered anymore, >>> ie. int3 and it's associated event is not sent to the monitor >>> application (VM_EVENT_REASON_SOFTWARE_BREAKPOINT). >>> >>>> >>>>> not discovered beforehand. Only copying vCPU info page contents during initial >>>>> fork fixes the problem. >>>> >>>> Hm, I'm not sure I understand why this is causing issues. When you >>>> reset a fork you should reset the vcpu info page, or else event masks would >>>> be in a wrong state? >>> >>> When we reset a fork we only want to 1) discard any memory allocated >>> for it 2) reset the vCPU registers. We don't want to reset event >>> channels or anything else. We have active vm_events on the domain and >>> the whole point of doing a fork reset is to avoid having to >>> reinitialize all that as it's quite slow. >> >> So for an arbitrary piece of state, what are the criteria to establish >> whether to copy or re-init them during a fork? Is it really now and >> forever only memory that wants resetting? I have to admit I'm confused >> by you also mentioning CPU registers - aren't they to be copied rather >> than reset? > > Registers are being reset by copying them from the parent. Allocated > memory is discarded as the memory that's needed for the new execution > will get copied when EPT faults happen as it's executing. The goal is > to put the domain back to its initial execution state without having > to reinitialize vm_events. In our experiments when the forks are > executed only for a very short period (fuzzing), having to > reinitialize the vm_event interfaces mean going from ~100 execution/s > to ~2 executions/s. Unfortunately in the current state the fork > doesn't generate the required vm_events after the first execution and > for some reason it only happens for int3 generated events. Thanks, but I'm afraid this doesn't answer my question regarding the criteria for what should be put back to the fork's initial state vs what should be left as is. In fact _anything_ not getting reset to initial state would seem to need special justification (beyond performance considerations). Jan
On Mon, Apr 20, 2020 at 9:51 AM Jan Beulich <jbeulich@suse.com> wrote: > > On 20.04.2020 16:27, Tamas K Lengyel wrote: > > On Mon, Apr 20, 2020 at 8:19 AM Jan Beulich <jbeulich@suse.com> wrote: > >> > >> On 20.04.2020 16:15, Tamas K Lengyel wrote: > >>> On Mon, Apr 20, 2020 at 1:45 AM Roger Pau Monné <roger.pau@citrix.com> wrote: > >>>> > >>>> On Fri, Apr 17, 2020 at 10:06:31AM -0700, Tamas K Lengyel wrote: > >>>>> When a forked VM is being reset while having vm_events active, re-copying the > >>>>> vCPU info page can lead to events being lost. This seems to only affect a > >>>>> subset of events (interrupts), while not others (cpuid, MTF, EPT) thus it was > >>>> > >>>> I'm slightly lost by the sentence, is the guest or the hypervisor > >>>> the one losing events? > >>>> > >>>> Ie: interrupts are events from a guest PoV, but cpuid or EPT is not > >>>> something that triggers events that are injected to the guest. I think > >>>> the commit message needs clarification. > >>> > >>> Sorry, what I meant was software interrupts are not triggered anymore, > >>> ie. int3 and it's associated event is not sent to the monitor > >>> application (VM_EVENT_REASON_SOFTWARE_BREAKPOINT). > >>> > >>>> > >>>>> not discovered beforehand. Only copying vCPU info page contents during initial > >>>>> fork fixes the problem. > >>>> > >>>> Hm, I'm not sure I understand why this is causing issues. When you > >>>> reset a fork you should reset the vcpu info page, or else event masks would > >>>> be in a wrong state? > >>> > >>> When we reset a fork we only want to 1) discard any memory allocated > >>> for it 2) reset the vCPU registers. We don't want to reset event > >>> channels or anything else. We have active vm_events on the domain and > >>> the whole point of doing a fork reset is to avoid having to > >>> reinitialize all that as it's quite slow. > >> > >> So for an arbitrary piece of state, what are the criteria to establish > >> whether to copy or re-init them during a fork? Is it really now and > >> forever only memory that wants resetting? I have to admit I'm confused > >> by you also mentioning CPU registers - aren't they to be copied rather > >> than reset? > > > > Registers are being reset by copying them from the parent. Allocated > > memory is discarded as the memory that's needed for the new execution > > will get copied when EPT faults happen as it's executing. The goal is > > to put the domain back to its initial execution state without having > > to reinitialize vm_events. In our experiments when the forks are > > executed only for a very short period (fuzzing), having to > > reinitialize the vm_event interfaces mean going from ~100 execution/s > > to ~2 executions/s. Unfortunately in the current state the fork > > doesn't generate the required vm_events after the first execution and > > for some reason it only happens for int3 generated events. > > Thanks, but I'm afraid this doesn't answer my question regarding the > criteria for what should be put back to the fork's initial state vs > what should be left as is. In fact _anything_ not getting reset to > initial state would seem to need special justification (beyond > performance considerations). From my PoV everything should be reset as long as it doesn't interfere with already registered vm_events. The only part that seems to interfere with the regular flow of events right now is the vcpu_info_mfn. I've ran some further tests and seems that when the code that is being fuzzed is in ring3, int3 events are delivered as expected after a fork reset. But if the code in question is ring0, then the expected int3 is not delivered. It could entirely be that in the kernel-mode case the code takes a detour and the reason we don't see the expected int3 is not an interference with vm_events directly, rather a crash in the guest due to the vcpu_info_mfn being reset. In either case, it needs to be avoided. Tamas Tamas
On Mon, Apr 20, 2020 at 10:09 AM Tamas K Lengyel <tamas@tklengyel.com> wrote: > > On Mon, Apr 20, 2020 at 9:51 AM Jan Beulich <jbeulich@suse.com> wrote: > > > > On 20.04.2020 16:27, Tamas K Lengyel wrote: > > > On Mon, Apr 20, 2020 at 8:19 AM Jan Beulich <jbeulich@suse.com> wrote: > > >> > > >> On 20.04.2020 16:15, Tamas K Lengyel wrote: > > >>> On Mon, Apr 20, 2020 at 1:45 AM Roger Pau Monné <roger.pau@citrix.com> wrote: > > >>>> > > >>>> On Fri, Apr 17, 2020 at 10:06:31AM -0700, Tamas K Lengyel wrote: > > >>>>> When a forked VM is being reset while having vm_events active, re-copying the > > >>>>> vCPU info page can lead to events being lost. This seems to only affect a > > >>>>> subset of events (interrupts), while not others (cpuid, MTF, EPT) thus it was > > >>>> > > >>>> I'm slightly lost by the sentence, is the guest or the hypervisor > > >>>> the one losing events? > > >>>> > > >>>> Ie: interrupts are events from a guest PoV, but cpuid or EPT is not > > >>>> something that triggers events that are injected to the guest. I think > > >>>> the commit message needs clarification. > > >>> > > >>> Sorry, what I meant was software interrupts are not triggered anymore, > > >>> ie. int3 and it's associated event is not sent to the monitor > > >>> application (VM_EVENT_REASON_SOFTWARE_BREAKPOINT). > > >>> > > >>>> > > >>>>> not discovered beforehand. Only copying vCPU info page contents during initial > > >>>>> fork fixes the problem. > > >>>> > > >>>> Hm, I'm not sure I understand why this is causing issues. When you > > >>>> reset a fork you should reset the vcpu info page, or else event masks would > > >>>> be in a wrong state? > > >>> > > >>> When we reset a fork we only want to 1) discard any memory allocated > > >>> for it 2) reset the vCPU registers. We don't want to reset event > > >>> channels or anything else. We have active vm_events on the domain and > > >>> the whole point of doing a fork reset is to avoid having to > > >>> reinitialize all that as it's quite slow. > > >> > > >> So for an arbitrary piece of state, what are the criteria to establish > > >> whether to copy or re-init them during a fork? Is it really now and > > >> forever only memory that wants resetting? I have to admit I'm confused > > >> by you also mentioning CPU registers - aren't they to be copied rather > > >> than reset? > > > > > > Registers are being reset by copying them from the parent. Allocated > > > memory is discarded as the memory that's needed for the new execution > > > will get copied when EPT faults happen as it's executing. The goal is > > > to put the domain back to its initial execution state without having > > > to reinitialize vm_events. In our experiments when the forks are > > > executed only for a very short period (fuzzing), having to > > > reinitialize the vm_event interfaces mean going from ~100 execution/s > > > to ~2 executions/s. Unfortunately in the current state the fork > > > doesn't generate the required vm_events after the first execution and > > > for some reason it only happens for int3 generated events. > > > > Thanks, but I'm afraid this doesn't answer my question regarding the > > criteria for what should be put back to the fork's initial state vs > > what should be left as is. In fact _anything_ not getting reset to > > initial state would seem to need special justification (beyond > > performance considerations). > > From my PoV everything should be reset as long as it doesn't interfere > with already registered vm_events. The only part that seems to > interfere with the regular flow of events right now is the > vcpu_info_mfn. Alright, I figured out what's really happening here. During fork reset we iterate over all pages belonging to the fork, releasing all pages that pass p2m_is_sharable(p2mt) check. Unfortunately the vcpu info pages also pass this check. Because of that the pages are removed from the p2m but remain mapped to the vcpu structs. No wonder this was causing all sorts of weirdness, if the guest tries to access the vcpu info pages it would cause endless pagefaults, which would manifest as events no longer appearing as expected (in this case the int3 event). Re-copying the vcpu info page's content from the parent is perfectly fine during reset, that causes no issues if the pages remain in the p2m. I'll be sending a different patch that fixes this bug and with a better commit message. Tamas
diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c index e572e9e39d..4b31a8b8f6 100644 --- a/xen/arch/x86/mm/mem_sharing.c +++ b/xen/arch/x86/mm/mem_sharing.c @@ -1534,28 +1534,6 @@ int mem_sharing_fork_page(struct domain *d, gfn_t gfn, bool unsharing) p2m->default_access, -1); } -static int bring_up_vcpus(struct domain *cd, struct domain *d) -{ - unsigned int i; - int ret = -EINVAL; - - if ( d->max_vcpus != cd->max_vcpus || - (ret = cpupool_move_domain(cd, d->cpupool)) ) - return ret; - - for ( i = 0; i < cd->max_vcpus; i++ ) - { - if ( !d->vcpu[i] || cd->vcpu[i] ) - continue; - - if ( !vcpu_create(cd, i) ) - return -EINVAL; - } - - domain_update_node_affinity(cd); - return 0; -} - static int copy_vcpu_settings(struct domain *cd, const struct domain *d) { unsigned int i; @@ -1614,6 +1592,31 @@ static int copy_vcpu_settings(struct domain *cd, const struct domain *d) return 0; } +static int bring_up_vcpus(struct domain *cd, struct domain *d) +{ + unsigned int i; + int ret = -EINVAL; + + if ( d->max_vcpus != cd->max_vcpus || + (ret = cpupool_move_domain(cd, d->cpupool)) ) + return ret; + + for ( i = 0; i < cd->max_vcpus; i++ ) + { + if ( !d->vcpu[i] || cd->vcpu[i] ) + continue; + + if ( !vcpu_create(cd, i) ) + return -EINVAL; + } + + if ( (ret = copy_vcpu_settings(cd, d)) ) + return ret; + + domain_update_node_affinity(cd); + return 0; +} + static int fork_hap_allocation(struct domain *cd, struct domain *d) { int rc; @@ -1697,9 +1700,6 @@ static int copy_settings(struct domain *cd, struct domain *d) { int rc; - if ( (rc = copy_vcpu_settings(cd, d)) ) - return rc; - if ( (rc = hvm_copy_context_and_params(cd, d)) ) return rc;
When a forked VM is being reset while having vm_events active, re-copying the vCPU info page can lead to events being lost. This seems to only affect a subset of events (interrupts), while not others (cpuid, MTF, EPT) thus it was not discovered beforehand. Only copying vCPU info page contents during initial fork fixes the problem. Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com> --- xen/arch/x86/mm/mem_sharing.c | 50 +++++++++++++++++------------------ 1 file changed, 25 insertions(+), 25 deletions(-)