diff mbox

[FIX,v2] spapr: Fix QEMU abort during memory unplug

Message ID 1500612666-19521-1-git-send-email-bharata@linux.vnet.ibm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Bharata B Rao July 21, 2017, 4:51 a.m. UTC
Commit 0cffce56 (hw/ppc/spapr.c: adding pending_dimm_unplugs to
sPAPRMachineState) introduced a new way to track pending LMBs of DIMM
device that is marked for removal. Since this commit we can hit the
assert in spapr_pending_dimm_unplugs_add() in the following situation:

- DIMM device removal fails as the guest doesn't allow the removal.
- Subsequent attempt to remove the same DIMM would hit the assert
  as the corresponding sPAPRDIMMState is still part of the
  pending_dimm_unplugs list.

Fix this by removing the assert and conditionally adding the
sPAPRDIMMState to pending_dimm_unplugs list only when it is not
already present.

Fixes: 0cffce56ae3501c5783d779f97993ce478acf856
Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
---
Changes in v2:
- sPAPRDIMMState is now allocated within spapr_pending_dimm_unplugs_add()
  itself (David Gibson)
- spapr_recover_pending_dimm_state() should never return a NULL sPAPRDIMMState,
  added an assert for the same.

 hw/ppc/spapr.c | 37 +++++++++++++++++++++----------------
 1 file changed, 21 insertions(+), 16 deletions(-)

Comments

David Gibson July 21, 2017, 6:57 a.m. UTC | #1
On Fri, Jul 21, 2017 at 10:21:06AM +0530, Bharata B Rao wrote:
> Commit 0cffce56 (hw/ppc/spapr.c: adding pending_dimm_unplugs to
> sPAPRMachineState) introduced a new way to track pending LMBs of DIMM
> device that is marked for removal. Since this commit we can hit the
> assert in spapr_pending_dimm_unplugs_add() in the following situation:
> 
> - DIMM device removal fails as the guest doesn't allow the removal.
> - Subsequent attempt to remove the same DIMM would hit the assert
>   as the corresponding sPAPRDIMMState is still part of the
>   pending_dimm_unplugs list.
> 
> Fix this by removing the assert and conditionally adding the
> sPAPRDIMMState to pending_dimm_unplugs list only when it is not
> already present.
> 
> Fixes: 0cffce56ae3501c5783d779f97993ce478acf856
> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>

Applied to ppc-for-2.10.

> ---
> Changes in v2:
> - sPAPRDIMMState is now allocated within spapr_pending_dimm_unplugs_add()
>   itself (David Gibson)
> - spapr_recover_pending_dimm_state() should never return a NULL sPAPRDIMMState,
>   added an assert for the same.
> 
>  hw/ppc/spapr.c | 37 +++++++++++++++++++++----------------
>  1 file changed, 21 insertions(+), 16 deletions(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 1cb09e7..2465b27 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -2850,11 +2850,25 @@ static sPAPRDIMMState *spapr_pending_dimm_unplugs_find(sPAPRMachineState *s,
>      return dimm_state;
>  }
>  
> -static void spapr_pending_dimm_unplugs_add(sPAPRMachineState *spapr,
> -                                           sPAPRDIMMState *dimm_state)
> +static sPAPRDIMMState *spapr_pending_dimm_unplugs_add(sPAPRMachineState *spapr,
> +                                                      uint32_t nr_lmbs,
> +                                                      PCDIMMDevice *dimm)
>  {
> -    g_assert(!spapr_pending_dimm_unplugs_find(spapr, dimm_state->dimm));
> -    QTAILQ_INSERT_HEAD(&spapr->pending_dimm_unplugs, dimm_state, next);
> +    sPAPRDIMMState *ds = NULL;
> +
> +    /*
> +     * If this request is for a DIMM whose removal had failed earlier
> +     * (due to guest's refusal to remove the LMBs), we would have this
> +     * dimm already in the pending_dimm_unplugs list. In that
> +     * case don't add again.
> +     */
> +    if (!spapr_pending_dimm_unplugs_find(spapr, dimm)) {
> +        ds = g_malloc0(sizeof(sPAPRDIMMState));
> +        ds->nr_lmbs = nr_lmbs;
> +        ds->dimm = dimm;
> +        QTAILQ_INSERT_HEAD(&spapr->pending_dimm_unplugs, ds, next);
> +    }
> +    return ds;
>  }
>  
>  static void spapr_pending_dimm_unplugs_remove(sPAPRMachineState *spapr,
> @@ -2875,7 +2889,6 @@ static sPAPRDIMMState *spapr_recover_pending_dimm_state(sPAPRMachineState *ms,
>      uint32_t avail_lmbs = 0;
>      uint64_t addr_start, addr;
>      int i;
> -    sPAPRDIMMState *ds;
>  
>      addr_start = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP,
>                                           &error_abort);
> @@ -2891,11 +2904,7 @@ static sPAPRDIMMState *spapr_recover_pending_dimm_state(sPAPRMachineState *ms,
>          addr += SPAPR_MEMORY_BLOCK_SIZE;
>      }
>  
> -    ds = g_malloc0(sizeof(sPAPRDIMMState));
> -    ds->nr_lmbs = avail_lmbs;
> -    ds->dimm = dimm;
> -    spapr_pending_dimm_unplugs_add(ms, ds);
> -    return ds;
> +    return spapr_pending_dimm_unplugs_add(ms, avail_lmbs, dimm);
>  }
>  
>  /* Callback to be called during DRC release. */
> @@ -2911,6 +2920,7 @@ void spapr_lmb_release(DeviceState *dev)
>       * during the unplug process. In this case recover it. */
>      if (ds == NULL) {
>          ds = spapr_recover_pending_dimm_state(spapr, PC_DIMM(dev));
> +        g_assert(ds);
>          /* The DRC being examined by the caller at least must be counted */
>          g_assert(ds->nr_lmbs);
>      }
> @@ -2942,18 +2952,13 @@ static void spapr_memory_unplug_request(HotplugHandler *hotplug_dev,
>      uint64_t addr_start, addr;
>      int i;
>      sPAPRDRConnector *drc;
> -    sPAPRDIMMState *ds;
> -
>      addr_start = object_property_get_uint(OBJECT(dimm), PC_DIMM_ADDR_PROP,
>                                           &local_err);
>      if (local_err) {
>          goto out;
>      }
>  
> -    ds = g_malloc0(sizeof(sPAPRDIMMState));
> -    ds->nr_lmbs = nr_lmbs;
> -    ds->dimm = dimm;
> -    spapr_pending_dimm_unplugs_add(spapr, ds);
> +    spapr_pending_dimm_unplugs_add(spapr, nr_lmbs, dimm);
>  
>      addr = addr_start;
>      for (i = 0; i < nr_lmbs; i++) {
Daniel Henrique Barboza July 21, 2017, 11:23 a.m. UTC | #2
On 07/21/2017 03:57 AM, David Gibson wrote:
> On Fri, Jul 21, 2017 at 10:21:06AM +0530, Bharata B Rao wrote:
>> Commit 0cffce56 (hw/ppc/spapr.c: adding pending_dimm_unplugs to
>> sPAPRMachineState) introduced a new way to track pending LMBs of DIMM
>> device that is marked for removal. Since this commit we can hit the
>> assert in spapr_pending_dimm_unplugs_add() in the following situation:
>>
>> - DIMM device removal fails as the guest doesn't allow the removal.
>> - Subsequent attempt to remove the same DIMM would hit the assert
>>    as the corresponding sPAPRDIMMState is still part of the
>>    pending_dimm_unplugs list.
>>
>> Fix this by removing the assert and conditionally adding the
>> sPAPRDIMMState to pending_dimm_unplugs list only when it is not
>> already present.
>>
>> Fixes: 0cffce56ae3501c5783d779f97993ce478acf856
>> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
> Applied to ppc-for-2.10.
>
>> ---
>> Changes in v2:
>> - sPAPRDIMMState is now allocated within spapr_pending_dimm_unplugs_add()
>>    itself (David Gibson)
>> - spapr_recover_pending_dimm_state() should never return a NULL sPAPRDIMMState,
>>    added an assert for the same.
>>
>>   hw/ppc/spapr.c | 37 +++++++++++++++++++++----------------
>>   1 file changed, 21 insertions(+), 16 deletions(-)
>>
>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>> index 1cb09e7..2465b27 100644
>> --- a/hw/ppc/spapr.c
>> +++ b/hw/ppc/spapr.c
>> @@ -2850,11 +2850,25 @@ static sPAPRDIMMState *spapr_pending_dimm_unplugs_find(sPAPRMachineState *s,
>>       return dimm_state;
>>   }
>>   
>> -static void spapr_pending_dimm_unplugs_add(sPAPRMachineState *spapr,
>> -                                           sPAPRDIMMState *dimm_state)
>> +static sPAPRDIMMState *spapr_pending_dimm_unplugs_add(sPAPRMachineState *spapr,
>> +                                                      uint32_t nr_lmbs,
>> +                                                      PCDIMMDevice *dimm)
>>   {
>> -    g_assert(!spapr_pending_dimm_unplugs_find(spapr, dimm_state->dimm));
>> -    QTAILQ_INSERT_HEAD(&spapr->pending_dimm_unplugs, dimm_state, next);
>> +    sPAPRDIMMState *ds = NULL;
>> +
>> +    /*
>> +     * If this request is for a DIMM whose removal had failed earlier
>> +     * (due to guest's refusal to remove the LMBs), we would have this
>> +     * dimm already in the pending_dimm_unplugs list. In that
>> +     * case don't add again.
>> +     */
>> +    if (!spapr_pending_dimm_unplugs_find(spapr, dimm)) {
>> +        ds = g_malloc0(sizeof(sPAPRDIMMState));
>> +        ds->nr_lmbs = nr_lmbs;
>> +        ds->dimm = dimm;
>> +        QTAILQ_INSERT_HEAD(&spapr->pending_dimm_unplugs, ds, next);
>> +    }
>> +    return ds;
In case unplugs_find() founds a DIMM state (the condition this patch is 
trying to fix), ds is
returned as NULL. This is working because unplug_request() does not use 
the return value
of unplugs_add() and because recover() is only called if find() returns 
NULL. Still not
so pretty.

What makes sense here is something like:

     ds = spapr_pending_dimm_unplugs_find(spapr, dimm);
     if (!ds) {
         (...)
     }
     return ds;

I think this is not worth sending a v3, David can amend it in the tree. 
After amending feel
free to add my

Reviewed-by: Daniel Barboza <danielhb@linux.vnet.ibm.com>




>>   }
>>   
>>   static void spapr_pending_dimm_unplugs_remove(sPAPRMachineState *spapr,
>> @@ -2875,7 +2889,6 @@ static sPAPRDIMMState *spapr_recover_pending_dimm_state(sPAPRMachineState *ms,
>>       uint32_t avail_lmbs = 0;
>>       uint64_t addr_start, addr;
>>       int i;
>> -    sPAPRDIMMState *ds;
>>   
>>       addr_start = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP,
>>                                            &error_abort);
>> @@ -2891,11 +2904,7 @@ static sPAPRDIMMState *spapr_recover_pending_dimm_state(sPAPRMachineState *ms,
>>           addr += SPAPR_MEMORY_BLOCK_SIZE;
>>       }
>>   
>> -    ds = g_malloc0(sizeof(sPAPRDIMMState));
>> -    ds->nr_lmbs = avail_lmbs;
>> -    ds->dimm = dimm;
>> -    spapr_pending_dimm_unplugs_add(ms, ds);
>> -    return ds;
>> +    return spapr_pending_dimm_unplugs_add(ms, avail_lmbs, dimm);
>>   }
>>   
>>   /* Callback to be called during DRC release. */
>> @@ -2911,6 +2920,7 @@ void spapr_lmb_release(DeviceState *dev)
>>        * during the unplug process. In this case recover it. */
>>       if (ds == NULL) {
>>           ds = spapr_recover_pending_dimm_state(spapr, PC_DIMM(dev));
>> +        g_assert(ds);
>>           /* The DRC being examined by the caller at least must be counted */
>>           g_assert(ds->nr_lmbs);
>>       }
>> @@ -2942,18 +2952,13 @@ static void spapr_memory_unplug_request(HotplugHandler *hotplug_dev,
>>       uint64_t addr_start, addr;
>>       int i;
>>       sPAPRDRConnector *drc;
>> -    sPAPRDIMMState *ds;
>> -
>>       addr_start = object_property_get_uint(OBJECT(dimm), PC_DIMM_ADDR_PROP,
>>                                            &local_err);
>>       if (local_err) {
>>           goto out;
>>       }
>>   
>> -    ds = g_malloc0(sizeof(sPAPRDIMMState));
>> -    ds->nr_lmbs = nr_lmbs;
>> -    ds->dimm = dimm;
>> -    spapr_pending_dimm_unplugs_add(spapr, ds);
>> +    spapr_pending_dimm_unplugs_add(spapr, nr_lmbs, dimm);
>>   
>>       addr = addr_start;
>>       for (i = 0; i < nr_lmbs; i++) {
David Gibson July 24, 2017, 2:49 a.m. UTC | #3
On Fri, Jul 21, 2017 at 08:23:02AM -0300, Daniel Henrique Barboza wrote:
> 
> 
> On 07/21/2017 03:57 AM, David Gibson wrote:
> > On Fri, Jul 21, 2017 at 10:21:06AM +0530, Bharata B Rao wrote:
> > > Commit 0cffce56 (hw/ppc/spapr.c: adding pending_dimm_unplugs to
> > > sPAPRMachineState) introduced a new way to track pending LMBs of DIMM
> > > device that is marked for removal. Since this commit we can hit the
> > > assert in spapr_pending_dimm_unplugs_add() in the following situation:
> > > 
> > > - DIMM device removal fails as the guest doesn't allow the removal.
> > > - Subsequent attempt to remove the same DIMM would hit the assert
> > >    as the corresponding sPAPRDIMMState is still part of the
> > >    pending_dimm_unplugs list.
> > > 
> > > Fix this by removing the assert and conditionally adding the
> > > sPAPRDIMMState to pending_dimm_unplugs list only when it is not
> > > already present.
> > > 
> > > Fixes: 0cffce56ae3501c5783d779f97993ce478acf856
> > > Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
> > Applied to ppc-for-2.10.
> > 
> > > ---
> > > Changes in v2:
> > > - sPAPRDIMMState is now allocated within spapr_pending_dimm_unplugs_add()
> > >    itself (David Gibson)
> > > - spapr_recover_pending_dimm_state() should never return a NULL sPAPRDIMMState,
> > >    added an assert for the same.
> > > 
> > >   hw/ppc/spapr.c | 37 +++++++++++++++++++++----------------
> > >   1 file changed, 21 insertions(+), 16 deletions(-)
> > > 
> > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > > index 1cb09e7..2465b27 100644
> > > --- a/hw/ppc/spapr.c
> > > +++ b/hw/ppc/spapr.c
> > > @@ -2850,11 +2850,25 @@ static sPAPRDIMMState *spapr_pending_dimm_unplugs_find(sPAPRMachineState *s,
> > >       return dimm_state;
> > >   }
> > > -static void spapr_pending_dimm_unplugs_add(sPAPRMachineState *spapr,
> > > -                                           sPAPRDIMMState *dimm_state)
> > > +static sPAPRDIMMState *spapr_pending_dimm_unplugs_add(sPAPRMachineState *spapr,
> > > +                                                      uint32_t nr_lmbs,
> > > +                                                      PCDIMMDevice *dimm)
> > >   {
> > > -    g_assert(!spapr_pending_dimm_unplugs_find(spapr, dimm_state->dimm));
> > > -    QTAILQ_INSERT_HEAD(&spapr->pending_dimm_unplugs, dimm_state, next);
> > > +    sPAPRDIMMState *ds = NULL;
> > > +
> > > +    /*
> > > +     * If this request is for a DIMM whose removal had failed earlier
> > > +     * (due to guest's refusal to remove the LMBs), we would have this
> > > +     * dimm already in the pending_dimm_unplugs list. In that
> > > +     * case don't add again.
> > > +     */
> > > +    if (!spapr_pending_dimm_unplugs_find(spapr, dimm)) {
> > > +        ds = g_malloc0(sizeof(sPAPRDIMMState));
> > > +        ds->nr_lmbs = nr_lmbs;
> > > +        ds->dimm = dimm;
> > > +        QTAILQ_INSERT_HEAD(&spapr->pending_dimm_unplugs, ds, next);
> > > +    }
> > > +    return ds;
> In case unplugs_find() founds a DIMM state (the condition this patch is
> trying to fix), ds is
> returned as NULL. This is working because unplug_request() does not use the
> return value
> of unplugs_add() and because recover() is only called if find() returns
> NULL. Still not
> so pretty.
> 
> What makes sense here is something like:
> 
>     ds = spapr_pending_dimm_unplugs_find(spapr, dimm);
>     if (!ds) {
>         (...)
>     }
>     return ds;
> 
> I think this is not worth sending a v3, David can amend it in the tree.
> After amending feel
> free to add my

Good point, amended.

> 
> Reviewed-by: Daniel Barboza <danielhb@linux.vnet.ibm.com>
> 
> 
> 
> 
> > >   }
> > >   static void spapr_pending_dimm_unplugs_remove(sPAPRMachineState *spapr,
> > > @@ -2875,7 +2889,6 @@ static sPAPRDIMMState *spapr_recover_pending_dimm_state(sPAPRMachineState *ms,
> > >       uint32_t avail_lmbs = 0;
> > >       uint64_t addr_start, addr;
> > >       int i;
> > > -    sPAPRDIMMState *ds;
> > >       addr_start = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP,
> > >                                            &error_abort);
> > > @@ -2891,11 +2904,7 @@ static sPAPRDIMMState *spapr_recover_pending_dimm_state(sPAPRMachineState *ms,
> > >           addr += SPAPR_MEMORY_BLOCK_SIZE;
> > >       }
> > > -    ds = g_malloc0(sizeof(sPAPRDIMMState));
> > > -    ds->nr_lmbs = avail_lmbs;
> > > -    ds->dimm = dimm;
> > > -    spapr_pending_dimm_unplugs_add(ms, ds);
> > > -    return ds;
> > > +    return spapr_pending_dimm_unplugs_add(ms, avail_lmbs, dimm);
> > >   }
> > >   /* Callback to be called during DRC release. */
> > > @@ -2911,6 +2920,7 @@ void spapr_lmb_release(DeviceState *dev)
> > >        * during the unplug process. In this case recover it. */
> > >       if (ds == NULL) {
> > >           ds = spapr_recover_pending_dimm_state(spapr, PC_DIMM(dev));
> > > +        g_assert(ds);
> > >           /* The DRC being examined by the caller at least must be counted */
> > >           g_assert(ds->nr_lmbs);
> > >       }
> > > @@ -2942,18 +2952,13 @@ static void spapr_memory_unplug_request(HotplugHandler *hotplug_dev,
> > >       uint64_t addr_start, addr;
> > >       int i;
> > >       sPAPRDRConnector *drc;
> > > -    sPAPRDIMMState *ds;
> > > -
> > >       addr_start = object_property_get_uint(OBJECT(dimm), PC_DIMM_ADDR_PROP,
> > >                                            &local_err);
> > >       if (local_err) {
> > >           goto out;
> > >       }
> > > -    ds = g_malloc0(sizeof(sPAPRDIMMState));
> > > -    ds->nr_lmbs = nr_lmbs;
> > > -    ds->dimm = dimm;
> > > -    spapr_pending_dimm_unplugs_add(spapr, ds);
> > > +    spapr_pending_dimm_unplugs_add(spapr, nr_lmbs, dimm);
> > >       addr = addr_start;
> > >       for (i = 0; i < nr_lmbs; i++) {
>
diff mbox

Patch

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 1cb09e7..2465b27 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2850,11 +2850,25 @@  static sPAPRDIMMState *spapr_pending_dimm_unplugs_find(sPAPRMachineState *s,
     return dimm_state;
 }
 
-static void spapr_pending_dimm_unplugs_add(sPAPRMachineState *spapr,
-                                           sPAPRDIMMState *dimm_state)
+static sPAPRDIMMState *spapr_pending_dimm_unplugs_add(sPAPRMachineState *spapr,
+                                                      uint32_t nr_lmbs,
+                                                      PCDIMMDevice *dimm)
 {
-    g_assert(!spapr_pending_dimm_unplugs_find(spapr, dimm_state->dimm));
-    QTAILQ_INSERT_HEAD(&spapr->pending_dimm_unplugs, dimm_state, next);
+    sPAPRDIMMState *ds = NULL;
+
+    /*
+     * If this request is for a DIMM whose removal had failed earlier
+     * (due to guest's refusal to remove the LMBs), we would have this
+     * dimm already in the pending_dimm_unplugs list. In that
+     * case don't add again.
+     */
+    if (!spapr_pending_dimm_unplugs_find(spapr, dimm)) {
+        ds = g_malloc0(sizeof(sPAPRDIMMState));
+        ds->nr_lmbs = nr_lmbs;
+        ds->dimm = dimm;
+        QTAILQ_INSERT_HEAD(&spapr->pending_dimm_unplugs, ds, next);
+    }
+    return ds;
 }
 
 static void spapr_pending_dimm_unplugs_remove(sPAPRMachineState *spapr,
@@ -2875,7 +2889,6 @@  static sPAPRDIMMState *spapr_recover_pending_dimm_state(sPAPRMachineState *ms,
     uint32_t avail_lmbs = 0;
     uint64_t addr_start, addr;
     int i;
-    sPAPRDIMMState *ds;
 
     addr_start = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP,
                                          &error_abort);
@@ -2891,11 +2904,7 @@  static sPAPRDIMMState *spapr_recover_pending_dimm_state(sPAPRMachineState *ms,
         addr += SPAPR_MEMORY_BLOCK_SIZE;
     }
 
-    ds = g_malloc0(sizeof(sPAPRDIMMState));
-    ds->nr_lmbs = avail_lmbs;
-    ds->dimm = dimm;
-    spapr_pending_dimm_unplugs_add(ms, ds);
-    return ds;
+    return spapr_pending_dimm_unplugs_add(ms, avail_lmbs, dimm);
 }
 
 /* Callback to be called during DRC release. */
@@ -2911,6 +2920,7 @@  void spapr_lmb_release(DeviceState *dev)
      * during the unplug process. In this case recover it. */
     if (ds == NULL) {
         ds = spapr_recover_pending_dimm_state(spapr, PC_DIMM(dev));
+        g_assert(ds);
         /* The DRC being examined by the caller at least must be counted */
         g_assert(ds->nr_lmbs);
     }
@@ -2942,18 +2952,13 @@  static void spapr_memory_unplug_request(HotplugHandler *hotplug_dev,
     uint64_t addr_start, addr;
     int i;
     sPAPRDRConnector *drc;
-    sPAPRDIMMState *ds;
-
     addr_start = object_property_get_uint(OBJECT(dimm), PC_DIMM_ADDR_PROP,
                                          &local_err);
     if (local_err) {
         goto out;
     }
 
-    ds = g_malloc0(sizeof(sPAPRDIMMState));
-    ds->nr_lmbs = nr_lmbs;
-    ds->dimm = dimm;
-    spapr_pending_dimm_unplugs_add(spapr, ds);
+    spapr_pending_dimm_unplugs_add(spapr, nr_lmbs, dimm);
 
     addr = addr_start;
     for (i = 0; i < nr_lmbs; i++) {