Message ID | 160674938210.2492771.1728601884822491679.stgit@lep8c.aus.stglabs.ibm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | spapr: nvdimm: Asynchronus flush hcall support | expand |
Hi Shiva, On Mon, 30 Nov 2020 09:16:39 -0600 Shivaprasad G Bhat <sbhat@linux.ibm.com> wrote: > The patch adds support for async hcalls at the DRC level for the > spapr devices. To be used by spapr-scm devices in the patch/es to follow. > > Signed-off-by: Shivaprasad G Bhat <sbhat@linux.ibm.com> > --- The overall idea looks good but I think you should consider using a thread pool to implement it. See below. > hw/ppc/spapr_drc.c | 149 ++++++++++++++++++++++++++++++++++++++++++++ > include/hw/ppc/spapr_drc.h | 25 +++++++ > 2 files changed, 174 insertions(+) > > diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c > index 77718cde1f..4ecd04f686 100644 > --- a/hw/ppc/spapr_drc.c > +++ b/hw/ppc/spapr_drc.c > @@ -15,6 +15,7 @@ > #include "qapi/qmp/qnull.h" > #include "cpu.h" > #include "qemu/cutils.h" > +#include "qemu/guest-random.h" > #include "hw/ppc/spapr_drc.h" > #include "qom/object.h" > #include "migration/vmstate.h" > @@ -421,6 +422,148 @@ void spapr_drc_detach(SpaprDrc *drc) > spapr_drc_release(drc); > } > > + > +/* > + * @drc : device DRC targetting which the async hcalls to be made. > + * > + * All subsequent requests to run/query the status should use the > + * unique token returned here. > + */ > +uint64_t spapr_drc_get_new_async_hcall_token(SpaprDrc *drc) > +{ > + Error *err = NULL; > + uint64_t token; > + SpaprDrcDeviceAsyncHCallState *tmp, *next, *state; > + > + state = g_malloc0(sizeof(*state)); > + state->pending = true; > + > + qemu_mutex_lock(&drc->async_hcall_states_lock); > +retry: > + if (qemu_guest_getrandom(&token, sizeof(token), &err) < 0) { > + error_report_err(err); > + g_free(state); > + qemu_mutex_unlock(&drc->async_hcall_states_lock); > + return 0; > + } > + > + if (!token) /* Token should be non-zero */ > + goto retry; > + > + if (!QLIST_EMPTY(&drc->async_hcall_states)) { > + QLIST_FOREACH_SAFE(tmp, &drc->async_hcall_states, node, next) { > + if (tmp->continue_token == token) { > + /* If the token already in use, get a new one */ > + goto retry; > + } > + } > + } > + > + state->continue_token = token; > + QLIST_INSERT_HEAD(&drc->async_hcall_states, state, node); > + > + qemu_mutex_unlock(&drc->async_hcall_states_lock); > + > + return state->continue_token; > +} > + > +static void *spapr_drc_async_hcall_runner(void *opaque) > +{ > + int response = -1; > + SpaprDrcDeviceAsyncHCallState *state = opaque; > + > + /* > + * state is freed only after this thread finishes(after pthread_join()), > + * don't worry about it becoming NULL. > + */ > + > + response = state->func(state->data); > + > + state->hcall_ret = response; > + state->pending = 0; > + > + return NULL; > +} > + > +/* > + * @drc : device DRC targetting which the async hcalls to be made. > + * token : The continue token to be used for tracking as recived from > + * spapr_drc_get_new_async_hcall_token > + * @func() : the worker function which needs to be executed asynchronously > + * @data : data to be passed to the asynchronous function. Worker is supposed > + * to free/cleanup the data that is passed here It'd be cleaner to pass a completion callback and have free/cleanup handled there. > + */ > +void spapr_drc_run_async_hcall(SpaprDrc *drc, uint64_t token, > + SpaprDrcAsyncHcallWorkerFunc *func, void *data) > +{ > + SpaprDrcDeviceAsyncHCallState *state; > + > + qemu_mutex_lock(&drc->async_hcall_states_lock); > + QLIST_FOREACH(state, &drc->async_hcall_states, node) { > + if (state->continue_token == token) { > + state->func = func; > + state->data = data; > + qemu_thread_create(&state->thread, "sPAPR Async HCALL", > + spapr_drc_async_hcall_runner, state, > + QEMU_THREAD_JOINABLE); qemu_thread_create() exits on failure, it shouldn't be called on a guest triggerable path, eg. a buggy guest could call it up to the point that pthread_create() returns EAGAIN. Please use a thread pool (see thread_pool_submit_aio()). This takes care of all the thread housekeeping for you in a safe way, and it provides a completion callback API. The implementation could then be just about having two lists: one for pending requests (fed here) and one for completed requests (fed by the completion callback). > + break; > + } > + } > + qemu_mutex_unlock(&drc->async_hcall_states_lock); > +} > + > +/* > + * spapr_drc_finish_async_hcalls > + * Waits for all pending async requests to complete > + * thier execution and free the states > + */ > +static void spapr_drc_finish_async_hcalls(SpaprDrc *drc) > +{ > + SpaprDrcDeviceAsyncHCallState *state, *next; > + > + if (QLIST_EMPTY(&drc->async_hcall_states)) { > + return; > + } > + > + qemu_mutex_lock(&drc->async_hcall_states_lock); > + QLIST_FOREACH_SAFE(state, &drc->async_hcall_states, node, next) { > + qemu_thread_join(&state->thread); With a thread-pool, you'd just need to aio_poll() until the pending list is empty and then clear the completed list. > + QLIST_REMOVE(state, node); > + g_free(state); > + } > + qemu_mutex_unlock(&drc->async_hcall_states_lock); > +} > + > +/* > + * spapr_drc_get_async_hcall_status > + * Fetches the status of the hcall worker and returns H_BUSY > + * if the worker is still running. > + */ > +int spapr_drc_get_async_hcall_status(SpaprDrc *drc, uint64_t token) > +{ > + int ret = H_PARAMETER; > + SpaprDrcDeviceAsyncHCallState *state, *node; > + > + qemu_mutex_lock(&drc->async_hcall_states_lock); > + QLIST_FOREACH_SAFE(state, &drc->async_hcall_states, node, node) { > + if (state->continue_token == token) { > + if (state->pending) { > + ret = H_BUSY; > + break; > + } else { > + ret = state->hcall_ret; > + qemu_thread_join(&state->thread); Like for qemu_thread_create(), the guest shouldn't be responsible for thread housekeeping. Getting the hcall status should just be about finding the token in the pending or completed lists. > + QLIST_REMOVE(state, node); > + g_free(state); > + break; > + } > + } > + } > + qemu_mutex_unlock(&drc->async_hcall_states_lock); > + > + return ret; > +} > + > void spapr_drc_reset(SpaprDrc *drc) > { > SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); > @@ -448,6 +591,7 @@ void spapr_drc_reset(SpaprDrc *drc) > drc->ccs_offset = -1; > drc->ccs_depth = -1; > } > + spapr_drc_finish_async_hcalls(drc); > } > > static bool spapr_drc_unplug_requested_needed(void *opaque) > @@ -558,6 +702,7 @@ SpaprDrc *spapr_dr_connector_new(Object *owner, const char *type, > drc->owner = owner; > prop_name = g_strdup_printf("dr-connector[%"PRIu32"]", > spapr_drc_index(drc)); > + Unrelated change. > object_property_add_child(owner, prop_name, OBJECT(drc)); > object_unref(OBJECT(drc)); > qdev_realize(DEVICE(drc), NULL, NULL); > @@ -577,6 +722,10 @@ static void spapr_dr_connector_instance_init(Object *obj) > object_property_add(obj, "fdt", "struct", prop_get_fdt, > NULL, NULL, NULL); > drc->state = drck->empty_state; > + > + qemu_mutex_init(&drc->async_hcall_states_lock); > + QLIST_INIT(&drc->async_hcall_states); > + Empty line not needed. > } > > static void spapr_dr_connector_class_init(ObjectClass *k, void *data) > diff --git a/include/hw/ppc/spapr_drc.h b/include/hw/ppc/spapr_drc.h > index 165b281496..77f6e4386c 100644 > --- a/include/hw/ppc/spapr_drc.h > +++ b/include/hw/ppc/spapr_drc.h > @@ -18,6 +18,7 @@ > #include "sysemu/runstate.h" > #include "hw/qdev-core.h" > #include "qapi/error.h" > +#include "block/thread-pool.h" > > #define TYPE_SPAPR_DR_CONNECTOR "spapr-dr-connector" > #define SPAPR_DR_CONNECTOR_GET_CLASS(obj) \ > @@ -168,6 +169,21 @@ typedef enum { > SPAPR_DRC_STATE_PHYSICAL_CONFIGURED = 8, > } SpaprDrcState; > > +typedef struct SpaprDrc SpaprDrc; > + > +typedef int SpaprDrcAsyncHcallWorkerFunc(void *opaque); > +typedef struct SpaprDrcDeviceAsyncHCallState { > + uint64_t continue_token; > + bool pending; > + > + int hcall_ret; > + SpaprDrcAsyncHcallWorkerFunc *func; > + void *data; > + > + QemuThread thread; > + > + QLIST_ENTRY(SpaprDrcDeviceAsyncHCallState) node; > +} SpaprDrcDeviceAsyncHCallState; > typedef struct SpaprDrc { > /*< private >*/ > DeviceState parent; > @@ -182,6 +198,10 @@ typedef struct SpaprDrc { > int ccs_offset; > int ccs_depth; > > + /* async hcall states */ > + QemuMutex async_hcall_states_lock; > + QLIST_HEAD(, SpaprDrcDeviceAsyncHCallState) async_hcall_states; > + > /* device pointer, via link property */ > DeviceState *dev; > bool unplug_requested; > @@ -241,6 +261,11 @@ void spapr_drc_detach(SpaprDrc *drc); > /* Returns true if a hot plug/unplug request is pending */ > bool spapr_drc_transient(SpaprDrc *drc); > > +uint64_t spapr_drc_get_new_async_hcall_token(SpaprDrc *drc); > +void spapr_drc_run_async_hcall(SpaprDrc *drc, uint64_t token, > + SpaprDrcAsyncHcallWorkerFunc, void *data); > +int spapr_drc_get_async_hcall_status(SpaprDrc *drc, uint64_t token); > + > static inline bool spapr_drc_unplug_requested(SpaprDrc *drc) > { > return drc->unplug_requested; > > >
On Mon, 21 Dec 2020 13:08:53 +0100 Greg Kurz <groug@kaod.org> wrote: > Hi Shiva, > > On Mon, 30 Nov 2020 09:16:39 -0600 > Shivaprasad G Bhat <sbhat@linux.ibm.com> wrote: > > > The patch adds support for async hcalls at the DRC level for the > > spapr devices. To be used by spapr-scm devices in the patch/es to follow. > > > > Signed-off-by: Shivaprasad G Bhat <sbhat@linux.ibm.com> > > --- > > The overall idea looks good but I think you should consider using > a thread pool to implement it. See below. > Some more comments. > > hw/ppc/spapr_drc.c | 149 ++++++++++++++++++++++++++++++++++++++++++++ > > include/hw/ppc/spapr_drc.h | 25 +++++++ > > 2 files changed, 174 insertions(+) > > > > diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c > > index 77718cde1f..4ecd04f686 100644 > > --- a/hw/ppc/spapr_drc.c > > +++ b/hw/ppc/spapr_drc.c > > @@ -15,6 +15,7 @@ > > #include "qapi/qmp/qnull.h" > > #include "cpu.h" > > #include "qemu/cutils.h" > > +#include "qemu/guest-random.h" > > #include "hw/ppc/spapr_drc.h" > > #include "qom/object.h" > > #include "migration/vmstate.h" > > @@ -421,6 +422,148 @@ void spapr_drc_detach(SpaprDrc *drc) > > spapr_drc_release(drc); > > } > > > > + > > +/* > > + * @drc : device DRC targetting which the async hcalls to be made. > > + * > > + * All subsequent requests to run/query the status should use the > > + * unique token returned here. > > + */ > > +uint64_t spapr_drc_get_new_async_hcall_token(SpaprDrc *drc) > > +{ > > + Error *err = NULL; > > + uint64_t token; > > + SpaprDrcDeviceAsyncHCallState *tmp, *next, *state; > > + > > + state = g_malloc0(sizeof(*state)); > > + state->pending = true; > > + > > + qemu_mutex_lock(&drc->async_hcall_states_lock); > > +retry: > > + if (qemu_guest_getrandom(&token, sizeof(token), &err) < 0) { > > + error_report_err(err); > > + g_free(state); > > + qemu_mutex_unlock(&drc->async_hcall_states_lock); > > + return 0; > > + } > > + > > + if (!token) /* Token should be non-zero */ > > + goto retry; > > + > > + if (!QLIST_EMPTY(&drc->async_hcall_states)) { > > + QLIST_FOREACH_SAFE(tmp, &drc->async_hcall_states, node, next) { > > + if (tmp->continue_token == token) { > > + /* If the token already in use, get a new one */ > > + goto retry; > > + } > > + } > > + } > > + > > + state->continue_token = token; > > + QLIST_INSERT_HEAD(&drc->async_hcall_states, state, node); > > + > > + qemu_mutex_unlock(&drc->async_hcall_states_lock); > > + > > + return state->continue_token; > > +} > > + > > +static void *spapr_drc_async_hcall_runner(void *opaque) > > +{ > > + int response = -1; It feels wrong since the return value of func() is supposed to be opaque to this function. And anyway it isn't needed since response is always set a few lines below. > > + SpaprDrcDeviceAsyncHCallState *state = opaque; > > + > > + /* > > + * state is freed only after this thread finishes(after pthread_join()), > > + * don't worry about it becoming NULL. > > + */ > > + > > + response = state->func(state->data); > > + > > + state->hcall_ret = response; > > + state->pending = 0; s/0/false/ > > + > > + return NULL; > > +} > > + > > +/* > > + * @drc : device DRC targetting which the async hcalls to be made. > > + * token : The continue token to be used for tracking as recived from > > + * spapr_drc_get_new_async_hcall_token > > + * @func() : the worker function which needs to be executed asynchronously > > + * @data : data to be passed to the asynchronous function. Worker is supposed > > + * to free/cleanup the data that is passed here > > It'd be cleaner to pass a completion callback and have free/cleanup handled there. > > > + */ > > +void spapr_drc_run_async_hcall(SpaprDrc *drc, uint64_t token, > > + SpaprDrcAsyncHcallWorkerFunc *func, void *data) > > +{ > > + SpaprDrcDeviceAsyncHCallState *state; > > + > > + qemu_mutex_lock(&drc->async_hcall_states_lock); > > + QLIST_FOREACH(state, &drc->async_hcall_states, node) { > > + if (state->continue_token == token) { > > + state->func = func; > > + state->data = data; > > + qemu_thread_create(&state->thread, "sPAPR Async HCALL", > > + spapr_drc_async_hcall_runner, state, > > + QEMU_THREAD_JOINABLE); > > qemu_thread_create() exits on failure, it shouldn't be called on > a guest triggerable path, eg. a buggy guest could call it up to > the point that pthread_create() returns EAGAIN. > > Please use a thread pool (see thread_pool_submit_aio()). This takes care > of all the thread housekeeping for you in a safe way, and it provides a > completion callback API. The implementation could then be just about > having two lists: one for pending requests (fed here) and one for > completed requests (fed by the completion callback). > > > + break; > > + } > > + } > > + qemu_mutex_unlock(&drc->async_hcall_states_lock); > > +} > > + > > +/* > > + * spapr_drc_finish_async_hcalls > > + * Waits for all pending async requests to complete > > + * thier execution and free the states > > + */ > > +static void spapr_drc_finish_async_hcalls(SpaprDrc *drc) > > +{ > > + SpaprDrcDeviceAsyncHCallState *state, *next; > > + > > + if (QLIST_EMPTY(&drc->async_hcall_states)) { > > + return; > > + } > > + This is called during machine reset and there won't be contention in the large majority of cases. If the list is empty QLIST_FOREACH_SAFE() won't iterate. So I don't think special casing the empty list brings much. > > + qemu_mutex_lock(&drc->async_hcall_states_lock); > > + QLIST_FOREACH_SAFE(state, &drc->async_hcall_states, node, next) { > > + qemu_thread_join(&state->thread); > > With a thread-pool, you'd just need to aio_poll() until the pending list > is empty and then clear the completed list. > > > + QLIST_REMOVE(state, node); > > + g_free(state); > > + } > > + qemu_mutex_unlock(&drc->async_hcall_states_lock); > > +} > > + > > +/* > > + * spapr_drc_get_async_hcall_status > > + * Fetches the status of the hcall worker and returns H_BUSY > > + * if the worker is still running. > > + */ > > +int spapr_drc_get_async_hcall_status(SpaprDrc *drc, uint64_t token) > > +{ > > + int ret = H_PARAMETER; > > + SpaprDrcDeviceAsyncHCallState *state, *node; > > + > > + qemu_mutex_lock(&drc->async_hcall_states_lock); > > + QLIST_FOREACH_SAFE(state, &drc->async_hcall_states, node, node) { > > + if (state->continue_token == token) { > > + if (state->pending) { > > + ret = H_BUSY; > > + break; > > + } else { > > + ret = state->hcall_ret; > > + qemu_thread_join(&state->thread); > > Like for qemu_thread_create(), the guest shouldn't be responsible for > thread housekeeping. Getting the hcall status should just be about > finding the token in the pending or completed lists. > > > + QLIST_REMOVE(state, node); > > + g_free(state); > > + break; > > + } > > + } > > + } > > + qemu_mutex_unlock(&drc->async_hcall_states_lock); > > + > > + return ret; > > +} > > + > > void spapr_drc_reset(SpaprDrc *drc) > > { > > SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); > > @@ -448,6 +591,7 @@ void spapr_drc_reset(SpaprDrc *drc) > > drc->ccs_offset = -1; > > drc->ccs_depth = -1; > > } > > + spapr_drc_finish_async_hcalls(drc); > > } > > > > static bool spapr_drc_unplug_requested_needed(void *opaque) > > @@ -558,6 +702,7 @@ SpaprDrc *spapr_dr_connector_new(Object *owner, const char *type, > > drc->owner = owner; > > prop_name = g_strdup_printf("dr-connector[%"PRIu32"]", > > spapr_drc_index(drc)); > > + > > Unrelated change. > > > object_property_add_child(owner, prop_name, OBJECT(drc)); > > object_unref(OBJECT(drc)); > > qdev_realize(DEVICE(drc), NULL, NULL); > > @@ -577,6 +722,10 @@ static void spapr_dr_connector_instance_init(Object *obj) > > object_property_add(obj, "fdt", "struct", prop_get_fdt, > > NULL, NULL, NULL); > > drc->state = drck->empty_state; > > + > > + qemu_mutex_init(&drc->async_hcall_states_lock); > > + QLIST_INIT(&drc->async_hcall_states); > > + > > Empty line not needed. > > > } > > > > static void spapr_dr_connector_class_init(ObjectClass *k, void *data) > > diff --git a/include/hw/ppc/spapr_drc.h b/include/hw/ppc/spapr_drc.h > > index 165b281496..77f6e4386c 100644 > > --- a/include/hw/ppc/spapr_drc.h > > +++ b/include/hw/ppc/spapr_drc.h > > @@ -18,6 +18,7 @@ > > #include "sysemu/runstate.h" > > #include "hw/qdev-core.h" > > #include "qapi/error.h" > > +#include "block/thread-pool.h" > > > > #define TYPE_SPAPR_DR_CONNECTOR "spapr-dr-connector" > > #define SPAPR_DR_CONNECTOR_GET_CLASS(obj) \ > > @@ -168,6 +169,21 @@ typedef enum { > > SPAPR_DRC_STATE_PHYSICAL_CONFIGURED = 8, > > } SpaprDrcState; > > > > +typedef struct SpaprDrc SpaprDrc; > > + > > +typedef int SpaprDrcAsyncHcallWorkerFunc(void *opaque); > > +typedef struct SpaprDrcDeviceAsyncHCallState { > > + uint64_t continue_token; > > + bool pending; > > + > > + int hcall_ret; > > + SpaprDrcAsyncHcallWorkerFunc *func; > > + void *data; > > + > > + QemuThread thread; > > + > > + QLIST_ENTRY(SpaprDrcDeviceAsyncHCallState) node; > > +} SpaprDrcDeviceAsyncHCallState; > > typedef struct SpaprDrc { > > /*< private >*/ > > DeviceState parent; > > @@ -182,6 +198,10 @@ typedef struct SpaprDrc { > > int ccs_offset; > > int ccs_depth; > > > > + /* async hcall states */ > > + QemuMutex async_hcall_states_lock; > > + QLIST_HEAD(, SpaprDrcDeviceAsyncHCallState) async_hcall_states; > > + > > /* device pointer, via link property */ > > DeviceState *dev; > > bool unplug_requested; > > @@ -241,6 +261,11 @@ void spapr_drc_detach(SpaprDrc *drc); > > /* Returns true if a hot plug/unplug request is pending */ > > bool spapr_drc_transient(SpaprDrc *drc); > > > > +uint64_t spapr_drc_get_new_async_hcall_token(SpaprDrc *drc); > > +void spapr_drc_run_async_hcall(SpaprDrc *drc, uint64_t token, > > + SpaprDrcAsyncHcallWorkerFunc, void *data); > > +int spapr_drc_get_async_hcall_status(SpaprDrc *drc, uint64_t token); > > + > > static inline bool spapr_drc_unplug_requested(SpaprDrc *drc) > > { > > return drc->unplug_requested; > > > > > > > >
On Mon, Dec 21, 2020 at 01:08:53PM +0100, Greg Kurz wrote: > Hi Shiva, > > On Mon, 30 Nov 2020 09:16:39 -0600 > Shivaprasad G Bhat <sbhat@linux.ibm.com> wrote: > > > The patch adds support for async hcalls at the DRC level for the > > spapr devices. To be used by spapr-scm devices in the patch/es to follow. > > > > Signed-off-by: Shivaprasad G Bhat <sbhat@linux.ibm.com> > > --- > > The overall idea looks good but I think you should consider using > a thread pool to implement it. See below. I am not convinced, however. Specifically, attaching this to the DRC doesn't make sense to me. We're adding exactly one DRC related async hcall, and I can't really see much call for another one. We could have other async hcalls - indeed we already have one for HPT resizing - but attaching this to DRCs doesn't help for those. > > > hw/ppc/spapr_drc.c | 149 ++++++++++++++++++++++++++++++++++++++++++++ > > include/hw/ppc/spapr_drc.h | 25 +++++++ > > 2 files changed, 174 insertions(+) > > > > diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c > > index 77718cde1f..4ecd04f686 100644 > > --- a/hw/ppc/spapr_drc.c > > +++ b/hw/ppc/spapr_drc.c > > @@ -15,6 +15,7 @@ > > #include "qapi/qmp/qnull.h" > > #include "cpu.h" > > #include "qemu/cutils.h" > > +#include "qemu/guest-random.h" > > #include "hw/ppc/spapr_drc.h" > > #include "qom/object.h" > > #include "migration/vmstate.h" > > @@ -421,6 +422,148 @@ void spapr_drc_detach(SpaprDrc *drc) > > spapr_drc_release(drc); > > } > > > > + > > +/* > > + * @drc : device DRC targetting which the async hcalls to be made. > > + * > > + * All subsequent requests to run/query the status should use the > > + * unique token returned here. > > + */ > > +uint64_t spapr_drc_get_new_async_hcall_token(SpaprDrc *drc) > > +{ > > + Error *err = NULL; > > + uint64_t token; > > + SpaprDrcDeviceAsyncHCallState *tmp, *next, *state; > > + > > + state = g_malloc0(sizeof(*state)); > > + state->pending = true; > > + > > + qemu_mutex_lock(&drc->async_hcall_states_lock); > > +retry: > > + if (qemu_guest_getrandom(&token, sizeof(token), &err) < 0) { > > + error_report_err(err); > > + g_free(state); > > + qemu_mutex_unlock(&drc->async_hcall_states_lock); > > + return 0; > > + } > > + > > + if (!token) /* Token should be non-zero */ > > + goto retry; > > + > > + if (!QLIST_EMPTY(&drc->async_hcall_states)) { > > + QLIST_FOREACH_SAFE(tmp, &drc->async_hcall_states, node, next) { > > + if (tmp->continue_token == token) { > > + /* If the token already in use, get a new one */ > > + goto retry; > > + } > > + } > > + } > > + > > + state->continue_token = token; > > + QLIST_INSERT_HEAD(&drc->async_hcall_states, state, node); > > + > > + qemu_mutex_unlock(&drc->async_hcall_states_lock); > > + > > + return state->continue_token; > > +} > > + > > +static void *spapr_drc_async_hcall_runner(void *opaque) > > +{ > > + int response = -1; > > + SpaprDrcDeviceAsyncHCallState *state = opaque; > > + > > + /* > > + * state is freed only after this thread finishes(after pthread_join()), > > + * don't worry about it becoming NULL. > > + */ > > + > > + response = state->func(state->data); > > + > > + state->hcall_ret = response; > > + state->pending = 0; > > + > > + return NULL; > > +} > > + > > +/* > > + * @drc : device DRC targetting which the async hcalls to be made. > > + * token : The continue token to be used for tracking as recived from > > + * spapr_drc_get_new_async_hcall_token > > + * @func() : the worker function which needs to be executed asynchronously > > + * @data : data to be passed to the asynchronous function. Worker is supposed > > + * to free/cleanup the data that is passed here > > It'd be cleaner to pass a completion callback and have free/cleanup handled there. > > > + */ > > +void spapr_drc_run_async_hcall(SpaprDrc *drc, uint64_t token, > > + SpaprDrcAsyncHcallWorkerFunc *func, void *data) > > +{ > > + SpaprDrcDeviceAsyncHCallState *state; > > + > > + qemu_mutex_lock(&drc->async_hcall_states_lock); > > + QLIST_FOREACH(state, &drc->async_hcall_states, node) { > > + if (state->continue_token == token) { > > + state->func = func; > > + state->data = data; > > + qemu_thread_create(&state->thread, "sPAPR Async HCALL", > > + spapr_drc_async_hcall_runner, state, > > + QEMU_THREAD_JOINABLE); > > qemu_thread_create() exits on failure, it shouldn't be called on > a guest triggerable path, eg. a buggy guest could call it up to > the point that pthread_create() returns EAGAIN. > > Please use a thread pool (see thread_pool_submit_aio()). This takes care > of all the thread housekeeping for you in a safe way, and it provides a > completion callback API. The implementation could then be just about > having two lists: one for pending requests (fed here) and one for > completed requests (fed by the completion callback). > > > + break; > > + } > > + } > > + qemu_mutex_unlock(&drc->async_hcall_states_lock); > > +} > > + > > +/* > > + * spapr_drc_finish_async_hcalls > > + * Waits for all pending async requests to complete > > + * thier execution and free the states > > + */ > > +static void spapr_drc_finish_async_hcalls(SpaprDrc *drc) > > +{ > > + SpaprDrcDeviceAsyncHCallState *state, *next; > > + > > + if (QLIST_EMPTY(&drc->async_hcall_states)) { > > + return; > > + } > > + > > + qemu_mutex_lock(&drc->async_hcall_states_lock); > > + QLIST_FOREACH_SAFE(state, &drc->async_hcall_states, node, next) { > > + qemu_thread_join(&state->thread); > > With a thread-pool, you'd just need to aio_poll() until the pending list > is empty and then clear the completed list. > > > + QLIST_REMOVE(state, node); > > + g_free(state); > > + } > > + qemu_mutex_unlock(&drc->async_hcall_states_lock); > > +} > > + > > +/* > > + * spapr_drc_get_async_hcall_status > > + * Fetches the status of the hcall worker and returns H_BUSY > > + * if the worker is still running. > > + */ > > +int spapr_drc_get_async_hcall_status(SpaprDrc *drc, uint64_t token) > > +{ > > + int ret = H_PARAMETER; > > + SpaprDrcDeviceAsyncHCallState *state, *node; > > + > > + qemu_mutex_lock(&drc->async_hcall_states_lock); > > + QLIST_FOREACH_SAFE(state, &drc->async_hcall_states, node, node) { > > + if (state->continue_token == token) { > > + if (state->pending) { > > + ret = H_BUSY; > > + break; > > + } else { > > + ret = state->hcall_ret; > > + qemu_thread_join(&state->thread); > > Like for qemu_thread_create(), the guest shouldn't be responsible for > thread housekeeping. Getting the hcall status should just be about > finding the token in the pending or completed lists. > > > + QLIST_REMOVE(state, node); > > + g_free(state); > > + break; > > + } > > + } > > + } > > + qemu_mutex_unlock(&drc->async_hcall_states_lock); > > + > > + return ret; > > +} > > + > > void spapr_drc_reset(SpaprDrc *drc) > > { > > SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); > > @@ -448,6 +591,7 @@ void spapr_drc_reset(SpaprDrc *drc) > > drc->ccs_offset = -1; > > drc->ccs_depth = -1; > > } > > + spapr_drc_finish_async_hcalls(drc); > > } > > > > static bool spapr_drc_unplug_requested_needed(void *opaque) > > @@ -558,6 +702,7 @@ SpaprDrc *spapr_dr_connector_new(Object *owner, const char *type, > > drc->owner = owner; > > prop_name = g_strdup_printf("dr-connector[%"PRIu32"]", > > spapr_drc_index(drc)); > > + > > Unrelated change. > > > object_property_add_child(owner, prop_name, OBJECT(drc)); > > object_unref(OBJECT(drc)); > > qdev_realize(DEVICE(drc), NULL, NULL); > > @@ -577,6 +722,10 @@ static void spapr_dr_connector_instance_init(Object *obj) > > object_property_add(obj, "fdt", "struct", prop_get_fdt, > > NULL, NULL, NULL); > > drc->state = drck->empty_state; > > + > > + qemu_mutex_init(&drc->async_hcall_states_lock); > > + QLIST_INIT(&drc->async_hcall_states); > > + > > Empty line not needed. > > > } > > > > static void spapr_dr_connector_class_init(ObjectClass *k, void *data) > > diff --git a/include/hw/ppc/spapr_drc.h b/include/hw/ppc/spapr_drc.h > > index 165b281496..77f6e4386c 100644 > > --- a/include/hw/ppc/spapr_drc.h > > +++ b/include/hw/ppc/spapr_drc.h > > @@ -18,6 +18,7 @@ > > #include "sysemu/runstate.h" > > #include "hw/qdev-core.h" > > #include "qapi/error.h" > > +#include "block/thread-pool.h" > > > > #define TYPE_SPAPR_DR_CONNECTOR "spapr-dr-connector" > > #define SPAPR_DR_CONNECTOR_GET_CLASS(obj) \ > > @@ -168,6 +169,21 @@ typedef enum { > > SPAPR_DRC_STATE_PHYSICAL_CONFIGURED = 8, > > } SpaprDrcState; > > > > +typedef struct SpaprDrc SpaprDrc; > > + > > +typedef int SpaprDrcAsyncHcallWorkerFunc(void *opaque); > > +typedef struct SpaprDrcDeviceAsyncHCallState { > > + uint64_t continue_token; > > + bool pending; > > + > > + int hcall_ret; > > + SpaprDrcAsyncHcallWorkerFunc *func; > > + void *data; > > + > > + QemuThread thread; > > + > > + QLIST_ENTRY(SpaprDrcDeviceAsyncHCallState) node; > > +} SpaprDrcDeviceAsyncHCallState; > > typedef struct SpaprDrc { > > /*< private >*/ > > DeviceState parent; > > @@ -182,6 +198,10 @@ typedef struct SpaprDrc { > > int ccs_offset; > > int ccs_depth; > > > > + /* async hcall states */ > > + QemuMutex async_hcall_states_lock; > > + QLIST_HEAD(, SpaprDrcDeviceAsyncHCallState) async_hcall_states; > > + > > /* device pointer, via link property */ > > DeviceState *dev; > > bool unplug_requested; > > @@ -241,6 +261,11 @@ void spapr_drc_detach(SpaprDrc *drc); > > /* Returns true if a hot plug/unplug request is pending */ > > bool spapr_drc_transient(SpaprDrc *drc); > > > > +uint64_t spapr_drc_get_new_async_hcall_token(SpaprDrc *drc); > > +void spapr_drc_run_async_hcall(SpaprDrc *drc, uint64_t token, > > + SpaprDrcAsyncHcallWorkerFunc, void *data); > > +int spapr_drc_get_async_hcall_status(SpaprDrc *drc, uint64_t token); > > + > > static inline bool spapr_drc_unplug_requested(SpaprDrc *drc) > > { > > return drc->unplug_requested; > > > > > > >
Thanks for the comments! On 12/28/20 2:08 PM, David Gibson wrote: > On Mon, Dec 21, 2020 at 01:08:53PM +0100, Greg Kurz wrote: ... >> The overall idea looks good but I think you should consider using >> a thread pool to implement it. See below. > I am not convinced, however. Specifically, attaching this to the DRC > doesn't make sense to me. We're adding exactly one DRC related async > hcall, and I can't really see much call for another one. We could > have other async hcalls - indeed we already have one for HPT resizing > - but attaching this to DRCs doesn't help for those. The semantics of the hcall made me think, if this is going to be re-usable for future if implemented at DRC level. Other option is to move the async-hcall-state/list into the NVDIMMState structure in include/hw/mem/nvdimm.h and handle it with machine->nvdimms_state at a global level. Hope you are okay with using the pool based approach that Greg suggested. Please let me know. Thanks, Shivaprasad
On Tue, Jan 19, 2021 at 12:40:31PM +0530, Shivaprasad G Bhat wrote: > Thanks for the comments! > > > On 12/28/20 2:08 PM, David Gibson wrote: > > > On Mon, Dec 21, 2020 at 01:08:53PM +0100, Greg Kurz wrote: > ... > > > The overall idea looks good but I think you should consider using > > > a thread pool to implement it. See below. > > I am not convinced, however. Specifically, attaching this to the DRC > > doesn't make sense to me. We're adding exactly one DRC related async > > hcall, and I can't really see much call for another one. We could > > have other async hcalls - indeed we already have one for HPT resizing > > - but attaching this to DRCs doesn't help for those. > > The semantics of the hcall made me think, if this is going to be > re-usable for future if implemented at DRC level. It would only be re-usable for operations that are actually connected to DRCs. It doesn't seem to me particularly likely that we'll ever have more asynchronous hcalls that are also associated with DRCs. > Other option > is to move the async-hcall-state/list into the NVDIMMState structure > in include/hw/mem/nvdimm.h and handle it with machine->nvdimms_state > at a global level. I'm ok with either of two options: A) Implement this ad-hoc for this specific case, making whatever simplifications you can based on this specific case. B) Implement a general mechanism for async hcalls that is *not* tied to DRCs. Then use that for the existing H_RESIZE_HPT_PREPARE call as well as this new one. > Hope you are okay with using the pool based approach that Greg Honestly a thread pool seems like it might be overkill for this application.
Hi David, Sorry about the delay. On 2/8/21 11:51 AM, David Gibson wrote: > On Tue, Jan 19, 2021 at 12:40:31PM +0530, Shivaprasad G Bhat wrote: >> Thanks for the comments! >> >> >> On 12/28/20 2:08 PM, David Gibson wrote: >> >>> On Mon, Dec 21, 2020 at 01:08:53PM +0100, Greg Kurz wrote: >> ... >>>> The overall idea looks good but I think you should consider using >>>> a thread pool to implement it. See below. >>> I am not convinced, however. Specifically, attaching this to the DRC >>> doesn't make sense to me. We're adding exactly one DRC related async >>> hcall, and I can't really see much call for another one. We could >>> have other async hcalls - indeed we already have one for HPT resizing >>> - but attaching this to DRCs doesn't help for those. >> The semantics of the hcall made me think, if this is going to be >> re-usable for future if implemented at DRC level. > It would only be re-usable for operations that are actually connected > to DRCs. It doesn't seem to me particularly likely that we'll ever > have more asynchronous hcalls that are also associated with DRCs. Okay >> Other option >> is to move the async-hcall-state/list into the NVDIMMState structure >> in include/hw/mem/nvdimm.h and handle it with machine->nvdimms_state >> at a global level. > I'm ok with either of two options: > > A) Implement this ad-hoc for this specific case, making whatever > simplifications you can based on this specific case. I am simplifying it to nvdimm use-case alone and limiting the scope. > B) Implement a general mechanism for async hcalls that is *not* tied > to DRCs. Then use that for the existing H_RESIZE_HPT_PREPARE call as > well as this new one. > >> Hope you are okay with using the pool based approach that Greg > Honestly a thread pool seems like it might be overkill for this > application. I think its appropriate here as that is what is being done by virtio-pmem too for flush requests. The aio infrastructure simplifies lot of the thread handling usage. Please suggest if you think there are better ways. I am sending the next version addressing all the comments from you and Greg. Thanks, Shivaprasad
diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c index 77718cde1f..4ecd04f686 100644 --- a/hw/ppc/spapr_drc.c +++ b/hw/ppc/spapr_drc.c @@ -15,6 +15,7 @@ #include "qapi/qmp/qnull.h" #include "cpu.h" #include "qemu/cutils.h" +#include "qemu/guest-random.h" #include "hw/ppc/spapr_drc.h" #include "qom/object.h" #include "migration/vmstate.h" @@ -421,6 +422,148 @@ void spapr_drc_detach(SpaprDrc *drc) spapr_drc_release(drc); } + +/* + * @drc : device DRC targetting which the async hcalls to be made. + * + * All subsequent requests to run/query the status should use the + * unique token returned here. + */ +uint64_t spapr_drc_get_new_async_hcall_token(SpaprDrc *drc) +{ + Error *err = NULL; + uint64_t token; + SpaprDrcDeviceAsyncHCallState *tmp, *next, *state; + + state = g_malloc0(sizeof(*state)); + state->pending = true; + + qemu_mutex_lock(&drc->async_hcall_states_lock); +retry: + if (qemu_guest_getrandom(&token, sizeof(token), &err) < 0) { + error_report_err(err); + g_free(state); + qemu_mutex_unlock(&drc->async_hcall_states_lock); + return 0; + } + + if (!token) /* Token should be non-zero */ + goto retry; + + if (!QLIST_EMPTY(&drc->async_hcall_states)) { + QLIST_FOREACH_SAFE(tmp, &drc->async_hcall_states, node, next) { + if (tmp->continue_token == token) { + /* If the token already in use, get a new one */ + goto retry; + } + } + } + + state->continue_token = token; + QLIST_INSERT_HEAD(&drc->async_hcall_states, state, node); + + qemu_mutex_unlock(&drc->async_hcall_states_lock); + + return state->continue_token; +} + +static void *spapr_drc_async_hcall_runner(void *opaque) +{ + int response = -1; + SpaprDrcDeviceAsyncHCallState *state = opaque; + + /* + * state is freed only after this thread finishes(after pthread_join()), + * don't worry about it becoming NULL. + */ + + response = state->func(state->data); + + state->hcall_ret = response; + state->pending = 0; + + return NULL; +} + +/* + * @drc : device DRC targetting which the async hcalls to be made. + * token : The continue token to be used for tracking as recived from + * spapr_drc_get_new_async_hcall_token + * @func() : the worker function which needs to be executed asynchronously + * @data : data to be passed to the asynchronous function. Worker is supposed + * to free/cleanup the data that is passed here + */ +void spapr_drc_run_async_hcall(SpaprDrc *drc, uint64_t token, + SpaprDrcAsyncHcallWorkerFunc *func, void *data) +{ + SpaprDrcDeviceAsyncHCallState *state; + + qemu_mutex_lock(&drc->async_hcall_states_lock); + QLIST_FOREACH(state, &drc->async_hcall_states, node) { + if (state->continue_token == token) { + state->func = func; + state->data = data; + qemu_thread_create(&state->thread, "sPAPR Async HCALL", + spapr_drc_async_hcall_runner, state, + QEMU_THREAD_JOINABLE); + break; + } + } + qemu_mutex_unlock(&drc->async_hcall_states_lock); +} + +/* + * spapr_drc_finish_async_hcalls + * Waits for all pending async requests to complete + * thier execution and free the states + */ +static void spapr_drc_finish_async_hcalls(SpaprDrc *drc) +{ + SpaprDrcDeviceAsyncHCallState *state, *next; + + if (QLIST_EMPTY(&drc->async_hcall_states)) { + return; + } + + qemu_mutex_lock(&drc->async_hcall_states_lock); + QLIST_FOREACH_SAFE(state, &drc->async_hcall_states, node, next) { + qemu_thread_join(&state->thread); + QLIST_REMOVE(state, node); + g_free(state); + } + qemu_mutex_unlock(&drc->async_hcall_states_lock); +} + +/* + * spapr_drc_get_async_hcall_status + * Fetches the status of the hcall worker and returns H_BUSY + * if the worker is still running. + */ +int spapr_drc_get_async_hcall_status(SpaprDrc *drc, uint64_t token) +{ + int ret = H_PARAMETER; + SpaprDrcDeviceAsyncHCallState *state, *node; + + qemu_mutex_lock(&drc->async_hcall_states_lock); + QLIST_FOREACH_SAFE(state, &drc->async_hcall_states, node, node) { + if (state->continue_token == token) { + if (state->pending) { + ret = H_BUSY; + break; + } else { + ret = state->hcall_ret; + qemu_thread_join(&state->thread); + QLIST_REMOVE(state, node); + g_free(state); + break; + } + } + } + qemu_mutex_unlock(&drc->async_hcall_states_lock); + + return ret; +} + void spapr_drc_reset(SpaprDrc *drc) { SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); @@ -448,6 +591,7 @@ void spapr_drc_reset(SpaprDrc *drc) drc->ccs_offset = -1; drc->ccs_depth = -1; } + spapr_drc_finish_async_hcalls(drc); } static bool spapr_drc_unplug_requested_needed(void *opaque) @@ -558,6 +702,7 @@ SpaprDrc *spapr_dr_connector_new(Object *owner, const char *type, drc->owner = owner; prop_name = g_strdup_printf("dr-connector[%"PRIu32"]", spapr_drc_index(drc)); + object_property_add_child(owner, prop_name, OBJECT(drc)); object_unref(OBJECT(drc)); qdev_realize(DEVICE(drc), NULL, NULL); @@ -577,6 +722,10 @@ static void spapr_dr_connector_instance_init(Object *obj) object_property_add(obj, "fdt", "struct", prop_get_fdt, NULL, NULL, NULL); drc->state = drck->empty_state; + + qemu_mutex_init(&drc->async_hcall_states_lock); + QLIST_INIT(&drc->async_hcall_states); + } static void spapr_dr_connector_class_init(ObjectClass *k, void *data) diff --git a/include/hw/ppc/spapr_drc.h b/include/hw/ppc/spapr_drc.h index 165b281496..77f6e4386c 100644 --- a/include/hw/ppc/spapr_drc.h +++ b/include/hw/ppc/spapr_drc.h @@ -18,6 +18,7 @@ #include "sysemu/runstate.h" #include "hw/qdev-core.h" #include "qapi/error.h" +#include "block/thread-pool.h" #define TYPE_SPAPR_DR_CONNECTOR "spapr-dr-connector" #define SPAPR_DR_CONNECTOR_GET_CLASS(obj) \ @@ -168,6 +169,21 @@ typedef enum { SPAPR_DRC_STATE_PHYSICAL_CONFIGURED = 8, } SpaprDrcState; +typedef struct SpaprDrc SpaprDrc; + +typedef int SpaprDrcAsyncHcallWorkerFunc(void *opaque); +typedef struct SpaprDrcDeviceAsyncHCallState { + uint64_t continue_token; + bool pending; + + int hcall_ret; + SpaprDrcAsyncHcallWorkerFunc *func; + void *data; + + QemuThread thread; + + QLIST_ENTRY(SpaprDrcDeviceAsyncHCallState) node; +} SpaprDrcDeviceAsyncHCallState; typedef struct SpaprDrc { /*< private >*/ DeviceState parent; @@ -182,6 +198,10 @@ typedef struct SpaprDrc { int ccs_offset; int ccs_depth; + /* async hcall states */ + QemuMutex async_hcall_states_lock; + QLIST_HEAD(, SpaprDrcDeviceAsyncHCallState) async_hcall_states; + /* device pointer, via link property */ DeviceState *dev; bool unplug_requested; @@ -241,6 +261,11 @@ void spapr_drc_detach(SpaprDrc *drc); /* Returns true if a hot plug/unplug request is pending */ bool spapr_drc_transient(SpaprDrc *drc); +uint64_t spapr_drc_get_new_async_hcall_token(SpaprDrc *drc); +void spapr_drc_run_async_hcall(SpaprDrc *drc, uint64_t token, + SpaprDrcAsyncHcallWorkerFunc, void *data); +int spapr_drc_get_async_hcall_status(SpaprDrc *drc, uint64_t token); + static inline bool spapr_drc_unplug_requested(SpaprDrc *drc) { return drc->unplug_requested;
The patch adds support for async hcalls at the DRC level for the spapr devices. To be used by spapr-scm devices in the patch/es to follow. Signed-off-by: Shivaprasad G Bhat <sbhat@linux.ibm.com> --- hw/ppc/spapr_drc.c | 149 ++++++++++++++++++++++++++++++++++++++++++++ include/hw/ppc/spapr_drc.h | 25 +++++++ 2 files changed, 174 insertions(+)