[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <DM5PR2101MB104749677CD07DE42B5BD26DD70C0@DM5PR2101MB1047.namprd21.prod.outlook.com>
Date: Wed, 22 Jan 2020 18:27:53 +0000
From: Michael Kelley <mikelley@...rosoft.com>
To: Dexuan Cui <decui@...rosoft.com>,
KY Srinivasan <kys@...rosoft.com>,
Haiyang Zhang <haiyangz@...rosoft.com>,
Stephen Hemminger <sthemmin@...rosoft.com>,
"sashal@...nel.org" <sashal@...nel.org>,
Sasha Levin <Alexander.Levin@...rosoft.com>,
"linux-hyperv@...r.kernel.org" <linux-hyperv@...r.kernel.org>,
vkuznets <vkuznets@...hat.com>,
"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>
Subject: RE: [PATCH v2 4/4] hv_utils: Add the support of hibernation
From: Dexuan Cui <decui@...rosoft.com> Sent: Sunday, January 12, 2020 10:32 PM
>
> Add util_pre_suspend() and util_pre_resume() for some hv_utils devices
> (e.g. kvp/vss/fcopy), because they need special handling before
> util_suspend() calls vmbus_close().
>
> For kvp, all the possible pending work items should be cancelled.
>
> For vss and fcopy, extra clean-up needs to be done, i.e. fake a THAW
> message for hv_vss_daemon and fake a CANCEL_FCOPY message for
> hv_fcopy_daemonemon, otherwise when the VM resums back, the daemons
> can end up in an inconsistent state (i.e. the file systems are
> frozen but will never be thawed; the file transmitted via fcopy
> may not be complete). Note: there is an extra patch for the daemons:
> "Tools: hv: Reopen the devices if read() or write() returns errors",
> because the hv_utils driver can not guarantee the whole transaction
> finishes completely once util_suspend() starts to run (at this time,
> all the userspace processes are frozen).
>
> util_probe() disables channel->callback_event to avoid the race with
> the the channel callback.
>
> Signed-off-by: Dexuan Cui <decui@...rosoft.com>
> ---
> drivers/hv/hv_fcopy.c | 58 ++++++++++++++++++++++++++++++++++++-
> drivers/hv/hv_kvp.c | 44 ++++++++++++++++++++++++++--
> drivers/hv/hv_snapshot.c | 60 +++++++++++++++++++++++++++++++++++++--
> drivers/hv/hv_util.c | 60 ++++++++++++++++++++++++++++++++++++++-
> drivers/hv/hyperv_vmbus.h | 6 ++++
> include/linux/hyperv.h | 2 ++
> 6 files changed, 224 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c
> index 08fa4a5de644..d63853f16356 100644
> --- a/drivers/hv/hv_fcopy.c
> +++ b/drivers/hv/hv_fcopy.c
> @@ -346,9 +346,65 @@ int hv_fcopy_init(struct hv_util_service *srv)
> return 0;
> }
>
> +static void hv_fcopy_cancel_work(void)
> +{
> + cancel_delayed_work_sync(&fcopy_timeout_work);
> + cancel_work_sync(&fcopy_send_work);
> +}
> +
> +int hv_fcopy_pre_suspend(void)
> +{
> + struct vmbus_channel *channel = fcopy_transaction.recv_channel;
> + struct hv_fcopy_hdr *fcopy_msg;
> +
> + tasklet_disable(&channel->callback_event);
> +
> + /*
> + * Fake a CANCEL_FCOPY message to the user space daemon in case the
> + * daemon is in the middle of copying some file. It doesn't matter if
> + * there is already a message pending to be delivered to the user
> + * space: we force fcopy_transaction.state to be HVUTIL_READY, so the
> + * user space daemon's write() will fail with -EINVAL (see
> + * fcopy_on_msg()), and the daemon will reset the device by closing and
> + * re-opening it.
> + */
> + fcopy_msg = kzalloc(sizeof(*fcopy_msg), GFP_KERNEL);
> + if (!fcopy_msg)
> + goto err;
> +
> + fcopy_msg->operation = CANCEL_FCOPY;
> +
> + hv_fcopy_cancel_work();
> +
> + /* We don't care about the return value. */
> + hvutil_transport_send(hvt, fcopy_msg, sizeof(*fcopy_msg), NULL);
> +
> + kfree(fcopy_msg);
> +
> + fcopy_transaction.state = HVUTIL_READY;
Is the ordering correct here? It seems like the fcopy daemon could receive
the cancel message and do the write before the state is forced to
HVUTIL_READY.
> +
> + /* tasklet_enable() will be called in hv_fcopy_pre_resume(). */
> +
> + return 0;
> +err:
> + tasklet_enable(&channel->callback_event);
A nit, but if you did the memory allocation first, you could return -ENOMEM
immediately on error and avoid the err: label and re-enabling the tasklet.
> + return -ENOMEM;
> +}
> +
> +int hv_fcopy_pre_resume(void)
> +{
> + struct vmbus_channel *channel = fcopy_transaction.recv_channel;
> +
> + tasklet_enable(&channel->callback_event);
> +
> + return 0;
> +}
> +
> void hv_fcopy_deinit(void)
> {
> fcopy_transaction.state = HVUTIL_DEVICE_DYING;
> - cancel_delayed_work_sync(&fcopy_timeout_work);
> +
> + hv_fcopy_cancel_work();
> +
> hvutil_transport_destroy(hvt);
> }
> diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c
> index ae7c028dc5a8..ca03f68df5d0 100644
> --- a/drivers/hv/hv_kvp.c
> +++ b/drivers/hv/hv_kvp.c
> @@ -758,11 +758,51 @@ hv_kvp_init(struct hv_util_service *srv)
> return 0;
> }
>
> -void hv_kvp_deinit(void)
> +static void hv_kvp_cancel_work(void)
> {
> - kvp_transaction.state = HVUTIL_DEVICE_DYING;
> cancel_delayed_work_sync(&kvp_host_handshake_work);
> cancel_delayed_work_sync(&kvp_timeout_work);
> cancel_work_sync(&kvp_sendkey_work);
> +}
> +
> +int hv_kvp_pre_suspend(void)
> +{
> + struct vmbus_channel *channel = kvp_transaction.recv_channel;
> +
> + tasklet_disable(&channel->callback_event);
> +
> + /*
> + * If there is a pending transtion, it's unnecessary to tell the host
> + * that the tranction will fail, becasue that is implied when
> + * util_suspend() calls vmbus_close() later.
> + */
> + hv_kvp_cancel_work();
> +
> + /*
> + * Forece the state to READY to handle the ICMSGTYPE_NEGOTIATE message
> + * later. The user space daemon may go out of order and its write()
> + * may get an EINVAL error: this doesn't matter since the daemon will
> + * reset the device by closing and re-opening the device.
> + */
> + kvp_transaction.state = HVUTIL_READY;
> +
> + return 0;
> +}
> +
> +int hv_kvp_pre_resume(void)
> +{
> + struct vmbus_channel *channel = kvp_transaction.recv_channel;
> +
> + tasklet_enable(&channel->callback_event);
> +
> + return 0;
> +}
> +
> +void hv_kvp_deinit(void)
> +{
> + kvp_transaction.state = HVUTIL_DEVICE_DYING;
> +
> + hv_kvp_cancel_work();
> +
> hvutil_transport_destroy(hvt);
> }
> diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c
> index 03b6454268b3..eb766ff8841b 100644
> --- a/drivers/hv/hv_snapshot.c
> +++ b/drivers/hv/hv_snapshot.c
> @@ -229,6 +229,7 @@ static void vss_handle_request(struct work_struct *dummy)
> vss_transaction.state = HVUTIL_HOSTMSG_RECEIVED;
> vss_send_op();
> return;
> +
Gratuitous blank line added?
> case VSS_OP_GET_DM_INFO:
> vss_transaction.msg->dm_info.flags = 0;
> break;
> @@ -379,10 +380,65 @@ hv_vss_init(struct hv_util_service *srv)
> return 0;
> }
>
> -void hv_vss_deinit(void)
> +static void hv_vss_cancel_work(void)
> {
> - vss_transaction.state = HVUTIL_DEVICE_DYING;
> cancel_delayed_work_sync(&vss_timeout_work);
> cancel_work_sync(&vss_handle_request_work);
> +}
> +
> +int hv_vss_pre_suspend(void)
> +{
> + struct vmbus_channel *channel = vss_transaction.recv_channel;
> + struct hv_vss_msg *vss_msg;
> +
> + tasklet_disable(&channel->callback_event);
> +
> + /*
> + * Fake a THAW message for the user space daemon in case the daemon
> + * has frozen the file systems. It doesn't matter if there is already
> + * a message pending to be delivered to the user space: we force
> + * vss_transaction.state to be HVUTIL_READY, so the user space daemon's
> + * write() will fail with -EINVAL (see vss_on_msg()), and the daemon
> + * will reset the device by closing and re-opening it.
> + */
> + vss_msg = kzalloc(sizeof(*vss_msg), GFP_KERNEL);
> + if (!vss_msg)
> + goto err;
> +
> + vss_msg->vss_hdr.operation = VSS_OP_THAW;
> +
> + /* Cancel any possible pending work. */
> + hv_vss_cancel_work();
> +
> + /* We don't care about the return value. */
> + hvutil_transport_send(hvt, vss_msg, sizeof(*vss_msg), NULL);
> +
> + kfree(vss_msg);
> +
> + vss_transaction.state = HVUTIL_READY;
Same comment about the ordering.
> +
> + /* tasklet_enable() will be called in hv_vss_pre_resume(). */
> +
> + return 0;
> +err:
> + tasklet_enable(&channel->callback_event);
> + return -ENOMEM;
Same comment about simplifying the error handling applies here.
> +}
> +
> +int hv_vss_pre_resume(void)
> +{
> + struct vmbus_channel *channel = vss_transaction.recv_channel;
> +
> + tasklet_enable(&channel->callback_event);
> +
> + return 0;
> +}
> +
> +void hv_vss_deinit(void)
> +{
> + vss_transaction.state = HVUTIL_DEVICE_DYING;
> +
> + hv_vss_cancel_work();
> +
> hvutil_transport_destroy(hvt);
> }
> diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c
> index d5216af62788..255faa3d657c 100644
> --- a/drivers/hv/hv_util.c
> +++ b/drivers/hv/hv_util.c
> @@ -123,12 +123,14 @@ static struct hv_util_service util_shutdown = {
> };
>
> static int hv_timesync_init(struct hv_util_service *srv);
> +static int hv_timesync_pre_suspend(void);
> static void hv_timesync_deinit(void);
>
> static void timesync_onchannelcallback(void *context);
> static struct hv_util_service util_timesynch = {
> .util_cb = timesync_onchannelcallback,
> .util_init = hv_timesync_init,
> + .util_pre_suspend = hv_timesync_pre_suspend,
> .util_deinit = hv_timesync_deinit,
> };
>
> @@ -140,18 +142,24 @@ static struct hv_util_service util_heartbeat = {
> static struct hv_util_service util_kvp = {
> .util_cb = hv_kvp_onchannelcallback,
> .util_init = hv_kvp_init,
> + .util_pre_suspend = hv_kvp_pre_suspend,
> + .util_pre_resume = hv_kvp_pre_resume,
> .util_deinit = hv_kvp_deinit,
> };
>
> static struct hv_util_service util_vss = {
> .util_cb = hv_vss_onchannelcallback,
> .util_init = hv_vss_init,
> + .util_pre_suspend = hv_vss_pre_suspend,
> + .util_pre_resume = hv_vss_pre_resume,
> .util_deinit = hv_vss_deinit,
> };
>
> static struct hv_util_service util_fcopy = {
> .util_cb = hv_fcopy_onchannelcallback,
> .util_init = hv_fcopy_init,
> + .util_pre_suspend = hv_fcopy_pre_suspend,
> + .util_pre_resume = hv_fcopy_pre_resume,
> .util_deinit = hv_fcopy_deinit,
> };
>
> @@ -512,6 +520,41 @@ static int util_remove(struct hv_device *dev)
> return 0;
> }
>
> +static int util_suspend(struct hv_device *dev)
> +{
> + struct hv_util_service *srv = hv_get_drvdata(dev);
> + int ret = 0;
> +
> + if (srv->util_pre_suspend) {
> + ret = srv->util_pre_suspend();
> +
Unneeded blank line?
> + if (ret)
> + return ret;
> + }
> +
> + vmbus_close(dev->channel);
> +
> + return 0;
> +}
> +
> +static int util_resume(struct hv_device *dev)
> +{
> + struct hv_util_service *srv = hv_get_drvdata(dev);
> + int ret = 0;
> +
> + if (srv->util_pre_resume) {
> + ret = srv->util_pre_resume();
> +
Unneeded blank line?
> + if (ret)
> + return ret;
> + }
> +
> + ret = vmbus_open(dev->channel, 4 * HV_HYP_PAGE_SIZE,
> + 4 * HV_HYP_PAGE_SIZE, NULL, 0, srv->util_cb,
> + dev->channel);
> + return ret;
> +}
> +
> static const struct hv_vmbus_device_id id_table[] = {
> /* Shutdown guid */
> { HV_SHUTDOWN_GUID,
> @@ -548,6 +591,8 @@ static struct hv_driver util_drv = {
> .id_table = id_table,
> .probe = util_probe,
> .remove = util_remove,
> + .suspend = util_suspend,
> + .resume = util_resume,
> .driver = {
> .probe_type = PROBE_PREFER_ASYNCHRONOUS,
> },
> @@ -617,11 +662,24 @@ static int hv_timesync_init(struct hv_util_service *srv)
> return 0;
> }
>
> +static void hv_timesync_cancel_work(void)
> +{
> + cancel_work_sync(&adj_time_work);
> +}
> +
> +static int hv_timesync_pre_suspend(void)
> +{
> + hv_timesync_cancel_work();
> +
> + return 0;
> +}
> +
> static void hv_timesync_deinit(void)
> {
> if (hv_ptp_clock)
> ptp_clock_unregister(hv_ptp_clock);
> - cancel_work_sync(&adj_time_work);
> +
> + hv_timesync_cancel_work();
> }
>
> static int __init init_hyperv_utils(void)
> diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
> index 20edcfd3b96c..f5fa3b3c9baf 100644
> --- a/drivers/hv/hyperv_vmbus.h
> +++ b/drivers/hv/hyperv_vmbus.h
> @@ -352,14 +352,20 @@ void vmbus_on_msg_dpc(unsigned long data);
>
> int hv_kvp_init(struct hv_util_service *srv);
> void hv_kvp_deinit(void);
> +int hv_kvp_pre_suspend(void);
> +int hv_kvp_pre_resume(void);
> void hv_kvp_onchannelcallback(void *context);
>
> int hv_vss_init(struct hv_util_service *srv);
> void hv_vss_deinit(void);
> +int hv_vss_pre_suspend(void);
> +int hv_vss_pre_resume(void);
> void hv_vss_onchannelcallback(void *context);
>
> int hv_fcopy_init(struct hv_util_service *srv);
> void hv_fcopy_deinit(void);
> +int hv_fcopy_pre_suspend(void);
> +int hv_fcopy_pre_resume(void);
> void hv_fcopy_onchannelcallback(void *context);
> void vmbus_initiate_unload(bool crash);
>
> diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
> index 41c58011431e..692c89ccf5df 100644
> --- a/include/linux/hyperv.h
> +++ b/include/linux/hyperv.h
> @@ -1435,6 +1435,8 @@ struct hv_util_service {
> void (*util_cb)(void *);
> int (*util_init)(struct hv_util_service *);
> void (*util_deinit)(void);
> + int (*util_pre_suspend)(void);
> + int (*util_pre_resume)(void);
> };
>
> struct vmbuspipe_hdr {
> --
> 2.19.1
Powered by blists - more mailing lists