[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <HK0P153MB01486C8C746F8936450C4CE1BF350@HK0P153MB0148.APCP153.PROD.OUTLOOK.COM>
Date: Mon, 13 Jan 2020 06:30:20 +0000
From: Dexuan Cui <decui@...rosoft.com>
To: KY Srinivasan <kys@...rosoft.com>,
Haiyang Zhang <haiyangz@...rosoft.com>,
Stephen Hemminger <sthemmin@...rosoft.com>,
"sashal@...nel.org" <sashal@...nel.org>,
Sasha Levin <Alexander.Levin@...rosoft.com>,
"linux-hyperv@...r.kernel.org" <linux-hyperv@...r.kernel.org>,
Michael Kelley <mikelley@...rosoft.com>,
vkuznets <vkuznets@...hat.com>,
"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>
Subject: [PATCH v2 1/4] Tools: hv: Reopen the devices if read() or write()
returns errors
The state machine in the hv_utils driver can run out of order in some
corner cases, e.g. if the kvp daemon doesn't call write() fast enough
due to some reason, kvp_timeout_func() can run first and move the state
to HVUTIL_READY; next, when kvp_on_msg() is called it returns -EINVAL
since kvp_transaction.state is smaller than HVUTIL_USERSPACE_REQ; later,
the daemon's write() gets an error -EINVAL, and the daemon will exit().
We can reproduce the issue by sending a SIGSTOP signal to the daemon, wait
for 1 minute, and send a SIGCONT signal to the daemon: the daemon will
exit() quickly.
We can fix the issue by forcing a reset of the device (which means the
daemon can close() and open() the device again) and doing extra necessary
clean-up.
Signed-off-by: Dexuan Cui <decui@...rosoft.com>
---
tools/hv/hv_fcopy_daemon.c | 19 +++++++++++++++----
tools/hv/hv_kvp_daemon.c | 25 ++++++++++++++-----------
tools/hv/hv_vss_daemon.c | 25 +++++++++++++++++++------
3 files changed, 48 insertions(+), 21 deletions(-)
diff --git a/tools/hv/hv_fcopy_daemon.c b/tools/hv/hv_fcopy_daemon.c
index aea2d91ab364..a78a5292589b 100644
--- a/tools/hv/hv_fcopy_daemon.c
+++ b/tools/hv/hv_fcopy_daemon.c
@@ -21,7 +21,7 @@
#include <fcntl.h>
#include <getopt.h>
-static int target_fd;
+static int target_fd = -1;
static char target_fname[PATH_MAX];
static unsigned long long filesize;
@@ -80,6 +80,8 @@ static int hv_start_fcopy(struct hv_start_fcopy *smsg)
error = 0;
done:
+ if (error)
+ memset(target_fname, 0, sizeof(target_fname));
return error;
}
@@ -111,12 +113,16 @@ static int hv_copy_data(struct hv_do_fcopy *cpmsg)
static int hv_copy_finished(void)
{
close(target_fd);
+ target_fd = -1;
+ memset(target_fname, 0, sizeof(target_fname));
return 0;
}
static int hv_copy_cancel(void)
{
close(target_fd);
+ target_fd = -1;
unlink(target_fname);
+ memset(target_fname, 0, sizeof(target_fname));
return 0;
}
@@ -141,7 +147,7 @@ int main(int argc, char *argv[])
struct hv_do_fcopy copy;
__u32 kernel_modver;
} buffer = { };
- int in_handshake = 1;
+ int in_handshake;
static struct option long_options[] = {
{"help", no_argument, 0, 'h' },
@@ -170,6 +176,9 @@ int main(int argc, char *argv[])
openlog("HV_FCOPY", 0, LOG_USER);
syslog(LOG_INFO, "starting; pid is:%d", getpid());
+reopen_fcopy_fd:
+ hv_copy_cancel();
+ in_handshake = 1;
fcopy_fd = open("/dev/vmbus/hv_fcopy", O_RDWR);
if (fcopy_fd < 0) {
@@ -196,7 +205,8 @@ int main(int argc, char *argv[])
len = pread(fcopy_fd, &buffer, sizeof(buffer), 0);
if (len < 0) {
syslog(LOG_ERR, "pread failed: %s", strerror(errno));
- exit(EXIT_FAILURE);
+ close(fcopy_fd);
+ goto reopen_fcopy_fd;
}
if (in_handshake) {
@@ -233,7 +243,8 @@ int main(int argc, char *argv[])
if (pwrite(fcopy_fd, &error, sizeof(int), 0) != sizeof(int)) {
syslog(LOG_ERR, "pwrite failed: %s", strerror(errno));
- exit(EXIT_FAILURE);
+ close(fcopy_fd);
+ goto reopen_fcopy_fd;
}
}
}
diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c
index e9ef4ca6a655..3282d48c4487 100644
--- a/tools/hv/hv_kvp_daemon.c
+++ b/tools/hv/hv_kvp_daemon.c
@@ -76,7 +76,7 @@ enum {
DNS
};
-static int in_hand_shake = 1;
+static int in_hand_shake;
static char *os_name = "";
static char *os_major = "";
@@ -1400,14 +1400,6 @@ int main(int argc, char *argv[])
openlog("KVP", 0, LOG_USER);
syslog(LOG_INFO, "KVP starting; pid is:%d", getpid());
- kvp_fd = open("/dev/vmbus/hv_kvp", O_RDWR | O_CLOEXEC);
-
- if (kvp_fd < 0) {
- syslog(LOG_ERR, "open /dev/vmbus/hv_kvp failed; error: %d %s",
- errno, strerror(errno));
- exit(EXIT_FAILURE);
- }
-
/*
* Retrieve OS release information.
*/
@@ -1423,6 +1415,16 @@ int main(int argc, char *argv[])
exit(EXIT_FAILURE);
}
+reopen_kvp_fd:
+ in_hand_shake = 1;
+ kvp_fd = open("/dev/vmbus/hv_kvp", O_RDWR | O_CLOEXEC);
+
+ if (kvp_fd < 0) {
+ syslog(LOG_ERR, "open /dev/vmbus/hv_kvp failed; error: %d %s",
+ errno, strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+
/*
* Register ourselves with the kernel.
*/
@@ -1458,7 +1460,7 @@ int main(int argc, char *argv[])
errno, strerror(errno));
close(kvp_fd);
- return EXIT_FAILURE;
+ goto reopen_kvp_fd;
}
/*
@@ -1623,7 +1625,8 @@ int main(int argc, char *argv[])
if (len != sizeof(struct hv_kvp_msg)) {
syslog(LOG_ERR, "write failed; error: %d %s", errno,
strerror(errno));
- exit(EXIT_FAILURE);
+ close(kvp_fd);
+ goto reopen_kvp_fd;
}
}
diff --git a/tools/hv/hv_vss_daemon.c b/tools/hv/hv_vss_daemon.c
index 92902a88f671..e70fed66a5ae 100644
--- a/tools/hv/hv_vss_daemon.c
+++ b/tools/hv/hv_vss_daemon.c
@@ -28,6 +28,8 @@
#include <stdbool.h>
#include <dirent.h>
+static bool fs_frozen;
+
/* Don't use syslog() in the function since that can cause write to disk */
static int vss_do_freeze(char *dir, unsigned int cmd)
{
@@ -155,8 +157,11 @@ static int vss_operate(int operation)
continue;
}
error |= vss_do_freeze(ent->mnt_dir, cmd);
- if (error && operation == VSS_OP_FREEZE)
- goto err;
+ if (operation == VSS_OP_FREEZE) {
+ if (error)
+ goto err;
+ fs_frozen = true;
+ }
}
endmntent(mounts);
@@ -167,6 +172,9 @@ static int vss_operate(int operation)
goto err;
}
+ if (operation == VSS_OP_THAW && !error)
+ fs_frozen = false;
+
goto out;
err:
save_errno = errno;
@@ -175,6 +183,7 @@ static int vss_operate(int operation)
endmntent(mounts);
}
vss_operate(VSS_OP_THAW);
+ fs_frozen = false;
/* Call syslog after we thaw all filesystems */
if (ent)
syslog(LOG_ERR, "FREEZE of %s failed; error:%d %s",
@@ -202,7 +211,7 @@ int main(int argc, char *argv[])
int op;
struct hv_vss_msg vss_msg[1];
int daemonize = 1, long_index = 0, opt;
- int in_handshake = 1;
+ int in_handshake;
__u32 kernel_modver;
static struct option long_options[] = {
@@ -232,6 +241,10 @@ int main(int argc, char *argv[])
openlog("Hyper-V VSS", 0, LOG_USER);
syslog(LOG_INFO, "VSS starting; pid is:%d", getpid());
+reopen_vss_fd:
+ if (fs_frozen)
+ vss_operate(VSS_OP_THAW);
+ in_handshake = 1;
vss_fd = open("/dev/vmbus/hv_vss", O_RDWR);
if (vss_fd < 0) {
syslog(LOG_ERR, "open /dev/vmbus/hv_vss failed; error: %d %s",
@@ -285,7 +298,7 @@ int main(int argc, char *argv[])
syslog(LOG_ERR, "read failed; error:%d %s",
errno, strerror(errno));
close(vss_fd);
- return EXIT_FAILURE;
+ goto reopen_vss_fd;
}
op = vss_msg->vss_hdr.operation;
@@ -318,8 +331,8 @@ int main(int argc, char *argv[])
syslog(LOG_ERR, "write failed; error: %d %s", errno,
strerror(errno));
- if (op == VSS_OP_FREEZE)
- vss_operate(VSS_OP_THAW);
+ close(vss_fd);
+ goto reopen_vss_fd;
}
}
--
2.19.1
Powered by blists - more mailing lists