>From 27170c1bb8f21f7b20c1716c1df65e4812b421f8 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Thu, 17 Mar 2016 14:41:07 +0100
Subject: [PATCH] Drivers: hv: vmbus: handle various crash scenarios

Kdump keeps biting. Turns out CHANNELMSG_UNLOAD_RESPONSE is always
delivered to the cpu which was used to initiate contact regardless of what
CPU we're sending CHANNELMSG_UNLOAD from. vmbus_wait_for_unload() doesn't
account for the fact that in case we're crashing on some other CPU and the
CPU which was used to initate contact is still alive and operational
CHANNELMSG_UNLOAD_RESPONSE will be delivered there completing
vmbus_connection.unload_event, our wait on the current CPU will never
end.

Do the following:
1) Remember the CPU we used to initiate contact in vmbus_connection.

1) Check for completion_done() in the loop. In case interrupt handler is
   still alive we'll get the confirmation we need.

2) Always read the init_cpu's message page as CHANNELMSG_UNLOAD_RESPONSE
   will be delivered there. We can race with still-alive interrupt handler
   doing the same but we don't care as we're checking completion_done()
   now.

3) Cleanup message pages on all CPUs. This is required (at least for the
   current CPU as we're clearing some other CPU's messages now but we may
   want to bring up additional CPUs on crash) as new messages won't be
   delivered till we consume what's pending. On boot we'll place message
   pages somewhere else and we won't be able to read stale messages.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
---
Changes since v1:
- Use init_cpu instead of CPU0 [K. Y. Srinivasan]
- Style changes in vmbus_wait_for_unload [Radim Krcmar]
---
 drivers/hv/channel_mgmt.c | 39 ++++++++++++++++++++++++++++++++-------
 drivers/hv/connection.c   | 10 +++++++---
 drivers/hv/hyperv_vmbus.h |  3 +++
 drivers/hv/vmbus_drv.c    |  1 +
 4 files changed, 43 insertions(+), 10 deletions(-)

diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 38b682ba..2fa526d 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -597,28 +597,53 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
 
 static void vmbus_wait_for_unload(void)
 {
-	int cpu = smp_processor_id();
-	void *page_addr = hv_context.synic_message_page[cpu];
+	int cpu;
+	void *page_addr =
+		hv_context.synic_message_page[vmbus_connection.init_cpu];
 	struct hv_message *msg = (struct hv_message *)page_addr +
 				  VMBUS_MESSAGE_SINT;
 	struct vmbus_channel_message_header *hdr;
-	bool unloaded = false;
+	enum vmbus_channel_message_type msgtype;
+
+	printk("vmbus_wait_for_unload: %d (%d)\n", vmbus_connection.init_cpu, smp_processor_id());
 
+	/*
+	 * CHANNELMSG_UNLOAD_RESPONSE is always delivered to the CPU which was
+	 * used to initate contact (see vmbus_negotiate_version()). When we're
+	 * crashing on a different CPU let's hope that IRQ handler on that CPU
+	 * is still functional and vmbus_unload_response() will complete
+	 * vmbus_connection.unload_event. If not, the last thing we can do is
+	 * read message page for that CPU regardless of what CPU we're on.
+	 */
 	while (1) {
+		if (completion_done(&vmbus_connection.unload_event))
+			break;
+
 		if (READ_ONCE(msg->header.message_type) == HVMSG_NONE) {
 			mdelay(10);
 			continue;
 		}
 
 		hdr = (struct vmbus_channel_message_header *)msg->u.payload;
-		if (hdr->msgtype == CHANNELMSG_UNLOAD_RESPONSE)
-			unloaded = true;
-
+		msgtype = hdr->msgtype;
 		vmbus_signal_eom(msg);
 
-		if (unloaded)
+		if (msgtype == CHANNELMSG_UNLOAD_RESPONSE)
 			break;
 	}
+
+	/*
+	 * We're crashing and already got the UNLOAD_RESPONSE, cleanup all
+	 * maybe-pending messages on all CPUs to be able to receive new
+	 * messages after we reconnect.
+	 */
+	for_each_online_cpu(cpu) {
+		page_addr = hv_context.synic_message_page[cpu];
+		msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT;
+		msg->header.message_type = HVMSG_NONE;
+	}
+
+	printk("vmbus_wait_for_unload done: %d (%d)\n", vmbus_connection.init_cpu, smp_processor_id());
 }
 
 /*
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index d02f137..4ab91b8 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -70,7 +70,7 @@ static __u32 vmbus_get_next_version(__u32 current_version)
 static int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo,
 					__u32 version)
 {
-	int ret = 0;
+	int ret = 0, cpu = smp_processor_id();
 	struct vmbus_channel_initiate_contact *msg;
 	unsigned long flags;
 
@@ -91,12 +91,16 @@ static int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo,
 	 * For post win8 hosts, we support receiving channel messagges on
 	 * all the CPUs. This is needed for kexec to work correctly where
 	 * the CPU attempting to connect may not be CPU 0.
+	 * We need to remember the CPU we use here as in case of unload
+	 * CHANNELMSG_UNLOAD_RESPONSE will be delivered to this CPU.
 	 */
 	if (version >= VERSION_WIN8_1) {
-		msg->target_vcpu = hv_context.vp_index[get_cpu()];
-		put_cpu();
+		printk("vmbus_negotiate_version: %d %d\n", cpu, hv_context.vp_index[cpu]);
+		msg->target_vcpu = hv_context.vp_index[cpu];
+		vmbus_connection.init_cpu = cpu;
 	} else {
 		msg->target_vcpu = 0;
+		vmbus_connection.init_cpu = 0;
 	}
 
 	/*
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 12321b9..3adf30b 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -563,6 +563,9 @@ struct vmbus_connection {
 
 	atomic_t next_gpadl_handle;
 
+	/* CPU which was used to initiate contact */
+	int init_cpu;
+
 	struct completion  unload_event;
 	/*
 	 * Represents channel interrupts. Each bit position represents a
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 64713ff..570dd639 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -732,6 +732,7 @@ void vmbus_on_msg_dpc(unsigned long data)
 		goto msg_handled;
 	}
 
+	printk("vmbus_on_msg_dpc: %d on %d\n", hdr->msgtype, cpu);
 	entry = &channel_message_table[hdr->msgtype];
 	if (entry->handler_type	== VMHT_BLOCKING) {
 		ctx = kmalloc(sizeof(*ctx), GFP_ATOMIC);
-- 
2.5.5