[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20091026162157.23704.12420.stgit@dev.haskins.net>
Date: Mon, 26 Oct 2009 12:21:57 -0400
From: Gregory Haskins <ghaskins@...ell.com>
To: kvm@...r.kernel.org
Cc: alacrityvm-devel@...ts.sourceforge.net,
linux-kernel@...r.kernel.org
Subject: [KVM PATCH v3 1/3] KVM: fix race in irq_routing logic
The current code suffers from the following race condition:
thread-1 thread-2
-----------------------------------------------------------
kvm_set_irq() {
rcu_read_lock()
irq_rt = rcu_dereference(table);
rcu_read_unlock();
kvm_set_irq_routing() {
mutex_lock();
irq_rt = table;
rcu_assign_pointer();
mutex_unlock();
synchronize_rcu();
kfree(irq_rt);
irq_rt->entry->set(); /* bad */
-------------------------------------------------------------
Because the pointer is accessed outside of the read-side critical
section. There are two basic patterns we can use to fix this bug:
1) Switch to sleeping-rcu and encompass the ->set() access within the
read-side critical section,
OR
2) Add reference counting to the irq_rt structure, and simply acquire
the reference from within the RSCS.
This patch implements solution (1).
Signed-off-by: Gregory Haskins <ghaskins@...ell.com>
---
include/linux/kvm_host.h | 6 +++++-
virt/kvm/irq_comm.c | 50 +++++++++++++++++++++++++++-------------------
virt/kvm/kvm_main.c | 1 +
3 files changed, 35 insertions(+), 22 deletions(-)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index bd5a616..1fe135d 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -185,7 +185,10 @@ struct kvm {
struct mutex irq_lock;
#ifdef CONFIG_HAVE_KVM_IRQCHIP
- struct kvm_irq_routing_table *irq_routing;
+ struct {
+ struct srcu_struct srcu;
+ struct kvm_irq_routing_table *table;
+ } irq_routing;
struct hlist_head mask_notifier_list;
struct hlist_head irq_ack_notifier_list;
#endif
@@ -541,6 +544,7 @@ int kvm_set_irq_routing(struct kvm *kvm,
const struct kvm_irq_routing_entry *entries,
unsigned nr,
unsigned flags);
+void kvm_init_irq_routing(struct kvm *kvm);
void kvm_free_irq_routing(struct kvm *kvm);
#else
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 00c68d2..db2553f 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -144,10 +144,11 @@ static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
*/
int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level)
{
- struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS];
- int ret = -1, i = 0;
+ struct kvm_kernel_irq_routing_entry *e;
+ int ret = -1;
struct kvm_irq_routing_table *irq_rt;
struct hlist_node *n;
+ int idx;
trace_kvm_set_irq(irq, level, irq_source_id);
@@ -155,21 +156,19 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level)
* IOAPIC. So set the bit in both. The guest will ignore
* writes to the unused one.
*/
- rcu_read_lock();
- irq_rt = rcu_dereference(kvm->irq_routing);
+ idx = srcu_read_lock(&kvm->irq_routing.srcu);
+ irq_rt = rcu_dereference(kvm->irq_routing.table);
if (irq < irq_rt->nr_rt_entries)
- hlist_for_each_entry(e, n, &irq_rt->map[irq], link)
- irq_set[i++] = *e;
- rcu_read_unlock();
+ hlist_for_each_entry(e, n, &irq_rt->map[irq], link) {
+ int r;
- while(i--) {
- int r;
- r = irq_set[i].set(&irq_set[i], kvm, irq_source_id, level);
- if (r < 0)
- continue;
+ r = e->set(e, kvm, irq_source_id, level);
+ if (r < 0)
+ continue;
- ret = r + ((ret < 0) ? 0 : ret);
- }
+ ret = r + ((ret < 0) ? 0 : ret);
+ }
+ srcu_read_unlock(&kvm->irq_routing.srcu, idx);
return ret;
}
@@ -179,17 +178,18 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
struct kvm_irq_ack_notifier *kian;
struct hlist_node *n;
int gsi;
+ int idx;
trace_kvm_ack_irq(irqchip, pin);
- rcu_read_lock();
- gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
+ idx = srcu_read_lock(&kvm->irq_routing.srcu);
+ gsi = rcu_dereference(kvm->irq_routing.table)->chip[irqchip][pin];
if (gsi != -1)
hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list,
link)
if (kian->gsi == gsi)
kian->irq_acked(kian);
- rcu_read_unlock();
+ srcu_read_unlock(&kvm->irq_routing.srcu, idx);
}
void kvm_register_irq_ack_notifier(struct kvm *kvm,
@@ -287,11 +287,19 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
rcu_read_unlock();
}
+void kvm_init_irq_routing(struct kvm *kvm)
+{
+ init_srcu_struct(&kvm->irq_routing.srcu);
+}
+
void kvm_free_irq_routing(struct kvm *kvm)
{
/* Called only during vm destruction. Nobody can use the pointer
at this stage */
- kfree(kvm->irq_routing);
+ synchronize_srcu(&kvm->irq_routing.srcu);
+ cleanup_srcu_struct(&kvm->irq_routing.srcu);
+
+ kfree(kvm->irq_routing.table);
}
static int setup_routing_entry(struct kvm_irq_routing_table *rt,
@@ -396,10 +404,10 @@ int kvm_set_irq_routing(struct kvm *kvm,
}
mutex_lock(&kvm->irq_lock);
- old = kvm->irq_routing;
- rcu_assign_pointer(kvm->irq_routing, new);
+ old = kvm->irq_routing.table;
+ rcu_assign_pointer(kvm->irq_routing.table, new);
mutex_unlock(&kvm->irq_lock);
- synchronize_rcu();
+ synchronize_srcu(&kvm->irq_routing.srcu);
new = old;
r = 0;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index cac69c4..ba94159 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -388,6 +388,7 @@ static struct kvm *kvm_create_vm(void)
atomic_inc(&kvm->mm->mm_count);
spin_lock_init(&kvm->mmu_lock);
spin_lock_init(&kvm->requests_lock);
+ kvm_init_irq_routing(kvm);
kvm_io_bus_init(&kvm->pio_bus);
kvm_eventfd_init(kvm);
mutex_init(&kvm->lock);
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists