[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20130205214941.4615.29852.stgit@lambeau>
Date: Tue, 05 Feb 2013 15:49:41 -0600
From: Michael Wolf <mjw@...ux.vnet.ibm.com>
To: linux-kernel@...r.kernel.org
Cc: riel@...hat.com, gleb@...hat.com, kvm@...r.kernel.org,
peterz@...radead.org, mtosatti@...hat.com, glommer@...allels.com,
mingo@...hat.com, anthony@...emonkey.ws
Subject: [PATCH 4/4] Add a timer to allow the separation of consigned from
steal time.
Add a helper routine to scheduler/core.c to allow the kvm module
to retrieve the cpu hardlimit settings. The values will be used
to set up a timer that is used to separate the consigned from the
steal time.
Signed-off-by: Michael Wolf <mjw@...ux.vnet.ibm.com>
---
arch/x86/include/asm/kvm_host.h | 9 ++++++
arch/x86/kvm/x86.c | 62 ++++++++++++++++++++++++++++++++++++++-
kernel/sched/core.c | 20 +++++++++++++
3 files changed, 90 insertions(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index fe5a37b..9518613 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -355,6 +355,15 @@ struct kvm_vcpu_arch {
bool tpr_access_reporting;
/*
+ * timer used to determine if the time should be counted as
+ * steal time or consigned time.
+ */
+ struct hrtimer steal_timer;
+ u64 current_consigned;
+ s64 consigned_quota;
+ s64 consigned_period;
+
+ /*
* Paging state of the vcpu
*
* If the vcpu runs in guest mode with two level paging this still saves
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 51b63d1..79d144d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1848,13 +1848,32 @@ static void kvmclock_reset(struct kvm_vcpu *vcpu)
static void accumulate_steal_time(struct kvm_vcpu *vcpu)
{
u64 delta;
+ u64 steal_delta;
+ u64 consigned_delta;
if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
return;
delta = current->sched_info.run_delay - vcpu->arch.st.last_steal;
vcpu->arch.st.last_steal = current->sched_info.run_delay;
- vcpu->arch.st.accum_steal = delta;
+
+ /* split the delta into steal and consigned */
+ if (vcpu->arch.current_consigned < vcpu->arch.consigned_quota) {
+ vcpu->arch.current_consigned += delta;
+ if (vcpu->arch.current_consigned > vcpu->arch.consigned_quota) {
+ steal_delta = vcpu->arch.current_consigned
+ - vcpu->arch.consigned_quota;
+ consigned_delta = delta - steal_delta;
+ } else {
+ consigned_delta = delta;
+ steal_delta = 0;
+ }
+ } else {
+ consigned_delta = 0;
+ steal_delta = delta;
+ }
+ vcpu->arch.st.accum_steal = steal_delta;
+ vcpu->arch.st.accum_consigned = consigned_delta;
}
static void record_steal_time(struct kvm_vcpu *vcpu)
@@ -2629,8 +2648,35 @@ static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
!(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY);
}
+extern int sched_use_hard_capping(int cpuid, int num_vcpus, s64 *quota,
+ s64 *period);
+enum hrtimer_restart steal_timer_fn(struct hrtimer *data)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm *kvm;
+ int num_vcpus;
+ ktime_t now;
+
+ vcpu = container_of(data, struct kvm_vcpu, arch.steal_timer);
+ kvm = vcpu->kvm;
+ num_vcpus = atomic_read(&kvm->online_vcpus);
+ sched_use_hard_capping(vcpu->cpu, num_vcpus,
+ &vcpu->arch.consigned_quota,
+ &vcpu->arch.consigned_period);
+ vcpu->arch.current_consigned = 0;
+ now = ktime_get();
+ hrtimer_forward(&vcpu->arch.steal_timer, now,
+ ktime_set(0, vcpu->arch.consigned_period));
+
+ return HRTIMER_RESTART;
+}
+
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
+ struct kvm *kvm;
+ int num_vcpus;
+ ktime_t ktime;
+
/* Address WBINVD may be executed by guest */
if (need_emulate_wbinvd(vcpu)) {
if (kvm_x86_ops->has_wbinvd_exit())
@@ -2670,6 +2716,18 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
kvm_migrate_timers(vcpu);
vcpu->cpu = cpu;
}
+ /* Initialize and start a timer to capture steal and consigned time */
+ kvm = vcpu->kvm;
+ num_vcpus = atomic_read(&kvm->online_vcpus);
+ num_vcpus = (num_vcpus == 0) ? 1 : num_vcpus;
+ sched_use_hard_capping(vcpu->cpu, num_vcpus,
+ &vcpu->arch.consigned_quota,
+ &vcpu->arch.consigned_period);
+ hrtimer_init(&vcpu->arch.steal_timer, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL);
+ vcpu->arch.steal_timer.function = &steal_timer_fn;
+ ktime = ktime_set(0, vcpu->arch.consigned_period);
+ hrtimer_start(&vcpu->arch.steal_timer, ktime, HRTIMER_MODE_REL);
accumulate_steal_time(vcpu);
kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
@@ -2680,6 +2738,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
kvm_x86_ops->vcpu_put(vcpu);
kvm_put_guest_fpu(vcpu);
vcpu->arch.last_host_tsc = native_read_tsc();
+ hrtimer_cancel(&vcpu->arch.steal_timer);
}
static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
@@ -6685,6 +6744,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
{
int idx;
+ hrtimer_cancel(&vcpu->arch.steal_timer);
kvm_pmu_destroy(vcpu);
kfree(vcpu->arch.mce_banks);
kvm_free_lapic(vcpu);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index efc2652..133ee47 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -8154,6 +8154,26 @@ void cpuacct_charge(struct task_struct *tsk, u64 cputime)
rcu_read_unlock();
}
+/*
+ * return 1 if the scheduler is using some form of hard capping
+ * return 0 if there is no capping configured.
+ */
+int sched_use_hard_capping(int cpuid, int num_cpus, long *quota, long *period)
+{
+ struct rq *rq = cpu_rq(cpuid);
+ struct task_struct *curr = rq->curr;
+ struct task_group *tg = curr->sched_task_group;
+ long total_time;
+
+ *period = tg_get_cfs_period(tg);
+ if (*quota == RUNTIME_INF || *quota == -1)
+ return 0;
+ *quota = jiffies_to_usecs(tg_get_cfs_quota(tg)) / num_cpus;
+ total_time = jiffies_to_usecs(*period);
+ *quota = total_time - *quota;
+ return 1;
+}
+EXPORT_SYMBOL_GPL(sched_use_hard_capping);
struct cgroup_subsys cpuacct_subsys = {
.name = "cpuacct",
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists