[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20230131073740.378984-11-baolu.lu@linux.intel.com>
Date: Tue, 31 Jan 2023 15:37:38 +0800
From: Lu Baolu <baolu.lu@...ux.intel.com>
To: Joerg Roedel <joro@...tes.org>
Cc: kan.liang@...ux.intel.com, iommu@...ts.linux.dev,
linux-kernel@...r.kernel.org
Subject: [PATCH 10/12] iommu/vt-d: Support cpumask for IOMMU perfmon
From: Kan Liang <kan.liang@...ux.intel.com>
The perf subsystem assumes that all counters are by default per-CPU. So
the user space tool reads a counter from each CPU. However, the IOMMU
counters are system-wide and can be read from any CPU. Here we use a CPU
mask to restrict counting to one CPU to handle the issue. (with CPU
hotplug notifier to choose a different CPU if the chosen one is taken
off-line).
The CPU is exposed to /sys/bus/event_source/devices/dmar*/cpumask for
the user space perf tool.
Signed-off-by: Kan Liang <kan.liang@...ux.intel.com>
Link: https://lore.kernel.org/r/20230128200428.1459118-6-kan.liang@linux.intel.com
Signed-off-by: Lu Baolu <baolu.lu@...ux.intel.com>
---
include/linux/cpuhotplug.h | 1 +
drivers/iommu/intel/perfmon.c | 113 ++++++++++++++++--
.../sysfs-bus-event_source-devices-iommu | 8 ++
3 files changed, 114 insertions(+), 8 deletions(-)
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 6c6859bfc454..f2ea348ce3b0 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -221,6 +221,7 @@ enum cpuhp_state {
CPUHP_AP_PERF_X86_CQM_ONLINE,
CPUHP_AP_PERF_X86_CSTATE_ONLINE,
CPUHP_AP_PERF_X86_IDXD_ONLINE,
+ CPUHP_AP_PERF_X86_IOMMU_PERF_ONLINE,
CPUHP_AP_PERF_S390_CF_ONLINE,
CPUHP_AP_PERF_S390_SF_ONLINE,
CPUHP_AP_PERF_ARM_CCI_ONLINE,
diff --git a/drivers/iommu/intel/perfmon.c b/drivers/iommu/intel/perfmon.c
index df9b78736462..322d362b85e4 100644
--- a/drivers/iommu/intel/perfmon.c
+++ b/drivers/iommu/intel/perfmon.c
@@ -34,9 +34,28 @@ static struct attribute_group iommu_pmu_events_attr_group = {
.attrs = attrs_empty,
};
+static cpumask_t iommu_pmu_cpu_mask;
+
+static ssize_t
+cpumask_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return cpumap_print_to_pagebuf(true, buf, &iommu_pmu_cpu_mask);
+}
+static DEVICE_ATTR_RO(cpumask);
+
+static struct attribute *iommu_pmu_cpumask_attrs[] = {
+ &dev_attr_cpumask.attr,
+ NULL
+};
+
+static struct attribute_group iommu_pmu_cpumask_attr_group = {
+ .attrs = iommu_pmu_cpumask_attrs,
+};
+
static const struct attribute_group *iommu_pmu_attr_groups[] = {
&iommu_pmu_format_attr_group,
&iommu_pmu_events_attr_group,
+ &iommu_pmu_cpumask_attr_group,
NULL
};
@@ -679,20 +698,98 @@ void free_iommu_pmu(struct intel_iommu *iommu)
iommu->pmu = NULL;
}
+static int iommu_pmu_cpu_online(unsigned int cpu)
+{
+ if (cpumask_empty(&iommu_pmu_cpu_mask))
+ cpumask_set_cpu(cpu, &iommu_pmu_cpu_mask);
+
+ return 0;
+}
+
+static int iommu_pmu_cpu_offline(unsigned int cpu)
+{
+ struct dmar_drhd_unit *drhd;
+ struct intel_iommu *iommu;
+ int target;
+
+ if (!cpumask_test_and_clear_cpu(cpu, &iommu_pmu_cpu_mask))
+ return 0;
+
+ target = cpumask_any_but(cpu_online_mask, cpu);
+
+ if (target < nr_cpu_ids)
+ cpumask_set_cpu(target, &iommu_pmu_cpu_mask);
+ else
+ target = -1;
+
+ rcu_read_lock();
+
+ for_each_iommu(iommu, drhd) {
+ if (!iommu->pmu)
+ continue;
+ perf_pmu_migrate_context(&iommu->pmu->pmu, cpu, target);
+ }
+ rcu_read_unlock();
+
+ return 0;
+}
+
+static int nr_iommu_pmu;
+
+static int iommu_pmu_cpuhp_setup(struct iommu_pmu *iommu_pmu)
+{
+ int ret;
+
+ if (nr_iommu_pmu++)
+ return 0;
+
+ ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_IOMMU_PERF_ONLINE,
+ "driver/iommu/intel/perfmon:online",
+ iommu_pmu_cpu_online,
+ iommu_pmu_cpu_offline);
+ if (ret)
+ nr_iommu_pmu = 0;
+
+ return ret;
+}
+
+static void iommu_pmu_cpuhp_free(struct iommu_pmu *iommu_pmu)
+{
+ if (--nr_iommu_pmu)
+ return;
+
+ cpuhp_remove_state(CPUHP_AP_PERF_X86_IOMMU_PERF_ONLINE);
+}
+
void iommu_pmu_register(struct intel_iommu *iommu)
{
- if (!iommu->pmu)
+ struct iommu_pmu *iommu_pmu = iommu->pmu;
+
+ if (!iommu_pmu)
return;
- if (__iommu_pmu_register(iommu)) {
- pr_err("Failed to register PMU for iommu (seq_id = %d)\n",
- iommu->seq_id);
- free_iommu_pmu(iommu);
- }
+ if (__iommu_pmu_register(iommu))
+ goto err;
+
+ if (iommu_pmu_cpuhp_setup(iommu_pmu))
+ goto unregister;
+
+ return;
+
+unregister:
+ perf_pmu_unregister(&iommu_pmu->pmu);
+err:
+ pr_err("Failed to register PMU for iommu (seq_id = %d)\n", iommu->seq_id);
+ free_iommu_pmu(iommu);
}
void iommu_pmu_unregister(struct intel_iommu *iommu)
{
- if (iommu->pmu)
- perf_pmu_unregister(&iommu->pmu->pmu);
+ struct iommu_pmu *iommu_pmu = iommu->pmu;
+
+ if (!iommu_pmu)
+ return;
+
+ iommu_pmu_cpuhp_free(iommu_pmu);
+ perf_pmu_unregister(&iommu_pmu->pmu);
}
diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-iommu b/Documentation/ABI/testing/sysfs-bus-event_source-devices-iommu
index 988210a0e8ce..d7af4919302e 100644
--- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-iommu
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-iommu
@@ -27,3 +27,11 @@ Description: Read-only. Attribute group to describe the magic bits
filter_pasid = "config2:0-21" - PASID filter
filter_ats = "config2:24-28" - Address Type filter
filter_page_table = "config2:32-36" - Page Table Level filter
+
+What: /sys/bus/event_source/devices/dmar*/cpumask
+Date: Jan 2023
+KernelVersion: 6.3
+Contact: Kan Liang <kan.liang@...ux.intel.com>
+Description: Read-only. This file always returns the CPU to which the
+ IOMMU pmu is bound for access to all IOMMU pmu performance
+ monitoring events.
--
2.34.1
Powered by blists - more mailing lists