linux-kernel - [PATCH 11/14] x86/cqm: Add failure on open and read

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1481929988-31569-12-git-send-email-vikas.shivappa@linux.intel.com>
Date:   Fri, 16 Dec 2016 15:13:05 -0800
From:   Vikas Shivappa <vikas.shivappa@...ux.intel.com>
To:     vikas.shivappa@...el.com, vikas.shivappa@...ux.intel.com
Cc:     linux-kernel@...r.kernel.org, x86@...nel.org, tglx@...utronix.de,
        peterz@...radead.org, ravi.v.shankar@...el.com,
        tony.luck@...el.com, fenghua.yu@...el.com, andi.kleen@...el.com,
        davidcc@...gle.com, eranian@...gle.com, hpa@...or.com
Subject: [PATCH 11/14] x86/cqm: Add failure on open and read

To provide reliable output to the user, cqm throws error when it does
not have enough RMIDs to monitor depending upon the mode user choses.
This also takes care to not overuse RMIDs. Default is LAZY mode.

NOLAZY mode: This patch adds a file mon_mask in the perf_cgroup which
indicates the packages which the user wants guaranteed monitoring. For
such cgroup events RMIDs are assigned at event create and we fail if
enough RMIDs are not present. This is basically a NOLAZY allocation of
RMIDs. This mode can be used in real time scenarios where user is sure
that tasks that are monitored are scheduled.

LAZY mode: If user did not enable the NOLAZY mode, RMIDs are allocated
only when tasks are actually scheduled. Upon failure to obtain RMIDs it
indicates a failure in read. Typical use case for this mode could be to
start monitoring cgroups which still donot have any tasks in them and
such cgroups are part of large number of cgroups which are monitored -
that way we donot overuse RMIDs.

Patch is based on David Carrillo-Cisneros <davidcc@...gle.com> patches
in cqm2 series.

Signed-off-by: Vikas Shivappa <vikas.shivappa@...ux.intel.com>
---
 arch/x86/events/intel/cqm.c             | 145 +++++++++++++++++++++++++++++---
 arch/x86/events/intel/cqm.h             |   1 +
 arch/x86/include/asm/intel_rdt_common.h |   7 +-
 3 files changed, 141 insertions(+), 12 deletions(-)

diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c
index 85162aa..e0d4017 100644
--- a/arch/x86/events/intel/cqm.c
+++ b/arch/x86/events/intel/cqm.c
@@ -78,6 +78,11 @@ struct sample {
  */
 static cpumask_t cqm_cpumask;
 
+/*
+ * Mask of online sockets.
+ */
+static cpumask_t cqm_pkgmask;
+
 struct pkg_data **cqm_pkgs_data;
 struct cgrp_cqm_info cqm_rootcginfo;
 
@@ -110,6 +115,14 @@ bool __rmid_valid(u32 rmid)
 	return true;
 }
 
+static inline bool __rmid_valid_raw(u32 rmid)
+{
+	if (rmid > cqm_max_rmid)
+		return false;
+
+	return true;
+}
+
 static u64 __rmid_read(u32 rmid)
 {
 	u64 val;
@@ -159,16 +172,19 @@ u32 __get_rmid(int domain)
 {
 	struct list_head *cqm_flist;
 	struct cqm_rmid_entry *entry;
+	struct pkg_data *pdata;
 
 	lockdep_assert_held(&cache_lock);
 
-	cqm_flist = &cqm_pkgs_data[domain]->cqm_rmid_free_lru;
+	pdata = cqm_pkgs_data[domain];
+	cqm_flist = &pdata->cqm_rmid_free_lru;
 
 	if (list_empty(cqm_flist))
 		return INVALID_RMID;
 
 	entry = list_first_entry(cqm_flist, struct cqm_rmid_entry, list);
 	list_del(&entry->list);
+	pdata->rmid_used_count++;
 
 	return entry->rmid;
 }
@@ -344,6 +360,7 @@ static void __intel_cqm_rmid_reuse(void)
 		 */
 		list_del(&entry->list);
 		list_add_tail(&entry->list, flist);
+		pdata->rmid_used_count--;
 	}
 
 end:
@@ -607,6 +624,33 @@ static int cqm_assign_rmid(struct perf_event *event, u32 *rmid)
 	return 0;
 }
 
+static inline int check_min_rmids(struct cgrp_cqm_info *cqm_info)
+{
+	int pkg = cpumask_first_and(&cqm_info->mon_mask, &cqm_pkgmask);
+
+	for (; pkg < nr_cpu_ids;
+	     pkg = cpumask_next_and(pkg, &cqm_info->mon_mask, &cqm_pkgmask)) {
+		if (cqm_pkgs_data[pkg]->rmid_used_count >= cqm_max_rmid)
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+static inline void alloc_min_rmids(struct cgrp_cqm_info *cqm_info)
+{
+	int pkg = cpumask_first_and(&cqm_info->mon_mask, &cqm_pkgmask);
+	u32 rmid;
+
+	for ( ; pkg < nr_cpu_ids;
+	     pkg = cpumask_next_and(pkg, &cqm_info->mon_mask, &cqm_pkgmask)) {
+
+		rmid = __get_rmid(pkg);
+		if (__rmid_valid(rmid))
+			cqm_info->rmid[pkg] = rmid;
+	}
+}
+
 /*
  * Find a group and setup RMID.
  *
@@ -642,6 +686,14 @@ static int intel_cqm_setup_event(struct perf_event *event,
 			event->hw.cqm_rmid = cqm_info->rmid;
 			return 0;
 		}
+
+		/*
+		 * For cgroups which must have RMIDs check if enough
+		 * RMIDs are available.
+		 */
+		if (cpumask_weight(&cqm_info->mon_mask) &&
+		    check_min_rmids(cqm_info))
+			return -EINVAL;
 	}
 #endif
 
@@ -656,6 +708,11 @@ static int intel_cqm_setup_event(struct perf_event *event,
 
 	cqm_assign_rmid(event, event->hw.cqm_rmid);
 
+#ifdef CONFIG_CGROUP_PERF
+	if (event->cgrp && cpumask_weight(&cqm_info->mon_mask))
+		alloc_min_rmids(cqm_info);
+#endif
+
 	return 0;
 }
 
@@ -896,16 +953,16 @@ static u64 intel_cqm_event_count(struct perf_event *event)
 	return __perf_event_count(event);
 }
 
-void alloc_needed_pkg_rmid(u32 *cqm_rmid)
+u32 alloc_needed_pkg_rmid(u32 *cqm_rmid)
 {
 	unsigned long flags;
 	u32 rmid;
 
 	if (WARN_ON(!cqm_rmid))
-		return;
+		return -EINVAL;
 
 	if (cqm_rmid == cqm_rootcginfo.rmid || cqm_rmid[pkg_id])
-		return;
+		return 0;
 
 	raw_spin_lock_irqsave(&cache_lock, flags);
 
@@ -914,6 +971,8 @@ void alloc_needed_pkg_rmid(u32 *cqm_rmid)
 		cqm_rmid[pkg_id] = rmid;
 
 	raw_spin_unlock_irqrestore(&cache_lock, flags);
+
+	return rmid;
 }
 
 static void intel_cqm_event_start(struct perf_event *event, int mode)
@@ -925,10 +984,8 @@ static void intel_cqm_event_start(struct perf_event *event, int mode)
 
 	event->hw.cqm_state &= ~PERF_HES_STOPPED;
 
-	if (is_task_event(event)) {
-		alloc_needed_pkg_rmid(event->hw.cqm_rmid);
+	if (is_task_event(event))
 		state->next_task_rmid = event->hw.cqm_rmid[pkg_id];
-	}
 }
 
 static void intel_cqm_event_stop(struct perf_event *event, int mode)
@@ -944,11 +1001,19 @@ static void intel_cqm_event_stop(struct perf_event *event, int mode)
 
 static int intel_cqm_event_add(struct perf_event *event, int mode)
 {
+	u32 rmid;
+
 	event->hw.cqm_state = PERF_HES_STOPPED;
 
-	if ((mode & PERF_EF_START))
+	/*
+	 * If Lazy RMID alloc fails indicate the error to the user.
+	 */
+	if ((mode & PERF_EF_START)) {
+		rmid = alloc_needed_pkg_rmid(event->hw.cqm_rmid);
+		if (!__rmid_valid_raw(rmid))
+			return -EINVAL;
 		intel_cqm_event_start(event, mode);
-
+	}
 	return 0;
 }
 
@@ -1426,12 +1491,67 @@ static int cqm_cont_monitoring_write_u64(struct cgroup_subsys_state *css,
 	return ret;
 }
 
+static int cqm_mon_mask_seq_show(struct seq_file *sf, void *v)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&cache_lock, flags);
+	seq_printf(sf, "%*pbl\n",
+		    cpumask_pr_args(&css_to_cqm_info(seq_css(sf))->mon_mask));
+	raw_spin_unlock_irqrestore(&cache_lock, flags);
+
+	return 0;
+}
+
+static ssize_t cqm_mon_mask_write(struct kernfs_open_file *of,
+				    char *buf, size_t nbytes, loff_t off)
+{
+	cpumask_var_t tmp_cpus, tmp_cpus1;
+	struct cgrp_cqm_info *cqm_info;
+	unsigned long flags;
+	int ret = 0;
+
+	buf = strstrip(buf);
+
+	if (!zalloc_cpumask_var(&tmp_cpus, GFP_KERNEL) ||
+		!zalloc_cpumask_var(&tmp_cpus1, GFP_KERNEL)) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = cpulist_parse(buf, tmp_cpus);
+	if (ret)
+		goto out;
+
+	if (cpumask_andnot(tmp_cpus1, tmp_cpus, &cqm_pkgmask)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	raw_spin_lock_irqsave(&cache_lock, flags);
+	cqm_info = css_to_cqm_info(of_css(of));
+	cpumask_copy(&cqm_info->mon_mask, tmp_cpus);
+	raw_spin_unlock_irqrestore(&cache_lock, flags);
+
+out:
+	free_cpumask_var(tmp_cpus);
+	free_cpumask_var(tmp_cpus1);
+
+	return ret ?: nbytes;
+}
+
 struct cftype perf_event_cgrp_arch_subsys_cftypes[] = {
 	{
 		.name = "cqm_cont_monitoring",
 		.read_u64 = cqm_cont_monitoring_read_u64,
 		.write_u64 = cqm_cont_monitoring_write_u64,
 	},
+	{
+		.name = "cqm_mon_mask",
+		.seq_show = cqm_mon_mask_seq_show,
+		.write = cqm_mon_mask_write,
+		.max_write_len = (100U + 6 * NR_CPUS),
+	},
 
 	{}	/* terminate */
 };
@@ -1449,8 +1569,10 @@ static inline void cqm_pick_event_reader(int cpu)
 
 	/* First online cpu in package becomes the reader */
 	reader = cpumask_any_and(&cqm_cpumask, topology_core_cpumask(cpu));
-	if (reader >= nr_cpu_ids)
+	if (reader >= nr_cpu_ids) {
 		cpumask_set_cpu(cpu, &cqm_cpumask);
+		cpumask_set_cpu(pkg_id, &cqm_pkgmask);
+	}
 }
 
 static int intel_cqm_cpu_starting(unsigned int cpu)
@@ -1482,6 +1604,8 @@ static int intel_cqm_cpu_exit(unsigned int cpu)
 
 	if (target < nr_cpu_ids)
 		cpumask_set_cpu(target, &cqm_cpumask);
+	else
+		cpumask_clear_cpu(pkg_id, &cqm_pkgmask);
 
 	return 0;
 }
@@ -1562,6 +1686,7 @@ static int pkg_data_init_cpu(int cpu)
 	 */
 	entry = __rmid_entry(0, curr_pkgid);
 	list_del(&entry->list);
+	pkg_data->rmid_used_count++;
 
 	cqm_rootcginfo.rmid = kzalloc(sizeof(u32) * cqm_socket_max, GFP_KERNEL);
 	if (!cqm_rootcginfo.rmid) {
diff --git a/arch/x86/events/intel/cqm.h b/arch/x86/events/intel/cqm.h
index 4415497..063956d 100644
--- a/arch/x86/events/intel/cqm.h
+++ b/arch/x86/events/intel/cqm.h
@@ -32,6 +32,7 @@ struct pkg_data {
 	atomic_t		reuse_scheduled;
 
 	int			rmid_work_cpu;
+	int			rmid_used_count;
 };
 #endif
 #endif
diff --git a/arch/x86/include/asm/intel_rdt_common.h b/arch/x86/include/asm/intel_rdt_common.h
index 6424322..39fa4fb 100644
--- a/arch/x86/include/asm/intel_rdt_common.h
+++ b/arch/x86/include/asm/intel_rdt_common.h
@@ -29,7 +29,7 @@ struct intel_pqr_state {
 
 u32 __get_rmid(int domain);
 bool __rmid_valid(u32 rmid);
-void alloc_needed_pkg_rmid(u32 *cqm_rmid);
+u32 alloc_needed_pkg_rmid(u32 *cqm_rmid);
 struct cgrp_cqm_info *cqminfo_from_tsk(struct task_struct *tsk);
 
 extern struct cgrp_cqm_info cqm_rootcginfo;
@@ -42,7 +42,9 @@ struct intel_pqr_state {
  * @cont_mon     Continuous monitoring flag
  * @mon_enabled  Whether monitoring is enabled
  * @level        Level in the cgroup tree. Root is level 0.
- * @rmid        The rmids of the cgroup.
+ * @rmid         The rmids of the cgroup.
+ * @mon_mask     Package Mask to indicate packages which must
+ *  must have RMIDs(guaranteed cqm monitoring).
  * @mfa          'Monitoring for ancestor' points to the cqm_info
  *  of the ancestor the cgroup is monitoring for. 'Monitoring for ancestor'
  *  means you will use an ancestors RMID at sched_in if you are
@@ -79,6 +81,7 @@ struct cgrp_cqm_info {
 	bool mon_enabled;
 	int level;
 	u32 *rmid;
+	struct cpumask mon_mask;
 	struct cgrp_cqm_info *mfa;
 	struct list_head tskmon_rlist;
 };
-- 
1.9.1