linux-kernel - [RFC] thermal: Move trip point handling code from ACPI to generic code

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20081001121344.GB29479@srcf.ucam.org>
Date:	Wed, 1 Oct 2008 13:13:44 +0100
From:	Matthew Garrett <mjg59@...f.ucam.org>
To:	linux-acpi@...r.kernel.org, linux-kernel@...r.kernel.org
Cc:	rui.zhang@...el.com, pavel@...e.cz
Subject: [RFC] thermal: Move trip point handling code from ACPI to generic code

Having trip point handling code in a single central location makes more 
sense than implementing it per thermal driver. This is a slightly rough 
prototype that should be pretty much equivalent in functionality to the 
existing code (at least, my laptop hasn't melted yet). Only thing I can 
think of off-hand that's missing are the updates for crossing trip
points (another callback into the specific code?) and hooking up the 
polling configuration that's currently in /proc. Any comments?

Motivation for this is wanting to start hooking DRM drivers into the 
thermal layer as we get better information on handing their PM 
functionality. I suspect lack of thermal control in GPUs is one of the 
things that's causing us to hit thermal limits on some laptops.

diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
index 754967f..c106a02 100644
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@@ -549,145 +549,6 @@ static int acpi_thermal_get_trip_points(struct acpi_thermal *tz)
 	return acpi_thermal_trips_update(tz, ACPI_TRIPS_INIT);
 }
 
-static int acpi_thermal_critical(struct acpi_thermal *tz)
-{
-	if (!tz || !tz->trips.critical.flags.valid)
-		return -EINVAL;
-
-	if (tz->temperature >= tz->trips.critical.temperature) {
-		printk(KERN_WARNING PREFIX "Critical trip point\n");
-		tz->trips.critical.flags.enabled = 1;
-	} else if (tz->trips.critical.flags.enabled)
-		tz->trips.critical.flags.enabled = 0;
-
-	acpi_bus_generate_proc_event(tz->device, ACPI_THERMAL_NOTIFY_CRITICAL,
-				tz->trips.critical.flags.enabled);
-	acpi_bus_generate_netlink_event(tz->device->pnp.device_class,
-					  tz->device->dev.bus_id,
-					  ACPI_THERMAL_NOTIFY_CRITICAL,
-					  tz->trips.critical.flags.enabled);
-
-	/* take no action if nocrt is set */
-	if(!nocrt) {
-		printk(KERN_EMERG
-			"Critical temperature reached (%ld C), shutting down.\n",
-			KELVIN_TO_CELSIUS(tz->temperature));
-		orderly_poweroff(true);
-	}
-
-	return 0;
-}
-
-static int acpi_thermal_hot(struct acpi_thermal *tz)
-{
-	if (!tz || !tz->trips.hot.flags.valid)
-		return -EINVAL;
-
-	if (tz->temperature >= tz->trips.hot.temperature) {
-		printk(KERN_WARNING PREFIX "Hot trip point\n");
-		tz->trips.hot.flags.enabled = 1;
-	} else if (tz->trips.hot.flags.enabled)
-		tz->trips.hot.flags.enabled = 0;
-
-	acpi_bus_generate_proc_event(tz->device, ACPI_THERMAL_NOTIFY_HOT,
-				tz->trips.hot.flags.enabled);
-	acpi_bus_generate_netlink_event(tz->device->pnp.device_class,
-					  tz->device->dev.bus_id,
-					  ACPI_THERMAL_NOTIFY_HOT,
-					  tz->trips.hot.flags.enabled);
-
-	/* TBD: Call user-mode "sleep(S4)" function if nocrt is cleared */
-
-	return 0;
-}
-
-static void acpi_thermal_passive(struct acpi_thermal *tz)
-{
-	int result = 1;
-	struct acpi_thermal_passive *passive = NULL;
-	int trend = 0;
-	int i = 0;
-
-
-	if (!tz || !tz->trips.passive.flags.valid)
-		return;
-
-	passive = &(tz->trips.passive);
-
-	/*
-	 * Above Trip?
-	 * -----------
-	 * Calculate the thermal trend (using the passive cooling equation)
-	 * and modify the performance limit for all passive cooling devices
-	 * accordingly.  Note that we assume symmetry.
-	 */
-	if (tz->temperature >= passive->temperature) {
-		trend =
-		    (passive->tc1 * (tz->temperature - tz->last_temperature)) +
-		    (passive->tc2 * (tz->temperature - passive->temperature));
-		ACPI_DEBUG_PRINT((ACPI_DB_INFO,
-				  "trend[%d]=(tc1[%lu]*(tmp[%lu]-last[%lu]))+(tc2[%lu]*(tmp[%lu]-psv[%lu]))\n",
-				  trend, passive->tc1, tz->temperature,
-				  tz->last_temperature, passive->tc2,
-				  tz->temperature, passive->temperature));
-		passive->flags.enabled = 1;
-		/* Heating up? */
-		if (trend > 0)
-			for (i = 0; i < passive->devices.count; i++)
-				acpi_processor_set_thermal_limit(passive->
-								 devices.
-								 handles[i],
-								 ACPI_PROCESSOR_LIMIT_INCREMENT);
-		/* Cooling off? */
-		else if (trend < 0) {
-			for (i = 0; i < passive->devices.count; i++)
-				/*
-				 * assume that we are on highest
-				 * freq/lowest thrott and can leave
-				 * passive mode, even in error case
-				 */
-				if (!acpi_processor_set_thermal_limit
-				    (passive->devices.handles[i],
-				     ACPI_PROCESSOR_LIMIT_DECREMENT))
-					result = 0;
-			/*
-			 * Leave cooling mode, even if the temp might
-			 * higher than trip point This is because some
-			 * machines might have long thermal polling
-			 * frequencies (tsp) defined. We will fall back
-			 * into passive mode in next cycle (probably quicker)
-			 */
-			if (result) {
-				passive->flags.enabled = 0;
-				ACPI_DEBUG_PRINT((ACPI_DB_INFO,
-						  "Disabling passive cooling, still above threshold,"
-						  " but we are cooling down\n"));
-			}
-		}
-		return;
-	}
-
-	/*
-	 * Below Trip?
-	 * -----------
-	 * Implement passive cooling hysteresis to slowly increase performance
-	 * and avoid thrashing around the passive trip point.  Note that we
-	 * assume symmetry.
-	 */
-	if (!passive->flags.enabled)
-		return;
-	for (i = 0; i < passive->devices.count; i++)
-		if (!acpi_processor_set_thermal_limit
-		    (passive->devices.handles[i],
-		     ACPI_PROCESSOR_LIMIT_DECREMENT))
-			result = 0;
-	if (result) {
-		passive->flags.enabled = 0;
-		ACPI_DEBUG_PRINT((ACPI_DB_INFO,
-				  "Disabling passive cooling (zone is cool)\n"));
-	}
-}
-
 static void acpi_thermal_active(struct acpi_thermal *tz)
 {
 	int result = 0;
@@ -762,13 +623,6 @@ static void acpi_thermal_active(struct acpi_thermal *tz)
 
 static void acpi_thermal_check(void *context);
 
-static void acpi_thermal_run(unsigned long data)
-{
-	struct acpi_thermal *tz = (struct acpi_thermal *)data;
-	if (!tz->zombie)
-		acpi_os_execute(OSL_GPE_HANDLER, acpi_thermal_check, (void *)data);
-}
-
 static void acpi_thermal_active_off(void *data)
 {
 	int result = 0;
@@ -812,123 +666,9 @@ static void acpi_thermal_active_off(void *data)
 
 static void acpi_thermal_check(void *data)
 {
-	int result = 0;
 	struct acpi_thermal *tz = data;
-	unsigned long sleep_time = 0;
-	unsigned long timeout_jiffies = 0;
-	int i = 0;
-	struct acpi_thermal_state state;
-
-
-	if (!tz) {
-		printk(KERN_ERR PREFIX "Invalid (NULL) context\n");
-		return;
-	}
-
-	/* Check if someone else is already running */
-	if (!mutex_trylock(&tz->lock))
-		return;
-
-	state = tz->state;
-
-	result = acpi_thermal_get_temperature(tz);
-	if (result)
-		goto unlock;
-
-	if (!tz->tz_enabled)
-		goto unlock;
-
-	memset(&tz->state, 0, sizeof(tz->state));
-
-	/*
-	 * Check Trip Points
-	 * -----------------
-	 * Compare the current temperature to the trip point values to see
-	 * if we've entered one of the thermal policy states.  Note that
-	 * this function determines when a state is entered, but the 
-	 * individual policy decides when it is exited (e.g. hysteresis).
-	 */
-	if (tz->trips.critical.flags.valid)
-		state.critical |=
-		    (tz->temperature >= tz->trips.critical.temperature);
-	if (tz->trips.hot.flags.valid)
-		state.hot |= (tz->temperature >= tz->trips.hot.temperature);
-	if (tz->trips.passive.flags.valid)
-		state.passive |=
-		    (tz->temperature >= tz->trips.passive.temperature);
-	for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE; i++)
-		if (tz->trips.active[i].flags.valid)
-			state.active |=
-			    (tz->temperature >=
-			     tz->trips.active[i].temperature);
-
-	/*
-	 * Invoke Policy
-	 * -------------
-	 * Separated from the above check to allow individual policy to 
-	 * determine when to exit a given state.
-	 */
-	if (state.critical)
-		acpi_thermal_critical(tz);
-	if (state.hot)
-		acpi_thermal_hot(tz);
-	if (state.passive)
-		acpi_thermal_passive(tz);
-	if (state.active)
-		acpi_thermal_active(tz);
-
-	/*
-	 * Calculate State
-	 * ---------------
-	 * Again, separated from the above two to allow independent policy
-	 * decisions.
-	 */
-	tz->state.critical = tz->trips.critical.flags.enabled;
-	tz->state.hot = tz->trips.hot.flags.enabled;
-	tz->state.passive = tz->trips.passive.flags.enabled;
-	tz->state.active = 0;
-	for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE; i++)
-		tz->state.active |= tz->trips.active[i].flags.enabled;
 
-	/*
-	 * Calculate Sleep Time
-	 * --------------------
-	 * If we're in the passive state, use _TSP's value.  Otherwise
-	 * use the default polling frequency (e.g. _TZP).  If no polling
-	 * frequency is specified then we'll wait forever (at least until
-	 * a thermal event occurs).  Note that _TSP and _TZD values are
-	 * given in 1/10th seconds (we must covert to milliseconds).
-	 */
-	if (tz->state.passive) {
-		sleep_time = tz->trips.passive.tsp * 100;
-		timeout_jiffies =  jiffies + (HZ * sleep_time) / 1000;
-	} else if (tz->polling_frequency > 0) {
-		sleep_time = tz->polling_frequency * 100;
-		timeout_jiffies =  round_jiffies(jiffies + (HZ * sleep_time) / 1000);
-	}
-
-	ACPI_DEBUG_PRINT((ACPI_DB_INFO, "%s: temperature[%lu] sleep[%lu]\n",
-			  tz->name, tz->temperature, sleep_time));
-
-	/*
-	 * Schedule Next Poll
-	 * ------------------
-	 */
-	if (!sleep_time) {
-		if (timer_pending(&(tz->timer)))
-			del_timer(&(tz->timer));
-	} else {
-		if (timer_pending(&(tz->timer)))
-			mod_timer(&(tz->timer), timeout_jiffies);
-		else {
-			tz->timer.data = (unsigned long)tz;
-			tz->timer.function = acpi_thermal_run;
-			tz->timer.expires = timeout_jiffies;
-			add_timer(&(tz->timer));
-		}
-	}
-      unlock:
-	mutex_unlock(&tz->lock);
+	thermal_zone_device_update(tz->thermal_zone);
 }
 
 /* sys I/F for generic thermal sysfs support */
@@ -1213,8 +953,21 @@ static int acpi_thermal_register_thermal_zone(struct acpi_thermal *tz)
 
 	for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE &&
 			tz->trips.active[i].flags.valid; i++, trips++);
-	tz->thermal_zone = thermal_zone_device_register("acpitz",
-					trips, tz, &acpi_thermal_zone_ops);
+
+	if (tz->trips.passive.flags.valid)
+		tz->thermal_zone =
+			thermal_zone_device_register("acpitz", trips, tz,
+						     &acpi_thermal_zone_ops,
+						     tz->trips.passive.tc1,
+						     tz->trips.passive.tc2,
+						     tz->trips.passive.tsp*100,
+						     tz->polling_frequency*100);
+	else
+		tz->thermal_zone =
+			thermal_zone_device_register("acpitz", trips, tz,
+						     &acpi_thermal_zone_ops,
+						     0, 0, 0,
+						     tz->polling_frequency);
 	if (IS_ERR(tz->thermal_zone))
 		return -ENODEV;
 
@@ -1736,10 +1489,6 @@ static int acpi_thermal_remove(struct acpi_device *device, int type)
 					    acpi_thermal_notify);
 
 	/* Terminate policy */
-	if (tz->trips.passive.flags.valid && tz->trips.passive.flags.enabled) {
-		tz->trips.passive.flags.enabled = 0;
-		acpi_thermal_passive(tz);
-	}
 	if (tz->trips.active[0].flags.valid
 	    && tz->trips.active[0].flags.enabled) {
 		tz->trips.active[0].flags.enabled = 0;
diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c
index 8ca2f59..7eff12f 100644
--- a/drivers/thermal/thermal_sys.c
+++ b/drivers/thermal/thermal_sys.c
@@ -30,6 +30,7 @@
 #include <linux/idr.h>
 #include <linux/thermal.h>
 #include <linux/spinlock.h>
+#include <linux/reboot.h>
 
 MODULE_AUTHOR("Zhang Rui");
 MODULE_DESCRIPTION("Generic thermal management sysfs support");
@@ -161,7 +162,6 @@ trip_point_type_show(struct device *dev, struct device_attribute *attr,
 	if (!sscanf(attr->attr.name, "trip_point_%d_type", &trip))
 		return -EINVAL;
 
-
 	ret = tz->ops->get_trip_type(tz, trip, &trip_type);
 	if (ret)
 		return ret;
@@ -778,12 +778,166 @@ void thermal_cooling_device_unregister(struct
 
 EXPORT_SYMBOL(thermal_cooling_device_unregister);
 
+static void thermal_zone_device_set_polling(struct thermal_zone_device *tz,
+					    int delay)
+{
+	cancel_delayed_work_sync(&(tz->poll_queue));
+
+	if (!delay)
+		return;
+
+	if (delay > 1000)
+		schedule_delayed_work(&(tz->poll_queue),
+					round_jiffies(delay * HZ));
+	else
+		schedule_delayed_work(&(tz->poll_queue), delay * HZ);
+}
+
+static void thermal_zone_device_passive(struct thermal_zone_device *tz,
+					int temp, int trip_temp, int trip)
+{
+	int trend = 0;
+	struct thermal_cooling_device_instance *instance;
+	struct thermal_cooling_device *cdev;
+	int state, max_state;
+
+	if (tz->passive == false)
+		return;
+
+	/*
+	 * Above Trip?
+	 * -----------
+	 * Calculate the thermal trend (using the passive cooling equation)
+	 * and modify the performance limit for all passive cooling devices
+	 * accordingly.  Note that we assume symmetry.
+	 */
+	if (temp >= trip_temp) {
+		tz->passive = true;
+
+		trend = (tz->tc1 * (temp - tz->last_temperature)) +
+			(tz->tc2 * (temp - trip_temp));
+
+		/* Heating up? */
+		if (trend > 0) {
+			list_for_each_entry(instance, &tz->cooling_devices,
+					    node) {
+				if (instance->trip != trip)
+					continue;
+				cdev = instance->cdev;
+				cdev->ops->get_cur_state(cdev, &state);
+				cdev->ops->get_max_state(cdev, &max_state);
+				if (state++ < max_state)
+					cdev->ops->set_cur_state(cdev, state);
+			}
+		} else if (trend < 0) { /* Cooling off? */
+			list_for_each_entry(instance, &tz->cooling_devices,
+					    node) {
+				if (instance->trip != trip)
+					continue;
+				cdev = instance->cdev;
+				cdev->ops->get_cur_state(cdev, &state);
+				cdev->ops->get_max_state(cdev, &max_state);
+				if (state > 0)
+					cdev->ops->set_cur_state(cdev, --state);
+			}
+			return;
+		}
+	}
+
+	/*
+	 * Below Trip?
+	 * -----------
+	 * Implement passive cooling hysteresis to slowly increase performance
+	 * and avoid thrashing around the passive trip point.  Note that we
+	 * assume symmetry.
+	 */
+	list_for_each_entry(instance, &tz->cooling_devices, node) {
+		if (instance->trip != trip)
+			continue;
+		cdev = instance->cdev;
+		cdev->ops->get_cur_state(cdev, &state);
+		cdev->ops->get_max_state(cdev, &max_state);
+		if (state > 0)
+			cdev->ops->set_cur_state(cdev, --state);
+		if (state == 0)
+			tz->passive = false;
+	}
+}
+
+void thermal_zone_device_update(struct thermal_zone_device *tz)
+{
+	int temp, trip_temp;
+	int count;
+	enum thermal_trip_t trip_type;
+	struct thermal_cooling_device_instance *instance;
+	struct thermal_cooling_device *cdev;
+
+	tz->ops->get_temp(tz, &temp);
+
+	for (count = 0; count < tz->trips; count++) {
+		tz->ops->get_trip_type(tz, count, &trip_type);
+		tz->ops->get_trip_temp(tz, count, &trip_temp);
+
+		switch (trip_type) {
+		case THERMAL_TRIP_CRITICAL:
+			if (temp > trip_temp)
+				/* FIXME: send notification */
+				orderly_poweroff(true);
+			break;
+		case THERMAL_TRIP_HOT:
+			if (temp > trip_temp)
+				/* FIXME: send notification */
+				printk(KERN_WARNING "Hot trip point\n");
+			break;
+		case THERMAL_TRIP_ACTIVE:
+			list_for_each_entry(instance, &tz->cooling_devices,
+					    node) {
+				if (instance->trip != count)
+					continue;
+
+				cdev = instance->cdev;
+
+				if (temp > trip_temp)
+					cdev->ops->set_cur_state(cdev, 1);
+				else
+					cdev->ops->set_cur_state(cdev, 0);
+			}
+			break;
+		case THERMAL_TRIP_PASSIVE:
+			thermal_zone_device_passive(tz, temp, trip_temp, count);
+			break;
+		}
+	}
+	tz->last_temperature = temp;
+	if (tz->passive)
+		thermal_zone_device_set_polling(tz, tz->passive_delay);
+	else if (tz->polling_delay)
+		thermal_zone_device_set_polling(tz, tz->polling_delay);
+}
+
+EXPORT_SYMBOL(thermal_zone_device_update);
+
+static void thermal_zone_device_check(struct work_struct *work)
+{
+	struct thermal_zone_device *tz = container_of(work, struct
+						      thermal_zone_device,
+						      poll_queue.work);
+	thermal_zone_device_update(tz);
+}
+
 /**
  * thermal_zone_device_register - register a new thermal zone device
  * @type:	the thermal zone device type
  * @trips:	the number of trip points the thermal zone support
  * @devdata:	private device data
  * @ops:	standard thermal zone device callbacks
+ * @tc1:	thermal coefficient 1 for passive calculations
+ * @tc2:	thermal coefficient 2 for passive calculations
+ * @passive_delay: number of milliseconds to wait between polls when
+ *		   performing passive cooling
+ * @polling_delay: number of milliseconds to wait between polls when checking
+ *		   whether trip points have been crossed (0 for interrupt
+ *		   driven systems)
  *
  * thermal_zone_device_unregister() must be called when the device is no
  * longer needed.
@@ -792,7 +946,10 @@ struct thermal_zone_device *thermal_zone_device_register(char *type,
 							 int trips,
 							 void *devdata, struct
 							 thermal_zone_device_ops
-							 *ops)
+							 *ops, int tc1, int
+							 tc2,
+							 int passive_delay,
+							 int polling_delay)
 {
 	struct thermal_zone_device *tz;
 	struct thermal_cooling_device *pos;
@@ -826,6 +983,11 @@ struct thermal_zone_device *thermal_zone_device_register(char *type,
 	tz->device.class = &thermal_class;
 	tz->devdata = devdata;
 	tz->trips = trips;
+	tz->tc1 = tc1;
+	tz->tc2 = tc2;
+	tz->passive_delay = passive_delay;
+	tz->polling_delay = polling_delay;
+
 	sprintf(tz->device.bus_id, "thermal_zone%d", tz->id);
 	result = device_register(&tz->device);
 	if (result) {
@@ -871,6 +1033,10 @@ struct thermal_zone_device *thermal_zone_device_register(char *type,
 		}
 	mutex_unlock(&thermal_list_lock);
 
+	INIT_DELAYED_WORK(&(tz->poll_queue), thermal_zone_device_check);
+
+	thermal_zone_device_set_polling(tz, tz->polling_delay);
+
 	if (!result)
 		return tz;
 
@@ -910,6 +1076,8 @@ void thermal_zone_device_unregister(struct thermal_zone_device *tz)
 		    tz->ops->unbind(tz, cdev);
 	mutex_unlock(&thermal_list_lock);
 
+	thermal_zone_device_set_polling(tz, 0);
+
 	if (tz->type[0])
 		device_remove_file(&tz->device, &dev_attr_type);
 	device_remove_file(&tz->device, &dev_attr_temp);
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 9e3475a..38ac33d 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -94,11 +94,18 @@ struct thermal_zone_device {
 	struct device device;
 	void *devdata;
 	int trips;
+	int tc1;
+	int tc2;
+	int passive_delay;
+	int polling_delay;
+	int last_temperature;
+	bool passive;
 	struct thermal_zone_device_ops *ops;
 	struct list_head cooling_devices;
 	struct idr idr;
 	struct mutex lock;	/* protect cooling devices list */
 	struct list_head node;
+	struct delayed_work poll_queue;
 #if defined(CONFIG_THERMAL_HWMON)
 	struct list_head hwmon_node;
 	struct thermal_hwmon_device *hwmon;
@@ -110,13 +117,16 @@ struct thermal_zone_device {
 struct thermal_zone_device *thermal_zone_device_register(char *, int, void *,
 							 struct
 							 thermal_zone_device_ops
-							 *);
+							 *, int tc1, int tc2,
+							 int passive_freq,
+							 int polling_freq);
 void thermal_zone_device_unregister(struct thermal_zone_device *);
 
 int thermal_zone_bind_cooling_device(struct thermal_zone_device *, int,
 				     struct thermal_cooling_device *);
 int thermal_zone_unbind_cooling_device(struct thermal_zone_device *, int,
 				       struct thermal_cooling_device *);
+void thermal_zone_device_update(struct thermal_zone_device *);
 struct thermal_cooling_device *thermal_cooling_device_register(char *, void *,
 							       struct
 							       thermal_cooling_device_ops

-- 
Matthew Garrett | mjg59@...f.ucam.org
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/