diff options
Diffstat (limited to 'arch/powerpc/platforms/pseries/ras.c')
| -rw-r--r-- | arch/powerpc/platforms/pseries/ras.c | 193 | 
1 files changed, 127 insertions, 66 deletions
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 086d2ae4e06..c4dfccd3a3d 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -16,37 +16,15 @@   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA   */ -/* Change Activity: - * 2001/09/21 : engebret : Created with minimal EPOW and HW exception support. - * End Change Activity - */ - -#include <linux/errno.h> -#include <linux/threads.h> -#include <linux/kernel_stat.h> -#include <linux/signal.h>  #include <linux/sched.h> -#include <linux/ioport.h>  #include <linux/interrupt.h> -#include <linux/timex.h> -#include <linux/init.h> -#include <linux/delay.h>  #include <linux/irq.h> -#include <linux/random.h> -#include <linux/sysrq.h> -#include <linux/bitops.h> +#include <linux/of.h> +#include <linux/fs.h> +#include <linux/reboot.h> -#include <asm/uaccess.h> -#include <asm/system.h> -#include <asm/io.h> -#include <asm/pgtable.h> -#include <asm/irq.h> -#include <asm/cache.h> -#include <asm/prom.h> -#include <asm/ptrace.h>  #include <asm/machdep.h>  #include <asm/rtas.h> -#include <asm/udbg.h>  #include <asm/firmware.h>  #include "pseries.h" @@ -57,7 +35,6 @@ static DEFINE_SPINLOCK(ras_log_buf_lock);  static char global_mce_data_buf[RTAS_ERROR_LOG_MAX];  static DEFINE_PER_CPU(__u64, mce_data_buf); -static int ras_get_sensor_state_token;  static int ras_check_exception_token;  #define EPOW_SENSOR_TOKEN	9 @@ -75,7 +52,6 @@ static int __init init_ras_IRQ(void)  {  	struct device_node *np; -	ras_get_sensor_state_token = rtas_token("get-sensor-state");  	ras_check_exception_token = rtas_token("check-exception");  	/* Internal Errors */ @@ -95,26 +71,126 @@ static int __init init_ras_IRQ(void)  	return 0;  } -__initcall(init_ras_IRQ); +subsys_initcall(init_ras_IRQ); -/* - * Handle power subsystem events (EPOW). - * - * Presently we just log the event has occurred.  This should be fixed - * to examine the type of power failure and take appropriate action where - * the time horizon permits something useful to be done. - */ +#define EPOW_SHUTDOWN_NORMAL				1 +#define EPOW_SHUTDOWN_ON_UPS				2 +#define EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS	3 +#define EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH	4 + +static void handle_system_shutdown(char event_modifier) +{ +	switch (event_modifier) { +	case EPOW_SHUTDOWN_NORMAL: +		pr_emerg("Firmware initiated power off"); +		orderly_poweroff(1); +		break; + +	case EPOW_SHUTDOWN_ON_UPS: +		pr_emerg("Loss of power reported by firmware, system is " +			"running on UPS/battery"); +		break; + +	case EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS: +		pr_emerg("Loss of system critical functions reported by " +			"firmware"); +		pr_emerg("Check RTAS error log for details"); +		orderly_poweroff(1); +		break; + +	case EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH: +		pr_emerg("Ambient temperature too high reported by firmware"); +		pr_emerg("Check RTAS error log for details"); +		orderly_poweroff(1); +		break; + +	default: +		pr_err("Unknown power/cooling shutdown event (modifier %d)", +			event_modifier); +	} +} + +struct epow_errorlog { +	unsigned char sensor_value; +	unsigned char event_modifier; +	unsigned char extended_modifier; +	unsigned char reserved; +	unsigned char platform_reason; +}; + +#define EPOW_RESET			0 +#define EPOW_WARN_COOLING		1 +#define EPOW_WARN_POWER			2 +#define EPOW_SYSTEM_SHUTDOWN		3 +#define EPOW_SYSTEM_HALT		4 +#define EPOW_MAIN_ENCLOSURE		5 +#define EPOW_POWER_OFF			7 + +void rtas_parse_epow_errlog(struct rtas_error_log *log) +{ +	struct pseries_errorlog *pseries_log; +	struct epow_errorlog *epow_log; +	char action_code; +	char modifier; + +	pseries_log = get_pseries_errorlog(log, PSERIES_ELOG_SECT_ID_EPOW); +	if (pseries_log == NULL) +		return; + +	epow_log = (struct epow_errorlog *)pseries_log->data; +	action_code = epow_log->sensor_value & 0xF;	/* bottom 4 bits */ +	modifier = epow_log->event_modifier & 0xF;	/* bottom 4 bits */ + +	switch (action_code) { +	case EPOW_RESET: +		pr_err("Non critical power or cooling issue cleared"); +		break; + +	case EPOW_WARN_COOLING: +		pr_err("Non critical cooling issue reported by firmware"); +		pr_err("Check RTAS error log for details"); +		break; + +	case EPOW_WARN_POWER: +		pr_err("Non critical power issue reported by firmware"); +		pr_err("Check RTAS error log for details"); +		break; + +	case EPOW_SYSTEM_SHUTDOWN: +		handle_system_shutdown(epow_log->event_modifier); +		break; + +	case EPOW_SYSTEM_HALT: +		pr_emerg("Firmware initiated power off"); +		orderly_poweroff(1); +		break; + +	case EPOW_MAIN_ENCLOSURE: +	case EPOW_POWER_OFF: +		pr_emerg("Critical power/cooling issue reported by firmware"); +		pr_emerg("Check RTAS error log for details"); +		pr_emerg("Immediate power off"); +		emergency_sync(); +		kernel_power_off(); +		break; + +	default: +		pr_err("Unknown power/cooling event (action code %d)", +			action_code); +	} +} + +/* Handle environmental and power warning (EPOW) interrupts. */  static irqreturn_t ras_epow_interrupt(int irq, void *dev_id)  { -	int status = 0xdeadbeef; -	int state = 0; +	int status; +	int state;  	int critical; -	status = rtas_call(ras_get_sensor_state_token, 2, 2, &state, -			   EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX); +	status = rtas_get_sensor(EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX, &state);  	if (state > 3) -		critical = 1;  /* Time Critical */ +		critical = 1;		/* Time Critical */  	else  		critical = 0; @@ -123,18 +199,14 @@ static irqreturn_t ras_epow_interrupt(int irq, void *dev_id)  	status = rtas_call(ras_check_exception_token, 6, 1, NULL,  			   RTAS_VECTOR_EXTERNAL_INTERRUPT,  			   virq_to_hw(irq), -			   RTAS_EPOW_WARNING | RTAS_POWERMGM_EVENTS, +			   RTAS_EPOW_WARNING,  			   critical, __pa(&ras_log_buf),  				rtas_get_error_log_max()); -	udbg_printf("EPOW <0x%lx 0x%x 0x%x>\n", -		    *((unsigned long *)&ras_log_buf), status, state); -	printk(KERN_WARNING "EPOW <0x%lx 0x%x 0x%x>\n", -	       *((unsigned long *)&ras_log_buf), status, state); - -	/* format and print the extended information */  	log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0); +	rtas_parse_epow_errlog((struct rtas_error_log *)ras_log_buf); +  	spin_unlock(&ras_log_buf_lock);  	return IRQ_HANDLED;  } @@ -150,7 +222,7 @@ static irqreturn_t ras_epow_interrupt(int irq, void *dev_id)  static irqreturn_t ras_error_interrupt(int irq, void *dev_id)  {  	struct rtas_error_log *rtas_elog; -	int status = 0xdeadbeef; +	int status;  	int fatal;  	spin_lock(&ras_log_buf_lock); @@ -158,7 +230,7 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id)  	status = rtas_call(ras_check_exception_token, 6, 1, NULL,  			   RTAS_VECTOR_EXTERNAL_INTERRUPT,  			   virq_to_hw(irq), -			   RTAS_INTERNAL_ERROR, 1 /*Time Critical */, +			   RTAS_INTERNAL_ERROR, 1 /* Time Critical */,  			   __pa(&ras_log_buf),  				rtas_get_error_log_max()); @@ -173,24 +245,13 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id)  	log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, fatal);  	if (fatal) { -		udbg_printf("Fatal HW Error <0x%lx 0x%x>\n", -			    *((unsigned long *)&ras_log_buf), status); -		printk(KERN_EMERG "Error: Fatal hardware error <0x%lx 0x%x>\n", -		       *((unsigned long *)&ras_log_buf), status); - -#ifndef DEBUG_RTAS_POWER_OFF -		/* Don't actually power off when debugging so we can test -		 * without actually failing while injecting errors. -		 * Error data will not be logged to syslog. -		 */ -		ppc_md.power_off(); -#endif +		pr_emerg("Fatal hardware error reported by firmware"); +		pr_emerg("Check RTAS error log for details"); +		pr_emerg("Immediate power off"); +		emergency_sync(); +		kernel_power_off();  	} else { -		udbg_printf("Recoverable HW Error <0x%lx 0x%x>\n", -			    *((unsigned long *)&ras_log_buf), status); -		printk(KERN_WARNING -		       "Warning: Recoverable hardware error <0x%lx 0x%x>\n", -		       *((unsigned long *)&ras_log_buf), status); +		pr_err("Recoverable hardware error reported by firmware");  	}  	spin_unlock(&ras_log_buf_lock);  |