diff options
Diffstat (limited to 'drivers/edac/i7core_edac.c')
| -rw-r--r-- | drivers/edac/i7core_edac.c | 285 | 
1 files changed, 82 insertions, 203 deletions
diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index 7f1dfcc4e59..a499c7ed820 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -221,7 +221,9 @@ struct i7core_inject {  };  struct i7core_channel { -	u32		ranks; +	bool		is_3dimms_present; +	bool		is_single_4rank; +	bool		has_4rank;  	u32		dimms;  }; @@ -257,7 +259,6 @@ struct i7core_pvt {  	struct i7core_channel	channel[NUM_CHANS];  	int		ce_count_available; -	int 		csrow_map[NUM_CHANS][MAX_DIMMS];  			/* ECC corrected errors counts per udimm */  	unsigned long	udimm_ce_count[MAX_DIMMS]; @@ -492,116 +493,15 @@ static void free_i7core_dev(struct i7core_dev *i7core_dev)  /****************************************************************************  			Memory check routines   ****************************************************************************/ -static struct pci_dev *get_pdev_slot_func(u8 socket, unsigned slot, -					  unsigned func) -{ -	struct i7core_dev *i7core_dev = get_i7core_dev(socket); -	int i; - -	if (!i7core_dev) -		return NULL; - -	for (i = 0; i < i7core_dev->n_devs; i++) { -		if (!i7core_dev->pdev[i]) -			continue; - -		if (PCI_SLOT(i7core_dev->pdev[i]->devfn) == slot && -		    PCI_FUNC(i7core_dev->pdev[i]->devfn) == func) { -			return i7core_dev->pdev[i]; -		} -	} - -	return NULL; -} - -/** - * i7core_get_active_channels() - gets the number of channels and csrows - * @socket:	Quick Path Interconnect socket - * @channels:	Number of channels that will be returned - * @csrows:	Number of csrows found - * - * Since EDAC core needs to know in advance the number of available channels - * and csrows, in order to allocate memory for csrows/channels, it is needed - * to run two similar steps. At the first step, implemented on this function, - * it checks the number of csrows/channels present at one socket. - * this is used in order to properly allocate the size of mci components. - * - * It should be noticed that none of the current available datasheets explain - * or even mention how csrows are seen by the memory controller. So, we need - * to add a fake description for csrows. - * So, this driver is attributing one DIMM memory for one csrow. - */ -static int i7core_get_active_channels(const u8 socket, unsigned *channels, -				      unsigned *csrows) -{ -	struct pci_dev *pdev = NULL; -	int i, j; -	u32 status, control; - -	*channels = 0; -	*csrows = 0; - -	pdev = get_pdev_slot_func(socket, 3, 0); -	if (!pdev) { -		i7core_printk(KERN_ERR, "Couldn't find socket %d fn 3.0!!!\n", -			      socket); -		return -ENODEV; -	} - -	/* Device 3 function 0 reads */ -	pci_read_config_dword(pdev, MC_STATUS, &status); -	pci_read_config_dword(pdev, MC_CONTROL, &control); - -	for (i = 0; i < NUM_CHANS; i++) { -		u32 dimm_dod[3]; -		/* Check if the channel is active */ -		if (!(control & (1 << (8 + i)))) -			continue; - -		/* Check if the channel is disabled */ -		if (status & (1 << i)) -			continue; - -		pdev = get_pdev_slot_func(socket, i + 4, 1); -		if (!pdev) { -			i7core_printk(KERN_ERR, "Couldn't find socket %d " -						"fn %d.%d!!!\n", -						socket, i + 4, 1); -			return -ENODEV; -		} -		/* Devices 4-6 function 1 */ -		pci_read_config_dword(pdev, -				MC_DOD_CH_DIMM0, &dimm_dod[0]); -		pci_read_config_dword(pdev, -				MC_DOD_CH_DIMM1, &dimm_dod[1]); -		pci_read_config_dword(pdev, -				MC_DOD_CH_DIMM2, &dimm_dod[2]); - -		(*channels)++; - -		for (j = 0; j < 3; j++) { -			if (!DIMM_PRESENT(dimm_dod[j])) -				continue; -			(*csrows)++; -		} -	} - -	debugf0("Number of active channels on socket %d: %d\n", -		socket, *channels); -	return 0; -} - -static int get_dimm_config(const struct mem_ctl_info *mci) +static int get_dimm_config(struct mem_ctl_info *mci)  {  	struct i7core_pvt *pvt = mci->pvt_info; -	struct csrow_info *csr;  	struct pci_dev *pdev;  	int i, j; -	int csrow = 0; -	unsigned long last_page = 0;  	enum edac_type mode;  	enum mem_type mtype; +	struct dimm_info *dimm;  	/* Get data from the MC register, function 0 */  	pdev = pvt->pci_mcr[0]; @@ -657,21 +557,20 @@ static int get_dimm_config(const struct mem_ctl_info *mci)  		pci_read_config_dword(pvt->pci_ch[i][0],  				MC_CHANNEL_DIMM_INIT_PARAMS, &data); -		pvt->channel[i].ranks = (data & QUAD_RANK_PRESENT) ? -						4 : 2; + +		if (data & THREE_DIMMS_PRESENT) +			pvt->channel[i].is_3dimms_present = true; + +		if (data & SINGLE_QUAD_RANK_PRESENT) +			pvt->channel[i].is_single_4rank = true; + +		if (data & QUAD_RANK_PRESENT) +			pvt->channel[i].has_4rank = true;  		if (data & REGISTERED_DIMM)  			mtype = MEM_RDDR3;  		else  			mtype = MEM_DDR3; -#if 0 -		if (data & THREE_DIMMS_PRESENT) -			pvt->channel[i].dimms = 3; -		else if (data & SINGLE_QUAD_RANK_PRESENT) -			pvt->channel[i].dimms = 1; -		else -			pvt->channel[i].dimms = 2; -#endif  		/* Devices 4-6 function 1 */  		pci_read_config_dword(pvt->pci_ch[i][1], @@ -682,11 +581,13 @@ static int get_dimm_config(const struct mem_ctl_info *mci)  				MC_DOD_CH_DIMM2, &dimm_dod[2]);  		debugf0("Ch%d phy rd%d, wr%d (0x%08x): " -			"%d ranks, %cDIMMs\n", +			"%s%s%s%cDIMMs\n",  			i,  			RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),  			data, -			pvt->channel[i].ranks, +			pvt->channel[i].is_3dimms_present ? "3DIMMS " : "", +			pvt->channel[i].is_3dimms_present ? "SINGLE_4R " : "", +			pvt->channel[i].has_4rank ? "HAS_4R " : "",  			(data & REGISTERED_DIMM) ? 'R' : 'U');  		for (j = 0; j < 3; j++) { @@ -696,6 +597,8 @@ static int get_dimm_config(const struct mem_ctl_info *mci)  			if (!DIMM_PRESENT(dimm_dod[j]))  				continue; +			dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers, +				       i, j, 0);  			banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));  			ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));  			rows = numrow(MC_DOD_NUMROW(dimm_dod[j])); @@ -704,8 +607,6 @@ static int get_dimm_config(const struct mem_ctl_info *mci)  			/* DDR3 has 8 I/O banks */  			size = (rows * cols * banks * ranks) >> (20 - 3); -			pvt->channel[i].dimms++; -  			debugf0("\tdimm %d %d Mb offset: %x, "  				"bank: %d, rank: %d, row: %#x, col: %#x\n",  				j, size, @@ -714,44 +615,28 @@ static int get_dimm_config(const struct mem_ctl_info *mci)  			npages = MiB_TO_PAGES(size); -			csr = &mci->csrows[csrow]; -			csr->first_page = last_page + 1; -			last_page += npages; -			csr->last_page = last_page; -			csr->nr_pages = npages; - -			csr->page_mask = 0; -			csr->grain = 8; -			csr->csrow_idx = csrow; -			csr->nr_channels = 1; - -			csr->channels[0].chan_idx = i; -			csr->channels[0].ce_count = 0; - -			pvt->csrow_map[i][j] = csrow; +			dimm->nr_pages = npages;  			switch (banks) {  			case 4: -				csr->dtype = DEV_X4; +				dimm->dtype = DEV_X4;  				break;  			case 8: -				csr->dtype = DEV_X8; +				dimm->dtype = DEV_X8;  				break;  			case 16: -				csr->dtype = DEV_X16; +				dimm->dtype = DEV_X16;  				break;  			default: -				csr->dtype = DEV_UNKNOWN; +				dimm->dtype = DEV_UNKNOWN;  			} -			csr->edac_mode = mode; -			csr->mtype = mtype; -			snprintf(csr->channels[0].label, -					sizeof(csr->channels[0].label), -					"CPU#%uChannel#%u_DIMM#%u", -					pvt->i7core_dev->socket, i, j); - -			csrow++; +			snprintf(dimm->label, sizeof(dimm->label), +				 "CPU#%uChannel#%u_DIMM#%u", +				 pvt->i7core_dev->socket, i, j); +			dimm->grain = 8; +			dimm->edac_mode = mode; +			dimm->mtype = mtype;  		}  		pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]); @@ -1567,22 +1452,16 @@ error:  /****************************************************************************  			Error check routines   ****************************************************************************/ -static void i7core_rdimm_update_csrow(struct mem_ctl_info *mci, +static void i7core_rdimm_update_errcount(struct mem_ctl_info *mci,  				      const int chan,  				      const int dimm,  				      const int add)  { -	char *msg; -	struct i7core_pvt *pvt = mci->pvt_info; -	int row = pvt->csrow_map[chan][dimm], i; +	int i;  	for (i = 0; i < add; i++) { -		msg = kasprintf(GFP_KERNEL, "Corrected error " -				"(Socket=%d channel=%d dimm=%d)", -				pvt->i7core_dev->socket, chan, dimm); - -		edac_mc_handle_fbd_ce(mci, row, 0, msg); -		kfree (msg); +		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 0, 0, 0, +				     chan, dimm, -1, "error", "", NULL);  	}  } @@ -1623,11 +1502,11 @@ static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci,  	/*updated the edac core */  	if (add0 != 0) -		i7core_rdimm_update_csrow(mci, chan, 0, add0); +		i7core_rdimm_update_errcount(mci, chan, 0, add0);  	if (add1 != 0) -		i7core_rdimm_update_csrow(mci, chan, 1, add1); +		i7core_rdimm_update_errcount(mci, chan, 1, add1);  	if (add2 != 0) -		i7core_rdimm_update_csrow(mci, chan, 2, add2); +		i7core_rdimm_update_errcount(mci, chan, 2, add2);  } @@ -1747,20 +1626,30 @@ static void i7core_mce_output_error(struct mem_ctl_info *mci,  				    const struct mce *m)  {  	struct i7core_pvt *pvt = mci->pvt_info; -	char *type, *optype, *err, *msg; +	char *type, *optype, *err, msg[80]; +	enum hw_event_mc_err_type tp_event;  	unsigned long error = m->status & 0x1ff0000l; +	bool uncorrected_error = m->mcgstatus & 1ll << 61; +	bool ripv = m->mcgstatus & 1;  	u32 optypenum = (m->status >> 4) & 0x07;  	u32 core_err_cnt = (m->status >> 38) & 0x7fff;  	u32 dimm = (m->misc >> 16) & 0x3;  	u32 channel = (m->misc >> 18) & 0x3;  	u32 syndrome = m->misc >> 32;  	u32 errnum = find_first_bit(&error, 32); -	int csrow; -	if (m->mcgstatus & 1) -		type = "FATAL"; -	else -		type = "NON_FATAL"; +	if (uncorrected_error) { +		if (ripv) { +			type = "FATAL"; +			tp_event = HW_EVENT_ERR_FATAL; +		} else { +			type = "NON_FATAL"; +			tp_event = HW_EVENT_ERR_UNCORRECTED; +		} +	} else { +		type = "CORRECTED"; +		tp_event = HW_EVENT_ERR_CORRECTED; +	}  	switch (optypenum) {  	case 0: @@ -1815,27 +1704,20 @@ static void i7core_mce_output_error(struct mem_ctl_info *mci,  		err = "unknown";  	} -	/* FIXME: should convert addr into bank and rank information */ -	msg = kasprintf(GFP_ATOMIC, -		"%s (addr = 0x%08llx, cpu=%d, Dimm=%d, Channel=%d, " -		"syndrome=0x%08x, count=%d, Err=%08llx:%08llx (%s: %s))\n", -		type, (long long) m->addr, m->cpu, dimm, channel, -		syndrome, core_err_cnt, (long long)m->status, -		(long long)m->misc, optype, err); - -	debugf0("%s", msg); +	snprintf(msg, sizeof(msg), "count=%d %s", core_err_cnt, optype); -	csrow = pvt->csrow_map[channel][dimm]; - -	/* Call the helper to output message */ -	if (m->mcgstatus & 1) -		edac_mc_handle_fbd_ue(mci, csrow, 0, -				0 /* FIXME: should be channel here */, msg); -	else if (!pvt->is_registered) -		edac_mc_handle_fbd_ce(mci, csrow, -				0 /* FIXME: should be channel here */, msg); - -	kfree(msg); +	/* +	 * Call the helper to output message +	 * FIXME: what to do if core_err_cnt > 1? Currently, it generates +	 * only one event +	 */ +	if (uncorrected_error || !pvt->is_registered) +		edac_mc_handle_error(tp_event, mci, +				     m->addr >> PAGE_SHIFT, +				     m->addr & ~PAGE_MASK, +				     syndrome, +				     channel, dimm, -1, +				     err, msg, m);  }  /* @@ -1932,12 +1814,6 @@ static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,  	if (mce->bank != 8)  		return NOTIFY_DONE; -#ifdef CONFIG_SMP -	/* Only handle if it is the right mc controller */ -	if (mce->socketid != pvt->i7core_dev->socket) -		return NOTIFY_DONE; -#endif -  	smp_rmb();  	if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {  		smp_wmb(); @@ -2234,8 +2110,6 @@ static void i7core_unregister_mci(struct i7core_dev *i7core_dev)  	if (pvt->enable_scrub)  		disable_sdram_scrub_setting(mci); -	mce_unregister_decode_chain(&i7_mce_dec); -  	/* Disable EDAC polling */  	i7core_pci_ctl_release(pvt); @@ -2252,15 +2126,19 @@ static int i7core_register_mci(struct i7core_dev *i7core_dev)  {  	struct mem_ctl_info *mci;  	struct i7core_pvt *pvt; -	int rc, channels, csrows; - -	/* Check the number of active and not disabled channels */ -	rc = i7core_get_active_channels(i7core_dev->socket, &channels, &csrows); -	if (unlikely(rc < 0)) -		return rc; +	int rc; +	struct edac_mc_layer layers[2];  	/* allocate a new MC control structure */ -	mci = edac_mc_alloc(sizeof(*pvt), csrows, channels, i7core_dev->socket); + +	layers[0].type = EDAC_MC_LAYER_CHANNEL; +	layers[0].size = NUM_CHANS; +	layers[0].is_virt_csrow = false; +	layers[1].type = EDAC_MC_LAYER_SLOT; +	layers[1].size = MAX_DIMMS; +	layers[1].is_virt_csrow = true; +	mci = edac_mc_alloc(i7core_dev->socket, ARRAY_SIZE(layers), layers, +			    sizeof(*pvt));  	if (unlikely(!mci))  		return -ENOMEM; @@ -2336,8 +2214,6 @@ static int i7core_register_mci(struct i7core_dev *i7core_dev)  	/* DCLK for scrub rate setting */  	pvt->dclk_freq = get_dclk_freq(); -	mce_register_decode_chain(&i7_mce_dec); -  	return 0;  fail0: @@ -2481,8 +2357,10 @@ static int __init i7core_init(void)  	pci_rc = pci_register_driver(&i7core_driver); -	if (pci_rc >= 0) +	if (pci_rc >= 0) { +		mce_register_decode_chain(&i7_mce_dec);  		return 0; +	}  	i7core_printk(KERN_ERR, "Failed to register device with error %d.\n",  		      pci_rc); @@ -2498,6 +2376,7 @@ static void __exit i7core_exit(void)  {  	debugf2("MC: " __FILE__ ": %s()\n", __func__);  	pci_unregister_driver(&i7core_driver); +	mce_unregister_decode_chain(&i7_mce_dec);  }  module_init(i7core_init);  |