lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20080627160551.d66547db.akpm@linux-foundation.org>
Date:	Fri, 27 Jun 2008 16:05:51 -0700
From:	Andrew Morton <akpm@...ux-foundation.org>
To:	dougthompson@...ssion.com
Cc:	bluesmoke-devel@...ts.sourceforge.net, linux-kernel@...r.kernel.org
Subject: Re: [PATCH 1/13] EDAC i5100 new intel chipset driver

On Fri, 27 Jun 2008 12:12:53 -0600
dougthompson@...ssion.com wrote:

> From:	Arthur Jones <ajones@...erbed.com>
> 
> Applied to linux-2.6.26-rc5-mm3
> 
> Preliminary support for the Intel 5100 MCH.  CE and UE
> errors are reported along with the current DIMM label
> information and other memory parameters.
> 
> Reasons why this is preliminary:
> 
> 1)  This chip has 2 independent memory controllers which,
> for best perforance, use interleaved accesses to the DDR2
> memory.  This architecture does not map very well to the
> current edac data structures which depend on symmetric
> channel access to the interleaved data.  Without core changes,
> the best I could do for now is to map both memory controllers
> to different csrows (first all ranks of controller 0,
> then all ranks of controller 1).  Someone much more
> familiar with the edac core than I will probably need to
> come up with a more general data structure to handle the
> interleaving and de-interleaving of the two memory controllers.
> 
> 2)  I have not yet tackled the de-interleaving of the
> rank/controller address space into the physical address
> space of the CPU.  There is nothing fundamentally missing,
> it is just ending up to be a lot of code, and I'd rather
> keep it separate for now, esp since it doesn't work yet...
> 
> 3)  The code depends on a particular i5100 chip select
> to DIMM mainboard chip select mapping.  This mapping
> seems obvious to me in order to support dual and single
> ranked memory, but it is not unique and DIMM labels
> could be wrong on other mainboards.  There is no way
> to query this mapping that I know of.
> 
> 4)  The code requires that the i5100 is in 32GB mode.
> Only 4 ranks per controller, 2 ranks per DIMM are
> supported.  I do not have hardware (nor do I expect
> to have hardware anytime soon) for the 48GB (6 ranks
> per controller) mode.
> 
> 5)  The serial presence detect code should be broken
> out into a "real" i2c driver so that decode-dimms.pl
> can work.
> 
> Signed-off-by:	Arthur Jones <ajones@...erbed.com>
> Signed-off-by:	Doug Thompson <dougthompson@...ssion.com>

A single space after the colon is conventional.

>
> ...
>
> +static unsigned long i5100_ctl_page_to_phys(struct mem_ctl_info *mci,
> +					    unsigned long cntlr_addr)
> +{
> +	const struct i5100_priv *priv = mci->pvt_info;
> +
> +	if (cntlr_addr < priv->tolm)
> +		return cntlr_addr;
> +
> +	return (1ULL << 32) + (cntlr_addr - priv->tolm);

Strange to use 1ULL for an unsigned long.

And it's broken on 32-bit, but that's probably inapplicable tothis
driver.

> +}
> +
> +static const char *i5100_err_msg(unsigned err)
> +{
> +	const char *merrs[] = {
> +		"unknown", /* 0 */
> +		"uncorrectable data ECC on replay", /* 1 */
> +		"unknown", /* 2 */
> +		"unknown", /* 3 */
> +		"aliased uncorrectable demand data ECC", /* 4 */
> +		"aliased uncorrectable spare-copy data ECC", /* 5 */
> +		"aliased uncorrectable patrol data ECC", /* 6 */
> +		"unknown", /* 7 */
> +		"unknown", /* 8 */
> +		"unknown", /* 9 */
> +		"non-aliased uncorrectable demand data ECC", /* 10 */
> +		"non-aliased uncorrectable spare-copy data ECC", /* 11 */
> +		"non-aliased uncorrectable patrol data ECC", /* 12 */
> +		"unknown", /* 13 */
> +		"correctable demand data ECC", /* 14 */
> +		"correctable spare-copy data ECC", /* 15 */
> +		"correctable patrol data ECC", /* 16 */
> +		"unknown", /* 17 */
> +		"SPD protocol error", /* 18 */
> +		"unknown", /* 19 */
> +		"spare copy initiated", /* 20 */
> +		"spare copy completed", /* 21 */
> +	};

The compiler will need to assemble thisarray onthe stack each time this
funtion is called.  Should be static.

> +	unsigned i;
> +
> +	for (i = 0; i < ARRAY_SIZE(merrs); i++)
> +		if (1 << i & err)
> +			return merrs[i];
> +
> +	return "none";
> +}
> +
>
> ...
>
> +static void i5100_read_log(struct mem_ctl_info *mci, int ctlr,
> +			   u32 ferr, u32 nerr)
> +{
> +	struct i5100_priv *priv = mci->pvt_info;
> +	struct pci_dev *pdev = (ctlr) ? priv->ch1mm : priv->ch0mm;
> +	u32 dw;
> +	u32 dw2;
> +	unsigned syndrome = 0;
> +	unsigned ecc_loc = 0;
> +	unsigned merr;
> +	unsigned bank;
> +	unsigned rank;
> +	unsigned cas;
> +	unsigned ras;
> +
> +	pci_read_config_dword(pdev, I5100_VALIDLOG, &dw);
> +
> +	if (I5100_VALIDLOG_REDMEMVALID(dw)) {

I WONDER WHY THAT "FUNCTION" IS IN UPPER CASE?

A lower-cased inlined C function would be nicer..

> +		pci_read_config_dword(pdev, I5100_REDMEMA, &dw2);
> +		syndrome = I5100_REDMEMA_SYNDROME(dw2);
> +		pci_read_config_dword(pdev, I5100_REDMEMB, &dw2);
> +		ecc_loc = I5100_REDMEMB_ECC_LOCATOR(dw2);
> +	}
> +
> +	if (I5100_VALIDLOG_RECMEMVALID(dw)) {
> +		const char *msg;
> +
> +		pci_read_config_dword(pdev, I5100_RECMEMA, &dw2);
> +		merr = I5100_RECMEMA_MERR(dw2);
> +		bank = I5100_RECMEMA_BANK(dw2);
> +		rank = I5100_RECMEMA_RANK(dw2);
> +
> +		pci_read_config_dword(pdev, I5100_RECMEMB, &dw2);
> +		cas = I5100_RECMEMB_CAS(dw2);
> +		ras = I5100_RECMEMB_RAS(dw2);
> +
> +		/* FIXME:  not really sure if this is what merr is...
> +		 */
> +		if (!merr)
> +			msg = i5100_err_msg(ferr);
> +		else
> +			msg = i5100_err_msg(nerr);
> +
> +		i5100_handle_ce(mci, ctlr, bank, rank, syndrome, cas, ras, msg);
> +	}
> +
> +	if (I5100_VALIDLOG_NRECMEMVALID(dw)) {
> +		const char *msg;
> +
> +		pci_read_config_dword(pdev, I5100_NRECMEMA, &dw2);
> +		merr = I5100_NRECMEMA_MERR(dw2);
> +		bank = I5100_NRECMEMA_BANK(dw2);
> +		rank = I5100_NRECMEMA_RANK(dw2);
> +
> +		pci_read_config_dword(pdev, I5100_NRECMEMB, &dw2);
> +		cas = I5100_NRECMEMB_CAS(dw2);
> +		ras = I5100_NRECMEMB_RAS(dw2);
> +
> +		/* FIXME:  not really sure if this is what merr is...
> +		 */
> +		if (!merr)
> +			msg = i5100_err_msg(ferr);
> +		else
> +			msg = i5100_err_msg(nerr);
> +
> +		i5100_handle_ue(mci, ctlr, bank, rank, syndrome, cas, ras, msg);
> +	}
> +
> +	pci_write_config_dword(pdev, I5100_VALIDLOG, dw);
> +}
> +
>
> ...
>
> +static unsigned long __devinit i5100_npages(struct mem_ctl_info *mci,
> +					    int csrow)
> +{
> +	struct i5100_priv *priv = mci->pvt_info;
> +	const unsigned ctlr_rank = i5100_csrow_to_rank(mci, csrow);
> +	const unsigned ctlr = i5100_csrow_to_cntlr(mci, csrow);
> +	unsigned addr_lines;
> +
> +	/* dimm present? */
> +	if (!priv->mtr[ctlr][ctlr_rank].present)
> +		return 0ULL;
> +
> +	addr_lines =
> +		I5100_DIMM_ADDR_LINES +
> +		priv->mtr[ctlr][ctlr_rank].numcol +
> +		priv->mtr[ctlr][ctlr_rank].numrow +
> +		priv->mtr[ctlr][ctlr_rank].numbank;
> +
> +	return (unsigned long)
> +		((unsigned long long) (1ULL << addr_lines) / PAGE_SIZE);

OK, the ULL here might make sense.

> +}
> +
>
> ...
>
> +static void __devinit i5100_init_csrows(struct mem_ctl_info *mci)
> +{
> +	int i;
> +	unsigned long total_pages = 0UL;
> +	struct i5100_priv *priv = mci->pvt_info;
> +
> +	for (i = 0; i < mci->nr_csrows; i++) {
> +		const unsigned long npages = i5100_npages(mci, i);
> +		const unsigned cntlr = i5100_csrow_to_cntlr(mci, i);
> +		const unsigned rank = i5100_csrow_to_rank(mci, i);
> +
> +		if (!npages)
> +			continue;
> +
> +		/*
> +		 * FIXME: these two are totally bogus -- I don't see how to
> +		 * map them correctly to this structure...
> +		 */

?

> +		mci->csrows[i].first_page = total_pages;
> +		mci->csrows[i].last_page = total_pages + npages - 1;
> +		mci->csrows[i].page_mask = 0UL;
> +
> +		mci->csrows[i].nr_pages = npages;
> +		mci->csrows[i].grain = 32;
> +		mci->csrows[i].csrow_idx = i;
> +		mci->csrows[i].dtype =
> +			(priv->mtr[cntlr][rank].width == 4) ? DEV_X4 : DEV_X8;
> +		mci->csrows[i].ue_count = 0;
> +		mci->csrows[i].ce_count = 0;
> +		mci->csrows[i].mtype = MEM_RDDR2;
> +		mci->csrows[i].edac_mode = EDAC_SECDED;
> +		mci->csrows[i].mci = mci;
> +		mci->csrows[i].nr_channels = 1;
> +		mci->csrows[i].channels[0].chan_idx = 0;
> +		mci->csrows[i].channels[0].ce_count = 0;
> +		mci->csrows[i].channels[0].csrow = mci->csrows + i;
> +		snprintf(mci->csrows[i].channels[0].label,
> +			 sizeof(mci->csrows[i].channels[0].label),
> +			 "DIMM%u", i5100_rank_to_slot(mci, cntlr, rank));
> +
> +		total_pages += npages;
> +	}
> +}
> +
>
> ...
>
> +static int __devinit i5100_init_one(struct pci_dev *pdev,
> +				    const struct pci_device_id *id)
> +{
> +	int rc;
> +	struct mem_ctl_info *mci;
> +	struct i5100_priv *priv;
> +	struct pci_dev *ch0mm, *ch1mm;
> +	int ret = 0;
> +	u32 dw;
> +	int ranksperch;
> +
> +	if (PCI_FUNC(pdev->devfn) != 1)
> +		return -ENODEV;
> +
> +	rc = pci_enable_device(pdev);
> +	if (rc < 0) {
> +		ret = rc;
> +		goto bail;
> +	}
> +
> +	/* figure out how many ranks, from strapped state of 48GB_Mode input */
> +	pci_read_config_dword(pdev, I5100_MS, &dw);
> +	ranksperch = !!(dw & (1 << 8)) * 2 + 4;
> +
> +	if (ranksperch != 4) {
> +		/* FIXME: get 6 ranks / controller to work - need hw... */
> +		printk(KERN_INFO "i5100_edac: unsupported configuration.\n");
> +		ret = -ENODEV;
> +		goto bail;
> +	}
> +
> +	/* device 21, func 0, Channel 0 Memory Map, Error Flag/Mask, etc... */
> +	ch0mm = pci_get_device_func(PCI_VENDOR_ID_INTEL,
> +				    PCI_DEVICE_ID_INTEL_5100_21, 0);
> +	if (!ch0mm)
> +		return -ENODEV;
> +
> +	rc = pci_enable_device(ch0mm);
> +	if (rc < 0) {
> +		ret = rc;
> +		goto bail_ch0;
> +	}
> +
> +	/* device 22, func 0, Channel 1 Memory Map, Error Flag/Mask, etc... */
> +	ch1mm = pci_get_device_func(PCI_VENDOR_ID_INTEL,
> +				    PCI_DEVICE_ID_INTEL_5100_22, 0);
> +	if (!ch1mm) {
> +		ret = -ENODEV;
> +		goto bail_ch0;
> +	}
> +
> +	rc = pci_enable_device(ch1mm);
> +	if (rc < 0) {
> +		ret = rc;
> +		goto bail_ch1;
> +	}
> +
> +	mci = edac_mc_alloc(sizeof(*priv), ranksperch * 2, 1, 0);
> +	if (!mci) {
> +		ret = -ENOMEM;
> +		goto bail_ch1;

No pci_disable_device() needed?

> +	}
> +
> +	mci->dev = &pdev->dev;
> +
> +	priv = mci->pvt_info;
> +	priv->ranksperctlr = ranksperch;
> +	priv->mc = pdev;
> +	priv->ch0mm = ch0mm;
> +	priv->ch1mm = ch1mm;
> +
> +	i5100_init_dimm_layout(pdev, mci);
> +	i5100_init_interleaving(pdev, mci);
> +
> +	mci->mtype_cap = MEM_FLAG_FB_DDR2;
> +	mci->edac_ctl_cap = EDAC_FLAG_SECDED;
> +	mci->edac_cap = EDAC_FLAG_SECDED;
> +	mci->mod_name = "i5100_edac.c";
> +	mci->mod_ver = "not versioned";
> +	mci->ctl_name = "i5100";
> +	mci->dev_name = pci_name(pdev);
> +	mci->ctl_page_to_phys = i5100_ctl_page_to_phys;
> +
> +	mci->edac_check = i5100_check_error;
> +
> +	i5100_init_csrows(mci);
> +
> +	/* this strange construction seems to be in every driver, dunno why */
> +	switch (edac_op_state) {
> +	case EDAC_OPSTATE_POLL:
> +	case EDAC_OPSTATE_NMI:
> +		break;
> +	default:
> +		edac_op_state = EDAC_OPSTATE_POLL;
> +		break;
> +	}
> +
> +	if (edac_mc_add_mc(mci)) {
> +		ret = -ENODEV;
> +		goto bail_mc;
> +	}
> +
> +	goto bail;
> +
> +bail_mc:
> +	edac_mc_free(mci);
> +
> +bail_ch1:
> +	pci_dev_put(ch1mm);
> +
> +bail_ch0:
> +	pci_dev_put(ch0mm);
> +
> +bail:
> +	return ret;
> +}
> +
>
> ...
>

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ