diff options
| -rw-r--r-- | CHANGELOG | 6 | ||||
| -rw-r--r-- | board/amcc/yellowstone/yellowstone.c | 4 | ||||
| -rw-r--r-- | board/amcc/yosemite/yosemite.c | 4 | ||||
| -rw-r--r-- | cpu/ppc4xx/sdram.c | 2 | ||||
| -rw-r--r-- | cpu/ppc4xx/spd_sdram.c | 4 | ||||
| -rw-r--r-- | doc/README.440-DDR-performance | 90 | 
6 files changed, 103 insertions, 7 deletions
| @@ -2,6 +2,12 @@  Changes since U-Boot 1.1.4:  ====================================================================== +* PPC440 DDR setup: Set SDRAM0_CFG0[PMU]=0 for best performance +  AMCC suggested to set the PMU bit to 0 for best performace on +  the PPC440 DDR controller. +  Please see doc/README.440-DDR-performance for details. +  Patch by Stefan Roese, 28 Jul 2006 +  * AMCC bamboo (440EP) U-Boot image reduced to 384kbyte    Please see doc/README.bamboo for details.    Patch by Stefan Roese, 27 Jul 2006 diff --git a/board/amcc/yellowstone/yellowstone.c b/board/amcc/yellowstone/yellowstone.c index 86d0db751..92dc9d4c0 100644 --- a/board/amcc/yellowstone/yellowstone.c +++ b/board/amcc/yellowstone/yellowstone.c @@ -313,13 +313,13 @@ void sdram_init(void)  	mtsdram(mem_tr0, 0x410a4012);	/* ?? */  	mtsdram(mem_rtr, 0x04080000);	/* ?? */  	mtsdram(mem_cfg1, 0x00000000);	/* Self-refresh exit, disable PM    */ -	mtsdram(mem_cfg0, 0x34000000);	/* Disable EEC */ +	mtsdram(mem_cfg0, 0x30000000);	/* Disable EEC */  	udelay(400);		/* Delay 200 usecs (min)            */  	/*--------------------------------------------------------------------  	 * Enable the controller, then wait for DCEN to complete  	 *------------------------------------------------------------------*/ -	mtsdram(mem_cfg0, 0x84000000);	/* Enable */ +	mtsdram(mem_cfg0, 0x80000000);	/* Enable */  	for (;;) {  		mfsdram(mem_mcsts, reg); diff --git a/board/amcc/yosemite/yosemite.c b/board/amcc/yosemite/yosemite.c index 674244155..7f2e71820 100644 --- a/board/amcc/yosemite/yosemite.c +++ b/board/amcc/yosemite/yosemite.c @@ -309,13 +309,13 @@ void sdram_init(void)  	mtsdram(mem_tr0, 0x410a4012);	/* ?? */  	mtsdram(mem_rtr, 0x04080000);	/* ?? */  	mtsdram(mem_cfg1, 0x00000000);	/* Self-refresh exit, disable PM    */ -	mtsdram(mem_cfg0, 0x34000000);	/* Disable EEC */ +	mtsdram(mem_cfg0, 0x30000000);	/* Disable EEC */  	udelay(400);		/* Delay 200 usecs (min)            */  	/*--------------------------------------------------------------------  	 * Enable the controller, then wait for DCEN to complete  	 *------------------------------------------------------------------*/ -	mtsdram(mem_cfg0, 0x84000000);	/* Enable */ +	mtsdram(mem_cfg0, 0x80000000);	/* Enable */  	for (;;) {  		mfsdram(mem_mcsts, reg); diff --git a/cpu/ppc4xx/sdram.c b/cpu/ppc4xx/sdram.c index e31d59d80..faeea5c91 100644 --- a/cpu/ppc4xx/sdram.c +++ b/cpu/ppc4xx/sdram.c @@ -379,7 +379,7 @@ long int initdram(int board_type)  		/*  		 * Enable the controller, then wait for DCEN to complete  		 */ -		mtsdram(mem_cfg0, 0x86000000);	/* DCEN=1, PMUD=1, 64-bit	*/ +		mtsdram(mem_cfg0, 0x82000000);	/* DCEN=1, PMUD=0, 64-bit	*/  		udelay(10000);  		if (get_ram_size(0, mb0cf[i].size) == mb0cf[i].size) { diff --git a/cpu/ppc4xx/spd_sdram.c b/cpu/ppc4xx/spd_sdram.c index c0a6933b8..c24456bea 100644 --- a/cpu/ppc4xx/spd_sdram.c +++ b/cpu/ppc4xx/spd_sdram.c @@ -1007,9 +1007,9 @@ void program_cfg0(unsigned long* dimm_populated,  	}  	/* -	 * program Page Management Unit +	 * program Page Management Unit (0 == enabled)  	 */ -	cfg0 |= SDRAM_CFG0_PMUD; +	cfg0 &= ~SDRAM_CFG0_PMUD;  	/*  	 * program Memory Controller Options 0 diff --git a/doc/README.440-DDR-performance b/doc/README.440-DDR-performance new file mode 100644 index 000000000..17bc74764 --- /dev/null +++ b/doc/README.440-DDR-performance @@ -0,0 +1,90 @@ +AMCC suggested to set the PMU bit to 0 for best performace on the +PPC440 DDR controller. The 440er common DDR setup files (sdram.c & +spd_sdram.c) are changed accordingly. So all 440er boards using +these setup routines will automatically receive this performance +increase. + +Please see below some benchmarks done by AMCC to demonstrate this +performance changes: + + +---------------------------------------- +SDRAM0_CFG0[PMU] = 1 (U-boot default for Bamboo, Yosemite and Yellowstone) +---------------------------------------- +Stream benchmark results +------------------------------------------------------------- +This system uses 8 bytes per DOUBLE PRECISION word. +------------------------------------------------------------- +Array size = 2000000, Offset = 0 +Total memory required = 45.8 MB. +Each test is run 10 times, but only +the *best* time for each is used. +------------------------------------------------------------- +Your clock granularity/precision appears to be 1 microseconds. +Each test below will take on the order of 112345 microseconds. +   (= 112345 clock ticks) +Increase the size of the arrays if this shows that you are not getting +at least 20 clock ticks per test. +------------------------------------------------------------- +WARNING -- The above is only a rough guideline. +For best results, please be sure you know the precision of your system +timer. +------------------------------------------------------------- +Function      Rate (MB/s)   RMS time     Min time     Max time +Copy:         256.7683       0.1248       0.1246       0.1250 +Scale:        246.0157       0.1302       0.1301       0.1302 +Add:          255.0316       0.1883       0.1882       0.1885 +Triad:        253.1245       0.1897       0.1896       0.1899 + + +TTCP Benchmark Results +ttcp-t: socket +ttcp-t: connect +ttcp-t: buflen=8192, nbuf=2048, align=16384/0, port=5000  tcp  -> +localhost +ttcp-t: 16777216 bytes in 0.28 real seconds = 454.29 Mbit/sec +++ +ttcp-t: 2048 I/O calls, msec/call = 0.14, calls/sec = 7268.57 +ttcp-t: 0.0user 0.1sys 0:00real 60% 0i+0d 0maxrss 0+2pf 3+1506csw + +---------------------------------------- +SDRAM0_CFG0[PMU] = 0 (Suggested modification) +Setting PMU = 0 provides a noticeable performance improvement *2% to +5% improvement in memory performance. +*Improves the Mbit/sec for TTCP benchmark by almost 76%. +---------------------------------------- +Stream benchmark results +------------------------------------------------------------- +This system uses 8 bytes per DOUBLE PRECISION word. +------------------------------------------------------------- +Array size = 2000000, Offset = 0 +Total memory required = 45.8 MB. +Each test is run 10 times, but only +the *best* time for each is used. +------------------------------------------------------------- +Your clock granularity/precision appears to be 1 microseconds. +Each test below will take on the order of 120066 microseconds. +   (= 120066 clock ticks) +Increase the size of the arrays if this shows that you are not getting +at least 20 clock ticks per test. +------------------------------------------------------------- +WARNING -- The above is only a rough guideline. +For best results, please be sure you know the precision of your system +timer. +------------------------------------------------------------- +Function      Rate (MB/s)   RMS time     Min time     Max time +Copy:         262.5167       0.1221       0.1219       0.1223 +Scale:        258.4856       0.1238       0.1238       0.1240 +Add:          262.5404       0.1829       0.1828       0.1831 +Triad:        266.8594       0.1800       0.1799       0.1802 + +TTCP Benchmark Results +ttcp-t: socket +ttcp-t: connect +ttcp-t: buflen=8192, nbuf=2048, align=16384/0, port=5000  tcp  -> +localhost +ttcp-t: 16777216 bytes in 0.16 real seconds = 804.06 Mbit/sec +++ +ttcp-t: 2048 I/O calls, msec/call = 0.08, calls/sec = 12864.89 +ttcp-t: 0.0user 0.0sys 0:00real 46% 0i+0d 0maxrss 0+2pf 120+1csw + + +2006-07-28, Stefan Roese <sr@denx.de> |