diff options
Diffstat (limited to 'arch/x86/kernel/setup.c')
| -rw-r--r-- | arch/x86/kernel/setup.c | 310 | 
1 files changed, 199 insertions, 111 deletions
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 8b24289cc10..56f7fcfe7fa 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -82,7 +82,6 @@  #include <asm/timer.h>  #include <asm/i8259.h>  #include <asm/sections.h> -#include <asm/dmi.h>  #include <asm/io_apic.h>  #include <asm/ist.h>  #include <asm/setup_arch.h> @@ -108,17 +107,16 @@  #include <asm/topology.h>  #include <asm/apicdef.h>  #include <asm/amd_nb.h> -#ifdef CONFIG_X86_64 -#include <asm/numa_64.h> -#endif  #include <asm/mce.h>  #include <asm/alternative.h>  #include <asm/prom.h>  /* - * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. - * The direct mapping extends to max_pfn_mapped, so that we can directly access - * apertures, ACPI and other tables without having to play with fixmaps. + * max_low_pfn_mapped: highest direct mapped pfn under 4GB + * max_pfn_mapped:     highest direct mapped pfn over 4GB + * + * The direct mapping only covers E820_RAM regions, so the ranges and gaps are + * represented by pfn_mapped   */  unsigned long max_low_pfn_mapped;  unsigned long max_pfn_mapped; @@ -172,9 +170,13 @@ static struct resource bss_resource = {  #ifdef CONFIG_X86_32  /* cpu data as detected by the assembly code in head.S */ -struct cpuinfo_x86 new_cpu_data __cpuinitdata = {0, 0, 0, 0, -1, 1, 0, 0, -1}; +struct cpuinfo_x86 new_cpu_data __cpuinitdata = { +	.wp_works_ok = -1, +};  /* common cpu data for all cpus */ -struct cpuinfo_x86 boot_cpu_data __read_mostly = {0, 0, 0, 0, -1, 1, 0, 0, -1}; +struct cpuinfo_x86 boot_cpu_data __read_mostly = { +	.wp_works_ok = -1, +};  EXPORT_SYMBOL(boot_cpu_data);  unsigned int def_to_bigsmp; @@ -276,18 +278,7 @@ void * __init extend_brk(size_t size, size_t align)  	return ret;  } -#ifdef CONFIG_X86_64 -static void __init init_gbpages(void) -{ -	if (direct_gbpages && cpu_has_gbpages) -		printk(KERN_INFO "Using GB pages for direct mapping\n"); -	else -		direct_gbpages = 0; -} -#else -static inline void init_gbpages(void) -{ -} +#ifdef CONFIG_X86_32  static void __init cleanup_highmap(void)  {  } @@ -296,8 +287,8 @@ static void __init cleanup_highmap(void)  static void __init reserve_brk(void)  {  	if (_brk_end > _brk_start) -		memblock_reserve(__pa(_brk_start), -				 __pa(_brk_end) - __pa(_brk_start)); +		memblock_reserve(__pa_symbol(_brk_start), +				 _brk_end - _brk_start);  	/* Mark brk area as locked down and no longer taking any  	   new allocations */ @@ -306,27 +297,43 @@ static void __init reserve_brk(void)  #ifdef CONFIG_BLK_DEV_INITRD +static u64 __init get_ramdisk_image(void) +{ +	u64 ramdisk_image = boot_params.hdr.ramdisk_image; + +	ramdisk_image |= (u64)boot_params.ext_ramdisk_image << 32; + +	return ramdisk_image; +} +static u64 __init get_ramdisk_size(void) +{ +	u64 ramdisk_size = boot_params.hdr.ramdisk_size; + +	ramdisk_size |= (u64)boot_params.ext_ramdisk_size << 32; + +	return ramdisk_size; +} +  #define MAX_MAP_CHUNK	(NR_FIX_BTMAPS << PAGE_SHIFT)  static void __init relocate_initrd(void)  {  	/* Assume only end is not page aligned */ -	u64 ramdisk_image = boot_params.hdr.ramdisk_image; -	u64 ramdisk_size  = boot_params.hdr.ramdisk_size; +	u64 ramdisk_image = get_ramdisk_image(); +	u64 ramdisk_size  = get_ramdisk_size();  	u64 area_size     = PAGE_ALIGN(ramdisk_size); -	u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT;  	u64 ramdisk_here;  	unsigned long slop, clen, mapaddr;  	char *p, *q; -	/* We need to move the initrd down into lowmem */ -	ramdisk_here = memblock_find_in_range(0, end_of_lowmem, area_size, -					 PAGE_SIZE); +	/* We need to move the initrd down into directly mapped mem */ +	ramdisk_here = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), +						 area_size, PAGE_SIZE);  	if (!ramdisk_here)  		panic("Cannot find place for new RAMDISK of size %lld\n",  			 ramdisk_size); -	/* Note: this includes all the lowmem currently occupied by +	/* Note: this includes all the mem currently occupied by  	   the initrd, we rely on that fact to keep the data intact. */  	memblock_reserve(ramdisk_here, area_size);  	initrd_start = ramdisk_here + PAGE_OFFSET; @@ -336,17 +343,7 @@ static void __init relocate_initrd(void)  	q = (char *)initrd_start; -	/* Copy any lowmem portion of the initrd */ -	if (ramdisk_image < end_of_lowmem) { -		clen = end_of_lowmem - ramdisk_image; -		p = (char *)__va(ramdisk_image); -		memcpy(q, p, clen); -		q += clen; -		ramdisk_image += clen; -		ramdisk_size  -= clen; -	} - -	/* Copy the highmem portion of the initrd */ +	/* Copy the initrd */  	while (ramdisk_size) {  		slop = ramdisk_image & ~PAGE_MASK;  		clen = ramdisk_size; @@ -360,22 +357,35 @@ static void __init relocate_initrd(void)  		ramdisk_image += clen;  		ramdisk_size  -= clen;  	} -	/* high pages is not converted by early_res_to_bootmem */ -	ramdisk_image = boot_params.hdr.ramdisk_image; -	ramdisk_size  = boot_params.hdr.ramdisk_size; + +	ramdisk_image = get_ramdisk_image(); +	ramdisk_size  = get_ramdisk_size();  	printk(KERN_INFO "Move RAMDISK from [mem %#010llx-%#010llx] to"  		" [mem %#010llx-%#010llx]\n",  		ramdisk_image, ramdisk_image + ramdisk_size - 1,  		ramdisk_here, ramdisk_here + ramdisk_size - 1);  } +static void __init early_reserve_initrd(void) +{ +	/* Assume only end is not page aligned */ +	u64 ramdisk_image = get_ramdisk_image(); +	u64 ramdisk_size  = get_ramdisk_size(); +	u64 ramdisk_end   = PAGE_ALIGN(ramdisk_image + ramdisk_size); + +	if (!boot_params.hdr.type_of_loader || +	    !ramdisk_image || !ramdisk_size) +		return;		/* No initrd provided by bootloader */ + +	memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image); +}  static void __init reserve_initrd(void)  {  	/* Assume only end is not page aligned */ -	u64 ramdisk_image = boot_params.hdr.ramdisk_image; -	u64 ramdisk_size  = boot_params.hdr.ramdisk_size; +	u64 ramdisk_image = get_ramdisk_image(); +	u64 ramdisk_size  = get_ramdisk_size();  	u64 ramdisk_end   = PAGE_ALIGN(ramdisk_image + ramdisk_size); -	u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT; +	u64 mapped_size;  	if (!boot_params.hdr.type_of_loader ||  	    !ramdisk_image || !ramdisk_size) @@ -383,22 +393,18 @@ static void __init reserve_initrd(void)  	initrd_start = 0; -	if (ramdisk_size >= (end_of_lowmem>>1)) { +	mapped_size = memblock_mem_size(max_pfn_mapped); +	if (ramdisk_size >= (mapped_size>>1))  		panic("initrd too large to handle, "  		       "disabling initrd (%lld needed, %lld available)\n", -		       ramdisk_size, end_of_lowmem>>1); -	} +		       ramdisk_size, mapped_size>>1);  	printk(KERN_INFO "RAMDISK: [mem %#010llx-%#010llx]\n", ramdisk_image,  			ramdisk_end - 1); - -	if (ramdisk_end <= end_of_lowmem) { -		/* All in lowmem, easy case */ -		/* -		 * don't need to reserve again, already reserved early -		 * in i386_start_kernel -		 */ +	if (pfn_range_is_mapped(PFN_DOWN(ramdisk_image), +				PFN_DOWN(ramdisk_end))) { +		/* All are mapped, easy case */  		initrd_start = ramdisk_image + PAGE_OFFSET;  		initrd_end = initrd_start + ramdisk_size;  		return; @@ -409,6 +415,9 @@ static void __init reserve_initrd(void)  	memblock_free(ramdisk_image, ramdisk_end - ramdisk_image);  }  #else +static void __init early_reserve_initrd(void) +{ +}  static void __init reserve_initrd(void)  {  } @@ -419,8 +428,6 @@ static void __init parse_setup_data(void)  	struct setup_data *data;  	u64 pa_data; -	if (boot_params.hdr.version < 0x0209) -		return;  	pa_data = boot_params.hdr.setup_data;  	while (pa_data) {  		u32 data_len, map_len; @@ -456,8 +463,6 @@ static void __init e820_reserve_setup_data(void)  	u64 pa_data;  	int found = 0; -	if (boot_params.hdr.version < 0x0209) -		return;  	pa_data = boot_params.hdr.setup_data;  	while (pa_data) {  		data = early_memremap(pa_data, sizeof(*data)); @@ -481,8 +486,6 @@ static void __init memblock_x86_reserve_range_setup_data(void)  	struct setup_data *data;  	u64 pa_data; -	if (boot_params.hdr.version < 0x0209) -		return;  	pa_data = boot_params.hdr.setup_data;  	while (pa_data) {  		data = early_memremap(pa_data, sizeof(*data)); @@ -501,42 +504,104 @@ static void __init memblock_x86_reserve_range_setup_data(void)  /*   * Keep the crash kernel below this limit.  On 32 bits earlier kernels   * would limit the kernel to the low 512 MiB due to mapping restrictions. - * On 64 bits, kexec-tools currently limits us to 896 MiB; increase this - * limit once kexec-tools are fixed. + * On 64bit, old kexec-tools need to under 896MiB.   */  #ifdef CONFIG_X86_32 -# define CRASH_KERNEL_ADDR_MAX	(512 << 20) +# define CRASH_KERNEL_ADDR_LOW_MAX	(512 << 20) +# define CRASH_KERNEL_ADDR_HIGH_MAX	(512 << 20)  #else -# define CRASH_KERNEL_ADDR_MAX	(896 << 20) +# define CRASH_KERNEL_ADDR_LOW_MAX	(896UL<<20) +# define CRASH_KERNEL_ADDR_HIGH_MAX	MAXMEM +#endif + +static void __init reserve_crashkernel_low(void) +{ +#ifdef CONFIG_X86_64 +	const unsigned long long alignment = 16<<20;	/* 16M */ +	unsigned long long low_base = 0, low_size = 0; +	unsigned long total_low_mem; +	unsigned long long base; +	bool auto_set = false; +	int ret; + +	total_low_mem = memblock_mem_size(1UL<<(32-PAGE_SHIFT)); +	/* crashkernel=Y,low */ +	ret = parse_crashkernel_low(boot_command_line, total_low_mem, +						&low_size, &base); +	if (ret != 0) { +		/* +		 * two parts from lib/swiotlb.c: +		 *	swiotlb size: user specified with swiotlb= or default. +		 *	swiotlb overflow buffer: now is hardcoded to 32k. +		 *		We round it to 8M for other buffers that +		 *		may need to stay low too. +		 */ +		low_size = swiotlb_size_or_default() + (8UL<<20); +		auto_set = true; +	} else { +		/* passed with crashkernel=0,low ? */ +		if (!low_size) +			return; +	} + +	low_base = memblock_find_in_range(low_size, (1ULL<<32), +					low_size, alignment); + +	if (!low_base) { +		if (!auto_set) +			pr_info("crashkernel low reservation failed - No suitable area found.\n"); + +		return; +	} + +	memblock_reserve(low_base, low_size); +	pr_info("Reserving %ldMB of low memory at %ldMB for crashkernel (System low RAM: %ldMB)\n", +			(unsigned long)(low_size >> 20), +			(unsigned long)(low_base >> 20), +			(unsigned long)(total_low_mem >> 20)); +	crashk_low_res.start = low_base; +	crashk_low_res.end   = low_base + low_size - 1; +	insert_resource(&iomem_resource, &crashk_low_res);  #endif +}  static void __init reserve_crashkernel(void)  { +	const unsigned long long alignment = 16<<20;	/* 16M */  	unsigned long long total_mem;  	unsigned long long crash_size, crash_base; +	bool high = false;  	int ret;  	total_mem = memblock_phys_mem_size(); +	/* crashkernel=XM */  	ret = parse_crashkernel(boot_command_line, total_mem,  			&crash_size, &crash_base); -	if (ret != 0 || crash_size <= 0) -		return; +	if (ret != 0 || crash_size <= 0) { +		/* crashkernel=X,high */ +		ret = parse_crashkernel_high(boot_command_line, total_mem, +				&crash_size, &crash_base); +		if (ret != 0 || crash_size <= 0) +			return; +		high = true; +	}  	/* 0 means: find the address automatically */  	if (crash_base <= 0) { -		const unsigned long long alignment = 16<<20;	/* 16M */ -  		/*  		 *  kexec want bzImage is below CRASH_KERNEL_ADDR_MAX  		 */  		crash_base = memblock_find_in_range(alignment, -			       CRASH_KERNEL_ADDR_MAX, crash_size, alignment); +					high ? CRASH_KERNEL_ADDR_HIGH_MAX : +					       CRASH_KERNEL_ADDR_LOW_MAX, +					crash_size, alignment);  		if (!crash_base) {  			pr_info("crashkernel reservation failed - No suitable area found.\n");  			return;  		} +  	} else {  		unsigned long long start; @@ -558,6 +623,9 @@ static void __init reserve_crashkernel(void)  	crashk_res.start = crash_base;  	crashk_res.end   = crash_base + crash_size - 1;  	insert_resource(&iomem_resource, &crashk_res); + +	if (crash_base >= (1ULL<<32)) +		reserve_crashkernel_low();  }  #else  static void __init reserve_crashkernel(void) @@ -608,8 +676,6 @@ static __init void reserve_ibft_region(void)  		memblock_reserve(addr, size);  } -static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10; -  static bool __init snb_gfx_workaround_needed(void)  {  #ifdef CONFIG_PCI @@ -698,8 +764,7 @@ static void __init trim_bios_range(void)  	 * since some BIOSes are known to corrupt low memory.  See the  	 * Kconfig help text for X86_RESERVE_LOW.  	 */ -	e820_update_range(0, ALIGN(reserve_low, PAGE_SIZE), -			  E820_RAM, E820_RESERVED); +	e820_update_range(0, PAGE_SIZE, E820_RAM, E820_RESERVED);  	/*  	 * special case: Some BIOSen report the PC BIOS @@ -711,6 +776,29 @@ static void __init trim_bios_range(void)  	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);  } +/* called before trim_bios_range() to spare extra sanitize */ +static void __init e820_add_kernel_range(void) +{ +	u64 start = __pa_symbol(_text); +	u64 size = __pa_symbol(_end) - start; + +	/* +	 * Complain if .text .data and .bss are not marked as E820_RAM and +	 * attempt to fix it by adding the range. We may have a confused BIOS, +	 * or the user may have used memmap=exactmap or memmap=xxM$yyM to +	 * exclude kernel range. If we really are running on top non-RAM, +	 * we will crash later anyways. +	 */ +	if (e820_all_mapped(start, start + size, E820_RAM)) +		return; + +	pr_warn(".text .data .bss are not marked as E820_RAM!\n"); +	e820_remove_range(start, size, E820_RAM, 0); +	e820_add_region(start, size, E820_RAM); +} + +static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10; +  static int __init parse_reservelow(char *p)  {  	unsigned long long size; @@ -733,6 +821,11 @@ static int __init parse_reservelow(char *p)  early_param("reservelow", parse_reservelow); +static void __init trim_low_memory_range(void) +{ +	memblock_reserve(0, ALIGN(reserve_low, PAGE_SIZE)); +} +	  /*   * Determine if we were loaded by an EFI loader.  If so, then we have also been   * passed the efi memmap, systab, etc., so we should use these data structures @@ -748,6 +841,17 @@ early_param("reservelow", parse_reservelow);  void __init setup_arch(char **cmdline_p)  { +	memblock_reserve(__pa_symbol(_text), +			 (unsigned long)__bss_stop - (unsigned long)_text); + +	early_reserve_initrd(); + +	/* +	 * At this point everything still needed from the boot loader +	 * or BIOS or kernel text should be early reserved or marked not +	 * RAM in e820. All other memory is free game. +	 */ +  #ifdef CONFIG_X86_32  	memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));  	visws_early_detect(); @@ -835,12 +939,12 @@ void __init setup_arch(char **cmdline_p)  	init_mm.end_data = (unsigned long) _edata;  	init_mm.brk = _brk_end; -	code_resource.start = virt_to_phys(_text); -	code_resource.end = virt_to_phys(_etext)-1; -	data_resource.start = virt_to_phys(_etext); -	data_resource.end = virt_to_phys(_edata)-1; -	bss_resource.start = virt_to_phys(&__bss_start); -	bss_resource.end = virt_to_phys(&__bss_stop)-1; +	code_resource.start = __pa_symbol(_text); +	code_resource.end = __pa_symbol(_etext)-1; +	data_resource.start = __pa_symbol(_etext); +	data_resource.end = __pa_symbol(_edata)-1; +	bss_resource.start = __pa_symbol(__bss_start); +	bss_resource.end = __pa_symbol(__bss_stop)-1;  #ifdef CONFIG_CMDLINE_BOOL  #ifdef CONFIG_CMDLINE_OVERRIDE @@ -892,6 +996,7 @@ void __init setup_arch(char **cmdline_p)  		efi_init();  	dmi_scan_machine(); +	dmi_set_dump_stack_arch_desc();  	/*  	 * VMware detection requires dmi to be available, so this @@ -906,6 +1011,7 @@ void __init setup_arch(char **cmdline_p)  	insert_resource(&iomem_resource, &data_resource);  	insert_resource(&iomem_resource, &bss_resource); +	e820_add_kernel_range();  	trim_bios_range();  #ifdef CONFIG_X86_32  	if (ppro_with_ram_bug()) { @@ -955,6 +1061,8 @@ void __init setup_arch(char **cmdline_p)  	reserve_ibft_region(); +	early_alloc_pgt_buf(); +  	/*  	 * Need to conclude brk, before memblock_x86_fill()  	 *  it could use memblock_find_in_range, could overlap with @@ -964,7 +1072,7 @@ void __init setup_arch(char **cmdline_p)  	cleanup_highmap(); -	memblock.current_limit = get_max_mapped(); +	memblock.current_limit = ISA_END_ADDRESS;  	memblock_x86_fill();  	/* @@ -981,41 +1089,22 @@ void __init setup_arch(char **cmdline_p)  	setup_bios_corruption_check();  #endif +#ifdef CONFIG_X86_32  	printk(KERN_DEBUG "initial memory mapped: [mem 0x00000000-%#010lx]\n",  			(max_pfn_mapped<<PAGE_SHIFT) - 1); +#endif -	setup_real_mode(); +	reserve_real_mode();  	trim_platform_memory_ranges(); +	trim_low_memory_range(); -	init_gbpages(); - -	/* max_pfn_mapped is updated here */ -	max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT); -	max_pfn_mapped = max_low_pfn_mapped; - -#ifdef CONFIG_X86_64 -	if (max_pfn > max_low_pfn) { -		int i; -		unsigned long start, end; -		unsigned long start_pfn, end_pfn; +	init_mem_mapping(); -		for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, -							 NULL) { +	early_trap_pf_init(); -			end = PFN_PHYS(end_pfn); -			if (end <= (1UL<<32)) -				continue; - -			start = PFN_PHYS(start_pfn); -			max_pfn_mapped = init_memory_mapping( -						max((1UL<<32), start), end); -		} +	setup_real_mode(); -		/* can we preseve max_low_pfn ?*/ -		max_low_pfn = max_pfn; -	} -#endif  	memblock.current_limit = get_max_mapped();  	dma_contiguous_reserve(0); @@ -1135,8 +1224,7 @@ void __init setup_arch(char **cmdline_p)  	 * mismatched firmware/kernel archtectures since there is no  	 * support for runtime services.  	 */ -	if (efi_enabled(EFI_BOOT) && -	    IS_ENABLED(CONFIG_X86_64) != efi_enabled(EFI_64BIT)) { +	if (efi_enabled(EFI_BOOT) && !efi_is_native()) {  		pr_info("efi: Setup done, disabling due to 32/64-bit mismatch\n");  		efi_unmap_memmap();  	}  |