diff options
Diffstat (limited to 'arch/x86/kernel/tlb_uv.c')
| -rw-r--r-- | arch/x86/kernel/tlb_uv.c | 189 | 
1 files changed, 128 insertions, 61 deletions
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index deb5ebb32c3..ed0c33761e6 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c @@ -25,6 +25,8 @@ static int			uv_bau_retry_limit __read_mostly;  /* position of pnode (which is nasid>>1): */  static int			uv_nshift __read_mostly; +/* base pnode in this partition */ +static int			uv_partition_base_pnode __read_mostly;  static unsigned long		uv_mmask __read_mostly; @@ -32,6 +34,34 @@ static DEFINE_PER_CPU(struct ptc_stats, ptcstats);  static DEFINE_PER_CPU(struct bau_control, bau_control);  /* + * Determine the first node on a blade. + */ +static int __init blade_to_first_node(int blade) +{ +	int node, b; + +	for_each_online_node(node) { +		b = uv_node_to_blade_id(node); +		if (blade == b) +			return node; +	} +	return -1; /* shouldn't happen */ +} + +/* + * Determine the apicid of the first cpu on a blade. + */ +static int __init blade_to_first_apicid(int blade) +{ +	int cpu; + +	for_each_present_cpu(cpu) +		if (blade == uv_cpu_to_blade_id(cpu)) +			return per_cpu(x86_cpu_to_apicid, cpu); +	return -1; +} + +/*   * Free a software acknowledge hardware resource by clearing its Pending   * bit. This will return a reply to the sender.   * If the message has timed out, a reply has already been sent by the @@ -67,7 +97,7 @@ static void uv_bau_process_message(struct bau_payload_queue_entry *msg,  	msp = __get_cpu_var(bau_control).msg_statuses + msg_slot;  	cpu = uv_blade_processor_id();  	msg->number_of_cpus = -	    uv_blade_nr_online_cpus(uv_node_to_blade_id(numa_node_id())); +		uv_blade_nr_online_cpus(uv_node_to_blade_id(numa_node_id()));  	this_cpu_mask = 1UL << cpu;  	if (msp->seen_by.bits & this_cpu_mask)  		return; @@ -215,14 +245,14 @@ static int uv_wait_completion(struct bau_desc *bau_desc,   * Returns @flush_mask if some remote flushing remains to be done. The   * mask will have some bits still set.   */ -const struct cpumask *uv_flush_send_and_wait(int cpu, int this_blade, +const struct cpumask *uv_flush_send_and_wait(int cpu, int this_pnode,  					     struct bau_desc *bau_desc,  					     struct cpumask *flush_mask)  {  	int completion_status = 0;  	int right_shift;  	int tries = 0; -	int blade; +	int pnode;  	int bit;  	unsigned long mmr_offset;  	unsigned long index; @@ -265,8 +295,8 @@ const struct cpumask *uv_flush_send_and_wait(int cpu, int this_blade,  	 * use the IPI method of shootdown on them.  	 */  	for_each_cpu(bit, flush_mask) { -		blade = uv_cpu_to_blade_id(bit); -		if (blade == this_blade) +		pnode = uv_cpu_to_pnode(bit); +		if (pnode == this_pnode)  			continue;  		cpumask_clear_cpu(bit, flush_mask);  	} @@ -309,16 +339,16 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,  	struct cpumask *flush_mask = __get_cpu_var(uv_flush_tlb_mask);  	int i;  	int bit; -	int blade; +	int pnode;  	int uv_cpu; -	int this_blade; +	int this_pnode;  	int locals = 0;  	struct bau_desc *bau_desc;  	cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu));  	uv_cpu = uv_blade_processor_id(); -	this_blade = uv_numa_blade_id(); +	this_pnode = uv_hub_info->pnode;  	bau_desc = __get_cpu_var(bau_control).descriptor_base;  	bau_desc += UV_ITEMS_PER_DESCRIPTOR * uv_cpu; @@ -326,13 +356,14 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,  	i = 0;  	for_each_cpu(bit, flush_mask) { -		blade = uv_cpu_to_blade_id(bit); -		BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1)); -		if (blade == this_blade) { +		pnode = uv_cpu_to_pnode(bit); +		BUG_ON(pnode > (UV_DISTRIBUTION_SIZE - 1)); +		if (pnode == this_pnode) {  			locals++;  			continue;  		} -		bau_node_set(blade, &bau_desc->distribution); +		bau_node_set(pnode - uv_partition_base_pnode, +				&bau_desc->distribution);  		i++;  	}  	if (i == 0) { @@ -350,7 +381,7 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,  	bau_desc->payload.address = va;  	bau_desc->payload.sending_cpu = cpu; -	return uv_flush_send_and_wait(uv_cpu, this_blade, bau_desc, flush_mask); +	return uv_flush_send_and_wait(uv_cpu, this_pnode, bau_desc, flush_mask);  }  /* @@ -418,24 +449,58 @@ void uv_bau_message_interrupt(struct pt_regs *regs)  	set_irq_regs(old_regs);  } +/* + * uv_enable_timeouts + * + * Each target blade (i.e. blades that have cpu's) needs to have + * shootdown message timeouts enabled.  The timeout does not cause + * an interrupt, but causes an error message to be returned to + * the sender. + */  static void uv_enable_timeouts(void)  { -	int i;  	int blade; -	int last_blade; +	int nblades;  	int pnode; -	int cur_cpu = 0; -	unsigned long apicid; +	unsigned long mmr_image; -	last_blade = -1; -	for_each_online_node(i) { -		blade = uv_node_to_blade_id(i); -		if (blade == last_blade) +	nblades = uv_num_possible_blades(); + +	for (blade = 0; blade < nblades; blade++) { +		if (!uv_blade_nr_possible_cpus(blade))  			continue; -		last_blade = blade; -		apicid = per_cpu(x86_cpu_to_apicid, cur_cpu); +  		pnode = uv_blade_to_pnode(blade); -		cur_cpu += uv_blade_nr_possible_cpus(i); +		mmr_image = +		    uv_read_global_mmr64(pnode, UVH_LB_BAU_MISC_CONTROL); +		/* +		 * Set the timeout period and then lock it in, in three +		 * steps; captures and locks in the period. +		 * +		 * To program the period, the SOFT_ACK_MODE must be off. +		 */ +		mmr_image &= ~((unsigned long)1 << +			       UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT); +		uv_write_global_mmr64 +		    (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); +		/* +		 * Set the 4-bit period. +		 */ +		mmr_image &= ~((unsigned long)0xf << +			UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT); +		mmr_image |= (UV_INTD_SOFT_ACK_TIMEOUT_PERIOD << +			     UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT); +		uv_write_global_mmr64 +		    (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); +		/* +		 * Subsequent reversals of the timebase bit (3) cause an +		 * immediate timeout of one or all INTD resources as +		 * indicated in bits 2:0 (7 causes all of them to timeout). +		 */ +		mmr_image |= ((unsigned long)1 << +			      UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT); +		uv_write_global_mmr64 +		    (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image);  	}  } @@ -482,8 +547,7 @@ static int uv_ptc_seq_show(struct seq_file *file, void *data)  			   stat->requestee, stat->onetlb, stat->alltlb,  			   stat->s_retry, stat->d_retry, stat->ptc_i);  		seq_printf(file, "%lx %ld %ld %ld %ld %ld %ld\n", -			   uv_read_global_mmr64(uv_blade_to_pnode -					(uv_cpu_to_blade_id(cpu)), +			   uv_read_global_mmr64(uv_cpu_to_pnode(cpu),  					UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE),  			   stat->sflush, stat->dflush,  			   stat->retriesok, stat->nomsg, @@ -617,16 +681,18 @@ static struct bau_control * __init uv_table_bases_init(int blade, int node)   * finish the initialization of the per-blade control structures   */  static void __init -uv_table_bases_finish(int blade, int node, int cur_cpu, +uv_table_bases_finish(int blade,  		      struct bau_control *bau_tablesp,  		      struct bau_desc *adp)  {  	struct bau_control *bcp; -	int i; +	int cpu; -	for (i = cur_cpu; i < cur_cpu + uv_blade_nr_possible_cpus(blade); i++) { -		bcp = (struct bau_control *)&per_cpu(bau_control, i); +	for_each_present_cpu(cpu) { +		if (blade != uv_cpu_to_blade_id(cpu)) +			continue; +		bcp = (struct bau_control *)&per_cpu(bau_control, cpu);  		bcp->bau_msg_head	= bau_tablesp->va_queue_first;  		bcp->va_queue_first	= bau_tablesp->va_queue_first;  		bcp->va_queue_last	= bau_tablesp->va_queue_last; @@ -649,11 +715,10 @@ uv_activation_descriptor_init(int node, int pnode)  	struct bau_desc *adp;  	struct bau_desc *ad2; -	adp = (struct bau_desc *) -	    kmalloc_node(16384, GFP_KERNEL, node); +	adp = (struct bau_desc *)kmalloc_node(16384, GFP_KERNEL, node);  	BUG_ON(!adp); -	pa = __pa((unsigned long)adp); +	pa = uv_gpa(adp); /* need the real nasid*/  	n = pa >> uv_nshift;  	m = pa & uv_mmask; @@ -667,8 +732,12 @@ uv_activation_descriptor_init(int node, int pnode)  	for (i = 0, ad2 = adp; i < UV_ACTIVATION_DESCRIPTOR_SIZE; i++, ad2++) {  		memset(ad2, 0, sizeof(struct bau_desc));  		ad2->header.sw_ack_flag = 1; -		ad2->header.base_dest_nodeid = -		    uv_blade_to_pnode(uv_cpu_to_blade_id(0)); +		/* +		 * base_dest_nodeid is the first node in the partition, so +		 * the bit map will indicate partition-relative node numbers. +		 * note that base_dest_nodeid is actually a nasid. +		 */ +		ad2->header.base_dest_nodeid = uv_partition_base_pnode << 1;  		ad2->header.command = UV_NET_ENDPOINT_INTD;  		ad2->header.int_both = 1;  		/* @@ -686,6 +755,8 @@ static struct bau_payload_queue_entry * __init  uv_payload_queue_init(int node, int pnode, struct bau_control *bau_tablesp)  {  	struct bau_payload_queue_entry *pqp; +	unsigned long pa; +	int pn;  	char *cp;  	pqp = (struct bau_payload_queue_entry *) kmalloc_node( @@ -696,10 +767,14 @@ uv_payload_queue_init(int node, int pnode, struct bau_control *bau_tablesp)  	cp = (char *)pqp + 31;  	pqp = (struct bau_payload_queue_entry *)(((unsigned long)cp >> 5) << 5);  	bau_tablesp->va_queue_first = pqp; +	/* +	 * need the pnode of where the memory was really allocated +	 */ +	pa = uv_gpa(pqp); +	pn = pa >> uv_nshift;  	uv_write_global_mmr64(pnode,  			      UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST, -			      ((unsigned long)pnode << -			       UV_PAYLOADQ_PNODE_SHIFT) | +			      ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) |  			      uv_physnodeaddr(pqp));  	uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL,  			      uv_physnodeaddr(pqp)); @@ -715,8 +790,9 @@ uv_payload_queue_init(int node, int pnode, struct bau_control *bau_tablesp)  /*   * Initialization of each UV blade's structures   */ -static int __init uv_init_blade(int blade, int node, int cur_cpu) +static int __init uv_init_blade(int blade)  { +	int node;  	int pnode;  	unsigned long pa;  	unsigned long apicid; @@ -724,16 +800,17 @@ static int __init uv_init_blade(int blade, int node, int cur_cpu)  	struct bau_payload_queue_entry *pqp;  	struct bau_control *bau_tablesp; +	node = blade_to_first_node(blade);  	bau_tablesp = uv_table_bases_init(blade, node);  	pnode = uv_blade_to_pnode(blade);  	adp = uv_activation_descriptor_init(node, pnode);  	pqp = uv_payload_queue_init(node, pnode, bau_tablesp); -	uv_table_bases_finish(blade, node, cur_cpu, bau_tablesp, adp); +	uv_table_bases_finish(blade, bau_tablesp, adp);  	/*  	 * the below initialization can't be in firmware because the  	 * messaging IRQ will be determined by the OS  	 */ -	apicid = per_cpu(x86_cpu_to_apicid, cur_cpu); +	apicid = blade_to_first_apicid(blade);  	pa = uv_read_global_mmr64(pnode, UVH_BAU_DATA_CONFIG);  	if ((pa & 0xff) != UV_BAU_MESSAGE) {  		uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, @@ -748,9 +825,7 @@ static int __init uv_init_blade(int blade, int node, int cur_cpu)  static int __init uv_bau_init(void)  {  	int blade; -	int node;  	int nblades; -	int last_blade;  	int cur_cpu;  	if (!is_uv_system()) @@ -763,29 +838,21 @@ static int __init uv_bau_init(void)  	uv_bau_retry_limit = 1;  	uv_nshift = uv_hub_info->n_val;  	uv_mmask = (1UL << uv_hub_info->n_val) - 1; -	nblades = 0; -	last_blade = -1; -	cur_cpu = 0; -	for_each_online_node(node) { -		blade = uv_node_to_blade_id(node); -		if (blade == last_blade) -			continue; -		last_blade = blade; -		nblades++; -	} +	nblades = uv_num_possible_blades(); +  	uv_bau_table_bases = (struct bau_control **)  	    kmalloc(nblades * sizeof(struct bau_control *), GFP_KERNEL);  	BUG_ON(!uv_bau_table_bases); -	last_blade = -1; -	for_each_online_node(node) { -		blade = uv_node_to_blade_id(node); -		if (blade == last_blade) -			continue; -		last_blade = blade; -		uv_init_blade(blade, node, cur_cpu); -		cur_cpu += uv_blade_nr_possible_cpus(blade); -	} +	uv_partition_base_pnode = 0x7fffffff; +	for (blade = 0; blade < nblades; blade++) +		if (uv_blade_nr_possible_cpus(blade) && +			(uv_blade_to_pnode(blade) < uv_partition_base_pnode)) +			uv_partition_base_pnode = uv_blade_to_pnode(blade); +	for (blade = 0; blade < nblades; blade++) +		if (uv_blade_nr_possible_cpus(blade)) +			uv_init_blade(blade); +  	alloc_intr_gate(UV_BAU_MESSAGE, uv_bau_message_intr1);  	uv_enable_timeouts();  |