diff options
| author | Dimitri Sivanich <sivanich@sgi.com> | 2008-04-30 00:53:35 -0700 | 
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-04-30 08:29:41 -0700 | 
| commit | cbacdd9572285c86848dd323dc764abb3681ddbc (patch) | |
| tree | d0568712f7371598cd5d7b43693b04f99f9265ef /drivers/char/mmtimer.c | |
| parent | d17468c73e138e1108b279acf892dd35937d43ed (diff) | |
| download | olio-linux-3.10-cbacdd9572285c86848dd323dc764abb3681ddbc.tar.xz olio-linux-3.10-cbacdd9572285c86848dd323dc764abb3681ddbc.zip  | |
SGI Altix mmtimer: allow larger number of timers per node
The purpose of this patch to the SGI Altix specific mmtimer (posix timer)
driver is to allow a virtually infinite number of timers to be set per
node.
Timers will now be kept on a sorted per-node list and a single node-based
hardware comparator is used to trigger the next timer.
[akpm@linux-foundation.org: mark things static]
[akpm@linux-foundation.org: fix warning]
Signed-off-by: Dimitri Sivanich <sivanich@sgi.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'drivers/char/mmtimer.c')
| -rw-r--r-- | drivers/char/mmtimer.c | 400 | 
1 files changed, 244 insertions, 156 deletions
diff --git a/drivers/char/mmtimer.c b/drivers/char/mmtimer.c index e60a74c66e3..d83db5d880e 100644 --- a/drivers/char/mmtimer.c +++ b/drivers/char/mmtimer.c @@ -74,9 +74,8 @@ static const struct file_operations mmtimer_fops = {   * We only have comparison registers RTC1-4 currently available per   * node.  RTC0 is used by SAL.   */ -#define NUM_COMPARATORS 3  /* Check for an RTC interrupt pending */ -static int inline mmtimer_int_pending(int comparator) +static int mmtimer_int_pending(int comparator)  {  	if (HUB_L((unsigned long *)LOCAL_MMR_ADDR(SH_EVENT_OCCURRED)) &  			SH_EVENT_OCCURRED_RTC1_INT_MASK << comparator) @@ -84,15 +83,16 @@ static int inline mmtimer_int_pending(int comparator)  	else  		return 0;  } +  /* Clear the RTC interrupt pending bit */ -static void inline mmtimer_clr_int_pending(int comparator) +static void mmtimer_clr_int_pending(int comparator)  {  	HUB_S((u64 *)LOCAL_MMR_ADDR(SH_EVENT_OCCURRED_ALIAS),  		SH_EVENT_OCCURRED_RTC1_INT_MASK << comparator);  }  /* Setup timer on comparator RTC1 */ -static void inline mmtimer_setup_int_0(u64 expires) +static void mmtimer_setup_int_0(int cpu, u64 expires)  {  	u64 val; @@ -106,7 +106,7 @@ static void inline mmtimer_setup_int_0(u64 expires)  	mmtimer_clr_int_pending(0);  	val = ((u64)SGI_MMTIMER_VECTOR << SH_RTC1_INT_CONFIG_IDX_SHFT) | -		((u64)cpu_physical_id(smp_processor_id()) << +		((u64)cpu_physical_id(cpu) <<  			SH_RTC1_INT_CONFIG_PID_SHFT);  	/* Set configuration */ @@ -122,7 +122,7 @@ static void inline mmtimer_setup_int_0(u64 expires)  }  /* Setup timer on comparator RTC2 */ -static void inline mmtimer_setup_int_1(u64 expires) +static void mmtimer_setup_int_1(int cpu, u64 expires)  {  	u64 val; @@ -133,7 +133,7 @@ static void inline mmtimer_setup_int_1(u64 expires)  	mmtimer_clr_int_pending(1);  	val = ((u64)SGI_MMTIMER_VECTOR << SH_RTC2_INT_CONFIG_IDX_SHFT) | -		((u64)cpu_physical_id(smp_processor_id()) << +		((u64)cpu_physical_id(cpu) <<  			SH_RTC2_INT_CONFIG_PID_SHFT);  	HUB_S((u64 *)LOCAL_MMR_ADDR(SH_RTC2_INT_CONFIG), val); @@ -144,7 +144,7 @@ static void inline mmtimer_setup_int_1(u64 expires)  }  /* Setup timer on comparator RTC3 */ -static void inline mmtimer_setup_int_2(u64 expires) +static void mmtimer_setup_int_2(int cpu, u64 expires)  {  	u64 val; @@ -155,7 +155,7 @@ static void inline mmtimer_setup_int_2(u64 expires)  	mmtimer_clr_int_pending(2);  	val = ((u64)SGI_MMTIMER_VECTOR << SH_RTC3_INT_CONFIG_IDX_SHFT) | -		((u64)cpu_physical_id(smp_processor_id()) << +		((u64)cpu_physical_id(cpu) <<  			SH_RTC3_INT_CONFIG_PID_SHFT);  	HUB_S((u64 *)LOCAL_MMR_ADDR(SH_RTC3_INT_CONFIG), val); @@ -170,22 +170,22 @@ static void inline mmtimer_setup_int_2(u64 expires)   * in order to insure that the setup succeeds in a deterministic time frame.   * It will check if the interrupt setup succeeded.   */ -static int inline mmtimer_setup(int comparator, unsigned long expires) +static int mmtimer_setup(int cpu, int comparator, unsigned long expires)  {  	switch (comparator) {  	case 0: -		mmtimer_setup_int_0(expires); +		mmtimer_setup_int_0(cpu, expires);  		break;  	case 1: -		mmtimer_setup_int_1(expires); +		mmtimer_setup_int_1(cpu, expires);  		break;  	case 2: -		mmtimer_setup_int_2(expires); +		mmtimer_setup_int_2(cpu, expires);  		break;  	}  	/* We might've missed our expiration time */ -	if (rtc_time() < expires) +	if (rtc_time() <= expires)  		return 1;  	/* @@ -195,7 +195,7 @@ static int inline mmtimer_setup(int comparator, unsigned long expires)  	return mmtimer_int_pending(comparator);  } -static int inline mmtimer_disable_int(long nasid, int comparator) +static int mmtimer_disable_int(long nasid, int comparator)  {  	switch (comparator) {  	case 0: @@ -216,18 +216,124 @@ static int inline mmtimer_disable_int(long nasid, int comparator)  	return 0;  } -#define TIMER_OFF 0xbadcabLL +#define COMPARATOR	1		/* The comparator to use */ -/* There is one of these for each comparator */ -typedef struct mmtimer { -	spinlock_t lock ____cacheline_aligned; +#define TIMER_OFF	0xbadcabLL	/* Timer is not setup */ +#define TIMER_SET	0		/* Comparator is set for this timer */ + +/* There is one of these for each timer */ +struct mmtimer { +	struct rb_node list;  	struct k_itimer *timer; -	int i;  	int cpu; +}; + +struct mmtimer_node { +	spinlock_t lock ____cacheline_aligned; +	struct rb_root timer_head; +	struct rb_node *next;  	struct tasklet_struct tasklet; -} mmtimer_t; +}; +static struct mmtimer_node *timers; + + +/* + * Add a new mmtimer struct to the node's mmtimer list. + * This function assumes the struct mmtimer_node is locked. + */ +static void mmtimer_add_list(struct mmtimer *n) +{ +	int nodeid = n->timer->it.mmtimer.node; +	unsigned long expires = n->timer->it.mmtimer.expires; +	struct rb_node **link = &timers[nodeid].timer_head.rb_node; +	struct rb_node *parent = NULL; +	struct mmtimer *x; + +	/* +	 * Find the right place in the rbtree: +	 */ +	while (*link) { +		parent = *link; +		x = rb_entry(parent, struct mmtimer, list); + +		if (expires < x->timer->it.mmtimer.expires) +			link = &(*link)->rb_left; +		else +			link = &(*link)->rb_right; +	} + +	/* +	 * Insert the timer to the rbtree and check whether it +	 * replaces the first pending timer +	 */ +	rb_link_node(&n->list, parent, link); +	rb_insert_color(&n->list, &timers[nodeid].timer_head); + +	if (!timers[nodeid].next || expires < rb_entry(timers[nodeid].next, +			struct mmtimer, list)->timer->it.mmtimer.expires) +		timers[nodeid].next = &n->list; +} + +/* + * Set the comparator for the next timer. + * This function assumes the struct mmtimer_node is locked. + */ +static void mmtimer_set_next_timer(int nodeid) +{ +	struct mmtimer_node *n = &timers[nodeid]; +	struct mmtimer *x; +	struct k_itimer *t; +	int o; + +restart: +	if (n->next == NULL) +		return; -static mmtimer_t ** timers; +	x = rb_entry(n->next, struct mmtimer, list); +	t = x->timer; +	if (!t->it.mmtimer.incr) { +		/* Not an interval timer */ +		if (!mmtimer_setup(x->cpu, COMPARATOR, +					t->it.mmtimer.expires)) { +			/* Late setup, fire now */ +			tasklet_schedule(&n->tasklet); +		} +		return; +	} + +	/* Interval timer */ +	o = 0; +	while (!mmtimer_setup(x->cpu, COMPARATOR, t->it.mmtimer.expires)) { +		unsigned long e, e1; +		struct rb_node *next; +		t->it.mmtimer.expires += t->it.mmtimer.incr << o; +		t->it_overrun += 1 << o; +		o++; +		if (o > 20) { +			printk(KERN_ALERT "mmtimer: cannot reschedule timer\n"); +			t->it.mmtimer.clock = TIMER_OFF; +			n->next = rb_next(&x->list); +			rb_erase(&x->list, &n->timer_head); +			kfree(x); +			goto restart; +		} + +		e = t->it.mmtimer.expires; +		next = rb_next(&x->list); + +		if (next == NULL) +			continue; + +		e1 = rb_entry(next, struct mmtimer, list)-> +			timer->it.mmtimer.expires; +		if (e > e1) { +			n->next = next; +			rb_erase(&x->list, &n->timer_head); +			mmtimer_add_list(x); +			goto restart; +		} +	} +}  /**   * mmtimer_ioctl - ioctl interface for /dev/mmtimer @@ -390,35 +496,6 @@ static int sgi_clock_set(clockid_t clockid, struct timespec *tp)  	return 0;  } -/* - * Schedule the next periodic interrupt. This function will attempt - * to schedule a periodic interrupt later if necessary. If the scheduling - * of an interrupt fails then the time to skip is lengthened - * exponentially in order to ensure that the next interrupt - * can be properly scheduled.. - */ -static int inline reschedule_periodic_timer(mmtimer_t *x) -{ -	int n; -	struct k_itimer *t = x->timer; - -	t->it.mmtimer.clock = x->i; -	t->it_overrun--; - -	n = 0; -	do { - -		t->it.mmtimer.expires += t->it.mmtimer.incr << n; -		t->it_overrun += 1 << n; -		n++; -		if (n > 20) -			return 1; - -	} while (!mmtimer_setup(x->i, t->it.mmtimer.expires)); - -	return 0; -} -  /**   * mmtimer_interrupt - timer interrupt handler   * @irq: irq received @@ -435,71 +512,75 @@ static int inline reschedule_periodic_timer(mmtimer_t *x)  static irqreturn_t  mmtimer_interrupt(int irq, void *dev_id)  { -	int i;  	unsigned long expires = 0;  	int result = IRQ_NONE;  	unsigned indx = cpu_to_node(smp_processor_id()); +	struct mmtimer *base; -	/* -	 * Do this once for each comparison register -	 */ -	for (i = 0; i < NUM_COMPARATORS; i++) { -		mmtimer_t *base = timers[indx] + i; -		/* Make sure this doesn't get reused before tasklet_sched */ -		spin_lock(&base->lock); -		if (base->cpu == smp_processor_id()) { -			if (base->timer) -				expires = base->timer->it.mmtimer.expires; -			/* expires test won't work with shared irqs */ -			if ((mmtimer_int_pending(i) > 0) || -				(expires && (expires < rtc_time()))) { -				mmtimer_clr_int_pending(i); -				tasklet_schedule(&base->tasklet); -				result = IRQ_HANDLED; -			} +	spin_lock(&timers[indx].lock); +	base = rb_entry(timers[indx].next, struct mmtimer, list); +	if (base == NULL) { +		spin_unlock(&timers[indx].lock); +		return result; +	} + +	if (base->cpu == smp_processor_id()) { +		if (base->timer) +			expires = base->timer->it.mmtimer.expires; +		/* expires test won't work with shared irqs */ +		if ((mmtimer_int_pending(COMPARATOR) > 0) || +			(expires && (expires <= rtc_time()))) { +			mmtimer_clr_int_pending(COMPARATOR); +			tasklet_schedule(&timers[indx].tasklet); +			result = IRQ_HANDLED;  		} -		spin_unlock(&base->lock); -		expires = 0;  	} +	spin_unlock(&timers[indx].lock);  	return result;  } -void mmtimer_tasklet(unsigned long data) { -	mmtimer_t *x = (mmtimer_t *)data; -	struct k_itimer *t = x->timer; +static void mmtimer_tasklet(unsigned long data) +{ +	int nodeid = data; +	struct mmtimer_node *mn = &timers[nodeid]; +	struct mmtimer *x = rb_entry(mn->next, struct mmtimer, list); +	struct k_itimer *t;  	unsigned long flags; -	if (t == NULL) -		return; -  	/* Send signal and deal with periodic signals */ -	spin_lock_irqsave(&t->it_lock, flags); -	spin_lock(&x->lock); -	/* If timer was deleted between interrupt and here, leave */ -	if (t != x->timer) +	spin_lock_irqsave(&mn->lock, flags); +	if (!mn->next)  		goto out; -	t->it_overrun = 0; -	if (posix_timer_event(t, 0) != 0) { +	x = rb_entry(mn->next, struct mmtimer, list); +	t = x->timer; + +	if (t->it.mmtimer.clock == TIMER_OFF) +		goto out; + +	t->it_overrun = 0; -		// printk(KERN_WARNING "mmtimer: cannot deliver signal.\n"); +	mn->next = rb_next(&x->list); +	rb_erase(&x->list, &mn->timer_head); +	if (posix_timer_event(t, 0) != 0)  		t->it_overrun++; -	} +  	if(t->it.mmtimer.incr) { -		/* Periodic timer */ -		if (reschedule_periodic_timer(x)) { -			printk(KERN_WARNING "mmtimer: unable to reschedule\n"); -			x->timer = NULL; -		} +		t->it.mmtimer.expires += t->it.mmtimer.incr; +		mmtimer_add_list(x);  	} else {  		/* Ensure we don't false trigger in mmtimer_interrupt */ +		t->it.mmtimer.clock = TIMER_OFF;  		t->it.mmtimer.expires = 0; +		kfree(x);  	} +	/* Set comparator for next timer, if there is one */ +	mmtimer_set_next_timer(nodeid); +  	t->it_overrun_last = t->it_overrun;  out: -	spin_unlock(&x->lock); -	spin_unlock_irqrestore(&t->it_lock, flags); +	spin_unlock_irqrestore(&mn->lock, flags);  }  static int sgi_timer_create(struct k_itimer *timer) @@ -516,19 +597,50 @@ static int sgi_timer_create(struct k_itimer *timer)   */  static int sgi_timer_del(struct k_itimer *timr)  { -	int i = timr->it.mmtimer.clock;  	cnodeid_t nodeid = timr->it.mmtimer.node; -	mmtimer_t *t = timers[nodeid] + i;  	unsigned long irqflags; -	if (i != TIMER_OFF) { -		spin_lock_irqsave(&t->lock, irqflags); -		mmtimer_disable_int(cnodeid_to_nasid(nodeid),i); -		t->timer = NULL; +	spin_lock_irqsave(&timers[nodeid].lock, irqflags); +	if (timr->it.mmtimer.clock != TIMER_OFF) { +		unsigned long expires = timr->it.mmtimer.expires; +		struct rb_node *n = timers[nodeid].timer_head.rb_node; +		struct mmtimer *uninitialized_var(t); +		int r = 0; +  		timr->it.mmtimer.clock = TIMER_OFF;  		timr->it.mmtimer.expires = 0; -		spin_unlock_irqrestore(&t->lock, irqflags); + +		while (n) { +			t = rb_entry(n, struct mmtimer, list); +			if (t->timer == timr) +				break; + +			if (expires < t->timer->it.mmtimer.expires) +				n = n->rb_left; +			else +				n = n->rb_right; +		} + +		if (!n) { +			spin_unlock_irqrestore(&timers[nodeid].lock, irqflags); +			return 0; +		} + +		if (timers[nodeid].next == n) { +			timers[nodeid].next = rb_next(n); +			r = 1; +		} + +		rb_erase(n, &timers[nodeid].timer_head); +		kfree(t); + +		if (r) { +			mmtimer_disable_int(cnodeid_to_nasid(nodeid), +				COMPARATOR); +			mmtimer_set_next_timer(nodeid); +		}  	} +	spin_unlock_irqrestore(&timers[nodeid].lock, irqflags);  	return 0;  } @@ -557,12 +669,11 @@ static int sgi_timer_set(struct k_itimer *timr, int flags,  	struct itimerspec * new_setting,  	struct itimerspec * old_setting)  { - -	int i;  	unsigned long when, period, irqflags;  	int err = 0;  	cnodeid_t nodeid; -	mmtimer_t *base; +	struct mmtimer *base; +	struct rb_node *n;  	if (old_setting)  		sgi_timer_get(timr, old_setting); @@ -575,6 +686,10 @@ static int sgi_timer_set(struct k_itimer *timr, int flags,  		/* Clear timer */  		return 0; +	base = kmalloc(sizeof(struct mmtimer), GFP_KERNEL); +	if (base == NULL) +		return -ENOMEM; +  	if (flags & TIMER_ABSTIME) {  		struct timespec n;  		unsigned long now; @@ -604,47 +719,38 @@ static int sgi_timer_set(struct k_itimer *timr, int flags,  	preempt_disable();  	nodeid =  cpu_to_node(smp_processor_id()); -retry: -	/* Don't use an allocated timer, or a deleted one that's pending */ -	for(i = 0; i< NUM_COMPARATORS; i++) { -		base = timers[nodeid] + i; -		if (!base->timer && !base->tasklet.state) { -			break; -		} -	} -	if (i == NUM_COMPARATORS) { -		preempt_enable(); -		return -EBUSY; -	} - -	spin_lock_irqsave(&base->lock, irqflags); +	/* Lock the node timer structure */ +	spin_lock_irqsave(&timers[nodeid].lock, irqflags); -	if (base->timer || base->tasklet.state != 0) { -		spin_unlock_irqrestore(&base->lock, irqflags); -		goto retry; -	}  	base->timer = timr;  	base->cpu = smp_processor_id(); -	timr->it.mmtimer.clock = i; +	timr->it.mmtimer.clock = TIMER_SET;  	timr->it.mmtimer.node = nodeid;  	timr->it.mmtimer.incr = period;  	timr->it.mmtimer.expires = when; -	if (period == 0) { -		if (!mmtimer_setup(i, when)) { -			mmtimer_disable_int(-1, i); -			posix_timer_event(timr, 0); -			timr->it.mmtimer.expires = 0; -		} -	} else { -		timr->it.mmtimer.expires -= period; -		if (reschedule_periodic_timer(base)) -			err = -EINVAL; +	n = timers[nodeid].next; + +	/* Add the new struct mmtimer to node's timer list */ +	mmtimer_add_list(base); + +	if (timers[nodeid].next == n) { +		/* No need to reprogram comparator for now */ +		spin_unlock_irqrestore(&timers[nodeid].lock, irqflags); +		preempt_enable(); +		return err;  	} -	spin_unlock_irqrestore(&base->lock, irqflags); +	/* We need to reprogram the comparator */ +	if (n) +		mmtimer_disable_int(cnodeid_to_nasid(nodeid), COMPARATOR); + +	mmtimer_set_next_timer(nodeid); + +	/* Unlock the node timer structure */ +	spin_unlock_irqrestore(&timers[nodeid].lock, irqflags);  	preempt_enable(); @@ -669,7 +775,6 @@ static struct k_clock sgi_clock = {   */  static int __init mmtimer_init(void)  { -	unsigned i;  	cnodeid_t node, maxn = -1;  	if (!ia64_platform_is("sn2")) @@ -706,31 +811,18 @@ static int __init mmtimer_init(void)  	maxn++;  	/* Allocate list of node ptrs to mmtimer_t's */ -	timers = kzalloc(sizeof(mmtimer_t *)*maxn, GFP_KERNEL); +	timers = kzalloc(sizeof(struct mmtimer_node)*maxn, GFP_KERNEL);  	if (timers == NULL) {  		printk(KERN_ERR "%s: failed to allocate memory for device\n",  				MMTIMER_NAME);  		goto out3;  	} -	/* Allocate mmtimer_t's for each online node */ +	/* Initialize struct mmtimer's for each online node */  	for_each_online_node(node) { -		timers[node] = kmalloc_node(sizeof(mmtimer_t)*NUM_COMPARATORS, GFP_KERNEL, node); -		if (timers[node] == NULL) { -			printk(KERN_ERR "%s: failed to allocate memory for device\n", -				MMTIMER_NAME); -			goto out4; -		} -		for (i=0; i< NUM_COMPARATORS; i++) { -			mmtimer_t * base = timers[node] + i; - -			spin_lock_init(&base->lock); -			base->timer = NULL; -			base->cpu = 0; -			base->i = i; -			tasklet_init(&base->tasklet, mmtimer_tasklet, -				(unsigned long) (base)); -		} +		spin_lock_init(&timers[node].lock); +		tasklet_init(&timers[node].tasklet, mmtimer_tasklet, +			(unsigned long) node);  	}  	sgi_clock_period = sgi_clock.res = NSEC_PER_SEC / sn_rtc_cycles_per_second; @@ -741,11 +833,8 @@ static int __init mmtimer_init(void)  	return 0; -out4: -	for_each_online_node(node) { -		kfree(timers[node]); -	}  out3: +	kfree(timers);  	misc_deregister(&mmtimer_miscdev);  out2:  	free_irq(SGI_MMTIMER_VECTOR, NULL); @@ -754,4 +843,3 @@ out1:  }  module_init(mmtimer_init); -  |