diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/hrtimer.c | 67 | ||||
| -rw-r--r-- | kernel/posix-cpu-timers.c | 218 | ||||
| -rw-r--r-- | kernel/time.c | 11 | ||||
| -rw-r--r-- | kernel/time/ntp.c | 2 | ||||
| -rw-r--r-- | kernel/time/timekeeping.c | 35 | ||||
| -rw-r--r-- | kernel/timer.c | 137 | 
6 files changed, 235 insertions, 235 deletions
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 0086628b6e9..b9b134b3508 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1749,35 +1749,15 @@ void __init hrtimers_init(void)  }  /** - * schedule_hrtimeout_range - sleep until timeout + * schedule_hrtimeout_range_clock - sleep until timeout   * @expires:	timeout value (ktime_t)   * @delta:	slack in expires timeout (ktime_t)   * @mode:	timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL - * - * Make the current task sleep until the given expiry time has - * elapsed. The routine will return immediately unless - * the current task state has been set (see set_current_state()). - * - * The @delta argument gives the kernel the freedom to schedule the - * actual wakeup to a time that is both power and performance friendly. - * The kernel give the normal best effort behavior for "@expires+@delta", - * but may decide to fire the timer earlier, but no earlier than @expires. - * - * You can set the task state as follows - - * - * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to - * pass before the routine returns. - * - * %TASK_INTERRUPTIBLE - the routine may return early if a signal is - * delivered to the current task. - * - * The current task state is guaranteed to be TASK_RUNNING when this - * routine returns. - * - * Returns 0 when the timer has expired otherwise -EINTR + * @clock:	timer clock, CLOCK_MONOTONIC or CLOCK_REALTIME   */ -int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta, -			       const enum hrtimer_mode mode) +int __sched +schedule_hrtimeout_range_clock(ktime_t *expires, unsigned long delta, +			       const enum hrtimer_mode mode, int clock)  {  	struct hrtimer_sleeper t; @@ -1799,7 +1779,7 @@ int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,  		return -EINTR;  	} -	hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, mode); +	hrtimer_init_on_stack(&t.timer, clock, mode);  	hrtimer_set_expires_range_ns(&t.timer, *expires, delta);  	hrtimer_init_sleeper(&t, current); @@ -1818,6 +1798,41 @@ int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,  	return !t.task ? 0 : -EINTR;  } + +/** + * schedule_hrtimeout_range - sleep until timeout + * @expires:	timeout value (ktime_t) + * @delta:	slack in expires timeout (ktime_t) + * @mode:	timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL + * + * Make the current task sleep until the given expiry time has + * elapsed. The routine will return immediately unless + * the current task state has been set (see set_current_state()). + * + * The @delta argument gives the kernel the freedom to schedule the + * actual wakeup to a time that is both power and performance friendly. + * The kernel give the normal best effort behavior for "@expires+@delta", + * but may decide to fire the timer earlier, but no earlier than @expires. + * + * You can set the task state as follows - + * + * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to + * pass before the routine returns. + * + * %TASK_INTERRUPTIBLE - the routine may return early if a signal is + * delivered to the current task. + * + * The current task state is guaranteed to be TASK_RUNNING when this + * routine returns. + * + * Returns 0 when the timer has expired otherwise -EINTR + */ +int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta, +				     const enum hrtimer_mode mode) +{ +	return schedule_hrtimeout_range_clock(expires, delta, mode, +					      CLOCK_MONOTONIC); +}  EXPORT_SYMBOL_GPL(schedule_hrtimeout_range);  /** diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index bc7704b3a44..799f360d147 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -11,19 +11,18 @@  #include <trace/events/timer.h>  /* - * Called after updating RLIMIT_CPU to set timer expiration if necessary. + * Called after updating RLIMIT_CPU to run cpu timer and update + * tsk->signal->cputime_expires expiration cache if necessary. Needs + * siglock protection since other code may update expiration cache as + * well.   */  void update_rlimit_cpu(unsigned long rlim_new)  {  	cputime_t cputime = secs_to_cputime(rlim_new); -	struct signal_struct *const sig = current->signal; -	if (cputime_eq(sig->it[CPUCLOCK_PROF].expires, cputime_zero) || -	    cputime_gt(sig->it[CPUCLOCK_PROF].expires, cputime)) { -		spin_lock_irq(¤t->sighand->siglock); -		set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL); -		spin_unlock_irq(¤t->sighand->siglock); -	} +	spin_lock_irq(¤t->sighand->siglock); +	set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL); +	spin_unlock_irq(¤t->sighand->siglock);  }  static int check_clock(const clockid_t which_clock) @@ -548,111 +547,62 @@ static inline int expires_gt(cputime_t expires, cputime_t new_exp)  	       cputime_gt(expires, new_exp);  } -static inline int expires_le(cputime_t expires, cputime_t new_exp) -{ -	return !cputime_eq(expires, cputime_zero) && -	       cputime_le(expires, new_exp); -}  /*   * Insert the timer on the appropriate list before any timers that   * expire later.  This must be called with the tasklist_lock held - * for reading, and interrupts disabled. + * for reading, interrupts disabled and p->sighand->siglock taken.   */ -static void arm_timer(struct k_itimer *timer, union cpu_time_count now) +static void arm_timer(struct k_itimer *timer)  {  	struct task_struct *p = timer->it.cpu.task;  	struct list_head *head, *listpos; +	struct task_cputime *cputime_expires;  	struct cpu_timer_list *const nt = &timer->it.cpu;  	struct cpu_timer_list *next; -	unsigned long i; -	head = (CPUCLOCK_PERTHREAD(timer->it_clock) ? -		p->cpu_timers : p->signal->cpu_timers); +	if (CPUCLOCK_PERTHREAD(timer->it_clock)) { +		head = p->cpu_timers; +		cputime_expires = &p->cputime_expires; +	} else { +		head = p->signal->cpu_timers; +		cputime_expires = &p->signal->cputime_expires; +	}  	head += CPUCLOCK_WHICH(timer->it_clock); -	BUG_ON(!irqs_disabled()); -	spin_lock(&p->sighand->siglock); -  	listpos = head; -	if (CPUCLOCK_WHICH(timer->it_clock) == CPUCLOCK_SCHED) { -		list_for_each_entry(next, head, entry) { -			if (next->expires.sched > nt->expires.sched) -				break; -			listpos = &next->entry; -		} -	} else { -		list_for_each_entry(next, head, entry) { -			if (cputime_gt(next->expires.cpu, nt->expires.cpu)) -				break; -			listpos = &next->entry; -		} +	list_for_each_entry(next, head, entry) { +		if (cpu_time_before(timer->it_clock, nt->expires, next->expires)) +			break; +		listpos = &next->entry;  	}  	list_add(&nt->entry, listpos);  	if (listpos == head) { +		union cpu_time_count *exp = &nt->expires; +  		/* -		 * We are the new earliest-expiring timer. -		 * If we are a thread timer, there can always -		 * be a process timer telling us to stop earlier. +		 * We are the new earliest-expiring POSIX 1.b timer, hence +		 * need to update expiration cache. Take into account that +		 * for process timers we share expiration cache with itimers +		 * and RLIMIT_CPU and for thread timers with RLIMIT_RTTIME.  		 */ -		if (CPUCLOCK_PERTHREAD(timer->it_clock)) { -			union cpu_time_count *exp = &nt->expires; - -			switch (CPUCLOCK_WHICH(timer->it_clock)) { -			default: -				BUG(); -			case CPUCLOCK_PROF: -				if (expires_gt(p->cputime_expires.prof_exp, -					       exp->cpu)) -					p->cputime_expires.prof_exp = exp->cpu; -				break; -			case CPUCLOCK_VIRT: -				if (expires_gt(p->cputime_expires.virt_exp, -					       exp->cpu)) -					p->cputime_expires.virt_exp = exp->cpu; -				break; -			case CPUCLOCK_SCHED: -				if (p->cputime_expires.sched_exp == 0 || -				    p->cputime_expires.sched_exp > exp->sched) -					p->cputime_expires.sched_exp = -								exp->sched; -				break; -			} -		} else { -			struct signal_struct *const sig = p->signal; -			union cpu_time_count *exp = &timer->it.cpu.expires; - -			/* -			 * For a process timer, set the cached expiration time. -			 */ -			switch (CPUCLOCK_WHICH(timer->it_clock)) { -			default: -				BUG(); -			case CPUCLOCK_VIRT: -				if (expires_le(sig->it[CPUCLOCK_VIRT].expires, -					       exp->cpu)) -					break; -				sig->cputime_expires.virt_exp = exp->cpu; -				break; -			case CPUCLOCK_PROF: -				if (expires_le(sig->it[CPUCLOCK_PROF].expires, -					       exp->cpu)) -					break; -				i = sig->rlim[RLIMIT_CPU].rlim_cur; -				if (i != RLIM_INFINITY && -				    i <= cputime_to_secs(exp->cpu)) -					break; -				sig->cputime_expires.prof_exp = exp->cpu; -				break; -			case CPUCLOCK_SCHED: -				sig->cputime_expires.sched_exp = exp->sched; -				break; -			} +		switch (CPUCLOCK_WHICH(timer->it_clock)) { +		case CPUCLOCK_PROF: +			if (expires_gt(cputime_expires->prof_exp, exp->cpu)) +				cputime_expires->prof_exp = exp->cpu; +			break; +		case CPUCLOCK_VIRT: +			if (expires_gt(cputime_expires->virt_exp, exp->cpu)) +				cputime_expires->virt_exp = exp->cpu; +			break; +		case CPUCLOCK_SCHED: +			if (cputime_expires->sched_exp == 0 || +			    cputime_expires->sched_exp > exp->sched) +				cputime_expires->sched_exp = exp->sched; +			break;  		}  	} - -	spin_unlock(&p->sighand->siglock);  }  /* @@ -660,7 +610,12 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now)   */  static void cpu_timer_fire(struct k_itimer *timer)  { -	if (unlikely(timer->sigq == NULL)) { +	if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) { +		/* +		 * User don't want any signal. +		 */ +		timer->it.cpu.expires.sched = 0; +	} else if (unlikely(timer->sigq == NULL)) {  		/*  		 * This a special case for clock_nanosleep,  		 * not a normal timer from sys_timer_create. @@ -721,7 +676,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,  			struct itimerspec *new, struct itimerspec *old)  {  	struct task_struct *p = timer->it.cpu.task; -	union cpu_time_count old_expires, new_expires, val; +	union cpu_time_count old_expires, new_expires, old_incr, val;  	int ret;  	if (unlikely(p == NULL)) { @@ -752,6 +707,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,  	BUG_ON(!irqs_disabled());  	ret = 0; +	old_incr = timer->it.cpu.incr;  	spin_lock(&p->sighand->siglock);  	old_expires = timer->it.cpu.expires;  	if (unlikely(timer->it.cpu.firing)) { @@ -759,7 +715,6 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,  		ret = TIMER_RETRY;  	} else  		list_del_init(&timer->it.cpu.entry); -	spin_unlock(&p->sighand->siglock);  	/*  	 * We need to sample the current value to convert the new @@ -813,6 +768,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,  		 * disable this firing since we are already reporting  		 * it as an overrun (thanks to bump_cpu_timer above).  		 */ +		spin_unlock(&p->sighand->siglock);  		read_unlock(&tasklist_lock);  		goto out;  	} @@ -828,11 +784,11 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,  	 */  	timer->it.cpu.expires = new_expires;  	if (new_expires.sched != 0 && -	    (timer->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE &&  	    cpu_time_before(timer->it_clock, val, new_expires)) { -		arm_timer(timer, val); +		arm_timer(timer);  	} +	spin_unlock(&p->sighand->siglock);  	read_unlock(&tasklist_lock);  	/* @@ -853,7 +809,6 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,  	timer->it_overrun = -1;  	if (new_expires.sched != 0 && -	    (timer->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE &&  	    !cpu_time_before(timer->it_clock, val, new_expires)) {  		/*  		 * The designated time already passed, so we notify @@ -867,7 +822,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,   out:  	if (old) {  		sample_to_timespec(timer->it_clock, -				   timer->it.cpu.incr, &old->it_interval); +				   old_incr, &old->it_interval);  	}  	return ret;  } @@ -927,25 +882,6 @@ void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)  		read_unlock(&tasklist_lock);  	} -	if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) { -		if (timer->it.cpu.incr.sched == 0 && -		    cpu_time_before(timer->it_clock, -				    timer->it.cpu.expires, now)) { -			/* -			 * Do-nothing timer expired and has no reload, -			 * so it's as if it was never set. -			 */ -			timer->it.cpu.expires.sched = 0; -			itp->it_value.tv_sec = itp->it_value.tv_nsec = 0; -			return; -		} -		/* -		 * Account for any expirations and reloads that should -		 * have happened. -		 */ -		bump_cpu_timer(timer, now); -	} -  	if (unlikely(clear_dead)) {  		/*  		 * We've noticed that the thread is dead, but @@ -1270,6 +1206,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)  			goto out;  		}  		read_lock(&tasklist_lock); /* arm_timer needs it.  */ +		spin_lock(&p->sighand->siglock);  	} else {  		read_lock(&tasklist_lock);  		if (unlikely(p->signal == NULL)) { @@ -1290,6 +1227,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)  			clear_dead_task(timer, now);  			goto out_unlock;  		} +		spin_lock(&p->sighand->siglock);  		cpu_timer_sample_group(timer->it_clock, p, &now);  		bump_cpu_timer(timer, now);  		/* Leave the tasklist_lock locked for the call below.  */ @@ -1298,7 +1236,9 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)  	/*  	 * Now re-arm for the new expiry time.  	 */ -	arm_timer(timer, now); +	BUG_ON(!irqs_disabled()); +	arm_timer(timer); +	spin_unlock(&p->sighand->siglock);  out_unlock:  	read_unlock(&tasklist_lock); @@ -1390,7 +1330,7 @@ static inline int fastpath_timer_check(struct task_struct *tsk)  			return 1;  	} -	return sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY; +	return 0;  }  /* @@ -1456,21 +1396,23 @@ void run_posix_cpu_timers(struct task_struct *tsk)  }  /* - * Set one of the process-wide special case CPU timers. + * Set one of the process-wide special case CPU timers or RLIMIT_CPU.   * The tsk->sighand->siglock must be held by the caller. - * The *newval argument is relative and we update it to be absolute, *oldval - * is absolute and we update it to be relative.   */  void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,  			   cputime_t *newval, cputime_t *oldval)  {  	union cpu_time_count now; -	struct list_head *head;  	BUG_ON(clock_idx == CPUCLOCK_SCHED);  	cpu_timer_sample_group(clock_idx, tsk, &now);  	if (oldval) { +		/* +		 * We are setting itimer. The *oldval is absolute and we update +		 * it to be relative, *newval argument is relative and we update +		 * it to be absolute. +		 */  		if (!cputime_eq(*oldval, cputime_zero)) {  			if (cputime_le(*oldval, now.cpu)) {  				/* Just about to fire. */ @@ -1483,33 +1425,21 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,  		if (cputime_eq(*newval, cputime_zero))  			return;  		*newval = cputime_add(*newval, now.cpu); - -		/* -		 * If the RLIMIT_CPU timer will expire before the -		 * ITIMER_PROF timer, we have nothing else to do. -		 */ -		if (tsk->signal->rlim[RLIMIT_CPU].rlim_cur -		    < cputime_to_secs(*newval)) -			return;  	}  	/* -	 * Check whether there are any process timers already set to fire -	 * before this one.  If so, we don't have anything more to do. +	 * Update expiration cache if we are the earliest timer, or eventually +	 * RLIMIT_CPU limit is earlier than prof_exp cpu timer expire.  	 */ -	head = &tsk->signal->cpu_timers[clock_idx]; -	if (list_empty(head) || -	    cputime_ge(list_first_entry(head, -				  struct cpu_timer_list, entry)->expires.cpu, -		       *newval)) { -		switch (clock_idx) { -		case CPUCLOCK_PROF: +	switch (clock_idx) { +	case CPUCLOCK_PROF: +		if (expires_gt(tsk->signal->cputime_expires.prof_exp, *newval))  			tsk->signal->cputime_expires.prof_exp = *newval; -			break; -		case CPUCLOCK_VIRT: +		break; +	case CPUCLOCK_VIRT: +		if (expires_gt(tsk->signal->cputime_expires.virt_exp, *newval))  			tsk->signal->cputime_expires.virt_exp = *newval; -			break; -		} +		break;  	}  } diff --git a/kernel/time.c b/kernel/time.c index 656dccfe1cb..50612faa9ba 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -132,12 +132,11 @@ SYSCALL_DEFINE2(gettimeofday, struct timeval __user *, tv,   */  static inline void warp_clock(void)  { -	write_seqlock_irq(&xtime_lock); -	wall_to_monotonic.tv_sec -= sys_tz.tz_minuteswest * 60; -	xtime.tv_sec += sys_tz.tz_minuteswest * 60; -	update_xtime_cache(0); -	write_sequnlock_irq(&xtime_lock); -	clock_was_set(); +	struct timespec delta, adjust; +	delta.tv_sec = sys_tz.tz_minuteswest * 60; +	delta.tv_nsec = 0; +	adjust = timespec_add_safe(current_kernel_time(), delta); +	do_settimeofday(&adjust);  }  /* diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 7c0f180d6e9..c63116863a8 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -69,7 +69,7 @@ static s64			time_freq;  /* time at last adjustment (secs):					*/  static long			time_reftime; -long				time_adjust; +static long			time_adjust;  /* constant (boot-param configurable) NTP tick adjustment (upscaled)	*/  static s64			ntp_tick_adj; diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 39f6177fafa..caf8d4d4f5c 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -165,13 +165,6 @@ struct timespec raw_time;  /* flag for if timekeeping is suspended */  int __read_mostly timekeeping_suspended; -static struct timespec xtime_cache __attribute__ ((aligned (16))); -void update_xtime_cache(u64 nsec) -{ -	xtime_cache = xtime; -	timespec_add_ns(&xtime_cache, nsec); -} -  /* must hold xtime_lock */  void timekeeping_leap_insert(int leapsecond)  { @@ -332,8 +325,6 @@ int do_settimeofday(struct timespec *tv)  	xtime = *tv; -	update_xtime_cache(0); -  	timekeeper.ntp_error = 0;  	ntp_clear(); @@ -559,7 +550,6 @@ void __init timekeeping_init(void)  	}  	set_normalized_timespec(&wall_to_monotonic,  				-boot.tv_sec, -boot.tv_nsec); -	update_xtime_cache(0);  	total_sleep_time.tv_sec = 0;  	total_sleep_time.tv_nsec = 0;  	write_sequnlock_irqrestore(&xtime_lock, flags); @@ -593,7 +583,6 @@ static int timekeeping_resume(struct sys_device *dev)  		wall_to_monotonic = timespec_sub(wall_to_monotonic, ts);  		total_sleep_time = timespec_add_safe(total_sleep_time, ts);  	} -	update_xtime_cache(0);  	/* re-base the last cycle value */  	timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock);  	timekeeper.ntp_error = 0; @@ -788,7 +777,6 @@ void update_wall_time(void)  {  	struct clocksource *clock;  	cycle_t offset; -	u64 nsecs;  	int shift = 0, maxshift;  	/* Make sure we're fully resumed: */ @@ -847,7 +835,9 @@ void update_wall_time(void)  		timekeeper.ntp_error += neg << timekeeper.ntp_error_shift;  	} -	/* store full nanoseconds into xtime after rounding it up and + +	/* +	 * Store full nanoseconds into xtime after rounding it up and  	 * add the remainder to the error difference.  	 */  	xtime.tv_nsec =	((s64) timekeeper.xtime_nsec >> timekeeper.shift) + 1; @@ -855,8 +845,15 @@ void update_wall_time(void)  	timekeeper.ntp_error +=	timekeeper.xtime_nsec <<  				timekeeper.ntp_error_shift; -	nsecs = clocksource_cyc2ns(offset, timekeeper.mult, timekeeper.shift); -	update_xtime_cache(nsecs); +	/* +	 * Finally, make sure that after the rounding +	 * xtime.tv_nsec isn't larger then NSEC_PER_SEC +	 */ +	if (unlikely(xtime.tv_nsec >= NSEC_PER_SEC)) { +		xtime.tv_nsec -= NSEC_PER_SEC; +		xtime.tv_sec++; +		second_overflow(); +	}  	/* check to see if there is a new clocksource to use */  	update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult); @@ -896,13 +893,13 @@ EXPORT_SYMBOL_GPL(monotonic_to_bootbased);  unsigned long get_seconds(void)  { -	return xtime_cache.tv_sec; +	return xtime.tv_sec;  }  EXPORT_SYMBOL(get_seconds);  struct timespec __current_kernel_time(void)  { -	return xtime_cache; +	return xtime;  }  struct timespec current_kernel_time(void) @@ -913,7 +910,7 @@ struct timespec current_kernel_time(void)  	do {  		seq = read_seqbegin(&xtime_lock); -		now = xtime_cache; +		now = xtime;  	} while (read_seqretry(&xtime_lock, seq));  	return now; @@ -928,7 +925,7 @@ struct timespec get_monotonic_coarse(void)  	do {  		seq = read_seqbegin(&xtime_lock); -		now = xtime_cache; +		now = xtime;  		mono = wall_to_monotonic;  	} while (read_seqretry(&xtime_lock, seq)); diff --git a/kernel/timer.c b/kernel/timer.c index aeb6a54f277..9199f3c5221 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -319,6 +319,24 @@ unsigned long round_jiffies_up_relative(unsigned long j)  }  EXPORT_SYMBOL_GPL(round_jiffies_up_relative); +/** + * set_timer_slack - set the allowed slack for a timer + * @slack_hz: the amount of time (in jiffies) allowed for rounding + * + * Set the amount of time, in jiffies, that a certain timer has + * in terms of slack. By setting this value, the timer subsystem + * will schedule the actual timer somewhere between + * the time mod_timer() asks for, and that time plus the slack. + * + * By setting the slack to -1, a percentage of the delay is used + * instead. + */ +void set_timer_slack(struct timer_list *timer, int slack_hz) +{ +	timer->slack = slack_hz; +} +EXPORT_SYMBOL_GPL(set_timer_slack); +  static inline void set_running_timer(struct tvec_base *base,  					struct timer_list *timer) @@ -550,6 +568,7 @@ static void __init_timer(struct timer_list *timer,  {  	timer->entry.next = NULL;  	timer->base = __raw_get_cpu_var(tvec_bases); +	timer->slack = -1;  #ifdef CONFIG_TIMER_STATS  	timer->start_site = NULL;  	timer->start_pid = -1; @@ -715,6 +734,41 @@ int mod_timer_pending(struct timer_list *timer, unsigned long expires)  }  EXPORT_SYMBOL(mod_timer_pending); +/* + * Decide where to put the timer while taking the slack into account + * + * Algorithm: + *   1) calculate the maximum (absolute) time + *   2) calculate the highest bit where the expires and new max are different + *   3) use this bit to make a mask + *   4) use the bitmask to round down the maximum time, so that all last + *      bits are zeros + */ +static inline +unsigned long apply_slack(struct timer_list *timer, unsigned long expires) +{ +	unsigned long expires_limit, mask; +	int bit; + +	expires_limit = expires + timer->slack; + +	if (timer->slack < 0) /* auto slack: use 0.4% */ +		expires_limit = expires + (expires - jiffies)/256; + +	mask = expires ^ expires_limit; + +	if (mask == 0) +		return expires; + +	bit = find_last_bit(&mask, BITS_PER_LONG); + +	mask = (1 << bit) - 1; + +	expires_limit = expires_limit & ~(mask); + +	return expires_limit; +} +  /**   * mod_timer - modify a timer's timeout   * @timer: the timer to be modified @@ -745,6 +799,8 @@ int mod_timer(struct timer_list *timer, unsigned long expires)  	if (timer_pending(timer) && timer->expires == expires)  		return 1; +	expires = apply_slack(timer, expires); +  	return __mod_timer(timer, expires, false, TIMER_NOT_PINNED);  }  EXPORT_SYMBOL(mod_timer); @@ -955,6 +1011,47 @@ static int cascade(struct tvec_base *base, struct tvec *tv, int index)  	return index;  } +static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long), +			  unsigned long data) +{ +	int preempt_count = preempt_count(); + +#ifdef CONFIG_LOCKDEP +	/* +	 * It is permissible to free the timer from inside the +	 * function that is called from it, this we need to take into +	 * account for lockdep too. To avoid bogus "held lock freed" +	 * warnings as well as problems when looking into +	 * timer->lockdep_map, make a copy and use that here. +	 */ +	struct lockdep_map lockdep_map = timer->lockdep_map; +#endif +	/* +	 * Couple the lock chain with the lock chain at +	 * del_timer_sync() by acquiring the lock_map around the fn() +	 * call here and in del_timer_sync(). +	 */ +	lock_map_acquire(&lockdep_map); + +	trace_timer_expire_entry(timer); +	fn(data); +	trace_timer_expire_exit(timer); + +	lock_map_release(&lockdep_map); + +	if (preempt_count != preempt_count()) { +		WARN_ONCE(1, "timer: %pF preempt leak: %08x -> %08x\n", +			  fn, preempt_count, preempt_count()); +		/* +		 * Restore the preempt count. That gives us a decent +		 * chance to survive and extract information. If the +		 * callback kept a lock held, bad luck, but not worse +		 * than the BUG() we had. +		 */ +		preempt_count() = preempt_count; +	} +} +  #define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK)  /** @@ -998,45 +1095,7 @@ static inline void __run_timers(struct tvec_base *base)  			detach_timer(timer, 1);  			spin_unlock_irq(&base->lock); -			{ -				int preempt_count = preempt_count(); - -#ifdef CONFIG_LOCKDEP -				/* -				 * It is permissible to free the timer from -				 * inside the function that is called from -				 * it, this we need to take into account for -				 * lockdep too. To avoid bogus "held lock -				 * freed" warnings as well as problems when -				 * looking into timer->lockdep_map, make a -				 * copy and use that here. -				 */ -				struct lockdep_map lockdep_map = -					timer->lockdep_map; -#endif -				/* -				 * Couple the lock chain with the lock chain at -				 * del_timer_sync() by acquiring the lock_map -				 * around the fn() call here and in -				 * del_timer_sync(). -				 */ -				lock_map_acquire(&lockdep_map); - -				trace_timer_expire_entry(timer); -				fn(data); -				trace_timer_expire_exit(timer); - -				lock_map_release(&lockdep_map); - -				if (preempt_count != preempt_count()) { -					printk(KERN_ERR "huh, entered %p " -					       "with preempt_count %08x, exited" -					       " with %08x?\n", -					       fn, preempt_count, -					       preempt_count()); -					BUG(); -				} -			} +			call_timer_fn(timer, fn, data);  			spin_lock_irq(&base->lock);  		}  	}  |