diff options
Diffstat (limited to 'kernel/sched/cputime.c')
| -rw-r--r-- | kernel/sched/cputime.c | 314 | 
1 files changed, 271 insertions, 43 deletions
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 293b202fcf7..9857329ed28 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -3,6 +3,7 @@  #include <linux/tsacct_kern.h>  #include <linux/kernel_stat.h>  #include <linux/static_key.h> +#include <linux/context_tracking.h>  #include "sched.h" @@ -163,7 +164,7 @@ void account_user_time(struct task_struct *p, cputime_t cputime,  	task_group_account_field(p, index, (__force u64) cputime);  	/* Account for user time used */ -	acct_update_integrals(p); +	acct_account_cputime(p);  }  /* @@ -213,7 +214,7 @@ void __account_system_time(struct task_struct *p, cputime_t cputime,  	task_group_account_field(p, index, (__force u64) cputime);  	/* Account for system time used */ -	acct_update_integrals(p); +	acct_account_cputime(p);  }  /* @@ -295,6 +296,7 @@ static __always_inline bool steal_account_process_tick(void)  void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)  {  	struct signal_struct *sig = tsk->signal; +	cputime_t utime, stime;  	struct task_struct *t;  	times->utime = sig->utime; @@ -308,16 +310,15 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)  	t = tsk;  	do { -		times->utime += t->utime; -		times->stime += t->stime; +		task_cputime(tsk, &utime, &stime); +		times->utime += utime; +		times->stime += stime;  		times->sum_exec_runtime += task_sched_runtime(t);  	} while_each_thread(tsk, t);  out:  	rcu_read_unlock();  } -#ifndef CONFIG_VIRT_CPU_ACCOUNTING -  #ifdef CONFIG_IRQ_TIME_ACCOUNTING  /*   * Account a tick to a process and cpustat @@ -382,11 +383,12 @@ static void irqtime_account_idle_ticks(int ticks)  		irqtime_account_process_tick(current, 0, rq);  }  #else /* CONFIG_IRQ_TIME_ACCOUNTING */ -static void irqtime_account_idle_ticks(int ticks) {} -static void irqtime_account_process_tick(struct task_struct *p, int user_tick, +static inline void irqtime_account_idle_ticks(int ticks) {} +static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick,  						struct rq *rq) {}  #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE  /*   * Account a single tick of cpu time.   * @p: the process that the cpu time gets accounted to @@ -397,6 +399,9 @@ void account_process_tick(struct task_struct *p, int user_tick)  	cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);  	struct rq *rq = this_rq(); +	if (vtime_accounting_enabled()) +		return; +  	if (sched_clock_irqtime) {  		irqtime_account_process_tick(p, user_tick, rq);  		return; @@ -438,8 +443,7 @@ void account_idle_ticks(unsigned long ticks)  	account_idle_time(jiffies_to_cputime(ticks));  } - -#endif +#endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */  /*   * Use precise platform statistics if available: @@ -461,25 +465,20 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime  	*st = cputime.stime;  } -void vtime_account_system_irqsafe(struct task_struct *tsk) -{ -	unsigned long flags; - -	local_irq_save(flags); -	vtime_account_system(tsk); -	local_irq_restore(flags); -} -EXPORT_SYMBOL_GPL(vtime_account_system_irqsafe); -  #ifndef __ARCH_HAS_VTIME_TASK_SWITCH  void vtime_task_switch(struct task_struct *prev)  { +	if (!vtime_accounting_enabled()) +		return; +  	if (is_idle_task(prev))  		vtime_account_idle(prev);  	else  		vtime_account_system(prev); +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE  	vtime_account_user(prev); +#endif  	arch_vtime_task_switch(prev);  }  #endif @@ -493,27 +492,40 @@ void vtime_task_switch(struct task_struct *prev)   * vtime_account().   */  #ifndef __ARCH_HAS_VTIME_ACCOUNT -void vtime_account(struct task_struct *tsk) +void vtime_account_irq_enter(struct task_struct *tsk)  { -	if (in_interrupt() || !is_idle_task(tsk)) -		vtime_account_system(tsk); -	else -		vtime_account_idle(tsk); +	if (!vtime_accounting_enabled()) +		return; + +	if (!in_interrupt()) { +		/* +		 * If we interrupted user, context_tracking_in_user() +		 * is 1 because the context tracking don't hook +		 * on irq entry/exit. This way we know if +		 * we need to flush user time on kernel entry. +		 */ +		if (context_tracking_in_user()) { +			vtime_account_user(tsk); +			return; +		} + +		if (is_idle_task(tsk)) { +			vtime_account_idle(tsk); +			return; +		} +	} +	vtime_account_system(tsk);  } -EXPORT_SYMBOL_GPL(vtime_account); +EXPORT_SYMBOL_GPL(vtime_account_irq_enter);  #endif /* __ARCH_HAS_VTIME_ACCOUNT */ -#else - -#ifndef nsecs_to_cputime -# define nsecs_to_cputime(__nsecs)	nsecs_to_jiffies(__nsecs) -#endif +#else /* !CONFIG_VIRT_CPU_ACCOUNTING */ -static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total) +static cputime_t scale_stime(cputime_t stime, cputime_t rtime, cputime_t total)  {  	u64 temp = (__force u64) rtime; -	temp *= (__force u64) utime; +	temp *= (__force u64) stime;  	if (sizeof(cputime_t) == 4)  		temp = div_u64(temp, (__force u32) total); @@ -531,10 +543,10 @@ static void cputime_adjust(struct task_cputime *curr,  			   struct cputime *prev,  			   cputime_t *ut, cputime_t *st)  { -	cputime_t rtime, utime, total; +	cputime_t rtime, stime, total; -	utime = curr->utime; -	total = utime + curr->stime; +	stime = curr->stime; +	total = stime + curr->utime;  	/*  	 * Tick based cputime accounting depend on random scheduling @@ -549,17 +561,17 @@ static void cputime_adjust(struct task_cputime *curr,  	rtime = nsecs_to_cputime(curr->sum_exec_runtime);  	if (total) -		utime = scale_utime(utime, rtime, total); +		stime = scale_stime(stime, rtime, total);  	else -		utime = rtime; +		stime = rtime;  	/*  	 * If the tick based count grows faster than the scheduler one,  	 * the result of the scaling may go backward.  	 * Let's enforce monotonicity.  	 */ -	prev->utime = max(prev->utime, utime); -	prev->stime = max(prev->stime, rtime - prev->utime); +	prev->stime = max(prev->stime, stime); +	prev->utime = max(prev->utime, rtime - prev->stime);  	*ut = prev->utime;  	*st = prev->stime; @@ -568,11 +580,10 @@ static void cputime_adjust(struct task_cputime *curr,  void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)  {  	struct task_cputime cputime = { -		.utime = p->utime, -		.stime = p->stime,  		.sum_exec_runtime = p->se.sum_exec_runtime,  	}; +	task_cputime(p, &cputime.utime, &cputime.stime);  	cputime_adjust(&cputime, &p->prev_cputime, ut, st);  } @@ -586,4 +597,221 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime  	thread_group_cputime(p, &cputime);  	cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st);  } -#endif +#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */ + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN +static unsigned long long vtime_delta(struct task_struct *tsk) +{ +	unsigned long long clock; + +	clock = sched_clock(); +	if (clock < tsk->vtime_snap) +		return 0; + +	return clock - tsk->vtime_snap; +} + +static cputime_t get_vtime_delta(struct task_struct *tsk) +{ +	unsigned long long delta = vtime_delta(tsk); + +	WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_SLEEPING); +	tsk->vtime_snap += delta; + +	/* CHECKME: always safe to convert nsecs to cputime? */ +	return nsecs_to_cputime(delta); +} + +static void __vtime_account_system(struct task_struct *tsk) +{ +	cputime_t delta_cpu = get_vtime_delta(tsk); + +	account_system_time(tsk, irq_count(), delta_cpu, cputime_to_scaled(delta_cpu)); +} + +void vtime_account_system(struct task_struct *tsk) +{ +	if (!vtime_accounting_enabled()) +		return; + +	write_seqlock(&tsk->vtime_seqlock); +	__vtime_account_system(tsk); +	write_sequnlock(&tsk->vtime_seqlock); +} + +void vtime_account_irq_exit(struct task_struct *tsk) +{ +	if (!vtime_accounting_enabled()) +		return; + +	write_seqlock(&tsk->vtime_seqlock); +	if (context_tracking_in_user()) +		tsk->vtime_snap_whence = VTIME_USER; +	__vtime_account_system(tsk); +	write_sequnlock(&tsk->vtime_seqlock); +} + +void vtime_account_user(struct task_struct *tsk) +{ +	cputime_t delta_cpu; + +	if (!vtime_accounting_enabled()) +		return; + +	delta_cpu = get_vtime_delta(tsk); + +	write_seqlock(&tsk->vtime_seqlock); +	tsk->vtime_snap_whence = VTIME_SYS; +	account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu)); +	write_sequnlock(&tsk->vtime_seqlock); +} + +void vtime_user_enter(struct task_struct *tsk) +{ +	if (!vtime_accounting_enabled()) +		return; + +	write_seqlock(&tsk->vtime_seqlock); +	tsk->vtime_snap_whence = VTIME_USER; +	__vtime_account_system(tsk); +	write_sequnlock(&tsk->vtime_seqlock); +} + +void vtime_guest_enter(struct task_struct *tsk) +{ +	write_seqlock(&tsk->vtime_seqlock); +	__vtime_account_system(tsk); +	current->flags |= PF_VCPU; +	write_sequnlock(&tsk->vtime_seqlock); +} + +void vtime_guest_exit(struct task_struct *tsk) +{ +	write_seqlock(&tsk->vtime_seqlock); +	__vtime_account_system(tsk); +	current->flags &= ~PF_VCPU; +	write_sequnlock(&tsk->vtime_seqlock); +} + +void vtime_account_idle(struct task_struct *tsk) +{ +	cputime_t delta_cpu = get_vtime_delta(tsk); + +	account_idle_time(delta_cpu); +} + +bool vtime_accounting_enabled(void) +{ +	return context_tracking_active(); +} + +void arch_vtime_task_switch(struct task_struct *prev) +{ +	write_seqlock(&prev->vtime_seqlock); +	prev->vtime_snap_whence = VTIME_SLEEPING; +	write_sequnlock(&prev->vtime_seqlock); + +	write_seqlock(¤t->vtime_seqlock); +	current->vtime_snap_whence = VTIME_SYS; +	current->vtime_snap = sched_clock(); +	write_sequnlock(¤t->vtime_seqlock); +} + +void vtime_init_idle(struct task_struct *t) +{ +	unsigned long flags; + +	write_seqlock_irqsave(&t->vtime_seqlock, flags); +	t->vtime_snap_whence = VTIME_SYS; +	t->vtime_snap = sched_clock(); +	write_sequnlock_irqrestore(&t->vtime_seqlock, flags); +} + +cputime_t task_gtime(struct task_struct *t) +{ +	unsigned int seq; +	cputime_t gtime; + +	do { +		seq = read_seqbegin(&t->vtime_seqlock); + +		gtime = t->gtime; +		if (t->flags & PF_VCPU) +			gtime += vtime_delta(t); + +	} while (read_seqretry(&t->vtime_seqlock, seq)); + +	return gtime; +} + +/* + * Fetch cputime raw values from fields of task_struct and + * add up the pending nohz execution time since the last + * cputime snapshot. + */ +static void +fetch_task_cputime(struct task_struct *t, +		   cputime_t *u_dst, cputime_t *s_dst, +		   cputime_t *u_src, cputime_t *s_src, +		   cputime_t *udelta, cputime_t *sdelta) +{ +	unsigned int seq; +	unsigned long long delta; + +	do { +		*udelta = 0; +		*sdelta = 0; + +		seq = read_seqbegin(&t->vtime_seqlock); + +		if (u_dst) +			*u_dst = *u_src; +		if (s_dst) +			*s_dst = *s_src; + +		/* Task is sleeping, nothing to add */ +		if (t->vtime_snap_whence == VTIME_SLEEPING || +		    is_idle_task(t)) +			continue; + +		delta = vtime_delta(t); + +		/* +		 * Task runs either in user or kernel space, add pending nohz time to +		 * the right place. +		 */ +		if (t->vtime_snap_whence == VTIME_USER || t->flags & PF_VCPU) { +			*udelta = delta; +		} else { +			if (t->vtime_snap_whence == VTIME_SYS) +				*sdelta = delta; +		} +	} while (read_seqretry(&t->vtime_seqlock, seq)); +} + + +void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime) +{ +	cputime_t udelta, sdelta; + +	fetch_task_cputime(t, utime, stime, &t->utime, +			   &t->stime, &udelta, &sdelta); +	if (utime) +		*utime += udelta; +	if (stime) +		*stime += sdelta; +} + +void task_cputime_scaled(struct task_struct *t, +			 cputime_t *utimescaled, cputime_t *stimescaled) +{ +	cputime_t udelta, sdelta; + +	fetch_task_cputime(t, utimescaled, stimescaled, +			   &t->utimescaled, &t->stimescaled, &udelta, &sdelta); +	if (utimescaled) +		*utimescaled += cputime_to_scaled(udelta); +	if (stimescaled) +		*stimescaled += cputime_to_scaled(sdelta); +} +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */  |