diff options
Diffstat (limited to 'lib/vsprintf.c')
| -rw-r--r-- | lib/vsprintf.c | 297 | 
1 files changed, 201 insertions, 96 deletions
diff --git a/lib/vsprintf.c b/lib/vsprintf.c index abbabec9720..c3f36d415bd 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -112,105 +112,198 @@ int skip_atoi(const char **s)  /* Decimal conversion is by far the most typical, and is used   * for /proc and /sys data. This directly impacts e.g. top performance   * with many processes running. We optimize it for speed - * using code from - * http://www.cs.uiowa.edu/~jones/bcd/decimal.html - * (with permission from the author, Douglas W. Jones). */ + * using ideas described at <http://www.cs.uiowa.edu/~jones/bcd/divide.html> + * (with permission from the author, Douglas W. Jones). + */ -/* Formats correctly any integer in [0,99999]. - * Outputs from one to five digits depending on input. - * On i386 gcc 4.1.2 -O2: ~250 bytes of code. */ +#if BITS_PER_LONG != 32 || BITS_PER_LONG_LONG != 64 +/* Formats correctly any integer in [0, 999999999] */  static noinline_for_stack -char *put_dec_trunc(char *buf, unsigned q) +char *put_dec_full9(char *buf, unsigned q)  { -	unsigned d3, d2, d1, d0; -	d1 = (q>>4) & 0xf; -	d2 = (q>>8) & 0xf; -	d3 = (q>>12); +	unsigned r; -	d0 = 6*(d3 + d2 + d1) + (q & 0xf); -	q = (d0 * 0xcd) >> 11; -	d0 = d0 - 10*q; -	*buf++ = d0 + '0'; /* least significant digit */ -	d1 = q + 9*d3 + 5*d2 + d1; -	if (d1 != 0) { -		q = (d1 * 0xcd) >> 11; -		d1 = d1 - 10*q; -		*buf++ = d1 + '0'; /* next digit */ +	/* +	 * Possible ways to approx. divide by 10 +	 * (x * 0x1999999a) >> 32 x < 1073741829 (multiply must be 64-bit) +	 * (x * 0xcccd) >> 19     x <      81920 (x < 262149 when 64-bit mul) +	 * (x * 0x6667) >> 18     x <      43699 +	 * (x * 0x3334) >> 17     x <      16389 +	 * (x * 0x199a) >> 16     x <      16389 +	 * (x * 0x0ccd) >> 15     x <      16389 +	 * (x * 0x0667) >> 14     x <       2739 +	 * (x * 0x0334) >> 13     x <       1029 +	 * (x * 0x019a) >> 12     x <       1029 +	 * (x * 0x00cd) >> 11     x <       1029 shorter code than * 0x67 (on i386) +	 * (x * 0x0067) >> 10     x <        179 +	 * (x * 0x0034) >>  9     x <         69 same +	 * (x * 0x001a) >>  8     x <         69 same +	 * (x * 0x000d) >>  7     x <         69 same, shortest code (on i386) +	 * (x * 0x0007) >>  6     x <         19 +	 * See <http://www.cs.uiowa.edu/~jones/bcd/divide.html> +	 */ +	r      = (q * (uint64_t)0x1999999a) >> 32; +	*buf++ = (q - 10 * r) + '0'; /* 1 */ +	q      = (r * (uint64_t)0x1999999a) >> 32; +	*buf++ = (r - 10 * q) + '0'; /* 2 */ +	r      = (q * (uint64_t)0x1999999a) >> 32; +	*buf++ = (q - 10 * r) + '0'; /* 3 */ +	q      = (r * (uint64_t)0x1999999a) >> 32; +	*buf++ = (r - 10 * q) + '0'; /* 4 */ +	r      = (q * (uint64_t)0x1999999a) >> 32; +	*buf++ = (q - 10 * r) + '0'; /* 5 */ +	/* Now value is under 10000, can avoid 64-bit multiply */ +	q      = (r * 0x199a) >> 16; +	*buf++ = (r - 10 * q)  + '0'; /* 6 */ +	r      = (q * 0xcd) >> 11; +	*buf++ = (q - 10 * r)  + '0'; /* 7 */ +	q      = (r * 0xcd) >> 11; +	*buf++ = (r - 10 * q) + '0'; /* 8 */ +	*buf++ = q + '0'; /* 9 */ +	return buf; +} +#endif -		d2 = q + 2*d2; -		if ((d2 != 0) || (d3 != 0)) { -			q = (d2 * 0xd) >> 7; -			d2 = d2 - 10*q; -			*buf++ = d2 + '0'; /* next digit */ +/* Similar to above but do not pad with zeros. + * Code can be easily arranged to print 9 digits too, but our callers + * always call put_dec_full9() instead when the number has 9 decimal digits. + */ +static noinline_for_stack +char *put_dec_trunc8(char *buf, unsigned r) +{ +	unsigned q; -			d3 = q + 4*d3; -			if (d3 != 0) { -				q = (d3 * 0xcd) >> 11; -				d3 = d3 - 10*q; -				*buf++ = d3 + '0';  /* next digit */ -				if (q != 0) -					*buf++ = q + '0'; /* most sign. digit */ -			} -		} +	/* Copy of previous function's body with added early returns */ +	q      = (r * (uint64_t)0x1999999a) >> 32; +	*buf++ = (r - 10 * q) + '0'; /* 2 */ +	if (q == 0) +		return buf; +	r      = (q * (uint64_t)0x1999999a) >> 32; +	*buf++ = (q - 10 * r) + '0'; /* 3 */ +	if (r == 0) +		return buf; +	q      = (r * (uint64_t)0x1999999a) >> 32; +	*buf++ = (r - 10 * q) + '0'; /* 4 */ +	if (q == 0) +		return buf; +	r      = (q * (uint64_t)0x1999999a) >> 32; +	*buf++ = (q - 10 * r) + '0'; /* 5 */ +	if (r == 0) +		return buf; +	q      = (r * 0x199a) >> 16; +	*buf++ = (r - 10 * q)  + '0'; /* 6 */ +	if (q == 0) +		return buf; +	r      = (q * 0xcd) >> 11; +	*buf++ = (q - 10 * r)  + '0'; /* 7 */ +	if (r == 0) +		return buf; +	q      = (r * 0xcd) >> 11; +	*buf++ = (r - 10 * q) + '0'; /* 8 */ +	if (q == 0) +		return buf; +	*buf++ = q + '0'; /* 9 */ +	return buf; +} + +/* There are two algorithms to print larger numbers. + * One is generic: divide by 1000000000 and repeatedly print + * groups of (up to) 9 digits. It's conceptually simple, + * but requires a (unsigned long long) / 1000000000 division. + * + * Second algorithm splits 64-bit unsigned long long into 16-bit chunks, + * manipulates them cleverly and generates groups of 4 decimal digits. + * It so happens that it does NOT require long long division. + * + * If long is > 32 bits, division of 64-bit values is relatively easy, + * and we will use the first algorithm. + * If long long is > 64 bits (strange architecture with VERY large long long), + * second algorithm can't be used, and we again use the first one. + * + * Else (if long is 32 bits and long long is 64 bits) we use second one. + */ + +#if BITS_PER_LONG != 32 || BITS_PER_LONG_LONG != 64 + +/* First algorithm: generic */ + +static +char *put_dec(char *buf, unsigned long long n) +{ +	if (n >= 100*1000*1000) { +		while (n >= 1000*1000*1000) +			buf = put_dec_full9(buf, do_div(n, 1000*1000*1000)); +		if (n >= 100*1000*1000) +			return put_dec_full9(buf, n);  	} +	return put_dec_trunc8(buf, n); +} + +#else + +/* Second algorithm: valid only for 64-bit long longs */ +static noinline_for_stack +char *put_dec_full4(char *buf, unsigned q) +{ +	unsigned r; +	r      = (q * 0xcccd) >> 19; +	*buf++ = (q - 10 * r) + '0'; +	q      = (r * 0x199a) >> 16; +	*buf++ = (r - 10 * q)  + '0'; +	r      = (q * 0xcd) >> 11; +	*buf++ = (q - 10 * r)  + '0'; +	*buf++ = r + '0';  	return buf;  } -/* Same with if's removed. Always emits five digits */ -static noinline_for_stack -char *put_dec_full(char *buf, unsigned q) + +/* Based on code by Douglas W. Jones found at + * <http://www.cs.uiowa.edu/~jones/bcd/decimal.html#sixtyfour> + * (with permission from the author). + * Performs no 64-bit division and hence should be fast on 32-bit machines. + */ +static +char *put_dec(char *buf, unsigned long long n)  { -	/* BTW, if q is in [0,9999], 8-bit ints will be enough, */ -	/* but anyway, gcc produces better code with full-sized ints */ -	unsigned d3, d2, d1, d0; -	d1 = (q>>4) & 0xf; -	d2 = (q>>8) & 0xf; -	d3 = (q>>12); +	uint32_t d3, d2, d1, q, h; -	/* -	 * Possible ways to approx. divide by 10 -	 * gcc -O2 replaces multiply with shifts and adds -	 * (x * 0xcd) >> 11: 11001101 - shorter code than * 0x67 (on i386) -	 * (x * 0x67) >> 10:  1100111 -	 * (x * 0x34) >> 9:    110100 - same -	 * (x * 0x1a) >> 8:     11010 - same -	 * (x * 0x0d) >> 7:      1101 - same, shortest code (on i386) -	 */ -	d0 = 6*(d3 + d2 + d1) + (q & 0xf); -	q = (d0 * 0xcd) >> 11; -	d0 = d0 - 10*q; -	*buf++ = d0 + '0'; -	d1 = q + 9*d3 + 5*d2 + d1; -		q = (d1 * 0xcd) >> 11; -		d1 = d1 - 10*q; -		*buf++ = d1 + '0'; +	if (n < 100*1000*1000) +		return put_dec_trunc8(buf, n); + +	d1  = ((uint32_t)n >> 16); /* implicit "& 0xffff" */ +	h   = (n >> 32); +	d2  = (h      ) & 0xffff; +	d3  = (h >> 16); /* implicit "& 0xffff" */ + +	q   = 656 * d3 + 7296 * d2 + 5536 * d1 + ((uint32_t)n & 0xffff); -		d2 = q + 2*d2; -			q = (d2 * 0xd) >> 7; -			d2 = d2 - 10*q; -			*buf++ = d2 + '0'; +	buf = put_dec_full4(buf, q % 10000); +	q   = q / 10000; -			d3 = q + 4*d3; -				q = (d3 * 0xcd) >> 11; /* - shorter code */ -				/* q = (d3 * 0x67) >> 10; - would also work */ -				d3 = d3 - 10*q; -				*buf++ = d3 + '0'; -					*buf++ = q + '0'; +	d1  = q + 7671 * d3 + 9496 * d2 + 6 * d1; +	buf = put_dec_full4(buf, d1 % 10000); +	q   = d1 / 10000; + +	d2  = q + 4749 * d3 + 42 * d2; +	buf = put_dec_full4(buf, d2 % 10000); +	q   = d2 / 10000; + +	d3  = q + 281 * d3; +	if (!d3) +		goto done; +	buf = put_dec_full4(buf, d3 % 10000); +	q   = d3 / 10000; +	if (!q) +		goto done; +	buf = put_dec_full4(buf, q); + done: +	while (buf[-1] == '0') +		--buf;  	return buf;  } -/* No inlining helps gcc to use registers better */ -static noinline_for_stack -char *put_dec(char *buf, unsigned long long num) -{ -	while (1) { -		unsigned rem; -		if (num < 100000) -			return put_dec_trunc(buf, num); -		rem = do_div(num, 100000); -		buf = put_dec_full(buf, rem); -	} -} + +#endif  /*   * Convert passed number to decimal string. @@ -220,16 +313,22 @@ char *put_dec(char *buf, unsigned long long num)   */  int num_to_str(char *buf, int size, unsigned long long num)  { -	char tmp[21];		/* Enough for 2^64 in decimal */ +	char tmp[sizeof(num) * 3];  	int idx, len; -	len = put_dec(tmp, num) - tmp; +	/* put_dec() may work incorrectly for num = 0 (generate "", not "0") */ +	if (num <= 9) { +		tmp[0] = '0' + num; +		len = 1; +	} else { +		len = put_dec(tmp, num) - tmp; +	}  	if (len > size)  		return 0;  	for (idx = 0; idx < len; ++idx)  		buf[idx] = tmp[len - idx - 1]; -	return  len; +	return len;  }  #define ZEROPAD	1		/* pad with zero */ @@ -284,6 +383,7 @@ char *number(char *buf, char *end, unsigned long long num,  	char locase;  	int need_pfx = ((spec.flags & SPECIAL) && spec.base != 10);  	int i; +	bool is_zero = num == 0LL;  	/* locase = 0 or 0x20. ORing digits or letters with 'locase'  	 * produces same digits or (maybe lowercased) letters */ @@ -305,15 +405,16 @@ char *number(char *buf, char *end, unsigned long long num,  		}  	}  	if (need_pfx) { -		spec.field_width--;  		if (spec.base == 16) +			spec.field_width -= 2; +		else if (!is_zero)  			spec.field_width--;  	}  	/* generate full string in tmp[], in reverse order */  	i = 0; -	if (num == 0) -		tmp[i++] = '0'; +	if (num < spec.base) +		tmp[i++] = digits[num] | locase;  	/* Generic code, for any base:  	else do {  		tmp[i++] = (digits[do_div(num,base)] | locase); @@ -353,9 +454,11 @@ char *number(char *buf, char *end, unsigned long long num,  	}  	/* "0x" / "0" prefix */  	if (need_pfx) { -		if (buf < end) -			*buf = '0'; -		++buf; +		if (spec.base == 16 || !is_zero) { +			if (buf < end) +				*buf = '0'; +			++buf; +		}  		if (spec.base == 16) {  			if (buf < end)  				*buf = ('X' | locase); @@ -436,7 +539,7 @@ char *symbol_string(char *buf, char *end, void *ptr,  	else if (ext != 'f' && ext != 's')  		sprint_symbol(sym, value);  	else -		kallsyms_lookup(value, NULL, NULL, NULL, sym); +		sprint_symbol_no_offset(sym, value);  	return string(buf, end, sym, spec);  #else @@ -607,7 +710,7 @@ char *ip4_string(char *p, const u8 *addr, const char *fmt)  	}  	for (i = 0; i < 4; i++) {  		char temp[3];	/* hold each IP quad in reverse order */ -		int digits = put_dec_trunc(temp, addr[index]) - temp; +		int digits = put_dec_trunc8(temp, addr[index]) - temp;  		if (leading_zeros) {  			if (digits < 3)  				*p++ = '0'; @@ -866,13 +969,15 @@ static noinline_for_stack  char *pointer(const char *fmt, char *buf, char *end, void *ptr,  	      struct printf_spec spec)  { +	int default_width = 2 * sizeof(void *) + (spec.flags & SPECIAL ? 2 : 0); +  	if (!ptr && *fmt != 'K') {  		/*  		 * Print (null) with the same width as a pointer so it makes  		 * tabular output look nice.  		 */  		if (spec.field_width == -1) -			spec.field_width = 2 * sizeof(void *); +			spec.field_width = default_width;  		return string(buf, end, "(null)", spec);  	} @@ -927,7 +1032,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,  		 */  		if (in_irq() || in_serving_softirq() || in_nmi()) {  			if (spec.field_width == -1) -				spec.field_width = 2 * sizeof(void *); +				spec.field_width = default_width;  			return string(buf, end, "pK-error", spec);  		}  		if (!((kptr_restrict == 0) || @@ -944,7 +1049,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,  	}  	spec.flags |= SMALL;  	if (spec.field_width == -1) { -		spec.field_width = 2 * sizeof(void *); +		spec.field_width = default_width;  		spec.flags |= ZEROPAD;  	}  	spec.base = 16;  |