diff options
| author | Wolfgang Denk <wd@denx.de> | 2013-03-23 23:50:34 +0000 | 
|---|---|---|
| committer | Tom Rini <trini@ti.com> | 2013-05-01 16:24:01 -0400 | 
| commit | 855f18ea0e6ff83ec9ed5af74e82bc4bff30867d (patch) | |
| tree | 2f1e664ce52eafdf3f3099437d4825fb8b774549 | |
| parent | 103c94b10481cf8479c9d0356e5202bc9200a04f (diff) | |
| download | olio-uboot-2014.01-855f18ea0e6ff83ec9ed5af74e82bc4bff30867d.tar.xz olio-uboot-2014.01-855f18ea0e6ff83ec9ed5af74e82bc4bff30867d.zip | |
setexpr: add regex substring matching and substitution
Add "setexpr name gsub r s [t]" and "setexpr name sub r s [t]"
commands which implement substring matching for the regular
expression <r> in the string <t>, and substitution of the string <s>.
The result is assigned to the environment variable <name>.  If <t> is
not supplied, the previous value of <name> is used instead.  "gsub"
performs global substitution, while "sub" will replace only the first
substring.
Both commands are closely modeled after the gawk functions with the
same names.
Examples:
- Generate broadcast address by substituting the last two numbers of
  the IP address by "255.255":
  	=> print ipaddr
	ipaddr=192.168.1.104
	=> setexpr broadcast sub "(.*\\.).*\\..*" "\\1255.255" $ipaddr
	broadcast=192.168.255.255
- Depending on keyboard configuration (German vs. US keyboard) a
  barcode scanner may initialize the MAC address as C0:E5:4E:02:06:DC
  or as C0>E5>4E>02>06>DC.  Make sure we always have a correct value:
	=> print ethaddr
	ethaddr=C0>E5>4E>02>06>DC
	=> setexpr ethaddr gsub > :
	ethaddr=C0:E5:4E:02:06:DC
- Do the same, but substitute one step at a time in a loop until no
  futher matches:
	=> setenv ethaddr C0>E5>4E>02>06>DC
	=> while setexpr ethaddr sub > :
	> do
	> echo -----
	> done
	ethaddr=C0:E5>4E>02>06>DC
	-----
	ethaddr=C0:E5:4E>02>06>DC
	-----
	ethaddr=C0:E5:4E:02>06>DC
	-----
	ethaddr=C0:E5:4E:02:06>DC
	-----
	ethaddr=C0:E5:4E:02:06:DC
	-----
	C0:E5:4E:02:06:DC: No match
	=> print ethaddr
	ethaddr=C0:E5:4E:02:06:DC
etc.
To enable this feature, the CONFIG_REGEX option has to be defined in
the board config file.
Signed-off-by: Wolfgang Denk <wd@denx.de>
| -rw-r--r-- | common/cmd_setexpr.c | 280 | 
1 files changed, 277 insertions, 3 deletions
| diff --git a/common/cmd_setexpr.c b/common/cmd_setexpr.c index ccd87f4bd..93cb255b2 100644 --- a/common/cmd_setexpr.c +++ b/common/cmd_setexpr.c @@ -1,5 +1,6 @@  /*   * Copyright 2008 Freescale Semiconductor, Inc. + * Copyright 2013 Wolfgang Denk <wd@denx.de>   *   * See file CREDITS for list of people who contributed to this   * project. @@ -50,13 +51,263 @@ static ulong get_arg(char *s, int w)  	}  } +#ifdef CONFIG_REGEX + +#include <slre.h> + +#define SLRE_BUFSZ	16384 +#define SLRE_PATSZ	4096 + +/* + * memstr - Find the first substring in memory + * @s1: The string to be searched + * @s2: The string to search for + * + * Similar to and based on strstr(), + * but strings do not need to be NUL terminated. + */ +static char *memstr(const char *s1, int l1, const char *s2, int l2) +{ +	if (!l2) +		return (char *)s1; + +	while (l1 >= l2) { +		l1--; +		if (!memcmp(s1, s2, l2)) +			return (char *)s1; +		s1++; +	} +	return NULL; +} + +static char *substitute(char *string,	/* string buffer */ +			int *slen,	/* current string length */ +			int ssize,	/* string bufer size */ +			const char *old,/* old (replaced) string */ +			int olen,	/* length of old string */ +			const char *new,/* new (replacement) string */ +			int nlen)	/* length of new string */ +{ +	char *p = memstr(string, *slen, old, olen); + +	if (p == NULL) +		return NULL; + +	debug("## Match at pos %ld: match len %d, subst len %d\n", +		(long)(p - string), olen, nlen); + +	/* make sure replacement matches */ +	if (*slen + nlen - olen > ssize) { +		printf("## error: substitution buffer overflow\n"); +		return NULL; +	} + +	/* move tail if needed */ +	if (olen != nlen) { +		int tail, len; + +		len = (olen > nlen) ? olen : nlen; + +		tail = ssize - (p + len - string); + +		debug("## tail len %d\n", tail); + +		memmove(p + nlen, p + olen, tail); +	} + +	/* insert substitue */ +	memcpy(p, new, nlen); + +	*slen += nlen - olen; + +	return p + nlen; +} + +/* + * Perform regex operations on a environment variable + * + * Returns 0 if OK, 1 in case of errors. + */ +static int regex_sub(const char *name, +	const char *r, const char *s, const char *t, +	int global) +{ +	struct slre slre; +	char data[SLRE_BUFSZ]; +	char *datap = data; +	const char *value; +	int res, len, nlen, loop; + +	if (name == NULL) +		return 1; + +	if (slre_compile(&slre, r) == 0) { +		printf("Error compiling regex: %s\n", slre.err_str); +		return 1; +	} + +	if (t == NULL) { +		value = getenv(name); + +		if (value == NULL) { +			printf("## Error: variable \"%s\" not defined\n", name); +			return 1; +		} +		t = value; +	} + +	debug("REGEX on %s=%s\n", name, t); +	debug("REGEX=\"%s\", SUBST=\"%s\", GLOBAL=%d\n", +		r, s ? s : "<NULL>", global); + +	len = strlen(t); +	if (len + 1 > SLRE_BUFSZ) { +		printf("## error: subst buffer overflow: have %d, need %d\n", +			SLRE_BUFSZ, len + 1); +		return 1; +	} + +	strcpy(data, t); + +	if (s == NULL) +		nlen = 0; +	else +		nlen = strlen(s); + +	for (loop = 0;; loop++) { +		struct cap caps[slre.num_caps + 2]; +		char nbuf[SLRE_PATSZ]; +		const char *old; +		char *np; +		int i, olen; + +		(void) memset(caps, 0, sizeof(caps)); + +		res = slre_match(&slre, datap, len, caps); + +		debug("Result: %d\n", res); + +		for (i = 0; i < slre.num_caps; i++) { +			if (caps[i].len > 0) { +				debug("Substring %d: [%.*s]\n", i, +					caps[i].len, caps[i].ptr); +			} +		} + +		if (res == 0) { +			if (loop == 0) { +				printf("%s: No match\n", t); +				return 1; +			} else { +				break; +			} +		} + +		debug("## MATCH ## %s\n", data); + +		if (s == NULL) { +			printf("%s=%s\n", name, t); +			return 1; +		} + +		old = caps[0].ptr; +		olen = caps[0].len; + +		if (nlen + 1 >= SLRE_PATSZ) { +			printf("## error: pattern buffer overflow: have %d, need %d\n", +				SLRE_BUFSZ, nlen + 1); +			return 1; +		} +		strcpy(nbuf, s); + +		debug("## SUBST(1) ## %s\n", nbuf); + +		/* +		 * Handle back references +		 * +		 * Support for \0 ... \9, where \0 is the +		 * whole matched pattern (similar to &). +		 * +		 * Implementation is a bit simpleminded as +		 * backrefs are substituted sequentially, one +		 * by one.  This will lead to somewhat +		 * unexpected results if the replacement +		 * strings contain any \N strings then then +		 * may get substitued, too.  We accept this +		 * restriction for the sake of simplicity. +		 */ +		for (i = 0; i < 10; ++i) { +			char backref[2] = { +				'\\', +				'0', +			}; + +			if (caps[i].len == 0) +				break; + +			backref[1] += i; + +			debug("## BACKREF %d: replace \"%.*s\" by \"%.*s\" in \"%s\"\n", +				i, +				2, backref, +				caps[i].len, caps[i].ptr, +				nbuf); + +			for (np = nbuf;;) { +				char *p = memstr(np, nlen, backref, 2); + +				if (p == NULL) +					break; + +				np = substitute(np, &nlen, +					SLRE_PATSZ, +					backref, 2, +					caps[i].ptr, caps[i].len); + +				if (np == NULL) +					return 1; +			} +		} +		debug("## SUBST(2) ## %s\n", nbuf); + +		datap = substitute(datap, &len, SLRE_BUFSZ, +				old, olen, +				nbuf, nlen); + +		if (datap == NULL) +			return 1; + +		debug("## REMAINDER: %s\n", datap); + +		debug("## RESULT: %s\n", data); + +		if (!global) +			break; +	} +	debug("## FINAL (now setenv()) :  %s\n", data); + +	printf("%s=%s\n", name, data); + +	return setenv(name, data); +} +#endif +  static int do_setexpr(cmd_tbl_t *cmdtp, int flag, int argc, char * const argv[])  {  	ulong a, b;  	ulong value;  	int w; -	if (argc < 3) +	/* +	 * We take 3, 5, or 6 arguments: +	 * 3 : setexpr name value +	 * 5 : setexpr name val1 op val2 +	 *     setexpr name [g]sub r s +	 * 6 : setexpr name [g]sub r s t +	 */ + +	/* > 6 already tested by max command args */ +	if ((argc < 3) || (argc == 4))  		return CMD_RET_USAGE;  	w = cmd_get_data_size(argv[0], 4); @@ -69,6 +320,19 @@ static int do_setexpr(cmd_tbl_t *cmdtp, int flag, int argc, char * const argv[])  		return 0;  	} +	/* 5 or 6 args (6 args only with [g]sub) */ +#ifdef CONFIG_REGEX +	/* +	 * rexep handling: "setexpr name [g]sub r s [t]" +	 * with 5 args, "t" will be NULL +	 */ +	if (strcmp(argv[2], "gsub") == 0) +		return regex_sub(argv[1], argv[3], argv[4], argv[5], 1); + +	if (strcmp(argv[2], "sub") == 0) +		return regex_sub(argv[1], argv[3], argv[4], argv[5], 0); +#endif +  	/* standard operators: "setexpr name val1 op val2" */  	if (argc != 5)  		return CMD_RET_USAGE; @@ -114,13 +378,23 @@ static int do_setexpr(cmd_tbl_t *cmdtp, int flag, int argc, char * const argv[])  }  U_BOOT_CMD( -	setexpr, 5, 0, do_setexpr, +	setexpr, 6, 0, do_setexpr,  	"set environment variable as the result of eval expression",  	"[.b, .w, .l] name [*]value1 <op> [*]value2\n"  	"    - set environment variable 'name' to the result of the evaluated\n" -	"      express specified by <op>.  <op> can be &, |, ^, +, -, *, /, %\n" +	"      expression specified by <op>.  <op> can be &, |, ^, +, -, *, /, %\n"  	"      size argument is only meaningful if value1 and/or value2 are\n"  	"      memory addresses (*)\n"  	"setexpr[.b, .w, .l] name [*]value\n"  	"    - load a value into a variable" +#ifdef CONFIG_REGEX +	"\n" +	"setexpr name gsub r s [t]\n" +	"    - For each substring matching the regular expression <r> in the\n" +	"      string <t>, substitute the string <s>.  The result is\n" +	"      assigned to <name>.  If <t> is not supplied, use the old\n" +	"      value of <name>\n" +	"setexpr name sub r s [t]\n" +	"    - Just like gsub(), but replace only the first matching substring" +#endif  ); |