diff options
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/Makefile | 1 | ||||
| -rw-r--r-- | lib/slre.c | 724 | 
2 files changed, 725 insertions, 0 deletions
| diff --git a/lib/Makefile b/lib/Makefile index e901cc7ca..a3131d873 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -72,6 +72,7 @@ COBJS-y += crc32.o  COBJS-y += ctype.o  COBJS-y += div64.o  COBJS-y += linux_string.o +COBJS-$(CONFIG_REGEX) += slre.o  COBJS-y += string.o  COBJS-y += time.o  COBJS-$(CONFIG_BOOTP_PXE) += uuid.o diff --git a/lib/slre.c b/lib/slre.c new file mode 100644 index 000000000..8cdd192ea --- /dev/null +++ b/lib/slre.c @@ -0,0 +1,724 @@ +/* + * Copyright (c) 2004-2005 Sergey Lyubka <valenok@gmail.com> + * All rights reserved + * + * "THE BEER-WARE LICENSE" (Revision 42): + * Sergey Lyubka wrote this file.  As long as you retain this notice you + * can do whatever you want with this stuff. If we meet some day, and you think + * this stuff is worth it, you can buy me a beer in return. + */ + +/* + * Downloaded Sat Nov  5 17:43:06 CET 2011 at + * http://slre.sourceforge.net/1.0/slre.c + */ + +#ifdef SLRE_TEST +#include <stdio.h> +#include <assert.h> +#include <ctype.h> +#include <stdlib.h> +#include <string.h> +#else +#include <common.h> +#include <linux/ctype.h> +#endif /* SLRE_TEST */ + +#include <errno.h> + +#include <slre.h> + +enum {END, BRANCH, ANY, EXACT, ANYOF, ANYBUT, OPEN, CLOSE, BOL, EOL, +	STAR, PLUS, STARQ, PLUSQ, QUEST, SPACE, NONSPACE, DIGIT}; + +#ifdef SLRE_TEST +static struct { +	const char	*name; +	int		narg; +	const char	*flags; +} opcodes[] = { +	{"END",		0, ""},		/* End of code block or program	*/ +	{"BRANCH",	2, "oo"},	/* Alternative operator, "|"	*/ +	{"ANY",		0, ""},		/* Match any character, "."	*/ +	{"EXACT",	2, "d"},	/* Match exact string		*/ +	{"ANYOF",	2, "D"},	/* Match any from set, "[]"	*/ +	{"ANYBUT",	2, "D"},	/* Match any but from set, "[^]"*/ +	{"OPEN ",	1, "i"},	/* Capture start, "("		*/ +	{"CLOSE",	1, "i"},	/* Capture end, ")"		*/ +	{"BOL",		0, ""},		/* Beginning of string, "^"	*/ +	{"EOL",		0, ""},		/* End of string, "$"		*/ +	{"STAR",	1, "o"},	/* Match zero or more times "*"	*/ +	{"PLUS",	1, "o"},	/* Match one or more times, "+"	*/ +	{"STARQ",	1, "o"},	/* Non-greedy STAR,  "*?"	*/ +	{"PLUSQ",	1, "o"},	/* Non-greedy PLUS, "+?"	*/ +	{"QUEST",	1, "o"},	/* Match zero or one time, "?"	*/ +	{"SPACE",	0, ""},		/* Match whitespace, "\s"	*/ +	{"NONSPACE",	0, ""},		/* Match non-space, "\S"	*/ +	{"DIGIT",	0, ""}		/* Match digit, "\d"		*/ +}; +#endif /* SLRE_TEST */ + +/* + * Commands and operands are all unsigned char (1 byte long). All code offsets + * are relative to current address, and positive (always point forward). Data + * offsets are absolute. Commands with operands: + * + * BRANCH offset1 offset2 + *	Try to match the code block that follows the BRANCH instruction + *	(code block ends with END). If no match, try to match code block that + *	starts at offset1. If either of these match, jump to offset2. + * + * EXACT data_offset data_length + *	Try to match exact string. String is recorded in data section from + *	data_offset, and has length data_length. + * + * OPEN capture_number + * CLOSE capture_number + *	If the user have passed 'struct cap' array for captures, OPEN + *	records the beginning of the matched substring (cap->ptr), CLOSE + *	sets the length (cap->len) for respective capture_number. + * + * STAR code_offset + * PLUS code_offset + * QUEST code_offset + *	*, +, ?, respectively. Try to gobble as much as possible from the + *	matched buffer, until code block that follows these instructions + *	matches. When the longest possible string is matched, + *	jump to code_offset + * + * STARQ, PLUSQ are non-greedy versions of STAR and PLUS. + */ + +static const char *meta_chars = "|.^$*+?()[\\"; + +#ifdef SLRE_TEST + +static void +print_character_set(FILE *fp, const unsigned char *p, int len) +{ +	int	i; + +	for (i = 0; i < len; i++) { +		if (i > 0) +			(void) fputc(',', fp); +		if (p[i] == 0) { +			i++; +			if (p[i] == 0) +				(void) fprintf(fp, "\\x%02x", p[i]); +			else +				(void) fprintf(fp, "%s", opcodes[p[i]].name); +		} else if (isprint(p[i])) { +			(void) fputc(p[i], fp); +		} else { +			(void) fprintf(fp, "\\x%02x", p[i]); +		} +	} +} + +void +slre_dump(const struct slre *r, FILE *fp) +{ +	int	i, j, ch, op, pc; + +	for (pc = 0; pc < r->code_size; pc++) { + +		op = r->code[pc]; +		(void) fprintf(fp, "%3d %s ", pc, opcodes[op].name); + +		for (i = 0; opcodes[op].flags[i] != '\0'; i++) +			switch (opcodes[op].flags[i]) { +			case 'i': +				(void) fprintf(fp, "%d ", r->code[pc + 1]); +				pc++; +				break; +			case 'o': +				(void) fprintf(fp, "%d ", +				    pc + r->code[pc + 1] - i); +				pc++; +				break; +			case 'D': +				print_character_set(fp, r->data + +				    r->code[pc + 1], r->code[pc + 2]); +				pc += 2; +				break; +			case 'd': +				(void) fputc('"', fp); +				for (j = 0; j < r->code[pc + 2]; j++) { +					ch = r->data[r->code[pc + 1] + j]; +					if (isprint(ch)) { +						(void) fputc(ch, fp); +					} else { +						(void) fprintf(fp, +							"\\x%02x", ch); +					} +				} +				(void) fputc('"', fp); +				pc += 2; +				break; +			} + +		(void) fputc('\n', fp); +	} +} +#endif /* SLRE_TEST */ + +static void +set_jump_offset(struct slre *r, int pc, int offset) +{ +	assert(offset < r->code_size); + +	if (r->code_size - offset > 0xff) +		r->err_str = "Jump offset is too big"; +	else +		r->code[pc] = (unsigned char) (r->code_size - offset); +} + +static void +emit(struct slre *r, int code) +{ +	if (r->code_size >= (int) (sizeof(r->code) / sizeof(r->code[0]))) +		r->err_str = "RE is too long (code overflow)"; +	else +		r->code[r->code_size++] = (unsigned char) code; +} + +static void +store_char_in_data(struct slre *r, int ch) +{ +	if (r->data_size >= (int) sizeof(r->data)) +		r->err_str = "RE is too long (data overflow)"; +	else +		r->data[r->data_size++] = ch; +} + +static void +exact(struct slre *r, const char **re) +{ +	int	old_data_size = r->data_size; + +	while (**re != '\0' && (strchr(meta_chars, **re)) == NULL) +		store_char_in_data(r, *(*re)++); + +	emit(r, EXACT); +	emit(r, old_data_size); +	emit(r, r->data_size - old_data_size); +} + +static int +get_escape_char(const char **re) +{ +	int	res; + +	switch (*(*re)++) { +	case 'n': +		res = '\n'; +		break; +	case 'r': +		res = '\r'; +		break; +	case 't': +		res = '\t'; +		break; +	case '0': +		res = 0; +		break; +	case 'S': +		res = NONSPACE << 8; +		break; +	case 's': +		res = SPACE << 8; +		break; +	case 'd': +		res = DIGIT << 8; +		break; +	default: +		res = (*re)[-1]; +		break; +	} + +	return res; +} + +static void +anyof(struct slre *r, const char **re) +{ +	int	esc, old_data_size = r->data_size, op = ANYOF; + +	if (**re == '^') { +		op = ANYBUT; +		(*re)++; +	} + +	while (**re != '\0') + +		switch (*(*re)++) { +		case ']': +			emit(r, op); +			emit(r, old_data_size); +			emit(r, r->data_size - old_data_size); +			return; +			/* NOTREACHED */ +			break; +		case '\\': +			esc = get_escape_char(re); +			if ((esc & 0xff) == 0) { +				store_char_in_data(r, 0); +				store_char_in_data(r, esc >> 8); +			} else { +				store_char_in_data(r, esc); +			} +			break; +		default: +			store_char_in_data(r, (*re)[-1]); +			break; +		} + +	r->err_str = "No closing ']' bracket"; +} + +static void +relocate(struct slre *r, int begin, int shift) +{ +	emit(r, END); +	memmove(r->code + begin + shift, r->code + begin, r->code_size - begin); +	r->code_size += shift; +} + +static void +quantifier(struct slre *r, int prev, int op) +{ +	if (r->code[prev] == EXACT && r->code[prev + 2] > 1) { +		r->code[prev + 2]--; +		emit(r, EXACT); +		emit(r, r->code[prev + 1] + r->code[prev + 2]); +		emit(r, 1); +		prev = r->code_size - 3; +	} +	relocate(r, prev, 2); +	r->code[prev] = op; +	set_jump_offset(r, prev + 1, prev); +} + +static void +exact_one_char(struct slre *r, int ch) +{ +	emit(r, EXACT); +	emit(r, r->data_size); +	emit(r, 1); +	store_char_in_data(r, ch); +} + +static void +fixup_branch(struct slre *r, int fixup) +{ +	if (fixup > 0) { +		emit(r, END); +		set_jump_offset(r, fixup, fixup - 2); +	} +} + +static void +compile(struct slre *r, const char **re) +{ +	int	op, esc, branch_start, last_op, fixup, cap_no, level; + +	fixup = 0; +	level = r->num_caps; +	branch_start = last_op = r->code_size; + +	for (;;) +		switch (*(*re)++) { +		case '\0': +			(*re)--; +			return; +			/* NOTREACHED */ +			break; +		case '^': +			emit(r, BOL); +			break; +		case '$': +			emit(r, EOL); +			break; +		case '.': +			last_op = r->code_size; +			emit(r, ANY); +			break; +		case '[': +			last_op = r->code_size; +			anyof(r, re); +			break; +		case '\\': +			last_op = r->code_size; +			esc = get_escape_char(re); +			if (esc & 0xff00) +				emit(r, esc >> 8); +			else +				exact_one_char(r, esc); +			break; +		case '(': +			last_op = r->code_size; +			cap_no = ++r->num_caps; +			emit(r, OPEN); +			emit(r, cap_no); + +			compile(r, re); +			if (*(*re)++ != ')') { +				r->err_str = "No closing bracket"; +				return; +			} + +			emit(r, CLOSE); +			emit(r, cap_no); +			break; +		case ')': +			(*re)--; +			fixup_branch(r, fixup); +			if (level == 0) { +				r->err_str = "Unbalanced brackets"; +				return; +			} +			return; +			/* NOTREACHED */ +			break; +		case '+': +		case '*': +			op = (*re)[-1] == '*' ? STAR : PLUS; +			if (**re == '?') { +				(*re)++; +				op = op == STAR ? STARQ : PLUSQ; +			} +			quantifier(r, last_op, op); +			break; +		case '?': +			quantifier(r, last_op, QUEST); +			break; +		case '|': +			fixup_branch(r, fixup); +			relocate(r, branch_start, 3); +			r->code[branch_start] = BRANCH; +			set_jump_offset(r, branch_start + 1, branch_start); +			fixup = branch_start + 2; +			r->code[fixup] = 0xff; +			break; +		default: +			(*re)--; +			last_op = r->code_size; +			exact(r, re); +			break; +		} +} + +int +slre_compile(struct slre *r, const char *re) +{ +	r->err_str = NULL; +	r->code_size = r->data_size = r->num_caps = r->anchored = 0; + +	if (*re == '^') +		r->anchored++; + +	emit(r, OPEN);	/* This will capture what matches full RE */ +	emit(r, 0); + +	while (*re != '\0') +		compile(r, &re); + +	if (r->code[2] == BRANCH) +		fixup_branch(r, 4); + +	emit(r, CLOSE); +	emit(r, 0); +	emit(r, END); + +	return (r->err_str == NULL ? 1 : 0); +} + +static int match(const struct slre *, int, +		const char *, int, int *, struct cap *); + +static void +loop_greedy(const struct slre *r, int pc, const char *s, int len, int *ofs) +{ +	int	saved_offset, matched_offset; + +	saved_offset = matched_offset = *ofs; + +	while (match(r, pc + 2, s, len, ofs, NULL)) { +		saved_offset = *ofs; +		if (match(r, pc + r->code[pc + 1], s, len, ofs, NULL)) +			matched_offset = saved_offset; +		*ofs = saved_offset; +	} + +	*ofs = matched_offset; +} + +static void +loop_non_greedy(const struct slre *r, int pc, const char *s, int len, int *ofs) +{ +	int	saved_offset = *ofs; + +	while (match(r, pc + 2, s, len, ofs, NULL)) { +		saved_offset = *ofs; +		if (match(r, pc + r->code[pc + 1], s, len, ofs, NULL)) +			break; +	} + +	*ofs = saved_offset; +} + +static int +is_any_of(const unsigned char *p, int len, const char *s, int *ofs) +{ +	int	i, ch; + +	ch = s[*ofs]; + +	for (i = 0; i < len; i++) +		if (p[i] == ch) { +			(*ofs)++; +			return 1; +		} + +	return 0; +} + +static int +is_any_but(const unsigned char *p, int len, const char *s, int *ofs) +{ +	int	i, ch; + +	ch = s[*ofs]; + +	for (i = 0; i < len; i++) { +		if (p[i] == ch) +			return 0; +	} + +	(*ofs)++; +	return 1; +} + +static int +match(const struct slre *r, int pc, const char *s, int len, +		int *ofs, struct cap *caps) +{ +	int	n, saved_offset, res = 1; + +	while (res && r->code[pc] != END) { + +		assert(pc < r->code_size); +		assert(pc < (int) (sizeof(r->code) / sizeof(r->code[0]))); + +		switch (r->code[pc]) { +		case BRANCH: +			saved_offset = *ofs; +			res = match(r, pc + 3, s, len, ofs, caps); +			if (res == 0) { +				*ofs = saved_offset; +				res = match(r, pc + r->code[pc + 1], +				    s, len, ofs, caps); +			} +			pc += r->code[pc + 2]; +			break; +		case EXACT: +			res = 0; +			n = r->code[pc + 2];	/* String length */ +			if (n <= len - *ofs && !memcmp(s + *ofs, r->data + +			    r->code[pc + 1], n)) { +				(*ofs) += n; +				res = 1; +			} +			pc += 3; +			break; +		case QUEST: +			res = 1; +			saved_offset = *ofs; +			if (!match(r, pc + 2, s, len, ofs, caps)) +				*ofs = saved_offset; +			pc += r->code[pc + 1]; +			break; +		case STAR: +			res = 1; +			loop_greedy(r, pc, s, len, ofs); +			pc += r->code[pc + 1]; +			break; +		case STARQ: +			res = 1; +			loop_non_greedy(r, pc, s, len, ofs); +			pc += r->code[pc + 1]; +			break; +		case PLUS: +			res = match(r, pc + 2, s, len, ofs, caps); +			if (res == 0) +				break; + +			loop_greedy(r, pc, s, len, ofs); +			pc += r->code[pc + 1]; +			break; +		case PLUSQ: +			res = match(r, pc + 2, s, len, ofs, caps); +			if (res == 0) +				break; + +			loop_non_greedy(r, pc, s, len, ofs); +			pc += r->code[pc + 1]; +			break; +		case SPACE: +			res = 0; +			if (*ofs < len && isspace(((unsigned char *)s)[*ofs])) { +				(*ofs)++; +				res = 1; +			} +			pc++; +			break; +		case NONSPACE: +			res = 0; +			if (*ofs < len && +					!isspace(((unsigned char *)s)[*ofs])) { +				(*ofs)++; +				res = 1; +			} +			pc++; +			break; +		case DIGIT: +			res = 0; +			if (*ofs < len && isdigit(((unsigned char *)s)[*ofs])) { +				(*ofs)++; +				res = 1; +			} +			pc++; +			break; +		case ANY: +			res = 0; +			if (*ofs < len) { +				(*ofs)++; +				res = 1; +			} +			pc++; +			break; +		case ANYOF: +			res = 0; +			if (*ofs < len) +				res = is_any_of(r->data + r->code[pc + 1], +					r->code[pc + 2], s, ofs); +			pc += 3; +			break; +		case ANYBUT: +			res = 0; +			if (*ofs < len) +				res = is_any_but(r->data + r->code[pc + 1], +					r->code[pc + 2], s, ofs); +			pc += 3; +			break; +		case BOL: +			res = *ofs == 0 ? 1 : 0; +			pc++; +			break; +		case EOL: +			res = *ofs == len ? 1 : 0; +			pc++; +			break; +		case OPEN: +			if (caps != NULL) +				caps[r->code[pc + 1]].ptr = s + *ofs; +			pc += 2; +			break; +		case CLOSE: +			if (caps != NULL) +				caps[r->code[pc + 1]].len = (s + *ofs) - +				    caps[r->code[pc + 1]].ptr; +			pc += 2; +			break; +		case END: +			pc++; +			break; +		default: +			printf("unknown cmd (%d) at %d\n", r->code[pc], pc); +			assert(0); +			break; +		} +	} + +	return res; +} + +int +slre_match(const struct slre *r, const char *buf, int len, +		struct cap *caps) +{ +	int	i, ofs = 0, res = 0; + +	if (r->anchored) { +		res = match(r, 0, buf, len, &ofs, caps); +	} else { +		for (i = 0; i < len && res == 0; i++) { +			ofs = i; +			res = match(r, 0, buf, len, &ofs, caps); +		} +	} + +	return res; +} + +#ifdef SLRE_TEST +#define N_CAPS	5 + +int main(int argc, char *argv[]) +{ +	struct slre	slre; +	struct cap	caps[N_CAPS]; +	unsigned char	data[1 * 1024 * 1024]; +	FILE		*fp; +	int		i, res, len; + +	if (argc < 2) { +		fprintf(stderr, "Usage: %s 'slre' <file>\n", argv[0]); +		return 1; +	} + +	fp = fopen(argv[2], "rb"); +	if (fp == NULL) { +		fprintf(stderr, "Error: cannot open %s:%s\n", +			argv[2], strerror(errno)); +		return 1; +	} + +	if (!slre_compile(&slre, argv[1])) { +		fprintf(stderr, "Error compiling slre: %s\n", slre.err_str); +		return 1; +	} +	 +	slre_dump(&slre, stderr); + +	while (fgets(data, sizeof(data), fp) != NULL) { +		len = strlen(data); + +		if ((len > 0) && (data[len-1] == '\n')) { +			data[len-1] = '\0'; +			--len; +		} + +		printf("Data = \"%s\"\n", data); + +		(void) memset(caps, 0, sizeof(caps)); + +		res = 0; + +		res = slre_match(&slre, data, len, caps); +		printf("Result [%d]: %d\n", i, res); + +		for (i = 0; i < N_CAPS; i++) { +			if (caps[i].len > 0) { +				printf("Substring %d: len=%d  [%.*s]\n", i, +					caps[i].len, +					caps[i].len, caps[i].ptr); +			} +		} +		printf("----------------------------------------------------\n"); +	} +	(void) fclose(fp); + +	return 0; +} +#endif /* SLRE_TEST */ |