[apparmor] [patch 2/5] parser: remove length restriction in convert_aaregex_to_pcre usage

Wed Dec 11 03:29:43 UTC 2013

On Mon, Dec 09, 2013 at 12:37:11PM -0800, Steve Beattie wrote:
> This patch removes the string length limit in convert_aaregex_to_pcre()
> usage. One of the benefits to moving to C++ is the ability to use
> std::strings, which dynamically resize themselves. While it's a large
> patch, a non-trivial amount is due to needing to get a char * string
> back out via the c_str() method.
> 
> The unit tests are modified to include checks to ensure that
> convert_aaregex_to_pcre only appends to the passed pcre string,
> it never resets it.
> 
> As the test case with overlong alternations added in the previous
> patch now passes, the TODO status is removed from it.
> 
> (Note: there's a couple of FIXME comments related to converting typebuf
> to std::string that are added by this patch that are addressed in the
> next patch. I kept that conversion separate to try to reduce the size
> of this patch a little.)
> 
> Signed-off-by: Steve Beattie <steve at nxnw.org>

This review will have to happen in two stages; about half-way through I
realized I was going overlook something if I kept going. So, keeping in
mind that this is only the first half of the patch, and you're liable
enough to fix up things in future patches and I haven't gone looking for
those, feel free to ignore whatever you want. (Well, you're always free to
ignore me. :)

Anyway, the bulk of this is really great. It's easy to see how C++
started winning converts, some of these cleanups are removing some
crufty-feeling old stuff...

Some comments inline.

> ---
>  parser/parser_regex.c                             |  363 ++++++++++------------
>  parser/tst/simple_tests/file/ok_alternations_3.sd |    1 
>  2 files changed, 174 insertions(+), 190 deletions(-)
> 
> Index: b/parser/parser_regex.c
> ===================================================================
> --- a/parser/parser_regex.c
> +++ b/parser/parser_regex.c
> @@ -24,6 +24,8 @@
>  #include <apparmor.h>
>  #define _(s) gettext(s)
>  
> +#include <string>
> +
>  /* #define DEBUG */
>  
>  #include "parser.h"
> @@ -81,17 +83,11 @@ static void filter_slashes(char *path)
>  	*dptr = 0;
>  }
>  
> +/* converts the apparmor regex in aare and appends pcre regex output
> + * to pcre string */
>  static pattern_t convert_aaregex_to_pcre(const char *aare, int anchor,
> -					 char *pcre, size_t pcre_size,
> -					 int *first_re_pos)
> +					 std::string& pcre, int *first_re_pos)
>  {
> -#define STORE(_src, _dest, _len) \
> -	if ((const char*)_dest + _len > (pcre + pcre_size)){ \
> -		error = e_buffer_overflow; \
> -	} else { \
> -		memcpy(_dest, _src, _len); \
> -		_dest += _len; \
> -	}

This is the only place e_buffer_overflow is ever assigned; please feel
free to remove its enum and the associated check to see if the buffer
would have overflowed later on in this function.

>  #define update_re_pos(X) if (!(*first_re_pos)) { *first_re_pos = (X); }

This might be a good candidate for future cleanups; the only times that
callers seemed to care about the value passed back in first_re_pos the
callers were forced to check if the returned type was ePatternBasic and
re-compute this with strlen(). I haven't actually looked into what it does
but probably it ought to be hauled out of this function or entirely hauled
into this function. Anyway...

>  #define MAX_ALT_DEPTH 50
>  	*first_re_pos = 0;
> @@ -101,7 +97,6 @@ static pattern_t convert_aaregex_to_pcre
>  	enum error_type error;
>  
>  	const char *sptr;
> -	char *dptr;
>  	pattern_t ptype;
>  
>  	BOOL bEscape = 0;	/* flag to indicate escape */
> @@ -113,14 +108,13 @@ static pattern_t convert_aaregex_to_pcre
>  	ptype = ePatternBasic;	/* assume no regex */
>  
>  	sptr = aare;
> -	dptr = pcre;
>  
>  	if (dfaflags & DFA_DUMP_RULE_EXPR)
>  		fprintf(stderr, "aare: %s   ->   ", aare);
>  
>  	if (anchor)
>  		/* anchor beginning of regular expression */
> -		*dptr++ = '^';
> +		pcre.append("^");
>  
>  	while (error == e_no_error && *sptr) {
>  		switch (*sptr) {
> @@ -135,7 +129,7 @@ static pattern_t convert_aaregex_to_pcre
>  			 * this is done by stripping later
>  			 */
>  			if (bEscape) {
> -				STORE("\\\\", dptr, 2);
> +				pcre.append("\\\\");
>  			} else {
>  				bEscape = TRUE;
>  				++sptr;
> @@ -149,9 +143,9 @@ static pattern_t convert_aaregex_to_pcre
>  				 * end up using this regex buffer (i.e another
>  				 * non-escaped regex follows)
>  				 */
> -				STORE("\\*", dptr, 2);
> +				pcre.append("\\*");
>  			} else {
> -				if ((dptr > pcre) &&  *(dptr - 1) == '/') {
> +				if ((pcre.length() > 0) && pcre[pcre.length() - 1]  == '/') {
>  					#if 0
>  					// handle comment containing use
>  					// of C comment characters
> @@ -177,7 +171,7 @@ static pattern_t convert_aaregex_to_pcre
>  					while (*s == '*')
>  						s++;
>  					if (*s == '/' || !*s) {
> -						STORE("[^/\\x00]", dptr, 8);
> +						pcre.append("[^/\\x00]");
>  					}
>  				}
>  				if (*(sptr + 1) == '*') {
> @@ -195,12 +189,12 @@ static pattern_t convert_aaregex_to_pcre
>  						ptype = ePatternRegex;
>  					}
>  
> -					STORE("[^\\x00]*", dptr, 8);
> +					pcre.append("[^\\x00]*");
>  					sptr++;
>  				} else {
>  					update_re_pos(sptr - aare);
>  					ptype = ePatternRegex;
> -					STORE("[^/\\x00]*", dptr, 9);
> +					pcre.append("[^/\\x00]*");
>  				}	/* *(sptr+1) == '*' */
>  			}	/* bEscape */
>  
> @@ -212,48 +206,48 @@ static pattern_t convert_aaregex_to_pcre
>  				 * so no need to escape, just skip
>  				 * transform
>  				 */
> -				STORE(sptr, dptr, 1);
> +				pcre.append(1, *sptr);
>  			} else {
>  				update_re_pos(sptr - aare);
>  				ptype = ePatternRegex;
> -				STORE("[^/\\x00]", dptr, 8);
> +				pcre.append("[^/\\x00]");
>  			}
>  			break;
>  
>  		case '[':
>  			if (bEscape) {
>  				/* [ is a PCRE special character */
> -				STORE("\\[", dptr, 2);
> +				pcre.append("\\[");
>  			} else {
>  				update_re_pos(sptr - aare);
>  				incharclass = 1;
>  				ptype = ePatternRegex;
> -				STORE(sptr, dptr, 1);
> +				pcre.append(1, *sptr);
>  			}
>  			break;
>  
>  		case ']':
>  			if (bEscape) {
>  				/* ] is a PCRE special character */
> -				STORE("\\]", dptr, 2);
> +				pcre.append("\\]");
>  			} else {
>  				if (incharclass == 0) {
>  					error = e_parse_error;
>  					PERROR(_("%s: Regex grouping error: Invalid close ], no matching open [ detected\n"), progname);
>  				}
>  				incharclass = 0;
> -				STORE(sptr, dptr, 1);
> +				pcre.append(1, *sptr);
>  			}
>  			break;
>  
>  		case '{':
>  			if (bEscape) {
>  				/* { is a PCRE special character */
> -				STORE("\\{", dptr, 2);
> +				pcre.append("\\{");
>  			} else {
>  				if (incharclass) {
>  					/* don't expand inside [] */
> -					STORE("{", dptr, 1);
> +					pcre.append("{");
>  				} else {
>  					update_re_pos(sptr - aare);
>  					ingrouping++;
> @@ -264,7 +258,7 @@ static pattern_t convert_aaregex_to_pcre
>  					} else {
>  						grouping_count[ingrouping] = 0;
>  						ptype = ePatternRegex;
> -						STORE("(", dptr, 1);
> +						pcre.append("(");
>  					}
>  				}	/* incharclass */
>  			}
> @@ -273,11 +267,11 @@ static pattern_t convert_aaregex_to_pcre
>  		case '}':
>  			if (bEscape) {
>  				/* { is a PCRE special character */
> -				STORE("\\}", dptr, 2);
> +				pcre.append("\\}");
>  			} else {
>  				if (incharclass) {
>  					/* don't expand inside [] */
> -					STORE("}", dptr, 1);
> +					pcre.append("}");
>  				} else {
>  					if (grouping_count[ingrouping] == 0) {
>  						error = e_parse_error;
> @@ -290,7 +284,7 @@ static pattern_t convert_aaregex_to_pcre
>  						PERROR(_("%s: Regex grouping error: Invalid close }, no matching open { detected\n"), progname);
>  						ingrouping = 0;
>  					}
> -					STORE(")", dptr, 1);
> +					pcre.append(")");
>  				}	/* incharclass */
>  			}	/* bEscape */
>  
> @@ -302,20 +296,20 @@ static pattern_t convert_aaregex_to_pcre
>  					/* escape inside char class is a
>  					 * valid matching char for '\'
>  					 */
> -					STORE("\\,", dptr, 2);
> +					pcre.append("\\,");
>  				} else {
>  					/* ',' is not a PCRE regex character
>  					 * so no need to escape, just skip
>  					 * transform
>  					 */
> -					STORE(sptr, dptr, 1);
> +					pcre.append(1, *sptr);
>  				}
>  			} else {
>  				if (ingrouping && !incharclass) {
>  					grouping_count[ingrouping]++;
> -					STORE("|", dptr, 1);
> +					pcre.append("|");
>  				} else {
> -					STORE(sptr, dptr, 1);
> +					pcre.append(1, *sptr);
>  				}
>  			}
>  			break;
> @@ -325,10 +319,10 @@ static pattern_t convert_aaregex_to_pcre
>  		case '^':
>  		case '$':
>  			if (incharclass) {
> -				STORE(sptr, dptr, 1);
> +				pcre.append(1, *sptr);
>  			} else {
> -				STORE("\\", dptr, 1);
> -				STORE(sptr, dptr, 1);
> +				pcre.append("\\");
> +				pcre.append(1, *sptr);
>  			}
>  			break;
>  
> @@ -343,7 +337,7 @@ static pattern_t convert_aaregex_to_pcre
>  		case '|':
>  		case '(':
>  		case ')':
> -			STORE("\\", dptr, 1);
> +			pcre.append("\\");
>  			// fall through to default
>  
>  		default:
> @@ -353,7 +347,7 @@ static pattern_t convert_aaregex_to_pcre
>  				pwarn("Character %c was quoted unnecessarily, "
>  				      "dropped preceding quote ('\\') character\n", *sptr);
>  			}
> -			STORE(sptr, dptr, 1);
> +			pcre.append(1, *sptr);
>  			break;
>  		}	/* switch (*sptr) */
>  
> @@ -376,10 +370,7 @@ static pattern_t convert_aaregex_to_pcre
>  	}
>  	/* anchor end and terminate pattern string */
>  	if ((error == e_no_error) && anchor) {
> -		STORE("$" , dptr, 1);
> -	}
> -	if (error == e_no_error) {
> -		STORE("", dptr, 1);
> +		pcre.append("$");
>  	}
>  	/* check error  again, as above STORE may have set it */
>  	if (error != e_no_error) {
> @@ -400,7 +391,7 @@ out:
>  		ptype = ePatternInvalid;
>  
>  	if (dfaflags & DFA_DUMP_RULE_EXPR)
> -		fprintf(stderr, "%s\n", pcre);
> +		fprintf(stderr, "%s\n", pcre.c_str());

Sooner or later we'll get to replace all these with cerr << :)

>  
>  	return ptype;
>  }
> @@ -417,7 +408,7 @@ static const char *local_name(const char
>  
>  static int process_profile_name_xmatch(Profile *prof)
>  {
> -	char tbuf[PATH_MAX + 3];	/* +3 for ^, $ and \0 */
> +	std::string tbuf;
>  	pattern_t ptype;
>  	const char *name;
>  
> @@ -426,7 +417,7 @@ static int process_profile_name_xmatch(P
>  		name = prof->attachment;
>  	else
>  		name = local_name(prof->name);
> -	ptype = convert_aaregex_to_pcre(name, 0, tbuf, PATH_MAX + 3,
> +	ptype = convert_aaregex_to_pcre(name, 0, tbuf,
>  					&prof->xmatch_len);
>  	if (ptype == ePatternBasic)
>  		prof->xmatch_len = strlen(name);
> @@ -444,7 +435,7 @@ static int process_profile_name_xmatch(P
>  		aare_ruleset_t *rule = aare_new_ruleset(0);
>  		if (!rule)
>  			return FALSE;
> -		if (!aare_add_rule(rule, tbuf, 0, AA_MAY_EXEC, 0, dfaflags)) {
> +		if (!aare_add_rule(rule, tbuf.c_str(), 0, AA_MAY_EXEC, 0, dfaflags)) {
>  			aare_delete_ruleset(rule);
>  			return FALSE;
>  		}
> @@ -452,15 +443,15 @@ static int process_profile_name_xmatch(P
>  			struct alt_name *alt;
>  			list_for_each(prof->altnames, alt) {
>  				int len;
> +				tbuf.clear();
>  				ptype = convert_aaregex_to_pcre(alt->name, 0,
>  								tbuf,
> -								PATH_MAX + 3,
>  								&len);
>  				if (ptype == ePatternBasic)
>  					len = strlen(alt->name);
>  				if (len < prof->xmatch_len)
>  					prof->xmatch_len = len;
> -				if (!aare_add_rule(rule, tbuf, 0, AA_MAY_EXEC, 0, dfaflags)) {
> +				if (!aare_add_rule(rule, tbuf.c_str(), 0, AA_MAY_EXEC, 0, dfaflags)) {
>  					aare_delete_ruleset(rule);
>  					return FALSE;
>  				}
> @@ -478,7 +469,7 @@ static int process_profile_name_xmatch(P
>  
>  static int process_dfa_entry(aare_ruleset_t *dfarules, struct cod_entry *entry)
>  {
> -	char tbuf[PATH_MAX + 3];	/* +3 for ^, $ and \0 */
> +	std::string tbuf;
>  	pattern_t ptype;
>  	int pos;
>  
> @@ -488,7 +479,7 @@ static int process_dfa_entry(aare_rulese
>  
>  	if (entry->mode & ~AA_CHANGE_PROFILE)
>  		filter_slashes(entry->name);
> -	ptype = convert_aaregex_to_pcre(entry->name, 0, tbuf, PATH_MAX+3, &pos);
> +	ptype = convert_aaregex_to_pcre(entry->name, 0, tbuf, &pos);
>  	if (ptype == ePatternInvalid)
>  		return FALSE;
>  
> @@ -510,30 +501,30 @@ static int process_dfa_entry(aare_rulese
>  	 * entry of the pair
>  	 */
>  	if (entry->deny && (entry->mode & AA_LINK_BITS)) {
> -		if (!aare_add_rule(dfarules, tbuf, entry->deny,
> +		if (!aare_add_rule(dfarules, tbuf.c_str(), entry->deny,
>  				   entry->mode & ~AA_LINK_BITS,
>  				   entry->audit & ~AA_LINK_BITS, dfaflags))
>  			return FALSE;
>  	} else if (entry->mode & ~AA_CHANGE_PROFILE) {
> -		if (!aare_add_rule(dfarules, tbuf, entry->deny, entry->mode,
> +		if (!aare_add_rule(dfarules, tbuf.c_str(), entry->deny, entry->mode,
>  				   entry->audit, dfaflags))
>  			return FALSE;
>  	}
>  
>  	if (entry->mode & (AA_LINK_BITS)) {
>  		/* add the pair rule */
> -		char lbuf[PATH_MAX + 8];
> +		std::string lbuf;
>  		int perms = AA_LINK_BITS & entry->mode;
>  		const char *vec[2];
>  		int pos;
> -		vec[0] = tbuf;
> +		vec[0] = tbuf.c_str();
>  		if (entry->link_name) {
> -			ptype = convert_aaregex_to_pcre(entry->link_name, 0, lbuf, PATH_MAX + 8, &pos);
> +			ptype = convert_aaregex_to_pcre(entry->link_name, 0, lbuf, &pos);
>  			if (ptype == ePatternInvalid)
>  				return FALSE;
>  			if (entry->subset)
>  				perms |= LINK_TO_LINK_SUBSET(perms);
> -			vec[1] = lbuf;
> +			vec[1] = lbuf.c_str();
>  		} else {
>  			perms |= LINK_TO_LINK_SUBSET(perms);
>  			vec[1] = "/[^/].*";
> @@ -543,7 +534,7 @@ static int process_dfa_entry(aare_rulese
>  	}
>  	if (entry->mode & AA_CHANGE_PROFILE) {
>  		const char *vec[3];
> -		char lbuf[PATH_MAX + 8];
> +		std::string lbuf;
>  		int index = 1;
>  
>  		/* allow change_profile for all execs */
> @@ -551,10 +542,10 @@ static int process_dfa_entry(aare_rulese
>  
>  		if (entry->ns) {
>  			int pos;
> -			ptype = convert_aaregex_to_pcre(entry->ns, 0, lbuf, PATH_MAX + 8, &pos);
> -			vec[index++] = lbuf;
> +			ptype = convert_aaregex_to_pcre(entry->ns, 0, lbuf, &pos);
> +			vec[index++] = lbuf.c_str();
>  		}
> -		vec[index++] = tbuf;
> +		vec[index++] = tbuf.c_str();
>  
>  		/* regular change_profile rule */
>  		if (!aare_add_rule_vec(dfarules, 0, AA_CHANGE_PROFILE | AA_ONEXEC, 0, index - 1, &vec[1], dfaflags))
> @@ -639,7 +630,7 @@ out:
>  static int build_list_val_expr(char *buffer, int size, struct value_list *list)
>  {
>  	struct value_list *ent;
> -	char tmp[PATH_MAX + 3];
> +	std::string tmp;
>  	char *p;
>  	int len;
>  	pattern_t ptype;
> @@ -656,28 +647,28 @@ static int build_list_val_expr(char *buf
>  	if (p > buffer + size)
>  		goto fail;
>  
> -	ptype = convert_aaregex_to_pcre(list->value, 0, tmp, PATH_MAX+3, &pos);
> +	ptype = convert_aaregex_to_pcre(list->value, 0, tmp, &pos);
>  	if (ptype == ePatternInvalid)
>  		goto fail;
>  
> -	len = strlen(tmp);
> +	len = tmp.length();
>  	if (len > size - (p - buffer))
>  		goto fail;
> -	strcpy(p, tmp);
> +	strcpy(p, tmp.c_str());
>  	p += len;
>  
>  	list_for_each(list->next, ent) {
> -		ptype = convert_aaregex_to_pcre(ent->value, 0, tmp,
> -						PATH_MAX+3, &pos);
> +		tmp.clear();
> +		ptype = convert_aaregex_to_pcre(ent->value, 0, tmp, &pos);
>  		if (ptype == ePatternInvalid)
>  			goto fail;
>  
>  		strncpy(p, "|", size - (p - buffer));
>  		p++;
> -		len = strlen(tmp);
> +		len = tmp.length();
>  		if (len > size - (p - buffer))
>  			goto fail;
> -		strcpy(p, tmp);
> +		strcpy(p, tmp.c_str());
>  		p += len;
>  	}
>  	strncpy(p, ")", size - (p - buffer));
> @@ -690,22 +681,19 @@ fail:
>  	return FALSE;
>  }
>  
> -static int convert_entry(char *buffer, int size, char *entry)
> +static int convert_entry(std::string& buffer, char *entry)
>  {
>  	pattern_t ptype;
>  	int pos;
>  
>  	if (entry) {
> -		ptype = convert_aaregex_to_pcre(entry, 0, buffer, size, &pos);
> +		ptype = convert_aaregex_to_pcre(entry, 0, buffer, &pos);
>  		if (ptype == ePatternInvalid)
>  			return FALSE;
>  	} else {
> -		/* match any char except \000 0 or more times */
> -		if (size < 8)
> -			return FALSE;
> -
> -		strcpy(buffer, "[^\\000]*");
> +		buffer.append("[^\\000]*");
>  	}
> +
>  	return TRUE;
>  }
>  
> @@ -750,38 +738,24 @@ static int build_mnt_flags(char *buffer,
>  	return TRUE;
>  }
>  
> -static int build_mnt_opts(char *buffer, int size, struct value_list *opts)
> +static int build_mnt_opts(std::string& buffer, struct value_list *opts)
>  {
>  	struct value_list *ent;
> -	char tmp[PATH_MAX + 3];
> -	char *p;
> -	int len;
>  	pattern_t ptype;
>  	int pos;
>  
>  	if (!opts) {
> -		if (size < 8)
> -			return FALSE;
> -		strncpy(buffer, "[^\\000]*", size);
> +		buffer.append("[^\\000]*");
>  		return TRUE;
>  	}
>  
> -	p = buffer;
>  	list_for_each(opts, ent) {
> -		ptype = convert_aaregex_to_pcre(ent->value, 0, tmp,
> -						PATH_MAX+3, &pos);
> +		ptype = convert_aaregex_to_pcre(ent->value, 0, buffer, &pos);
>  		if (ptype == ePatternInvalid)
>  			goto fail;
>  
> -		len = strlen(tmp);
> -		if (len > size - (p - buffer))
> -			goto fail;
> -		strcpy(p, tmp);
> -		p += len;
> -		if (ent->next && size - (p - buffer) > 1) {
> -			*p++ = ',';
> -			*p = 0;
> -		}
> +		if (ent->next)
> +			buffer.append(",");
>  	}
>  
>  	return TRUE;

With only one remaining place where this function can fail, consider
changing 'goto fail' to return FALSE, ditch the label and other return.

Thanks!
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 490 bytes
Desc: Digital signature
URL: <https://lists.ubuntu.com/archives/apparmor/attachments/20131210/05a9d22d/attachment-0001.pgp>