[apparmor] [PATCH] parser - more regex unittests and fixes (was Re: [PATCH] [parsers] allow for nested alternations expressions)

John Johansen john.johansen at canonical.com
Thu Nov 7 00:38:09 UTC 2013


On 11/06/2013 04:19 PM, Steve Beattie wrote:
> On Mon, Nov 04, 2013 at 05:42:29PM -0800, Seth Arnold wrote:
>> I suspect there is more work to be done in this block of code; '[' may
>> need a corresponding if (incharclass == 1) test, unless this is supposed
>> to work: [[]  /* character class with a [ inside the class */
> 
err that is perfectly valid pcre, [ in a character class is only special if
its part of a posix character class.

http://perldoc.perl.org/perlrecharclass.html#Bracketed-Character-Classes
Special Characters Inside a Bracketed Character Class subsection of
Bracketed Character Classes

and ] has the exception that it isn't special if its the first character
within the character set to match (where a leading ^ isn't the first character
to match because it indicates inverting the class
  ie.
   []]
   [^]]
are valid

> Yes, good eye. Writing more unit tests for the function highlighted more
> issues, the following patch adds those test cases and fixes the
> following:
> 
>   - the '[' inside a character class that you pointed out
>   - expanding {,} inside a character class [] into (|)
>   - dropping the escape \ on a comma inside a character class (is not
>     dropping it the right behavior?
> 
> (This patch is on top of the previous unittests patch and the one just
> posted to fix the trailing slash issues.
> )
> Signed-off-by: Steve Beattie <steve at nxnw.org>
> ---
>  parser/parser_regex.c |  127 ++++++++++++++++++++++++++++++++++++++------------
>  1 file changed, 99 insertions(+), 28 deletions(-)
> 
> Index: b/parser/parser_regex.c
> ===================================================================
> --- a/parser/parser_regex.c
> +++ b/parser/parser_regex.c
> @@ -224,6 +224,10 @@ static pattern_t convert_aaregex_to_pcre
>  				/* [ is a PCRE special character */
>  				STORE("\\[", dptr, 2);
>  			} else {
> +				if (incharclass == 1) {
> +					error = e_parse_error;
> +					PERROR(_("%s: Regex grouping error: Invalid unquoted [ inside '[ ]'\n"), progname);
> +				}
>  				update_re_pos(sptr - aare);
>  				incharclass = 1;
>  				ptype = ePatternRegex;
> @@ -250,17 +254,22 @@ static pattern_t convert_aaregex_to_pcre
>  				/* { is a PCRE special character */
>  				STORE("\\{", dptr, 2);
>  			} else {
> -				update_re_pos(sptr - aare);
> -				ingrouping++;
> -				if (ingrouping >= MAX_ALT_DEPTH) {
> -					error = e_parse_error;
> -					PERROR(_("%s: Regex grouping error: Exceeded maximum nesting of {}\n"), progname);
> -
> +				if (incharclass) {
> +					/* don't expand inside [] */
> +					STORE("{", dptr, 1);
>  				} else {
> -					grouping_count[ingrouping] = 0;
> -					ptype = ePatternRegex;
> -					STORE("(", dptr, 1);
> -				}
> +					update_re_pos(sptr - aare);
> +					ingrouping++;
> +					if (ingrouping >= MAX_ALT_DEPTH) {
> +						error = e_parse_error;
> +						PERROR(_("%s: Regex grouping error: Exceeded maximum nesting of {}\n"), progname);
> +
> +					} else {
> +						grouping_count[ingrouping] = 0;
> +						ptype = ePatternRegex;
> +						STORE("(", dptr, 1);
> +					}
> +				}	/* incharclass */
>  			}
>  			break;
>  
> @@ -269,31 +278,43 @@ static pattern_t convert_aaregex_to_pcre
>  				/* { is a PCRE special character */
>  				STORE("\\}", dptr, 2);
>  			} else {
> -				if (grouping_count[ingrouping] == 0) {
> -					error = e_parse_error;
> -					PERROR(_("%s: Regex grouping error: Invalid number of items between {}\n"), progname);
> -
> -				}
> -				ingrouping--;
> -				if (ingrouping < 0) {
> -					error = e_parse_error;
> -					PERROR(_("%s: Regex grouping error: Invalid close }, no matching open { detected\n"), progname);
> -					ingrouping = 0;
> -				}
> -				STORE(")", dptr, 1);
> +				if (incharclass) {
> +					/* don't expand inside [] */
> +					STORE("}", dptr, 1);
> +				} else {
> +					if (grouping_count[ingrouping] == 0) {
> +						error = e_parse_error;
> +						PERROR(_("%s: Regex grouping error: Invalid number of items between {}\n"), progname);
> +
> +					}
> +					ingrouping--;
> +					if (ingrouping < 0) {
> +						error = e_parse_error;
> +						PERROR(_("%s: Regex grouping error: Invalid close }, no matching open { detected\n"), progname);
> +						ingrouping = 0;
> +					}
> +					STORE(")", dptr, 1);
> +				}	/* incharclass */
>  			}	/* bEscape */
>  
>  			break;
>  
>  		case ',':
>  			if (bEscape) {
> -				/* , is not a PCRE regex character
> -				 * so no need to escape, just skip
> -				 * transform
> -				 */
> -				STORE(sptr, dptr, 1);
> +				if (incharclass) {
> +					/* escape inside char class is a
> +					 * valid matching char for '\'
> +					 */
> +					STORE("\\,", dptr, 2);
> +				} else {
> +					/* ',' is not a PCRE regex character
> +					 * so no need to escape, just skip
> +					 * transform
> +					 */
> +					STORE(sptr, dptr, 1);
> +				}
>  			} else {
> -				if (ingrouping) {
> +				if (ingrouping && !incharclass) {
>  					grouping_count[ingrouping]++;
>  					STORE("|", dptr, 1);
>  				} else {
> @@ -1356,6 +1377,7 @@ static int test_aaregex_to_pcre(void)
>  	MY_REGEX_FAIL_TEST("blort]");
>  	MY_REGEX_FAIL_TEST("blo]rt");
>  	MY_REGEX_FAIL_TEST("]blort");
> +	MY_REGEX_TEST("b[lor]t", "b[lor]t", ePatternRegex);
>  
>  	/* simple alternation tests */
>  	MY_REGEX_TEST("{alpha,beta}", "(alpha|beta)", ePatternRegex);
> @@ -1374,6 +1396,55 @@ static int test_aaregex_to_pcre(void)
>  	MY_REGEX_TEST("{{alpha,alpha{blort,nested}}beta,beta}", "((alpha|alpha(blort|nested))beta|beta)", ePatternRegex);
>  	MY_REGEX_TEST("{{alpha,alpha{blort,nested}}beta,beta}", "((alpha|alpha(blort|nested))beta|beta)", ePatternRegex);
>  	MY_REGEX_TEST("{{a,b{c,d}}e,{f,{g,{h{i,j,k},l}m},n}o}", "((a|b(c|d))e|(f|(g|(h(i|j|k)|l)m)|n)o)", ePatternRegex);
> +	/* max nesting depth = 50 */
> +	MY_REGEX_TEST("{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a,b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b}b,blort}",
> +			"(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)b|blort)", ePatternRegex);
> +	MY_REGEX_FAIL_TEST("{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a,b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b}b,blort}");
> +
> +	/* simple single char */
> +	MY_REGEX_TEST("blor?t", "blor[^/\\x00]t", ePatternRegex);
> +
> +	/* simple globbing */
> +	MY_REGEX_TEST("/*", "/[^/\\x00][^/\\x00]*", ePatternRegex);
> +	MY_REGEX_TEST("/blort/*", "/blort/[^/\\x00][^/\\x00]*", ePatternRegex);
> +	MY_REGEX_TEST("/*/blort", "/[^/\\x00][^/\\x00]*/blort", ePatternRegex);
> +	MY_REGEX_TEST("/**", "/[^/\\x00][^\\x00]*", ePatternTailGlob);
> +	MY_REGEX_TEST("/blort/**", "/blort/[^/\\x00][^\\x00]*", ePatternTailGlob);
> +	MY_REGEX_TEST("/**/blort", "/[^/\\x00][^\\x00]*/blort", ePatternRegex);
> +
> +	/* more complicated quoting */
> +	MY_REGEX_FAIL_TEST("\\\\[");
> +	MY_REGEX_FAIL_TEST("\\\\]");
> +	MY_REGEX_TEST("\\\\?", "\\\\[^/\\x00]", ePatternRegex);
> +	MY_REGEX_FAIL_TEST("\\\\{");
> +	MY_REGEX_FAIL_TEST("\\\\}");
> +	MY_REGEX_TEST("\\\\,", "\\\\,", ePatternBasic);
> +	MY_REGEX_TEST("\\\\^", "\\\\\\^", ePatternBasic);
> +	MY_REGEX_TEST("\\\\$", "\\\\\\$", ePatternBasic);
> +	MY_REGEX_TEST("\\\\.", "\\\\\\.", ePatternBasic);
> +	MY_REGEX_TEST("\\\\+", "\\\\\\+", ePatternBasic);
> +	MY_REGEX_TEST("\\\\|", "\\\\\\|", ePatternBasic);
> +	MY_REGEX_TEST("\\\\(", "\\\\\\(", ePatternBasic);
> +	MY_REGEX_TEST("\\\\)", "\\\\\\)", ePatternBasic);
> +
> +	/* more complicated character class tests */
> +	/*   -- embedded alternations */
> +	MY_REGEX_TEST("b[\\lor]t", "b[\\lor]t", ePatternRegex);
> +	MY_REGEX_TEST("b[{a,b}]t", "b[{a,b}]t", ePatternRegex);
> +	MY_REGEX_TEST("b[\\{a,b\\}]t", "b[\\{a,b\\}]t", ePatternRegex);
> +	MY_REGEX_TEST("{alpha,b[{a,b}]t,gamma}", "(alpha|b[{a,b}]t|gamma)", ePatternRegex);
> +	MY_REGEX_TEST("{alpha,b[\\{a,b\\}]t,gamma}", "(alpha|b[\\{a,b\\}]t|gamma)", ePatternRegex);
> +	MY_REGEX_TEST("{alpha,b[\\{a\\,b\\}]t,gamma}", "(alpha|b[\\{a\\,b\\}]t|gamma)", ePatternRegex);
> +
> +	/*   -- embedded character class chars */
> +	MY_REGEX_TEST("b[lo\\]r]t", "b[lo\\]r]t", ePatternRegex);
> +	MY_REGEX_FAIL_TEST("b[lo]r]t");
> +	MY_REGEX_TEST("b[lo\\[r]t", "b[lo\\[r]t", ePatternRegex);
> +	MY_REGEX_FAIL_TEST("b[lo[r]t");
> +
> +	/*   -- embedded misc single chars match */
> +	MY_REGEX_TEST("b[^lor]t", "b[^lor]t", ePatternRegex);
> +	MY_REGEX_TEST("b[lor^]t", "b[lor^]t", ePatternRegex);
>  
>  	return rc;
>  }
> 
> 
> 




More information about the AppArmor mailing list