[apparmor] [PATCH] parser - more regex unittests and fixes (was Re: [PATCH] [parsers] allow for nested alternations expressions)
John Johansen
john.johansen at canonical.com
Thu Nov 7 00:38:09 UTC 2013
On 11/06/2013 04:19 PM, Steve Beattie wrote:
> On Mon, Nov 04, 2013 at 05:42:29PM -0800, Seth Arnold wrote:
>> I suspect there is more work to be done in this block of code; '[' may
>> need a corresponding if (incharclass == 1) test, unless this is supposed
>> to work: [[] /* character class with a [ inside the class */
>
err that is perfectly valid pcre, [ in a character class is only special if
its part of a posix character class.
http://perldoc.perl.org/perlrecharclass.html#Bracketed-Character-Classes
Special Characters Inside a Bracketed Character Class subsection of
Bracketed Character Classes
and ] has the exception that it isn't special if its the first character
within the character set to match (where a leading ^ isn't the first character
to match because it indicates inverting the class
ie.
[]]
[^]]
are valid
> Yes, good eye. Writing more unit tests for the function highlighted more
> issues, the following patch adds those test cases and fixes the
> following:
>
> - the '[' inside a character class that you pointed out
> - expanding {,} inside a character class [] into (|)
> - dropping the escape \ on a comma inside a character class (is not
> dropping it the right behavior?
>
> (This patch is on top of the previous unittests patch and the one just
> posted to fix the trailing slash issues.
> )
> Signed-off-by: Steve Beattie <steve at nxnw.org>
> ---
> parser/parser_regex.c | 127 ++++++++++++++++++++++++++++++++++++++------------
> 1 file changed, 99 insertions(+), 28 deletions(-)
>
> Index: b/parser/parser_regex.c
> ===================================================================
> --- a/parser/parser_regex.c
> +++ b/parser/parser_regex.c
> @@ -224,6 +224,10 @@ static pattern_t convert_aaregex_to_pcre
> /* [ is a PCRE special character */
> STORE("\\[", dptr, 2);
> } else {
> + if (incharclass == 1) {
> + error = e_parse_error;
> + PERROR(_("%s: Regex grouping error: Invalid unquoted [ inside '[ ]'\n"), progname);
> + }
> update_re_pos(sptr - aare);
> incharclass = 1;
> ptype = ePatternRegex;
> @@ -250,17 +254,22 @@ static pattern_t convert_aaregex_to_pcre
> /* { is a PCRE special character */
> STORE("\\{", dptr, 2);
> } else {
> - update_re_pos(sptr - aare);
> - ingrouping++;
> - if (ingrouping >= MAX_ALT_DEPTH) {
> - error = e_parse_error;
> - PERROR(_("%s: Regex grouping error: Exceeded maximum nesting of {}\n"), progname);
> -
> + if (incharclass) {
> + /* don't expand inside [] */
> + STORE("{", dptr, 1);
> } else {
> - grouping_count[ingrouping] = 0;
> - ptype = ePatternRegex;
> - STORE("(", dptr, 1);
> - }
> + update_re_pos(sptr - aare);
> + ingrouping++;
> + if (ingrouping >= MAX_ALT_DEPTH) {
> + error = e_parse_error;
> + PERROR(_("%s: Regex grouping error: Exceeded maximum nesting of {}\n"), progname);
> +
> + } else {
> + grouping_count[ingrouping] = 0;
> + ptype = ePatternRegex;
> + STORE("(", dptr, 1);
> + }
> + } /* incharclass */
> }
> break;
>
> @@ -269,31 +278,43 @@ static pattern_t convert_aaregex_to_pcre
> /* { is a PCRE special character */
> STORE("\\}", dptr, 2);
> } else {
> - if (grouping_count[ingrouping] == 0) {
> - error = e_parse_error;
> - PERROR(_("%s: Regex grouping error: Invalid number of items between {}\n"), progname);
> -
> - }
> - ingrouping--;
> - if (ingrouping < 0) {
> - error = e_parse_error;
> - PERROR(_("%s: Regex grouping error: Invalid close }, no matching open { detected\n"), progname);
> - ingrouping = 0;
> - }
> - STORE(")", dptr, 1);
> + if (incharclass) {
> + /* don't expand inside [] */
> + STORE("}", dptr, 1);
> + } else {
> + if (grouping_count[ingrouping] == 0) {
> + error = e_parse_error;
> + PERROR(_("%s: Regex grouping error: Invalid number of items between {}\n"), progname);
> +
> + }
> + ingrouping--;
> + if (ingrouping < 0) {
> + error = e_parse_error;
> + PERROR(_("%s: Regex grouping error: Invalid close }, no matching open { detected\n"), progname);
> + ingrouping = 0;
> + }
> + STORE(")", dptr, 1);
> + } /* incharclass */
> } /* bEscape */
>
> break;
>
> case ',':
> if (bEscape) {
> - /* , is not a PCRE regex character
> - * so no need to escape, just skip
> - * transform
> - */
> - STORE(sptr, dptr, 1);
> + if (incharclass) {
> + /* escape inside char class is a
> + * valid matching char for '\'
> + */
> + STORE("\\,", dptr, 2);
> + } else {
> + /* ',' is not a PCRE regex character
> + * so no need to escape, just skip
> + * transform
> + */
> + STORE(sptr, dptr, 1);
> + }
> } else {
> - if (ingrouping) {
> + if (ingrouping && !incharclass) {
> grouping_count[ingrouping]++;
> STORE("|", dptr, 1);
> } else {
> @@ -1356,6 +1377,7 @@ static int test_aaregex_to_pcre(void)
> MY_REGEX_FAIL_TEST("blort]");
> MY_REGEX_FAIL_TEST("blo]rt");
> MY_REGEX_FAIL_TEST("]blort");
> + MY_REGEX_TEST("b[lor]t", "b[lor]t", ePatternRegex);
>
> /* simple alternation tests */
> MY_REGEX_TEST("{alpha,beta}", "(alpha|beta)", ePatternRegex);
> @@ -1374,6 +1396,55 @@ static int test_aaregex_to_pcre(void)
> MY_REGEX_TEST("{{alpha,alpha{blort,nested}}beta,beta}", "((alpha|alpha(blort|nested))beta|beta)", ePatternRegex);
> MY_REGEX_TEST("{{alpha,alpha{blort,nested}}beta,beta}", "((alpha|alpha(blort|nested))beta|beta)", ePatternRegex);
> MY_REGEX_TEST("{{a,b{c,d}}e,{f,{g,{h{i,j,k},l}m},n}o}", "((a|b(c|d))e|(f|(g|(h(i|j|k)|l)m)|n)o)", ePatternRegex);
> + /* max nesting depth = 50 */
> + MY_REGEX_TEST("{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a,b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b}b,blort}",
> + "(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)|b)b|blort)", ePatternRegex);
> + MY_REGEX_FAIL_TEST("{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a{a,b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b},b}b,blort}");
> +
> + /* simple single char */
> + MY_REGEX_TEST("blor?t", "blor[^/\\x00]t", ePatternRegex);
> +
> + /* simple globbing */
> + MY_REGEX_TEST("/*", "/[^/\\x00][^/\\x00]*", ePatternRegex);
> + MY_REGEX_TEST("/blort/*", "/blort/[^/\\x00][^/\\x00]*", ePatternRegex);
> + MY_REGEX_TEST("/*/blort", "/[^/\\x00][^/\\x00]*/blort", ePatternRegex);
> + MY_REGEX_TEST("/**", "/[^/\\x00][^\\x00]*", ePatternTailGlob);
> + MY_REGEX_TEST("/blort/**", "/blort/[^/\\x00][^\\x00]*", ePatternTailGlob);
> + MY_REGEX_TEST("/**/blort", "/[^/\\x00][^\\x00]*/blort", ePatternRegex);
> +
> + /* more complicated quoting */
> + MY_REGEX_FAIL_TEST("\\\\[");
> + MY_REGEX_FAIL_TEST("\\\\]");
> + MY_REGEX_TEST("\\\\?", "\\\\[^/\\x00]", ePatternRegex);
> + MY_REGEX_FAIL_TEST("\\\\{");
> + MY_REGEX_FAIL_TEST("\\\\}");
> + MY_REGEX_TEST("\\\\,", "\\\\,", ePatternBasic);
> + MY_REGEX_TEST("\\\\^", "\\\\\\^", ePatternBasic);
> + MY_REGEX_TEST("\\\\$", "\\\\\\$", ePatternBasic);
> + MY_REGEX_TEST("\\\\.", "\\\\\\.", ePatternBasic);
> + MY_REGEX_TEST("\\\\+", "\\\\\\+", ePatternBasic);
> + MY_REGEX_TEST("\\\\|", "\\\\\\|", ePatternBasic);
> + MY_REGEX_TEST("\\\\(", "\\\\\\(", ePatternBasic);
> + MY_REGEX_TEST("\\\\)", "\\\\\\)", ePatternBasic);
> +
> + /* more complicated character class tests */
> + /* -- embedded alternations */
> + MY_REGEX_TEST("b[\\lor]t", "b[\\lor]t", ePatternRegex);
> + MY_REGEX_TEST("b[{a,b}]t", "b[{a,b}]t", ePatternRegex);
> + MY_REGEX_TEST("b[\\{a,b\\}]t", "b[\\{a,b\\}]t", ePatternRegex);
> + MY_REGEX_TEST("{alpha,b[{a,b}]t,gamma}", "(alpha|b[{a,b}]t|gamma)", ePatternRegex);
> + MY_REGEX_TEST("{alpha,b[\\{a,b\\}]t,gamma}", "(alpha|b[\\{a,b\\}]t|gamma)", ePatternRegex);
> + MY_REGEX_TEST("{alpha,b[\\{a\\,b\\}]t,gamma}", "(alpha|b[\\{a\\,b\\}]t|gamma)", ePatternRegex);
> +
> + /* -- embedded character class chars */
> + MY_REGEX_TEST("b[lo\\]r]t", "b[lo\\]r]t", ePatternRegex);
> + MY_REGEX_FAIL_TEST("b[lo]r]t");
> + MY_REGEX_TEST("b[lo\\[r]t", "b[lo\\[r]t", ePatternRegex);
> + MY_REGEX_FAIL_TEST("b[lo[r]t");
> +
> + /* -- embedded misc single chars match */
> + MY_REGEX_TEST("b[^lor]t", "b[^lor]t", ePatternRegex);
> + MY_REGEX_TEST("b[lor^]t", "b[lor^]t", ePatternRegex);
>
> return rc;
> }
>
>
>
More information about the AppArmor
mailing list