[apparmor] [PATCH 01/10] clean up the lexer

Wed Jul 24 07:33:13 UTC 2013

On Sun, Jul 21, 2013 at 10:32:44PM -0700, John Johansen wrote:
> - Make indenting consistent
> - Move common match + fn patterns into a single shared entry with mulitstate
>   headers
> - add names table to convert lexer state #s to state names used in the code
> - Create/use macros for common patterns of DUMP, DEBUG, return ...
>   this fixes a few places where preprocess or DEBUG output was not
>   available
> - update RE patterns for bugs eg. {WS} inside a character class [] does
>   not match whitespace
> 
> all told despite adding code to provide better support to debug and
> preprocessing, the code is about 150 lines shorter, and has few corner
> cases cleaned up.

Oh man, this is beautiful. Lovely. Thanks. :)

A few comments inline..

> Signed-off-by: John Johansen <john.johansen at canonical.com>
> ---
>  parser/parser_lex.l  | 661 +++++++++++++++++++--------------------------------
>  parser/parser_yacc.y |   1 +
>  2 files changed, 251 insertions(+), 411 deletions(-)
> 
> diff --git a/parser/parser_lex.l b/parser/parser_lex.l
> index 539e16a..1b14625 100644
> --- a/parser/parser_lex.l
> +++ b/parser/parser_lex.l
> @@ -46,7 +46,8 @@
>  #endif
>  /* #define DEBUG */
>  #ifdef DEBUG
> -#define PDEBUG(fmt, args...) printf("Lexer (state %d): " fmt, YY_START, ## args)
> +static int yy_top_state(void);
> +#define PDEBUG(fmt, args...) printf("Lexer (Line %d) (state %s): " fmt, current_lineno, state_names[YY_START], ## args)
>  #else
>  #define PDEBUG(fmt, args...)	/* Do nothing */
>  #endif
> @@ -54,8 +55,44 @@
>  
>  #define DUMP_PREPROCESS do { if (preprocess_only) ECHO; } while (0)
>  
> +#
> +#define RETURN_TOKEN(X) \
> +do { \
> +	DUMP_PREPROCESS; \
> +	PDEBUG("Matched: %s\n", yytext); \
> +	return (X); \
> +} while (0)
> +
> +#define POP_AND_RETURN(X) \
> +do { \
> +	DUMP_PREPROCESS; \
> +	PDEBUG(" (ret_to(%s)): Matched: %s\n", state_names[yy_top_state()], yytext); \
> +	yy_pop_state(); \
> +	return (X); \
> +} while (0)
> +
> +#define PUSH_AND_RETURN(X, Y) \
> +do { \
> +	DUMP_PREPROCESS; \
> +	PDEBUG(" (push(%s)): Matched: %s\n", state_names[(X)], yytext); \
> +	yy_push_state(X); \
> +	return (Y); \
> +} while (0)
> +
> +#define BEGIN_AND_RETURN(X, Y) \
> +do { \
> +	DUMP_PREPROCESS; \
> +	PDEBUG(" (begin(%s)): Matched: %s\n", state_names[(X)], yytext); \
> +	BEGIN(X); \
> +	return (Y); \
> +} while (0)
> +
> +
>  #define YY_NO_INPUT
>  
> +#define STATE_TABLE_ENT(X) [(X)] = #X
> +static const char *const state_names[];
> +
>  struct ignored_suffix_t {
>  	char * text;
>  	int len;
> @@ -199,8 +236,9 @@ POST_VAR_ID 	{POST_VAR_ID_CHARS}|(,{POST_VAR_ID_CHARS})
>  LIST_VALUE_ID_CHARS	[^ \t\n"!,]{-}[()]
>  LIST_VALUE_ID	{LIST_VALUE_ID_CHARS}+
>  ID_CHARS_NOEQ	[^ \t\n"!,]{-}[=]
> +LEADING_ID_CHARS_NOEQ [^ \t\n"!,]{-}[=()+&]
>  ID_NOEQ		{ID_CHARS_NOEQ}|(,{ID_CHARS_NOEQ})
> -IDS_NOEQ	{ID_NOEQ}+
> +IDS_NOEQ       {LEADING_ID_CHARS_NOEQ}{ID_NOEQ}*
>  ALLOWED_QUOTED_ID 	[^\0"]|\\\"
>  QUOTED_ID	\"{ALLOWED_QUOTED_ID}*\"
>  
> @@ -221,10 +259,12 @@ OPEN_PAREN 	\(
>  CLOSE_PAREN	\)
>  COMMA		\,
>  EQUALS		=
> +NOTEQUALS	!=

Not used...

>  ADD_ASSIGN	\+=
>  ARROW		->
>  LT_EQUAL	<=
>  
> +/* IF adding new state please update state_names table at eof */
>  %x SUB_ID
>  %x SUB_VALUE
>  %x EXTCOND_MODE
> @@ -247,483 +287,282 @@ LT_EQUAL	<=
>  	}
>  %}
>  
> -<INCLUDE>{
> -	{WS}+	{ /* Eat whitespace */ }
> -	\<([^\> \t\n]+)\>	{	/* <filename> */
> -		char *filename = strdup(yytext);
> -		filename[strlen(filename) - 1] = '\0';
> -		include_filename(filename + 1, 1);
> -		free(filename);
> -		yy_pop_state();
> -		}
> +<INITIAL,INCLUDE,LIST_VAL_MODE,EXTCOND_MODE,ASSIGN_MODE,NETWORK_MODE,CHANGE_PROFILE_MODE,RLIMIT_MODE,MOUNT_MODE>{
> +	{WS}+	{  DUMP_PREPROCESS; /* Ignoring whitespace */ }
> +}
>  
> -	\"([^\" \t\n]+)\"	{	/* "filename" */
> +<INCLUDE>{
> +	(\<([^\> \t\n]+)\>|\"([^\" \t\n]+)\")	{	/* <filename> */
>  		char *filename = strdup(yytext);
>  		filename[strlen(filename) - 1] = '\0';
> -		include_filename(filename + 1, 0);
> +		include_filename(filename + 1, *filename == '<');
>  		free(filename);
>  		yy_pop_state();
> -		}
> +	}
>  
> -	[^\<\>\"{WS}]+ {	/* filename */
> +	[^\<\>\" \t\n]+ {	/* filename */
>  		include_filename(yytext, 0);
>  		yy_pop_state();
> -		}
> +	}
>  }
>  
>  <<EOF>> {
>  	fclose(yyin);
>  	pop_include_stack();
>  	yypop_buffer_state();
> -	if ( !YY_CURRENT_BUFFER ) yyterminate();
> +	if ( !YY_CURRENT_BUFFER )
> +		yyterminate();
>  }
>  
>  <INITIAL,MOUNT_MODE>{
>  	{VARIABLE_NAME}/{WS}*=	{
> -				/* we match to the = in the lexer so that
> -				 * can switch scanner state.  By the time
> -				 * the parser see the = it may be to late
> -				 * as bison may have requested the next
> -				 * token from the scanner
> -				 */
> -				DUMP_PREPROCESS;
> -				PDEBUG("conditional %s=\n", yytext);
> -				yylval.id = processid(yytext, yyleng);
> -				yy_push_state(EXTCOND_MODE);
> -				return TOK_CONDID;
> -			}
> +		/* we match to the = in the lexer so that can switch scanner
> +		 * state.  By the time the parser see the = it may be to late
> +		 * as bison may have requested the next token from the scanner
> +		 */

While we're making drastic changes, some of these comment errors that
have grated on me for years are finally fair game :) "so that we can",
and "too late", please.

> +		DUMP_PREPROCESS;
> +		yylval.id = processid(yytext, yyleng);
> +		PUSH_AND_RETURN(EXTCOND_MODE, TOK_CONDID);

PUSH_AND_RETURN() already includes the DUMP_PREPROCESS.

> +	}
> +
>  	{VARIABLE_NAME}/{WS}+in{WS}*\(	{
> -				/* we match to 'in' in the lexer so that
> -				 * we can switch scanner state.  By the time
> -				 * the parser see the 'in' it may be to late
> -				 * as bison may have requested the next
> -				 * token from the scanner
> -				 */
> -				DUMP_PREPROCESS;
> -				PDEBUG("conditional %s=\n", yytext);
> -				yylval.id = processid(yytext, yyleng);
> -				yy_push_state(EXTCOND_MODE);
> -				return TOK_CONDID;
> -			}
> +		/* we match to 'in' in the lexer so that we can switch scanner
> +		 * state.  By the time the parser see the 'in' it may be to
> +		 * late as bison may have requested the next token from the
> +		 * scanner
> +		 */

Again, "too late".

> +		DUMP_PREPROCESS;
> +		yylval.id = processid(yytext, yyleng);
> +		PUSH_AND_RETURN(EXTCOND_MODE, TOK_CONDID);

PUSH_AND_RETURN() already includes the DUMP_PREPROCESS.

> +	}
>  }
>  
>  <SUB_ID>{
> -	({IDS}|{QUOTED_ID})	{
> -			  /* Ugh, this is a gross hack. I used to use
> -			   * {IDS} to match all TOK_IDs, but that would
> -			   * also match TOK_MODE + TOK_END_OF_RULE
> -			   * without any spaces in between (because it's
> -			   * a longer match). So now, when I want to
> -			   * match any random string, I go into a
> -			   * separate state. */
> -			DUMP_PREPROCESS;
> -			yylval.id =  processid(yytext, yyleng);
> -			PDEBUG("Found sub name: \"%s\"\n",  yylval.id);
> -			yy_pop_state();
> -			return TOK_ID;
> -		}
> -
> -	[^\n]	{
> -			DUMP_PREPROCESS;
> -			/* Something we didn't expect */
> -			yyerror(_("Found unexpected character: '%s'"), yytext);
> -		}
> +	({IDS}|{QUOTED_ID}) {
> +		/* Go into separate state to match generic ID strings */
> +		yylval.id =  processid(yytext, yyleng);
> +		POP_AND_RETURN(TOK_ID);
> +	}
>  }
>  
>  <SUB_VALUE>{
> -	({IDS}|{QUOTED_ID})	{
> -			  /* Ugh, this is a gross hack. I used to use
> -			   * {IDS} to match all TOK_IDs, but that would
> -			   * also match TOK_MODE + TOK_END_OF_RULE
> -			   * without any spaces in between (because it's
> -			   * a longer match). So now, when I want to
> -			   * match any random string, I go into a
> -			   * separate state. */
> -			DUMP_PREPROCESS;
> -			yylval.id =  processid(yytext, yyleng);
> -			PDEBUG("Found sub value: \"%s\"\n",  yylval.id);
> -			yy_pop_state();
> -			return TOK_VALUE;
> -		}
> -
> -	[^\n]	{
> -			DUMP_PREPROCESS;
> -			/* Something we didn't expect */
> -			yyerror(_("Found unexpected character: '%s'"), yytext);
> -		}
> +	({IDS}|{QUOTED_ID}) {
> +		/* Go into separate state to match generic VALUE strings */
> +		yylval.id =  processid(yytext, yyleng);
> +		POP_AND_RETURN(TOK_VALUE);
> +	}
>  }

Can <SUB_ID> and <SUB_VALUE> be combined here? Is the clarity increased
if they are combined?

>  
>  <LIST_VAL_MODE>{
> -	{CLOSE_PAREN}	{
> -			DUMP_PREPROCESS;
> -			PDEBUG("listval: )\n");
> -			yy_pop_state();
> -			return TOK_CLOSEPAREN;
> -			}
> -
> -	{WS}+		{ DUMP_PREPROCESS; /* Eat whitespace */ }
> +	{CLOSE_PAREN} { POP_AND_RETURN(TOK_CLOSEPAREN); }
>  
>  	{COMMA}	{
> -			DUMP_PREPROCESS;
> -			PDEBUG("listval: , \n");
> -			/* East comma, its an optional separator */
> -			}
> -
> -	({LIST_VALUE_ID}|{QUOTED_ID})	{
> -			DUMP_PREPROCESS;
> -			yylval.id = processid(yytext, yyleng);
> -			PDEBUG("listval: \"%s\"\n", yylval.id);
> -			return TOK_VALUE;
> -			}
> -
> -	[^\n]		{
> -			DUMP_PREPROCESS;
> -			/* Something we didn't expect */
> -			yyerror(_("Found unexpected character: '%s'"), yytext);
> -			}
> +		DUMP_PREPROCESS;
> +		PDEBUG("listval: , \n");
> +		/* Eat comma, its an optional separator */
> +	}
> +
> +	({LIST_VALUE_ID}|{QUOTED_ID}) {
> +		yylval.id = processid(yytext, yyleng);
> +		RETURN_TOKEN(TOK_VALUE);
> +	}
>  }
>  
>  <EXTCOND_MODE>{
> -	{WS}+		{ DUMP_PREPROCESS; /* Eat whitespace */ }
> -
> -	{EQUALS}{WS}*/[^(\n]{-}{WS}	{
> -			DUMP_PREPROCESS;
> -			BEGIN(SUB_VALUE);
> -			return TOK_EQUALS;
> -		}
> -
> -	{EQUALS}	{
> -			DUMP_PREPROCESS;
> -			return TOK_EQUALS;
> -		}
> -
> -	{OPEN_PAREN}	{
> -			DUMP_PREPROCESS;
> -			PDEBUG("extcond listv\n");
> -			/* Don't push state here as this is a transition
> -			 * start condition and we want to return to the start
> -			 * condition that invoked <EXTCOND_MODE> when
> -			 * LIST_VAL_ID is done
> -			 */
> -			BEGIN(LIST_VAL_MODE);
> -			return TOK_OPENPAREN;
> -		}
> +	{EQUALS}{WS}*/[^(\n]{-}{WS} { BEGIN_AND_RETURN(SUB_VALUE, TOK_EQUALS);}
>  
> -	in	{
> -			DUMP_PREPROCESS;
> -			return TOK_IN;
> -		}
> +	{EQUALS} { RETURN_TOKEN(TOK_EQUALS); }
>  
> -	[^\n]	{
> -			DUMP_PREPROCESS;
> -			/* Something we didn't expect */
> -			yyerror(_("Found unexpected character: '%s' %d"), yytext, *yytext);
> -		}
> +	/* Don't push state here as this is a transition start condition and
> +	 * we want to return to the start condition that invoked <EXTCOND_MODE>
> +	 * when LIST_VAL_ID is done
> +	 */
> +	{OPEN_PAREN} { BEGIN_AND_RETURN(LIST_VAL_MODE, TOK_OPENPAREN); }
>  
> +	in { RETURN_TOKEN(TOK_IN); }
>  }
>  
>  <ASSIGN_MODE>{
> -	{WS}+		{ DUMP_PREPROCESS; /* Eat whitespace */ }
> -
> -	({IDS}|{QUOTED_ID})		{
> -			DUMP_PREPROCESS;
> -			yylval.var_val = processid(yytext, yyleng);
> -			PDEBUG("Found assignment value: \"%s\"\n", yylval.var_val);
> -			return TOK_VALUE;
> -			}
> +	({IDS}|{QUOTED_ID}) {
> +		yylval.var_val = processid(yytext, yyleng);
> +		RETURN_TOKEN(TOK_VALUE);
> +	}
>  
> -	{END_OF_RULE}	{
> -			DUMP_PREPROCESS;
> -			yylval.id = strdup(yytext);
> -			yyerror(_("Variable declarations do not accept trailing commas"));
> -			}
> +	{END_OF_RULE} {
> +		yylval.id = strdup(yytext);
> +		DUMP_PREPROCESS;
> +		yyerror(_("Variable declarations do not accept trailing commas"));
> +	}

It wasn't introduced here, but I don't understand the strdup(),
yyerror() is going to exit anyway.

>  
> -	\\\n		{ DUMP_PREPROCESS; current_lineno++ ; }
> +	\\\n	{ DUMP_PREPROCESS; current_lineno++ ; }
>  
> -	\r?\n		{
> -			DUMP_PREPROCESS;
> -			current_lineno++;
> -			yy_pop_state();
> -			}
> -	[^\n]		{
> -			DUMP_PREPROCESS;
> -			/* Something we didn't expect */
> -			yyerror(_("Found unexpected character: '%s'"), yytext);
> -			}
> +	\r?\n	{
> +		DUMP_PREPROCESS;
> +		current_lineno++;
> +		yy_pop_state();
> +	}
>  }
>  
>  <NETWORK_MODE>{
> -	{WS}+		{ DUMP_PREPROCESS; /* Eat whitespace */ }
> -
> -	{IDS}		{
> -			DUMP_PREPROCESS;
> -			yylval.id = strdup(yytext);
> -			return TOK_ID;
> -			}
> -	{END_OF_RULE}	{
> -			DUMP_PREPROCESS;
> -			yy_pop_state();
> -			return TOK_END_OF_RULE;
> -		}
> -	[^\n]		{
> -			DUMP_PREPROCESS;
> -			  /* Something we didn't expect */
> -			yylval.id = strdup(yytext);
> -			yyerror(_("(network_mode) Found unexpected character: '%s'"), yylval.id);
> -			}
> -
> -	\r?\n		{
> -			DUMP_PREPROCESS;
> -			current_lineno++;
> -			}
> +	{IDS} {
> +		yylval.id = strdup(yytext);
> +		RETURN_TOKEN(TOK_ID);
> +	}
>  }
>  
>  <CHANGE_PROFILE_MODE>{
> -	{ARROW}	        {
> -			DUMP_PREPROCESS;
> -			PDEBUG("Matched a change profile arrow\n");
> -			return TOK_ARROW;
> -			}
> -
> -	({IDS}|{QUOTED_ID})	{
> -			  /* Ugh, this is a gross hack. I used to use
> -			   * {IDS} to match all TOK_IDs, but that would
> -			   * also match TOK_MODE + TOK_END_OF_RULE
> -			   * without any spaces in between (because it's
> -			   * a longer match). So now, when I want to
> -			   * match any random string, I go into a
> -			   * separate state. */
> -			DUMP_PREPROCESS;
> -			yylval.id = processid(yytext, yyleng);
> -			PDEBUG("Found change profile name: \"%s\"\n", yylval.id);
> -			yy_pop_state();
> -			return TOK_ID;
> -		}
> +	{ARROW}		{ RETURN_TOKEN(TOK_ARROW); }
>  
> -	{WS}+			{  DUMP_PREPROCESS; /* Ignoring whitespace */ }
> -	[^\n]	{
> -			DUMP_PREPROCESS;
> -			/* Something we didn't expect */
> -			yyerror(_("Found unexpected character: '%s'"), yytext);
> -		}
> +	({IDS}|{QUOTED_ID}) {
> +		yylval.id = processid(yytext, yyleng);
> +		POP_AND_RETURN(TOK_ID);
> +	}
>  }
>  
>  <RLIMIT_MODE>{
> -	{WS}+		{ DUMP_PREPROCESS; /* Eat whitespace */ }
> -
> -
>  	-?{NUMBER}[[:alpha:]]*  {

Not introduced in this patch, but can we use a more-specific set of
chars here to give better error messages? Or would errors get worse? At
least line number is easily available here...

> -			DUMP_PREPROCESS;
> -		        yylval.var_val = strdup(yytext);
> -		        return TOK_VALUE;
> -			}
> +	        yylval.var_val = strdup(yytext);
> +		RETURN_TOKEN(TOK_VALUE);
> +	}
>  
>  	{KEYWORD}	{
> -			DUMP_PREPROCESS;
> -		        yylval.id = strdup(yytext);
> -			if (strcmp(yytext, "infinity") == 0)
> -				return TOK_VALUE;
> -			return TOK_ID;
> -			}
> +	        yylval.id = strdup(yytext);
> +		if (strcmp(yytext, "infinity") == 0)
> +			RETURN_TOKEN(TOK_VALUE);
> +		RETURN_TOKEN(TOK_ID);
> +	}
>  
> -	{LT_EQUAL}	{ DUMP_PREPROCESS; return TOK_LE; }
> +	{LT_EQUAL}	{ RETURN_TOKEN(TOK_LE); }
> +}
>  
> -	{END_OF_RULE}	{
> -			DUMP_PREPROCESS;
> -			yy_pop_state();
> -			return TOK_END_OF_RULE;
> -			}
> +<MOUNT_MODE>{
> +	{ARROW}		{ RETURN_TOKEN(TOK_ARROW); }
>  
> -	\\\n		{
> -			DUMP_PREPROCESS;
> -			current_lineno++;
> -			yy_pop_state();
> -			}
> +	({IDS_NOEQ}|{PATHNAME}|{QUOTED_ID}) {
> +		yylval.id = processid(yytext, yyleng);
> +		RETURN_TOKEN(TOK_ID);
> +	}
> +}
>  
> -	\r?\n		{
> -			DUMP_PREPROCESS;
> -			current_lineno++;
> -			yy_pop_state();
> -			}
> +#include/.*\r?\n	{

Hunh, I don't think I knew that "# include" wouldn't include a file. Now I
do know. :)

> +	DUMP_PREPROCESS;
> +	PDEBUG("Matched: %s\n", yytext);
> +	yy_push_state(INCLUDE);
>  }
>  
> -<MOUNT_MODE>{
> -	{WS}+		{  DUMP_PREPROCESS; /* Ignoring whitespace */ }
> +#.*\r?\n	{ /* normal comment */
> +	DUMP_PREPROCESS;
> +	PDEBUG("comment(%d): %s\n", current_lineno, yytext);
> +	current_lineno++;
> +}
>  
> -	{ARROW}		{
> -			DUMP_PREPROCESS;
> -			PDEBUG("Matched arrow\n");
> -			return TOK_ARROW;
> -			}
> +{CARET}		{ PUSH_AND_RETURN(SUB_ID, TOK_CARET); }
>  
> -	({IDS_NOEQ}|{PATHNAME}|{QUOTED_ID})	{
> -			DUMP_PREPROCESS;
> -			yylval.id = processid(yytext, yyleng);
> -			PDEBUG("Found ID: \"%s\"\n", yylval.id);
> -			return TOK_ID;
> -			}
> +{ARROW}		{ RETURN_TOKEN(TOK_ARROW); }
>  
> +{EQUALS}	{ PUSH_AND_RETURN(ASSIGN_MODE, TOK_EQUALS); }
> +
> +{ADD_ASSIGN}	{ PUSH_AND_RETURN(ASSIGN_MODE, TOK_ADD_ASSIGN); }
> +
> +{SET_VARIABLE}	{
> +	yylval.set_var = strdup(yytext);
> +	RETURN_TOKEN(TOK_SET_VAR);
> +}
> +
> +{BOOL_VARIABLE}	{
> +	yylval.bool_var = strdup(yytext);
> +	RETURN_TOKEN(TOK_BOOL_VAR);
> +}
> +
> +{OPEN_BRACE}	{ RETURN_TOKEN(TOK_OPEN); }
> +
> +{CLOSE_BRACE}	{ RETURN_TOKEN(TOK_CLOSE); }
> +
> +({PATHNAME}|{QPATHNAME}) {
> +	yylval.id = processid(yytext, yyleng);
> +	RETURN_TOKEN(TOK_ID);
> +}
> +
> +({MODES})/([[:space:],]) {
> +	yylval.mode = strdup(yytext);
> +	RETURN_TOKEN(TOK_MODE);
> +}
> +
> +{HAT}		{ PUSH_AND_RETURN(SUB_ID, TOK_HAT); }
> +
> +{PROFILE}	{ PUSH_AND_RETURN(SUB_ID, TOK_PROFILE); }
> +
> +{COLON}		{ RETURN_TOKEN(TOK_COLON); }
> +
> +{OPEN_PAREN}	{ PUSH_AND_RETURN(LIST_VAL_MODE, TOK_OPENPAREN); }
> +
> +{VARIABLE_NAME}	{
> +	DUMP_PREPROCESS;
> +	int token = get_keyword_token(yytext);
> +	int state = INITIAL;
> +
> +	/* special cases */
> +	switch (token) {
> +	case -1:
> +		/* no token found */
> +		yylval.id = processunquoted(yytext, yyleng);
> +		RETURN_TOKEN(TOK_ID);
> +		break;
> +	case TOK_RLIMIT:
> +		state = RLIMIT_MODE;
> +		break;
> +	case TOK_NETWORK:
> +		state = NETWORK_MODE;
> +		break;
> +	case TOK_CHANGE_PROFILE:
> +		state = CHANGE_PROFILE_MODE;
> +		break;
> +	case TOK_MOUNT:
> +	case TOK_REMOUNT:
> +	case TOK_UMOUNT:
> +		state = MOUNT_MODE;
> +		break;
> +	default: /* nothing */
> +		break;
> +	}
> +	PUSH_AND_RETURN(state, token);
> +}
> +
> +<INITIAL,NETWORK_MODE,RLIMIT_MODE,MOUNT_MODE>{
>  	{END_OF_RULE}	{
> -			DUMP_PREPROCESS;
> +		if (YY_START != INITIAL)
>  			yy_pop_state();
> -			return TOK_END_OF_RULE;
> -			}
> -
> -	[^\n]		{
> -			DUMP_PREPROCESS;
> -			/* Something we didn't expect */
> -			yyerror(_("Found unexpected character: '%s'"), yytext);
> -			}
> +		RETURN_TOKEN(TOK_END_OF_RULE);
> +	}
>  
>  	\r?\n		{
> -			DUMP_PREPROCESS;
> -			current_lineno++;
> -			yy_pop_state();
> -			}
> +		DUMP_PREPROCESS;
> +		current_lineno++;
> +	}
>  }
>  
> -#include/.*\r?\n	 { /* include */
> -			PDEBUG("Matched #include\n");
> -			yy_push_state(INCLUDE);
> -			}
> -
> -#.*\r?\n		{ /* normal comment */
> -			DUMP_PREPROCESS;
> -			PDEBUG("comment(%d): %s\n", current_lineno, yytext);
> -			current_lineno++;
> -			}
> -
> -{END_OF_RULE}		{ DUMP_PREPROCESS; return TOK_END_OF_RULE; }
> -
> -{CARET}			{
> -			DUMP_PREPROCESS;
> -			PDEBUG("Matched hat ^\n");
> -			yy_push_state(SUB_ID);
> -			return TOK_CARET;
> -			}
> -{ARROW}			{
> -			DUMP_PREPROCESS;
> -			PDEBUG("Matched a arrow\n");
> -			return TOK_ARROW;
> -			}
> -{EQUALS}		{
> -			DUMP_PREPROCESS;
> -			PDEBUG("Matched equals for assignment\n");
> -			yy_push_state(ASSIGN_MODE);
> -			return TOK_EQUALS;
> -			}
> -{ADD_ASSIGN}		{
> -			DUMP_PREPROCESS;
> -			PDEBUG("Matched additive value assignment\n");
> -			yy_push_state(ASSIGN_MODE);
> -			return TOK_ADD_ASSIGN;
> -			}
> -{SET_VARIABLE}		{
> -			DUMP_PREPROCESS;
> -			yylval.set_var = strdup(yytext);
> -			PDEBUG("Found set variable %s\n", yylval.set_var);
> -			return TOK_SET_VAR;
> -			}
> -
> -{BOOL_VARIABLE}		{
> -			DUMP_PREPROCESS;
> -			yylval.bool_var = strdup(yytext);
> -			PDEBUG("Found boolean variable %s\n", yylval.bool_var);
> -			return TOK_BOOL_VAR;
> -			}
> -
> -{OPEN_BRACE}		{
> -			DUMP_PREPROCESS;
> -			PDEBUG("Open Brace\n");
> -			return TOK_OPEN;
> -			}
> -{CLOSE_BRACE}		{
> -			DUMP_PREPROCESS;
> -			PDEBUG("Close Brace\n");
> -			return TOK_CLOSE;
> -			}
> -
> -({PATHNAME}|{QPATHNAME})		{
> -			DUMP_PREPROCESS;
> -			yylval.id = processid(yytext, yyleng);
> -			PDEBUG("Found id: \"%s\"\n", yylval.id);
> -			return TOK_ID;
> -			}
> -
> -({MODES})/([[:space:],])	{
> -			DUMP_PREPROCESS;
> -			yylval.mode = strdup(yytext);
> -			PDEBUG("Found modes: %s\n", yylval.mode);
> -			return TOK_MODE;
> -			}
> -
> -{HAT}			{
> -			DUMP_PREPROCESS;
> -			yy_push_state(SUB_ID);
> -			return TOK_HAT;
> -			}
> -
> -{PROFILE}		{
> -			DUMP_PREPROCESS;
> -			yy_push_state(SUB_ID);
> -			return TOK_PROFILE;
> -			}
> -
> -{COLON}			{
> -			DUMP_PREPROCESS;
> -			PDEBUG("Found a colon\n");
> -			return TOK_COLON;
> -			}
> -
> -{OPEN_PAREN}	{
> -			DUMP_PREPROCESS;
> -			PDEBUG("listval (\n");
> -			yy_push_state(LIST_VAL_MODE);
> -			return TOK_OPENPAREN;
> -			}
> -
> -{VARIABLE_NAME}		{
> -			DUMP_PREPROCESS;
> -			int token = get_keyword_token(yytext);
> -
> -			/* special cases */
> -			switch (token) {
> -			case -1:
> -				/* no token found */
> -				yylval.id = processunquoted(yytext, yyleng);
> -				PDEBUG("Found (var) id: \"%s\"\n", yylval.id);
> -				return TOK_ID;
> -				break;
> -			case TOK_RLIMIT:
> -				yy_push_state(RLIMIT_MODE);
> -				break;
> -			case TOK_NETWORK:
> -				yy_push_state(NETWORK_MODE);
> -				break;
> -			case TOK_CHANGE_PROFILE:
> -				yy_push_state(CHANGE_PROFILE_MODE);
> -				break;
> -			case TOK_MOUNT:
> -			case TOK_REMOUNT:
> -			case TOK_UMOUNT:
> -				PDEBUG("Entering mount\n");
> -				yy_push_state(MOUNT_MODE);
> -				break;
> -			default: /* nothing */
> -				break;
> -			}
> -			return token;
> -			}
> -
> -{WS}+			{  DUMP_PREPROCESS; /* Ignoring whitespace */ }
> -
> -\r?\n			{ DUMP_PREPROCESS; current_lineno++ ; }
> -
> -[^\n]			{
> -			DUMP_PREPROCESS;
> -
> -			  /* Something we didn't expect */
> -			yyerror(_("Found unexpected character: '%s'"), yytext);
> -			}
> -
> +<INITIAL,SUB_ID,SUB_VALUE,LIST_VAL_MODE,EXTCOND_MODE,ASSIGN_MODE,NETWORK_MODE,CHANGE_PROFILE_MODE,RLIMIT_MODE,MOUNT_MODE>{
> +	[^\n]	{
> +		DUMP_PREPROCESS;
> +		/* Something we didn't expect */
> +		yyerror(_("Found unexpected character: '%s'"), yytext);
> +	}
> +}
>  %%
> +
> +/* Create a table mapping lexer state number to the name used in the
> + * in the code.  This allows for better debug output
> + */
> +static const char *const state_names[] = {
> +	STATE_TABLE_ENT(INITIAL),
> +	STATE_TABLE_ENT(SUB_ID),
> +	STATE_TABLE_ENT(SUB_VALUE),
> +	STATE_TABLE_ENT(EXTCOND_MODE),
> +	STATE_TABLE_ENT(NETWORK_MODE),
> +	STATE_TABLE_ENT(LIST_VAL_MODE),
> +	STATE_TABLE_ENT(ASSIGN_MODE),
> +	STATE_TABLE_ENT(RLIMIT_MODE),
> +	STATE_TABLE_ENT(MOUNT_MODE),
> +	STATE_TABLE_ENT(CHANGE_PROFILE_MODE),
> +	STATE_TABLE_ENT(INCLUDE),
> +};
> diff --git a/parser/parser_yacc.y b/parser/parser_yacc.y
> index 351a173..433bb6d 100644
> --- a/parser/parser_yacc.y
> +++ b/parser/parser_yacc.y
> @@ -87,6 +87,7 @@ void add_local_entry(struct codomain *cod);
>  %token TOK_MODE
>  %token TOK_END_OF_RULE
>  %token TOK_EQUALS
> +%token TOK_NOTEQUALS

.. Well, okay, used here, but not used _much_. :)

>  %token TOK_ARROW
>  %token TOK_ADD_ASSIGN
>  %token TOK_LE
> -- 

Man, what an awesome cleanup. Thanks. :)
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 490 bytes
Desc: Digital signature
URL: <https://lists.ubuntu.com/archives/apparmor/attachments/20130724/5946f7e5/attachment-0001.pgp>