[apparmor] [PATCH] Add compressed dfa matching routines to library, and a base test program

Sat Jan 9 04:22:09 UTC 2016

On Fri, Jan 08, 2016 at 03:48:06PM -0800, John Johansen wrote:
> Signed-off-by: John Johansen <john.johansen at canonical.com>

This is pretty cool! A few comments inline...

> +++ b/devtools/README

> +    ./test_re -t -c 10000000 -E expr.txt -M match.txt
> +    elapsed time: 8.421s
> +
> +to generate an hfa
> +    ./test_r -o hfa.file -E expr.txt

Should probably be ./test_re

> +print_hfa: simple hfa text dump utility
> +Eg.
> +  print_hfa hfa.file

Should probably be ./print_hfa

> +++ b/devtools/print_hfa.c

> +char *load_file(const char *path, ssize_t *fsize)
> +{
> +	struct stat stat;
> +	char *buffer;
> +	ssize_t remaining;
> +	int fd, res;
> +
> +	fd = open(path, O_RDONLY);
> +	if (fd == -1) {
> +		fprintf(stderr, " Error '%s', failed to open '%s'\n", strerror(errno), path);
> +		goto out;
> +	}
> +
> +	res = fstat(fd, &stat);
> +	if (res != 0) {
> +		fprintf(stderr, "Error '%s', could not get size of file '%s'\n", strerror(errno), path);
> +		close(fd);
> +		goto out_fd;
> +	}

These error strings are funny, I'm more accustomed to seeing the error
come after the filename; also, glibc has a %m that automatically means
"strerror(errno)". e.g:

  +		fprintf(stderr, "Failed to open '%s': %m\n", path);
  +		fprintf(stderr, "Failed to get size of file '%s': %m\n", path);

> +
> +	buffer = (char *) malloc(stat.st_size + 1);

No use is made of this terminating NUL in this program; much of this could
be re-written to use:

buffer = mmap(NULL, stat.st_size, PROT_READ | MAP_PRIVATE, MAP_POPULATE, fd, 0);

> +int main(int argc, char **argv)
> +{
> +	const char *err;
> +	char *chfa;
> +	ssize_t size;
> +	int i;
> +	
> +	progname = argv[0];
> +
> +	if (argc < 2) {
> +		usage();
> +		return 1;
> +	}
> +
> +	for (i = optind; i <= argc && argv[i]; i++) {

What does the i <= argc && argv[i] protect against that i < argc doesn't?

> +/* To write hfa to a memory buffer do
> +void print_hfa_to_memory_stream(struct aa_hfa *hfa)
> +{
> +	char *bp;
> +	size_t *size;
> +
> +	f = open_memstream(&bp, size);

There's a bug here, it should read:

  +	char *bp;
  +	size_t size; /* note no * */
  +
  +	f = open_memstream(&bp, &size);

This is so cool, I can't believe I haven't seen open_memstream() before.

> +	fprint_hfa(f, hfa);
> +	fflush (f);				// update bp and size
> +	printf ("buf = `%s', size = %d\n", bp, size);
> +	fprintf (f, ", world");
> +	fclose (f);				// update bp and size
> +	printf ("buf = `%s', size = %d\n", bp, size);
> +	free(bp);				// free stream buffer
> +}
> +*/

> +++ b/devtools/test_re.cc

> +static int process_args(int argc, char *argv[])
> +{
> +	int c, o;
> +
> +	while ((c = getopt_long(argc, argv, short_options, long_options, &o)) != -1) {
> +		long tmp;
> +		switch (c) {
> +		case 0:
> +			fprintf(stderr, "Error in getopt_long handling\n");
> +			exit(1);
> +			break;
> +		case 'h':
> +			if (!optarg) {
> +				usage();
> +			} else if (strcmp(optarg, "Dump") == 0 ||
> +				   strcmp(optarg, "dump") == 0 ||
> +				   strcmp(optarg, "D") == 0) {
> +				display_dump(progname);
> +			} else if (strcmp(optarg, "Optimize") == 0 ||
> +				   strcmp(optarg, "optimize") == 0 ||
> +				   strcmp(optarg, "O") == 0) {
> +				display_optimize(progname);
> +			} else {
> +				fprintf(stderr, "%s: Invalid --help option %s\n",
> +				       progname, optarg);
> +				exit(1);
> +			}
> +			exit(0);
> +			break;
> +		case 't':
> +			g_print_time = 1;
> +			break;
> +		case 'c':	/* repeat count */
> +			tmp = strtol(optarg, NULL, 0);
> +			if (tmp < 1)
> +				fprintf(stderr, "Error bad repeat count '%s'\n",
> +					optarg);
> +			else if (tmp == LONG_MAX && errno == ERANGE)
> +				fprintf(stderr, "Error repeat count '%s' out of range\n", optarg);
> +			else
> +				g_repeat_count = tmp;
> +			break;
> +		case 's':
> +			g_sep = optarg;
> +			break;
> +		case 'e':	/* expression */
> +			if (!process_expr_line(optarg))
> +				fprintf(stderr, "Error processing expr '%s'\n",
> +					optarg);
> +			break;
> +		case 'E':
> +			if (!process_expr_file(optarg)) {
> +				fprintf(stderr, "Error processing expr file '%s'\n", optarg);
> +				exit(1);
> +			}
> +			break;
> +		case 'm':	/* text to match */
> +			if (!process_match_line(optarg))
> +				fprintf(stderr, "Error processing match '%s'\n",
> +					optarg);
> +			break;
> +		case 'M':
> +			if (!process_match_file(optarg)) {
> +				fprintf(stderr, "Error processing match file '%s'\n", optarg);
> +				exit(1);
> +			}
> +			break;
> +		case 'o':
> +			g_out_file = optarg;
> +			break;
> +		case 'D':
> +			if (!optarg) {
> +				g_dump_info = 1;
> +				handle_flag_table(dumpflag_table, "stats",
> +						  &hfaflags);
> +			} else if (!handle_flag_table(dumpflag_table, optarg,
> +						      &hfaflags)) {
> +				fprintf(stderr, "%s: Invalid --Dump option %s\n",
> +				       progname, optarg);
> +				exit(1);
> +			}
> +			break;

I think there's a bug here, if handle_flag_table() returns 1, the
g_dump_info variable won't be set. Is this intentional? It could be fixed
by moving the g_dump_info up two lines, before the !optarg check.

> +		case 'O':
> +			if (!optarg) {
> +				/* todo: default optimization */
> +			} else if (!handle_flag_table(optflag_table, optarg,
> +					       &hfaflags)) {
> +				fprintf(stderr, "%s: Invalid --Optimize option %s\n",
> +				       progname, optarg);
> +				exit(1);
> +			}
> +			break;
> +
> +		}
> +
> +	}
> +
> +	return optind;
> +}
> +

> +int parse_entry(const char *line, entry *e)
> +{
> +	char *pos;
> +	unsigned long tmp;
> +
> +	while(isspace(*line))
> +		line++;
> +	if (*line == 0)
> +		return -1;
> +
> +	tmp = strtoul(line, &pos, 0);
> +	if ((tmp == ULONG_MAX && errno == ERANGE) || !pos)
> +	{
> +		fprintf(stderr, "Error: parsing entry '%s'\n", line);
> +		return 0;
> +	}
> +	e->perm = tmp;
> +
> +	while(isspace(*pos))
> +		pos++;
> +	e->str = pos;

I think this may give confusing results if the last line of the file looks
like:

1

Then e->str is left pointing at an ascii NUL.

> +/* return the number of failures */
> +int do_matches(aa_hfa *hfa, unsigned int count)
> +{
> +	unsigned int failures = 0;
> +	clock_t t = clock();
> +
> +	for (unsigned int i = 0; i < count; i++) {
> +		for (std::list<entry>::iterator j = g_matches.begin(); j != g_matches.end(); j++) {
> +			struct aa_state state;
> +			aa_hfa_match(hfa, &state, j->str);
> +			unsigned int accept = aa_hfa_accept(hfa, &state);
> +			if (accept != j->perm) {
> +				failures++;
> +			}
> +		}
> +	}
> +	t = clock() - t;
> +	unsigned int secs = t/CLOCKS_PER_SEC;
> +	if (g_print_time)
> +		printf("elapsed time: %u.%3.3lds\n", secs, (((t % CLOCKS_PER_SEC)*1000+500)/CLOCKS_PER_SEC));
> +

I don't understand the +500 here..

> +int main(int argc, char **argv)
> +{
> +	struct direct_mapped_accept accept;
> +	const char *err;
> +
> +	progname = argv[0];
> +
> +	if (argc < 3) {
> +		usage();
> +		return 1;
> +	}
> +
> +	g_rules = new aare_rules(0);
> +	if (!g_rules) {
> +		fprintf(stderr, "Error failed to create ruleset\n");
> +		exit(1);
> +	}
> +
> +	optind = process_args(argc, argv);
> +
> +	for (int i = optind; i <= argc && argv[i]; i++) {
> +		if (!process_match_line(argv[i]))
> +			fprintf(stderr, "Error failed to process match arg '%s'\n", argv[i]);
> +	}
> +
> +	if (!g_matches.size() && !g_out_file) {
> +		fprintf(stderr, "Error at least one match string must be defined\n");
> +		exit(1);
> +	}
> +
> +	if (!g_rules->rule_count) {
> +		fprintf(stderr, "Error at least one Expr pattern must be defined\n");
> +		exit(1);
> +	}
> +	if (g_dump_info) {
> +		fprintf(stderr, "Processed\n"
> +			        "  %ld expr entries\n"
> +			        "  %ld match entries\n",
> +			(long) g_rules->rule_count, g_matches.size());
> +		fprintf(stderr, "Creating HFA\n");
> +	}
> +	size_t size;
> +	void *chfa = g_rules->create_dfa(&size, hfaflags);
> +	if (!chfa) {
> +		fprintf(stderr, "Error failed to compile hfa\n");
> +		exit(1);
> +	}
> +	delete g_rules;
> +
> +	if (g_out_file) {
> +		if (g_dump_info)
> +			fprintf(stderr, "Writing HFA to '%s'\n", g_out_file);
> +		FILE *f = fopen(g_out_file, "w");
> +		if (!f) {
> +			fprintf(stderr, "Error failed to dump hfa to '%s'\n", g_out_file);
> +			exit(1);

It'd be nice to have the %m to describe why it failed:

  +			fprintf(stderr, "Error failed to dump hfa to '%s': %m\n", g_out_file);

> +		}
> +		fwrite(chfa, size, 1, f);
> +		fclose(f);
> +	}
> +
> +	if (g_dump_info)
> +		fprintf(stderr, "Unpacking HFA\n");
> +	struct aa_hfa *hfa = aa_hfa_unpack(chfa, size, &err, &accept);
> +	if (!hfa) {
> +		fprintf(stderr, "Error failed to unpack compressed hfa: %s\n", err);
> +		exit(1);
> +	}
> +	free(chfa);
> +
> +	if (g_dump_info)
> +		fprintf(stderr, "Starting matches\n");
> +	if (g_matches.size()) {
> +		int failures = do_matches(hfa, g_repeat_count);
> +		if (failures) {
> +			fprintf(stderr, "Error failed to match to expected perms %d times\n", failures);
> +			exit(2);
> +		}
> +	}
> +
> +	aa_hfa_free(hfa);
> +	return 0;
> +}

> +++ b/libraries/libapparmor/src/hfa.c

> + * AppArmor's extended Hybrid Finite Atomata (eHFA) matching engine is

Automata

> + * essentially an NFA with additional DFA constraints, with some limited
> + * function callouts and scratch memory (it is not turing complete nor

Turing

> + * even a full PFA).
> + *
> + * It is not a true NFA as it does not allow for lambda transitions, nor
> + * does it allow for an arbitrary set of follow states for any given
> + * character transition. Instead it should be thought of as a DFA with
> + * limited NFA states that have constraints such that the number of
> + * potential NFA states to track in parallel is known in advanced and

advance

> + * of a fixed number.
> + *
> + * Atm this code only implements the DFA portion of the match and does
> + * not allow for NFA states nor the extended work memory.
> + */

> +/**
> + * aa_hfa_init_state - initialize state information
> + * @hfa: hfa to base initialization off of
> + * @state: state tracking struct to initialize
> + *
> + * Returns: 0 on success else error

Many of these functions cannot return errors, and indeed can't error. It
might make sense to change the return type to void:

> + */
> +int aa_hfa_init_state(struct aa_hfa *hfa, struct aa_state *state)
> +{
> +	state->state = AA_HFA_START;
> +
> +	return 0;
> +}
> +

> +/**
> + * aa_hfa_reset - reset a state tracking structure to start a new match
> + * @hfa: dfa to reset for
> + * @state: state to reset
> + *
> + * Returns: 0 on success else error

This function can't error:

> + */
> +int aa_hfa_reset(struct aa_hfa *hfa, struct aa_state *state)
> +{
> +	state->state = AA_HFA_START;
> +
> +	return 0;
> +}

> +/**
> + * aa_hfa_matchn_cont - traverse @hfa from @state to find state @str stops at
> + * @hfa: the hfa to match @str against  (NOT NULL)
> + * @state: the state of the hfa to start matching in
> + * @str: the string of bytes to match against the hfa  (NOT NULL)
> + * @len: length of the string of bytes to match
> + *
> + * aa_hfa_matchn will match @str against the hfa and return the state it
> + * finished matching in. The final state can be used to look up the accepting
> + * label, or as the start state of a continuing match.
> + *
> + * This function will happily match again the 0 byte and only finishes
> + * when @len input is consumed.
> + *
> + * Returns: 0 on success else error.
> + *
> + * Note: most matches are guarenteed to succeed, see aa_hfa_reset, and
> + *       aa_hfa_can_fail.
> + */

The old function name aa_hfa_matchn* survived in the comment block; and
the function can't return anything except 0, so this might be worth
changing to void too:

> +int aa_hfa_continuen(struct aa_hfa *hfa, struct aa_state *state,
> +		     const char *str, size_t len)
> +{
> +	unsigned int s = state->state;
> +
> +	if (s == 0)
> +		return 0;
> +
> +	/* current state is <state>, matching character *str */
> +	if (hfa->ec) {
> +		for (; len; len--)
> +			match_EC_char(s, hfa, *str++);
> +	} else {
> +		for (; len; len--)
> +			match_char(s, hfa, *str++);
> +	}
> +	state->state = s;
> +
> +	return 0;
> +}
> +
> +/**
> + * aa_hfa_continue - traverse @hfa from @state to find state @str stops at
> + * @hfa: the hfa to match @str against  (NOT NULL)
> + * @state: the state of the hfa to start matching in
> + * @str: the null terminated string of bytes to match against the hfa (NOT NULL)
> + *
> + * aa_hfa_match will match @str against the hfa and return the state it
> + * finished matching in. The final state can be used to look up the accepting
> + * label, or as the start state of a continuing match.
> + *
> + * Returns: 0 on success else error.
> + *
> + * Note: most matches are guarenteed to succeed, see aa_hfa_start, and
> + *       aa_hfa_can_fail.
> + */

The old function name aa_hfa_match survived in the comment block, and this
function can't return anything except 0, so it might be worth changing to
void:

> +int aa_hfa_continue(struct aa_hfa *hfa, struct aa_state *state, const char *str)
> +{
> +	unsigned int s = state->state;
> +
> +	if (s == 0)
> +		return 0;
> +
> +	if (hfa->ec) {
> +		while (*str)
> +			match_EC_char(s, hfa, *str++);
> +	} else {
> +		while (*str)
> +			match_char(s, hfa, *str++);
> +	}
> +	state->state = s;
> +
> +	return 0;
> +}
> +

> +/**
> + * aa_hfa_matchn - reset and traverse @hfa to find state @str stops at
> + * @hfa: the hfa to match @str against  (NOT NULL)
> + * @state: the state of the hfa to start matching in
> + * @str: the string of bytes to match against the hfa  (NOT NULL)
> + * @len: length of the string of bytes to match
> + *
> + * aa_hfa_matchn will match @str against the hfa and return the state it
> + * finished matching in. The final state can be used to look up the accepting
> + * label, or as the start state of a continuing match.
> + *
> + * This function will happily match again the 0 byte and only finishes
> + * when @len input is consumed.
> + *
> + * Returns: 0 on success else error.
> + *
> + * Note: most matches are guarenteed to succeed, see aa_hfa_reset, and
> + *       aa_hfa_can_fail.
> + */
> +int aa_hfa_matchn(struct aa_hfa *hfa, struct aa_state *state, const char *str,
> +		  size_t len)
> +{
> +	int err = aa_hfa_reset(hfa, state);
> +	if (err)
> +		return err;
> +	return aa_hfa_continuen(hfa, state, str, len);
> +}
> +
> +/**
> + * aa_hfa_match - reset and traverse @hfa to find state @str stops at
> + * @hfa: the hfa to match @str against  (NOT NULL)
> + * @state: the state of the hfa to start matching in
> + * @str: the null terminated string of bytes to match against the hfa (NOT NULL)
> + *
> + * aa_hfa_match will match @str against the hfa and return the state it
> + * finished matching in. The final state can be used to look up the accepting
> + * label, or as the start state of a continuing match.
> + *
> + * Returns: 0 on success else error.
> + *
> + * Note: most matches are guarenteed to succeed, see aa_hfa_start, and
> + *       aa_hfa_can_fail.
> + */
> +int aa_hfa_match(struct aa_hfa *hfa, struct aa_state *state, const char *str)
> +{
> +	int err = aa_hfa_reset(hfa, state);
> +	if (err)
> +		return err;
> +	return aa_hfa_continue(hfa, state, str);
> +}
> +
> +/**
> + * aa_hfa_next - step one character to the next state in the hfa
> + * @hfa: the hfa to tranverse (NOT NULL)
> + * @state: the state to start in
> + * @c: the input character to transition on
> + *
> + * aa_hfa_match will step through the hfa by one input character @c
> + *
> + * Returns: 0 on success else error.
> + *
> + * Note: most matches are guarenteed to succeed, see aa_hfa_start, and
> + *       aa_hfa_can_fail.
> + */

This function can't return anything except 0.

> +int aa_hfa_next(struct aa_hfa *hfa, struct aa_state *state, const char c)
> +{
> +	if (hfa->ec)
> +		match_EC_char(state->state, hfa, c);
> +	else
> +		match_char(state->state, hfa, c);
> +
> +	return 0;
> +}

Thanks
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 473 bytes
Desc: Digital signature
URL: <https://lists.ubuntu.com/archives/apparmor/attachments/20160108/454f02bd/attachment-0001.pgp>