[apparmor] [PATCH] Add compressed dfa matching routines to library, and a base test program
Seth Arnold
seth.arnold at canonical.com
Sat Jan 9 04:22:09 UTC 2016
On Fri, Jan 08, 2016 at 03:48:06PM -0800, John Johansen wrote:
> Signed-off-by: John Johansen <john.johansen at canonical.com>
This is pretty cool! A few comments inline...
> +++ b/devtools/README
> + ./test_re -t -c 10000000 -E expr.txt -M match.txt
> + elapsed time: 8.421s
> +
> +to generate an hfa
> + ./test_r -o hfa.file -E expr.txt
Should probably be ./test_re
> +print_hfa: simple hfa text dump utility
> +Eg.
> + print_hfa hfa.file
Should probably be ./print_hfa
> +++ b/devtools/print_hfa.c
> +char *load_file(const char *path, ssize_t *fsize)
> +{
> + struct stat stat;
> + char *buffer;
> + ssize_t remaining;
> + int fd, res;
> +
> + fd = open(path, O_RDONLY);
> + if (fd == -1) {
> + fprintf(stderr, " Error '%s', failed to open '%s'\n", strerror(errno), path);
> + goto out;
> + }
> +
> + res = fstat(fd, &stat);
> + if (res != 0) {
> + fprintf(stderr, "Error '%s', could not get size of file '%s'\n", strerror(errno), path);
> + close(fd);
> + goto out_fd;
> + }
These error strings are funny, I'm more accustomed to seeing the error
come after the filename; also, glibc has a %m that automatically means
"strerror(errno)". e.g:
+ fprintf(stderr, "Failed to open '%s': %m\n", path);
+ fprintf(stderr, "Failed to get size of file '%s': %m\n", path);
> +
> + buffer = (char *) malloc(stat.st_size + 1);
No use is made of this terminating NUL in this program; much of this could
be re-written to use:
buffer = mmap(NULL, stat.st_size, PROT_READ | MAP_PRIVATE, MAP_POPULATE, fd, 0);
> +int main(int argc, char **argv)
> +{
> + const char *err;
> + char *chfa;
> + ssize_t size;
> + int i;
> +
> + progname = argv[0];
> +
> + if (argc < 2) {
> + usage();
> + return 1;
> + }
> +
> + for (i = optind; i <= argc && argv[i]; i++) {
What does the i <= argc && argv[i] protect against that i < argc doesn't?
> +/* To write hfa to a memory buffer do
> +void print_hfa_to_memory_stream(struct aa_hfa *hfa)
> +{
> + char *bp;
> + size_t *size;
> +
> + f = open_memstream(&bp, size);
There's a bug here, it should read:
+ char *bp;
+ size_t size; /* note no * */
+
+ f = open_memstream(&bp, &size);
This is so cool, I can't believe I haven't seen open_memstream() before.
> + fprint_hfa(f, hfa);
> + fflush (f); // update bp and size
> + printf ("buf = `%s', size = %d\n", bp, size);
> + fprintf (f, ", world");
> + fclose (f); // update bp and size
> + printf ("buf = `%s', size = %d\n", bp, size);
> + free(bp); // free stream buffer
> +}
> +*/
> +++ b/devtools/test_re.cc
> +static int process_args(int argc, char *argv[])
> +{
> + int c, o;
> +
> + while ((c = getopt_long(argc, argv, short_options, long_options, &o)) != -1) {
> + long tmp;
> + switch (c) {
> + case 0:
> + fprintf(stderr, "Error in getopt_long handling\n");
> + exit(1);
> + break;
> + case 'h':
> + if (!optarg) {
> + usage();
> + } else if (strcmp(optarg, "Dump") == 0 ||
> + strcmp(optarg, "dump") == 0 ||
> + strcmp(optarg, "D") == 0) {
> + display_dump(progname);
> + } else if (strcmp(optarg, "Optimize") == 0 ||
> + strcmp(optarg, "optimize") == 0 ||
> + strcmp(optarg, "O") == 0) {
> + display_optimize(progname);
> + } else {
> + fprintf(stderr, "%s: Invalid --help option %s\n",
> + progname, optarg);
> + exit(1);
> + }
> + exit(0);
> + break;
> + case 't':
> + g_print_time = 1;
> + break;
> + case 'c': /* repeat count */
> + tmp = strtol(optarg, NULL, 0);
> + if (tmp < 1)
> + fprintf(stderr, "Error bad repeat count '%s'\n",
> + optarg);
> + else if (tmp == LONG_MAX && errno == ERANGE)
> + fprintf(stderr, "Error repeat count '%s' out of range\n", optarg);
> + else
> + g_repeat_count = tmp;
> + break;
> + case 's':
> + g_sep = optarg;
> + break;
> + case 'e': /* expression */
> + if (!process_expr_line(optarg))
> + fprintf(stderr, "Error processing expr '%s'\n",
> + optarg);
> + break;
> + case 'E':
> + if (!process_expr_file(optarg)) {
> + fprintf(stderr, "Error processing expr file '%s'\n", optarg);
> + exit(1);
> + }
> + break;
> + case 'm': /* text to match */
> + if (!process_match_line(optarg))
> + fprintf(stderr, "Error processing match '%s'\n",
> + optarg);
> + break;
> + case 'M':
> + if (!process_match_file(optarg)) {
> + fprintf(stderr, "Error processing match file '%s'\n", optarg);
> + exit(1);
> + }
> + break;
> + case 'o':
> + g_out_file = optarg;
> + break;
> + case 'D':
> + if (!optarg) {
> + g_dump_info = 1;
> + handle_flag_table(dumpflag_table, "stats",
> + &hfaflags);
> + } else if (!handle_flag_table(dumpflag_table, optarg,
> + &hfaflags)) {
> + fprintf(stderr, "%s: Invalid --Dump option %s\n",
> + progname, optarg);
> + exit(1);
> + }
> + break;
I think there's a bug here, if handle_flag_table() returns 1, the
g_dump_info variable won't be set. Is this intentional? It could be fixed
by moving the g_dump_info up two lines, before the !optarg check.
> + case 'O':
> + if (!optarg) {
> + /* todo: default optimization */
> + } else if (!handle_flag_table(optflag_table, optarg,
> + &hfaflags)) {
> + fprintf(stderr, "%s: Invalid --Optimize option %s\n",
> + progname, optarg);
> + exit(1);
> + }
> + break;
> +
> + }
> +
> + }
> +
> + return optind;
> +}
> +
> +int parse_entry(const char *line, entry *e)
> +{
> + char *pos;
> + unsigned long tmp;
> +
> + while(isspace(*line))
> + line++;
> + if (*line == 0)
> + return -1;
> +
> + tmp = strtoul(line, &pos, 0);
> + if ((tmp == ULONG_MAX && errno == ERANGE) || !pos)
> + {
> + fprintf(stderr, "Error: parsing entry '%s'\n", line);
> + return 0;
> + }
> + e->perm = tmp;
> +
> + while(isspace(*pos))
> + pos++;
> + e->str = pos;
I think this may give confusing results if the last line of the file looks
like:
1
Then e->str is left pointing at an ascii NUL.
> +/* return the number of failures */
> +int do_matches(aa_hfa *hfa, unsigned int count)
> +{
> + unsigned int failures = 0;
> + clock_t t = clock();
> +
> + for (unsigned int i = 0; i < count; i++) {
> + for (std::list<entry>::iterator j = g_matches.begin(); j != g_matches.end(); j++) {
> + struct aa_state state;
> + aa_hfa_match(hfa, &state, j->str);
> + unsigned int accept = aa_hfa_accept(hfa, &state);
> + if (accept != j->perm) {
> + failures++;
> + }
> + }
> + }
> + t = clock() - t;
> + unsigned int secs = t/CLOCKS_PER_SEC;
> + if (g_print_time)
> + printf("elapsed time: %u.%3.3lds\n", secs, (((t % CLOCKS_PER_SEC)*1000+500)/CLOCKS_PER_SEC));
> +
I don't understand the +500 here..
> +int main(int argc, char **argv)
> +{
> + struct direct_mapped_accept accept;
> + const char *err;
> +
> + progname = argv[0];
> +
> + if (argc < 3) {
> + usage();
> + return 1;
> + }
> +
> + g_rules = new aare_rules(0);
> + if (!g_rules) {
> + fprintf(stderr, "Error failed to create ruleset\n");
> + exit(1);
> + }
> +
> + optind = process_args(argc, argv);
> +
> + for (int i = optind; i <= argc && argv[i]; i++) {
> + if (!process_match_line(argv[i]))
> + fprintf(stderr, "Error failed to process match arg '%s'\n", argv[i]);
> + }
> +
> + if (!g_matches.size() && !g_out_file) {
> + fprintf(stderr, "Error at least one match string must be defined\n");
> + exit(1);
> + }
> +
> + if (!g_rules->rule_count) {
> + fprintf(stderr, "Error at least one Expr pattern must be defined\n");
> + exit(1);
> + }
> + if (g_dump_info) {
> + fprintf(stderr, "Processed\n"
> + " %ld expr entries\n"
> + " %ld match entries\n",
> + (long) g_rules->rule_count, g_matches.size());
> + fprintf(stderr, "Creating HFA\n");
> + }
> + size_t size;
> + void *chfa = g_rules->create_dfa(&size, hfaflags);
> + if (!chfa) {
> + fprintf(stderr, "Error failed to compile hfa\n");
> + exit(1);
> + }
> + delete g_rules;
> +
> + if (g_out_file) {
> + if (g_dump_info)
> + fprintf(stderr, "Writing HFA to '%s'\n", g_out_file);
> + FILE *f = fopen(g_out_file, "w");
> + if (!f) {
> + fprintf(stderr, "Error failed to dump hfa to '%s'\n", g_out_file);
> + exit(1);
It'd be nice to have the %m to describe why it failed:
+ fprintf(stderr, "Error failed to dump hfa to '%s': %m\n", g_out_file);
> + }
> + fwrite(chfa, size, 1, f);
> + fclose(f);
> + }
> +
> + if (g_dump_info)
> + fprintf(stderr, "Unpacking HFA\n");
> + struct aa_hfa *hfa = aa_hfa_unpack(chfa, size, &err, &accept);
> + if (!hfa) {
> + fprintf(stderr, "Error failed to unpack compressed hfa: %s\n", err);
> + exit(1);
> + }
> + free(chfa);
> +
> + if (g_dump_info)
> + fprintf(stderr, "Starting matches\n");
> + if (g_matches.size()) {
> + int failures = do_matches(hfa, g_repeat_count);
> + if (failures) {
> + fprintf(stderr, "Error failed to match to expected perms %d times\n", failures);
> + exit(2);
> + }
> + }
> +
> + aa_hfa_free(hfa);
> + return 0;
> +}
> +++ b/libraries/libapparmor/src/hfa.c
> + * AppArmor's extended Hybrid Finite Atomata (eHFA) matching engine is
Automata
> + * essentially an NFA with additional DFA constraints, with some limited
> + * function callouts and scratch memory (it is not turing complete nor
Turing
> + * even a full PFA).
> + *
> + * It is not a true NFA as it does not allow for lambda transitions, nor
> + * does it allow for an arbitrary set of follow states for any given
> + * character transition. Instead it should be thought of as a DFA with
> + * limited NFA states that have constraints such that the number of
> + * potential NFA states to track in parallel is known in advanced and
advance
> + * of a fixed number.
> + *
> + * Atm this code only implements the DFA portion of the match and does
> + * not allow for NFA states nor the extended work memory.
> + */
> +/**
> + * aa_hfa_init_state - initialize state information
> + * @hfa: hfa to base initialization off of
> + * @state: state tracking struct to initialize
> + *
> + * Returns: 0 on success else error
Many of these functions cannot return errors, and indeed can't error. It
might make sense to change the return type to void:
> + */
> +int aa_hfa_init_state(struct aa_hfa *hfa, struct aa_state *state)
> +{
> + state->state = AA_HFA_START;
> +
> + return 0;
> +}
> +
> +/**
> + * aa_hfa_reset - reset a state tracking structure to start a new match
> + * @hfa: dfa to reset for
> + * @state: state to reset
> + *
> + * Returns: 0 on success else error
This function can't error:
> + */
> +int aa_hfa_reset(struct aa_hfa *hfa, struct aa_state *state)
> +{
> + state->state = AA_HFA_START;
> +
> + return 0;
> +}
> +/**
> + * aa_hfa_matchn_cont - traverse @hfa from @state to find state @str stops at
> + * @hfa: the hfa to match @str against (NOT NULL)
> + * @state: the state of the hfa to start matching in
> + * @str: the string of bytes to match against the hfa (NOT NULL)
> + * @len: length of the string of bytes to match
> + *
> + * aa_hfa_matchn will match @str against the hfa and return the state it
> + * finished matching in. The final state can be used to look up the accepting
> + * label, or as the start state of a continuing match.
> + *
> + * This function will happily match again the 0 byte and only finishes
> + * when @len input is consumed.
> + *
> + * Returns: 0 on success else error.
> + *
> + * Note: most matches are guarenteed to succeed, see aa_hfa_reset, and
> + * aa_hfa_can_fail.
> + */
The old function name aa_hfa_matchn* survived in the comment block; and
the function can't return anything except 0, so this might be worth
changing to void too:
> +int aa_hfa_continuen(struct aa_hfa *hfa, struct aa_state *state,
> + const char *str, size_t len)
> +{
> + unsigned int s = state->state;
> +
> + if (s == 0)
> + return 0;
> +
> + /* current state is <state>, matching character *str */
> + if (hfa->ec) {
> + for (; len; len--)
> + match_EC_char(s, hfa, *str++);
> + } else {
> + for (; len; len--)
> + match_char(s, hfa, *str++);
> + }
> + state->state = s;
> +
> + return 0;
> +}
> +
> +/**
> + * aa_hfa_continue - traverse @hfa from @state to find state @str stops at
> + * @hfa: the hfa to match @str against (NOT NULL)
> + * @state: the state of the hfa to start matching in
> + * @str: the null terminated string of bytes to match against the hfa (NOT NULL)
> + *
> + * aa_hfa_match will match @str against the hfa and return the state it
> + * finished matching in. The final state can be used to look up the accepting
> + * label, or as the start state of a continuing match.
> + *
> + * Returns: 0 on success else error.
> + *
> + * Note: most matches are guarenteed to succeed, see aa_hfa_start, and
> + * aa_hfa_can_fail.
> + */
The old function name aa_hfa_match survived in the comment block, and this
function can't return anything except 0, so it might be worth changing to
void:
> +int aa_hfa_continue(struct aa_hfa *hfa, struct aa_state *state, const char *str)
> +{
> + unsigned int s = state->state;
> +
> + if (s == 0)
> + return 0;
> +
> + if (hfa->ec) {
> + while (*str)
> + match_EC_char(s, hfa, *str++);
> + } else {
> + while (*str)
> + match_char(s, hfa, *str++);
> + }
> + state->state = s;
> +
> + return 0;
> +}
> +
> +/**
> + * aa_hfa_matchn - reset and traverse @hfa to find state @str stops at
> + * @hfa: the hfa to match @str against (NOT NULL)
> + * @state: the state of the hfa to start matching in
> + * @str: the string of bytes to match against the hfa (NOT NULL)
> + * @len: length of the string of bytes to match
> + *
> + * aa_hfa_matchn will match @str against the hfa and return the state it
> + * finished matching in. The final state can be used to look up the accepting
> + * label, or as the start state of a continuing match.
> + *
> + * This function will happily match again the 0 byte and only finishes
> + * when @len input is consumed.
> + *
> + * Returns: 0 on success else error.
> + *
> + * Note: most matches are guarenteed to succeed, see aa_hfa_reset, and
> + * aa_hfa_can_fail.
> + */
> +int aa_hfa_matchn(struct aa_hfa *hfa, struct aa_state *state, const char *str,
> + size_t len)
> +{
> + int err = aa_hfa_reset(hfa, state);
> + if (err)
> + return err;
> + return aa_hfa_continuen(hfa, state, str, len);
> +}
> +
> +/**
> + * aa_hfa_match - reset and traverse @hfa to find state @str stops at
> + * @hfa: the hfa to match @str against (NOT NULL)
> + * @state: the state of the hfa to start matching in
> + * @str: the null terminated string of bytes to match against the hfa (NOT NULL)
> + *
> + * aa_hfa_match will match @str against the hfa and return the state it
> + * finished matching in. The final state can be used to look up the accepting
> + * label, or as the start state of a continuing match.
> + *
> + * Returns: 0 on success else error.
> + *
> + * Note: most matches are guarenteed to succeed, see aa_hfa_start, and
> + * aa_hfa_can_fail.
> + */
> +int aa_hfa_match(struct aa_hfa *hfa, struct aa_state *state, const char *str)
> +{
> + int err = aa_hfa_reset(hfa, state);
> + if (err)
> + return err;
> + return aa_hfa_continue(hfa, state, str);
> +}
> +
> +/**
> + * aa_hfa_next - step one character to the next state in the hfa
> + * @hfa: the hfa to tranverse (NOT NULL)
> + * @state: the state to start in
> + * @c: the input character to transition on
> + *
> + * aa_hfa_match will step through the hfa by one input character @c
> + *
> + * Returns: 0 on success else error.
> + *
> + * Note: most matches are guarenteed to succeed, see aa_hfa_start, and
> + * aa_hfa_can_fail.
> + */
This function can't return anything except 0.
> +int aa_hfa_next(struct aa_hfa *hfa, struct aa_state *state, const char c)
> +{
> + if (hfa->ec)
> + match_EC_char(state->state, hfa, c);
> + else
> + match_char(state->state, hfa, c);
> +
> + return 0;
> +}
Thanks
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 473 bytes
Desc: Digital signature
URL: <https://lists.ubuntu.com/archives/apparmor/attachments/20160108/454f02bd/attachment-0001.pgp>
More information about the AppArmor
mailing list