[apparmor] [patch] v4 - parser: add basic support for parallel compiles and loads

Fri Dec 11 21:25:28 UTC 2015

revised v4 version of patch

* replace * with x in -j syntax to avoid shell processing problems
  ie. -j*4 is now -jx4

* -j now requires an option, so that -jx is not potentially confused with possible future -x option

* when abort_on_error is specified, have the parser driver wait for outstanding work before exiting, so that init doesn't have to inherit the outstanding work units

---

commit a841dcb8a6a77649f22cf988c971043615c9b7b4
Author: John Johansen <john.johansen at canonical.com>
Date:   Sat Oct 17 00:28:46 2015 -0700

    parser: add basic support for parallel compiles and loads
    
    This adds a basic support for parallel compiles. It uses a fork()/wait
    model due to the parsers current dependence on global variables and
    structures. It has been setup in a similar manner to how cilk handles
    multithreading to make it easy to port to a managed thread model once
    the parser removes the dependence on global compute structures in the
    backend.
    
    This patch adds two new command line flags
      -j or --jobs
         which follows the make syntax of specifying parallel jobs currently
         defaults to -jauto
         -j8     or  --jobs=8	allows for 8 parallel jobs
         -jauto  or  --jobs=auto	sets the jobs to the # of cpus
         -jx4    or  --jobs=x4	sets the jobs to # of cpus * 4
         -jx1 is equivalent to -jauto
    
         Note: unlike make -j must be accompanied by an option
    
    --max_jobs=n
        allows setting hard cap on the number of jobs that can be specified
        by --jobs. It defaults to the number of processors in the system * 8.
        It supports the "auto" and "max" keywords, and using *n for a
        multiple of the available cpus.
    
    additionally the -d flag has been modified to take an optional parameter
    and
      --debug=jobs
    will output debug information for the job control logic.
    
    In light testing on one machine the job control logic provides a nice
    performance boost.  On an x86 test machine with 60 profiles in the
    /etc/apparmor.d/ directory, for the command
      time apparmor_parser -QT /etc/apparmor.d/
    
      old (equiv of -j1):
         real  0m10.968s
         user  0m10.888s
         sys   0m0.088s
    
      ubuntu parallel load using xargs:
         real  0m8.003s
         user  0m21.680s
         sys   0m0.216s
    
      -j:
         real  0m6.547s
         user  0m17.900s
         sys   0m0.132s
    
    Signed-off-by: John Johansen <john.johansen at canonical.com>

diff --git a/parser/apparmor_parser.pod b/parser/apparmor_parser.pod
index 12c784a..4e10469 100644
--- a/parser/apparmor_parser.pod
+++ b/parser/apparmor_parser.pod
@@ -278,6 +278,29 @@ the matching stats flag.
 
 Use --help=dump to see a full list of which dump flags are supported
 
+=item -j n, --jobs=n
+
+Set the number of jobs used to compile the specified policy. Where n can
+be
+
+  #    - a specific number of jobs
+  auto - the # of cpus in the in the system
+  x#   - # * number of cpus
+
+Eg.
+  -j8     OR --jobs=8                   allows for 8 parallel jobs
+  -jauto  OR --jobs=auto                sets the jobs to the # of cpus
+  -jx4    OR --jobs=x4                  sets the jobs to # of cpus * 4
+  -jx1   is equivalent to   -jauto
+
+The default value is the number of cpus in the system.
+
+=item --max-jobs n
+
+Set a hard cap on the value that can be specified by the --jobs flag.
+It takes the same set of options available to the --jobs option, and
+defaults to 8*cpus
+
 =item -O n, --optimize=n
 
 Set the optimization flags used by policy compilation.  A single optimization
diff --git a/parser/parser_main.c b/parser/parser_main.c
index 9110076..98ceab9 100644
--- a/parser/parser_main.c
+++ b/parser/parser_main.c
@@ -36,8 +36,11 @@
 #include <limits.h>
 #include <sys/types.h>
 #include <sys/stat.h>
+#include <sys/wait.h>
+
 #include <sys/apparmor.h>
 
+
 #include "lib.h"
 #include "features.h"
 #include "parser.h"
@@ -76,6 +79,18 @@ int abort_on_error = 0;			/* stop processing profiles if error */
 int skip_bad_cache_rebuild = 0;
 int mru_skip_cache = 1;
 int debug_cache = 0;
+
+/* for jobs_max and jobs
+ * LONG_MAX : no limit
+ * 0  : auto  = detect system processing cores
+ * n  : use that number of processes/threads to compile policy
+ */
+#define JOBS_AUTO 0
+long jobs_max = -8;			/* 8 * cpus */
+long jobs = JOBS_AUTO;			/* default: number of processor cores */
+long njobs = 0;
+bool debug_jobs = false;
+
 struct timespec cache_tstamp, mru_policy_tstamp;
 
 static char *apparmorfs = NULL;
@@ -84,7 +99,7 @@ static char *cacheloc = NULL;
 static aa_features *features = NULL;
 
 /* Make sure to update BOTH the short and long_options */
-static const char *short_options = "adf:h::rRVvI:b:BCD:NSm:M:qQn:XKTWkL:O:po:";
+static const char *short_options = "ad::f:h::rRVvI:b:BCD:NSm:M:qQn:XKTWkL:O:po:j:";
 struct option long_options[] = {
 	{"add", 		0, 0, 'a'},
 	{"binary",		0, 0, 'B'},
@@ -116,7 +131,7 @@ struct option long_options[] = {
 	{"purge-cache",		0, 0, 130},	/* no short option */
 	{"create-cache-dir",	0, 0, 131},	/* no short option */
 	{"cache-loc",		1, 0, 'L'},
-	{"debug",		0, 0, 'd'},
+	{"debug",		2, 0, 'd'},
 	{"dump",		1, 0, 'D'},
 	{"Dump",		1, 0, 'D'},
 	{"optimize",		1, 0, 'O'},
@@ -126,6 +141,8 @@ struct option long_options[] = {
 	{"skip-bad-cache-rebuild",	0, 0, 133},	/* no short option */
 	{"warn",		1, 0, 134},	/* no short option */
 	{"debug-cache",		0, 0, 135},	/* no short option */
+	{"jobs",		1, 0, 'j'},
+	{"max-jobs",		1, 0, 136},	/* no short option */
 	{NULL, 0, 0, 0},
 };
 
@@ -170,11 +187,13 @@ static void display_usage(const char *command)
 	       "-v, --verbose		Show profile names as they load\n"
 	       "-Q, --skip-kernel-load	Do everything except loading into kernel\n"
 	       "-V, --version		Display version info and exit\n"
-	       "-d, --debug 		Debug apparmor definitions\n"
+	       "-d [n], --debug 	Debug apparmor definitions OR [n]\n"
 	       "-p, --preprocess	Dump preprocessed profile\n"
 	       "-D [n], --dump		Dump internal info for debugging\n"
 	       "-O [n], --Optimize	Control dfa optimizations\n"
 	       "-h [cmd], --help[=cmd]  Display this text or info about cmd\n"
+	       "-j [n], --jobs [n]	Set the number of compile threads\n"
+	       "--max-jobs [n]		Hard cap on --jobs. Default 8*cpus\n"
 	       "--abort-on-error	Abort processing of profiles on first error\n"
 	       "--skip-bad-cache-rebuild Do not try rebuilding the cache if it is rejected by the kernel\n"
 	       "--warn n		Enable warnings (see --help=warn)\n"
@@ -268,6 +287,32 @@ static int getopt_long_file(FILE *f, const struct option *longopts,
 	return 0;
 }
 
+static long process_jobs_arg(const char *arg, const char *val) {
+	char *end;
+	long n;
+
+	if (!val || strcmp(val, "auto") == 0)
+		n = JOBS_AUTO;
+	else if (strcmp(val, "max") == 0)
+		n = LONG_MAX;
+	else {
+		bool multiple = false;
+		if (*val == 'x') {
+			multiple = true;
+			val++;
+		}
+		n = strtol(val, &end, 0);
+		if (!(*val && val != end && *end == '\0')) {
+			PERROR("%s: Invalid option %s=%s%s\n", progname, arg, multiple ? "*" : "", val);
+			exit(1);
+		}
+		if (multiple)
+			n = n * -1;
+	}
+
+	return n;
+}
+
 /* process a single argment from getopt_long
  * Returns: 1 if an action arg, else 0
  */
@@ -286,8 +331,17 @@ static int process_arg(int c, char *optarg)
 		option = OPTION_ADD;
 		break;
 	case 'd':
-		debug++;
-		skip_read_cache = 1;
+		if (!optarg) {
+			debug++;
+			skip_read_cache = 1;
+		} else if (strcmp(optarg, "jobs") == 0 ||
+			   strcmp(optarg, "j") == 0) {
+			debug_jobs = true;
+		} else {
+			PERROR("%s: Invalid --debug option '%s'\n",
+			       progname, optarg);
+			exit(1);
+		}
 		break;
 	case 'h':
 		if (!optarg) {
@@ -470,6 +524,12 @@ static int process_arg(int c, char *optarg)
 	case 135:
 		debug_cache = 1;
 		break;
+	case 'j':
+		jobs = process_jobs_arg("-j", optarg);
+		break;
+	case 136:
+		jobs_max = process_jobs_arg("max-jobs", optarg);
+		break;
 	default:
 		display_usage(progname);
 		exit(1);
@@ -803,6 +863,117 @@ out:
 	return retval;
 }
 
+/* Do not call directly, this is a helper for work_sync, which can handle
+ * single worker cases and cases were the work queue is optimized away
+ *
+ * call only if there are work children to wait on
+ */
+#define work_sync_one(RESULT)						\
+do {									\
+	int status;							\
+	wait(&status);							\
+	if (WIFEXITED(status))						\
+		RESULT(WEXITSTATUS(status));				\
+	else								\
+		RESULT(ECHILD);						\
+	/* TODO: do we need to handle traced */				\
+	njobs--;							\
+	if (debug_jobs)							\
+		fprintf(stderr, "    JOBS SYNC ONE: result %d, jobs left %ld\n", status, njobs);							\
+} while (0)
+
+#define work_sync(RESULT)						\
+do {									\
+	if (debug_jobs)							\
+		fprintf(stderr, "JOBS SYNC: jobs left %ld\n", njobs);	\
+	while (njobs)							\
+		work_sync_one(RESULT);					\
+} while (0)
+
+#define work_spawn(WORK, RESULT)					\
+do {									\
+	if (jobs == 1) {						\
+		/* no parallel work so avoid fork() overhead */		\
+		RESULT(WORK);						\
+		break;							\
+	}								\
+	if (njobs == jobs) {						\
+		/* wait for a child */					\
+		if (debug_jobs)						\
+			fprintf(stderr, "    JOBS SPAWN: waiting (jobs %ld == max %ld) ...\n", njobs, jobs);						\
+		work_sync_one(RESULT);					\
+	}								\
+									\
+	pid_t child = fork();						\
+	if (child == 0) {						\
+		/* child - exit work unit with returned value */	\
+		exit(WORK);						\
+	} else if (child > 0) {						\
+		/* parent */						\
+		njobs++;						\
+		if (debug_jobs)						\
+			fprintf(stderr, "    JOBS SPAWN: created %ld ...\n", njobs);									\
+	} else {							\
+		/* error */						\
+		if (debug_jobs)						\
+			fprintf(stderr, "    JOBS SPAWN: failed error: %d) ...\n", errno);								\
+		RESULT(errno);						\
+	}								\
+} while (0)
+
+
+/* sadly C forces us to do this with exit, long_jump or returning error
+ * from work_spawn and work_sync. We could throw a C++ exception, is it
+ * worth doing it to avoid the exit here.
+ *
+ * atm not all resources maybe cleanedup at exit
+ */
+int last_error = 0;
+void handle_work_result(int retval)
+{
+	if (retval) {
+		last_error = retval;
+		if (abort_on_error) {
+			/* already in abort mode we don't need subsequent
+			 * syncs to do this too
+			 */
+			abort_on_error = 0;
+			work_sync(handle_work_result);
+			exit(last_error);
+
+		}
+	}
+}
+
+static long compute_jobs(long n, long j)
+{
+	if (j == JOBS_AUTO)
+		j = n;
+	else if (j < 0)
+		j = n * j * -1;
+	return j;
+}
+
+static void setup_parallel_compile(void)
+{
+	/* jobs and paralell_max set by default, config or args */
+	long n = sysconf(_SC_NPROCESSORS_ONLN);
+	if (n == -1)
+		/* unable to determine number of processors, default to 1 */
+		n = 1;
+	jobs = compute_jobs(n, jobs);
+	jobs_max = compute_jobs(n, jobs_max);
+
+	if (jobs > jobs_max) {
+		pwarn("%s: Warning caping number of jobs to %ld * # of cpus == '%ld'",
+		      progname, jobs_max, jobs);
+		jobs = jobs_max;
+	}
+	njobs = 0;
+	if (debug_jobs)
+		fprintf(stderr, "jobs: %ld\n", jobs);
+}
+
 struct dir_cb_data {
 	aa_kernel_interface *kernel_interface;
 	const char *dirname;	/* name of the parent dir */
@@ -820,8 +991,9 @@ static int profile_dir_cb(int dirfd unused, const char *name, struct stat *st,
 		autofree char *path = NULL;
 		if (asprintf(&path, "%s/%s", cb_data->dirname, name) < 0)
 			PERROR(_("Out of memory"));
-		rc = process_profile(option, cb_data->kernel_interface, path,
-				     cb_data->cachedir);
+		work_spawn(process_profile(option, cb_data->kernel_interface,
+					   path, cb_data->cachedir),
+			   handle_work_result);
 	}
 	return rc;
 }
@@ -837,7 +1009,9 @@ static int binary_dir_cb(int dirfd unused, const char *name, struct stat *st,
 		autofree char *path = NULL;
 		if (asprintf(&path, "%s/%s", cb_data->dirname, name) < 0)
 			PERROR(_("Out of memory"));
-		rc = process_binary(option, cb_data->kernel_interface, path);
+		work_spawn(process_binary(option, cb_data->kernel_interface,
+					   path),
+			    handle_work_result);
 	}
 	return rc;
 }
@@ -861,7 +1035,7 @@ int main(int argc, char *argv[])
 {
 	aa_kernel_interface *kernel_interface = NULL;
 	aa_policy_cache *policy_cache = NULL;
-	int retval, last_error;
+	int retval;
 	int i;
 	int optind;
 
@@ -873,6 +1047,8 @@ int main(int argc, char *argv[])
 	process_config_file("/etc/apparmor/parser.conf");
 	optind = process_args(argc, argv);
 
+	setup_parallel_compile();
+
 	setlocale(LC_MESSAGES, "");
 	bindtextdomain(PACKAGE, LOCALEDIR);
 	textdomain(PACKAGE);
@@ -939,6 +1115,7 @@ int main(int argc, char *argv[])
 		}
 	}
 
+
 	retval = last_error = 0;
 	for (i = optind; i <= argc; i++) {
 		struct stat stat_file;
@@ -973,22 +1150,19 @@ int main(int argc, char *argv[])
 				       profilename);
 			}
 		} else if (binary_input) {
-			retval = process_binary(option, kernel_interface,
-						profilename);
+			work_spawn(process_binary(option, kernel_interface,
+						  profilename),
+				   handle_work_result);
 		} else {
-			retval = process_profile(option, kernel_interface,
-						 profilename, cacheloc);
+			work_spawn(process_profile(option, kernel_interface,
+						   profilename, cacheloc),
+				   handle_work_result);
 		}
 
 		if (profilename) free(profilename);
 		profilename = NULL;
-
-		if (retval) {
-			last_error = retval;
-			if (abort_on_error)
-				break;
-		}
 	}
+	work_sync(handle_work_result);
 
 	if (ofile)
 		fclose(ofile);