[apparmor] [PATCH 02/11] Speed up dfa generation by using hashing in set comparison

John Johansen john.johansen at canonical.com
Tue Oct 19 01:20:34 BST 2010


Convert Nodemap comparision to use a hash value.  This uses a little more
memory than just using the NodeSet size to short circuit comparison but it
improves on the case where compared sets have the same size.  It is possible
that this will slow down small dfa generation slightly but the trade off for
large dfa's (which are the slow ones to generate) is worth it.

This results in another performance bump over using the NodeSize is NodeSet
comparison, and the amount of improvement increases with larger dfas
---
 parser/libapparmor_re/regexp.y |   50 +++++++++++++++++++++++++--------------
 1 files changed, 32 insertions(+), 18 deletions(-)

diff --git a/parser/libapparmor_re/regexp.y b/parser/libapparmor_re/regexp.y
index e19214d..d5412f6 100644
--- a/parser/libapparmor_re/regexp.y
+++ b/parser/libapparmor_re/regexp.y
@@ -1300,20 +1300,32 @@ void dump_syntax_tree(ostream& os, Node *node) {
 }
 
 /* Comparison operator for sets of <NodeSet *>.
- * Compare set sizes, and if the sets are the same size
+ * Compare set hashes, and if the sets have the same hash
  * do compare pointer comparison on set of <Node *>, the pointer comparison
  * allows us to determine which Sets of <Node *> we have seen already from
  * new ones when constructing the DFA.
  */
 struct deref_less_than {
-  bool operator()(NodeSet * const & lhs, NodeSet * const & rhs) const
-  { if (lhs->size() == rhs->size())
-      return *lhs < *rhs;
-    else
-      return lhs->size() < rhs->size();
+  bool operator()(pair <unsigned long, NodeSet *> const & lhs, pair <unsigned long, NodeSet *> const & rhs) const
+  {
+	  if (lhs.first == rhs.first)
+		  return *(lhs.second) < *(rhs.second);
+	  else
+		  return lhs.first < rhs.first;
   }
 };
 
+unsigned long hash_NodeSet(const NodeSet *ns)
+{
+        unsigned long hash = 5381;
+
+	for (NodeSet::iterator i = ns->begin(); i != ns->end(); i++) {
+	  hash = ((hash << 5) + hash) + (unsigned long) *i;
+	}
+
+        return hash;
+}
+
 class State;
 /**
  * State cases are identical to NodesCases except they map to State *
@@ -1357,7 +1369,7 @@ ostream& operator<<(ostream& os, const State& state)
 }
 
 typedef list<State *> Partition;
-typedef map<NodeSet *, State *, deref_less_than > NodeMap;
+typedef map<pair<unsigned long, NodeSet *>, State *, deref_less_than > NodeMap;
 /* Transitions in the DFA. */
 
 class DFA {
@@ -1384,9 +1396,9 @@ uint32_t accept_perms(NodeSet *state, uint32_t *audit_ctl, int *error);
 /* macro to help out with DFA creation, not done as inlined fn as nearly
  * every line uses a different map or variable that would have to be passed
  */
-#define update_for_nodes(NODES, TARGET) \
+#define update_for_nodes(INDEX, TARGET)	\
 do { \
-	map<NodeSet *, State *, deref_less_than>::iterator x = nodemap.find(NODES);	\
+	map<pair <unsigned long, NodeSet *>, State *, deref_less_than>::iterator x = nodemap.find(INDEX); \
 	if (x == nodemap.end()) { \
 		/* set of nodes isn't known so create new state, and nodes to \
 		 * state mapping \
@@ -1395,12 +1407,12 @@ do { \
 		TARGET = new State(); \
 		(TARGET)->label = nomatch_count; \
 		states.push_back(TARGET); \
-		nodemap.insert(make_pair(NODES, TARGET)); \
-		work_queue.push_back(NODES); \
+		nodemap.insert(make_pair(INDEX, TARGET)); \
+		work_queue.push_back((INDEX).second);	  \
 	} else { \
 		/* set of nodes already has a mapping so free this one */ \
 		match_count++; \
-		delete NODES; \
+		delete (INDEX).second;		\
 		TARGET = x->second; \
 	} \
 } while (0)
@@ -1432,7 +1444,7 @@ DFA::DFA(Node *root, dfaflags_t flags) : root(root)
 	nonmatching = new State;
 	states.push_back(nonmatching);
 	NodeSet *emptynode = new NodeSet;
-	nodemap.insert(make_pair(emptynode, nonmatching));
+	nodemap.insert(make_pair(make_pair(hash_NodeSet(emptynode), emptynode), nonmatching));
 	/* there is no nodemapping for the nonmatching state */
 
 	start = new State;
@@ -1440,7 +1452,7 @@ DFA::DFA(Node *root, dfaflags_t flags) : root(root)
 	nomatch_count++;
 	states.push_back(start);
 	NodeSet *first = new NodeSet(root->firstpos);
-	nodemap.insert(make_pair(first, start));
+	nodemap.insert(make_pair(make_pair(hash_NodeSet(first), first), start));
 
 	/* the work_queue contains the proto-states (set of nodes that is
 	 * the precurser of a state) that need to be computed
@@ -1461,7 +1473,7 @@ DFA::DFA(Node *root, dfaflags_t flags) : root(root)
 		int error;
 		NodeSet *nodes = work_queue.front();
 		work_queue.pop_front();
-		State *from = nodemap[nodes];
+		State *from = nodemap[make_pair(hash_NodeSet(nodes), nodes)];
 
 		/* Compute permissions associated with the State. */
 		from->accept = accept_perms(nodes, &from->audit, &error);
@@ -1490,7 +1502,8 @@ DFA::DFA(Node *root, dfaflags_t flags) : root(root)
 		/* check the default transition first */
 		if (cases.otherwise) {
 			State *target;
-			update_for_nodes(cases.otherwise, target);
+			pair <unsigned long, NodeSet *> index = make_pair (hash_NodeSet(cases.otherwise), cases.otherwise);
+			update_for_nodes(index, target);
 			from->cases.otherwise = target;
 		}
 
@@ -1500,7 +1513,8 @@ DFA::DFA(Node *root, dfaflags_t flags) : root(root)
 		for (NodeCases::iterator j = cases.begin(); j != cases.end();
 		     j++) {
 			State *target;
-			update_for_nodes(j->second, target);
+			pair <unsigned long, NodeSet *> index = make_pair(hash_NodeSet(j->second), j->second);
+			update_for_nodes(index, target);
 			/* Don't insert transition that the default transition
 			 * already covers
 			 */
@@ -1518,7 +1532,7 @@ DFA::DFA(Node *root, dfaflags_t flags) : root(root)
 		(*i)->followpos.clear();
 	}
 	for (NodeMap::iterator i = nodemap.begin(); i != nodemap.end(); i++)
-		delete i->first;
+		delete i->first.second;
 	nodemap.clear();
 
 	if (flags & (DFA_DUMP_STATS))
-- 
1.7.1




More information about the AppArmor mailing list