[apparmor] [PATCH 09/10] Split the nodeset used in computing the dfa into two sets, accepting and non-accepting, and have the proto-state use them.
John Johansen
john.johansen at canonical.com
Fri Oct 28 19:19:36 UTC 2011
To reduce memory overhead each set gains its own "cache" that make sure
there is only a single instance of each NodeSet generated. And since
we have a cache abstraction, move relavent stats into it.
Also refactor code slightly to make caches and work_queue etc, DFA member
variables instead of passing them as parameters.
The split + caching results in a small reduction in memory use as the
cost of ProtoState + Caching is less than the redundancy that is eliminated.
However this results in a small decrease in performance.
Sorry I know this really should have been split into multiple patches
but the patch evolved and I got lazy and decided to just not bother
splitting it.
Signed-off-by: John Johansen <john.johansen at canonical.com>
---
parser/libapparmor_re/expr-tree.h | 3 +
parser/libapparmor_re/hfa.cc | 160 +++++++++++++++++++++++--------------
parser/libapparmor_re/hfa.h | 129 +++++++++++++++++++++++-------
3 files changed, 203 insertions(+), 89 deletions(-)
diff --git a/parser/libapparmor_re/expr-tree.h b/parser/libapparmor_re/expr-tree.h
index 11d7d0d..4d297b6 100644
--- a/parser/libapparmor_re/expr-tree.h
+++ b/parser/libapparmor_re/expr-tree.h
@@ -204,6 +204,7 @@ public:
void compute_firstpos() { firstpos.insert(this); }
void compute_lastpos() { lastpos.insert(this); }
virtual void follow(Cases &cases) = 0;
+ virtual int is_accept(void) = 0;
};
/* common base class for all the different classes that contain
@@ -212,6 +213,7 @@ public:
class CNode: public ImportantNode {
public:
CNode(): ImportantNode() { }
+ int is_accept(void) { return false; }
};
/* Match one specific character (/c/). */
@@ -363,6 +365,7 @@ public:
class AcceptNode: public ImportantNode {
public:
AcceptNode() { }
+ int is_accept(void) { return true; }
void release(void)
{
/* don't delete AcceptNode via release as they are shared, and
diff --git a/parser/libapparmor_re/hfa.cc b/parser/libapparmor_re/hfa.cc
index d9aa26a..fe79aad 100644
--- a/parser/libapparmor_re/hfa.cc
+++ b/parser/libapparmor_re/hfa.cc
@@ -35,11 +35,30 @@
#include "hfa.h"
#include "../immunix.h"
+
+ostream &operator<<(ostream &os, const CacheStats &cache)
+{
+ /* dump the state label */
+ os << "cache: size=";
+ os << cache.size();
+ os << " dups=";
+ os << cache.dup;
+ os << " longest=";
+ os << cache.max;
+ if (cache.size()) {
+ os << " avg=";
+ os << cache.sum / cache.size();
+ }
+ return os;
+}
+
ostream &operator<<(ostream &os, const ProtoState &proto)
{
/* dump the state label */
os << '{';
- os << proto.nodes;
+ os << proto.nnodes;
+ os << ',';
+ os << proto.anodes;
os << '}';
return os;
}
@@ -53,50 +72,53 @@ ostream &operator<<(ostream &os, const State &state)
return os;
}
-State *DFA::add_new_state(NodeMap &nodemap, ProtoState &proto,
- State *other, dfa_stats_t &stats)
+static void split_node_types(NodeSet *nodes, NodeSet **anodes, NodeSet **nnodes
+)
{
- State *state = new State(nodemap.size(), proto, other);
- states.push_back(state);
- nodemap.insert(make_pair(proto, state));
- stats.proto_sum += proto.size();
- if (proto.size() > stats.proto_max)
- stats.proto_max = proto.size();
- return state;
-}
-
-State *DFA::find_target_state(NodeMap &nodemap, list<State *> &work_queue,
- NodeSet *nodes, dfa_stats_t &stats)
-{
- State *target;
+ *anodes = *nnodes = NULL;
+ for (NodeSet::iterator i = nodes->begin(); i != nodes->end(); ) {
+ if ((*i)->is_accept()) {
+ if (!*anodes)
+ *anodes = new NodeSet;
+ (*anodes)->insert(*i);
+ NodeSet::iterator k = i++;
+ nodes->erase(k);
+ } else
+ i++;
+ }
- pair<set<hashedNodeSet>::iterator,bool> uniq = uniq_nodes.insert(hashedNodeSet(nodes));
- if (uniq.second == false) {
- delete(nodes);
- nodes = uniq.first->nodes;
+ if (nodes->size() > 0 || !*anodes) {
+ *nnodes = nodes;
+ } else {
+ delete nodes;
}
+}
- ProtoState index(nodes);
+State *DFA::add_new_state(NodeSet *nodes, State *other)
+{
+ /* The splitting of nodes should probably get pushed down into
+ * follow(), ie. put in separate lists from the start
+ */
+ NodeSet *anodes, *nnodes;
+ split_node_types(nodes, &anodes, &nnodes);
- map<ProtoState, State *>::iterator x = nodemap.find(index);
+ nnodes = nnodes_cache.insert(nnodes);
+ anodes = anodes_cache.insert(anodes);
- if (x == nodemap.end()) {
- /* set of nodes isn't known so create new state, and nodes to
- * state mapping
- */
- target = add_new_state(nodemap, index, nonmatching, stats);
- work_queue.push_back(target);
+ ProtoState proto(nnodes, anodes);
+ State *state = new State(node_map.size(), proto, other);
+ pair<NodeMap::iterator,bool> x = node_map.insert(proto, state);
+ if (x.second == false) {
+ delete state;
} else {
- /* set of nodes already has a mapping so free this one */
- stats.duplicates++;
- target = x->second;
+ states.push_back(state);
+ work_queue.push_back(state);
}
- return target;
+ return x.first->second;
}
-void DFA::update_state_transitions(NodeMap &nodemap, list<State *> &work_queue,
- State *state, dfa_stats_t &stats)
+void DFA::update_state_transitions(State *state)
{
/* Compute possible transitions for state->nodes. This is done by
* iterating over all the nodes in state->nodes and combining the
@@ -104,9 +126,12 @@ void DFA::update_state_transitions(NodeMap &nodemap, list<State *> &work_queue,
*
* The resultant transition set is a mapping of characters to
* sets of nodes.
+ *
+ * Note: the follow set for accept nodes is always empty so we don't
+ * need to compute follow for the accept nodes in a protostate
*/
Cases cases;
- for (ProtoState::iterator i = state->nodes.begin(); i != state->nodes.end(); i++)
+ for (NodeSet::iterator i = state->nodes.nnodes->begin(); i != state->nodes.nnodes->end(); i++)
(*i)->follow(cases);
/* Now for each set of nodes in the computed transitions, make
@@ -116,8 +141,7 @@ void DFA::update_state_transitions(NodeMap &nodemap, list<State *> &work_queue,
/* check the default transition first */
if (cases.otherwise)
- state->otherwise = find_target_state(nodemap, work_queue,
- cases.otherwise, stats);
+ state->otherwise = add_new_state(cases.otherwise, nonmatching);
else
state->otherwise = nonmatching;
@@ -126,7 +150,7 @@ void DFA::update_state_transitions(NodeMap &nodemap, list<State *> &work_queue,
*/
for (Cases::iterator j = cases.begin(); j != cases.end(); j++) {
State *target;
- target = find_target_state(nodemap, work_queue, j->second, stats);
+ target = add_new_state(j->second, nonmatching);
/* Don't insert transition that the otherwise transition
* already covers
@@ -153,7 +177,6 @@ void DFA::dump_node_to_dfa(void)
*/
DFA::DFA(Node *root, dfaflags_t flags): root(root)
{
- dfa_stats_t stats = { 0, 0, 0 };
int i = 0;
if (flags & DFA_DUMP_PROGRESS)
@@ -171,12 +194,8 @@ DFA::DFA(Node *root, dfaflags_t flags): root(root)
(*i)->compute_followpos();
}
- NodeMap nodemap;
- ProtoState emptynode = ProtoState(new NodeSet);
- nonmatching = add_new_state(nodemap, emptynode, NULL, stats);
-
- ProtoState first = ProtoState(new NodeSet(root->firstpos));
- start = add_new_state(nodemap, first, nonmatching, stats);
+ nonmatching = add_new_state(new NodeSet, NULL);
+ start = add_new_state(new NodeSet(root->firstpos), nonmatching);
/* the work_queue contains the states that need to have their
* transitions computed. This could be done with a recursive
@@ -188,14 +207,18 @@ DFA::DFA(Node *root, dfaflags_t flags): root(root)
* manner, this may help reduce the number of entries on the
* work_queue at any given time, thus reducing peak memory use.
*/
- list<State *> work_queue;
work_queue.push_back(start);
while (!work_queue.empty()) {
- if (i % 1000 == 0 && (flags & DFA_DUMP_PROGRESS))
- fprintf(stderr, "\033[2KCreating dfa: queue %zd\tstates %zd\teliminated duplicates %d\r",
- work_queue.size(), states.size(),
- stats.duplicates);
+ if (i % 1000 == 0 && (flags & DFA_DUMP_PROGRESS)) {
+ cerr << "\033[2KCreating dfa: queue "
+ << work_queue.size()
+ << "\tstates "
+ << states.size()
+ << "\teliminated duplicates "
+ << node_map.dup
+ << "\r";
+ }
i++;
State *from = work_queue.front();
@@ -204,7 +227,7 @@ DFA::DFA(Node *root, dfaflags_t flags): root(root)
/* Update 'from's transitions, and if it transitions to any
* unknown State create it and add it to the work_queue
*/
- update_state_transitions(nodemap, work_queue, from, stats);
+ update_state_transitions(from);
} /* while (!work_queue.empty()) */
@@ -220,20 +243,31 @@ DFA::DFA(Node *root, dfaflags_t flags): root(root)
if (flags & DFA_DUMP_NODE_TO_DFA)
dump_node_to_dfa();
- for (set<hashedNodeSet>::iterator i = uniq_nodes.begin(); i != uniq_nodes.end(); i++)
- delete i->nodes;
- uniq_nodes.clear();
- nodemap.clear();
-
- if (flags & (DFA_DUMP_STATS))
- fprintf(stderr, "\033[2KCreated dfa: states %zd,\teliminated duplicates %d,\tprotostate sets: longest %u, avg %u\n",
- states.size(), stats.duplicates, stats.proto_max,
- (unsigned int)(stats.proto_sum / states.size()));
+ if (flags & (DFA_DUMP_STATS)) {
+ cerr << "\033[2KCreated dfa: states "
+ << states.size()
+ << " proto { "
+ << node_map
+ << " }, nnodes { "
+ << nnodes_cache
+ << " }, anodes { "
+ << anodes_cache
+ << " }\n";
+ }
+ /* Clear out uniq_nnodes as they are no longer needed.
+ * Do not clear out uniq_anodes, as we need them for minimizations
+ * diffs, unions, ...
+ */
+ nnodes_cache.clear();
+ node_map.clear();
}
DFA::~DFA()
{
+ anodes_cache.clear();
+ nnodes_cache.clear();
+
for (Partition::iterator i = states.begin(); i != states.end(); i++)
delete *i;
}
@@ -256,7 +290,6 @@ void DFA::dump_uniq_perms(const char *s)
void DFA::remove_unreachable(dfaflags_t flags)
{
set<State *> reachable;
- list<State *> work_queue;
/* find the set of reachable states */
reachable.insert(nonmatching);
@@ -804,6 +837,11 @@ uint32_t accept_perms(NodeSet *state, uint32_t *audit_ctl, int *error)
if (error)
*error = 0;
+ if (!state) {
+ *audit_ctl = 0;
+ return perms;
+ }
+
for (NodeSet::iterator i = state->begin(); i != state->end(); i++) {
MatchFlag *match;
if (!(match = dynamic_cast<MatchFlag *>(*i)))
diff --git a/parser/libapparmor_re/hfa.h b/parser/libapparmor_re/hfa.h
index 49ca17f..cc71e2d 100644
--- a/parser/libapparmor_re/hfa.h
+++ b/parser/libapparmor_re/hfa.h
@@ -65,24 +65,75 @@ public:
}
};
+class CacheStats {
+public:
+ unsigned long dup, sum, max;
+
+ CacheStats(void): dup(0), sum(0), max(0) { };
+
+ void clear(void) { dup = sum = max = 0; }
+ virtual unsigned long size(void) const = 0;
+};
+
+class NodeCache: public CacheStats {
+public:
+ set<hashedNodeSet> cache;
+
+ NodeCache(void): cache() { };
+ ~NodeCache() { clear(); };
+
+ virtual unsigned long size(void) const { return cache.size(); }
+
+ void clear()
+ {
+ for (set<hashedNodeSet>::iterator i = cache.begin();
+ i != cache.end(); i++) {
+ delete i->nodes;
+ }
+ cache.clear();
+ CacheStats::clear();
+ }
+
+ NodeSet *insert(NodeSet *nodes)
+ {
+ if (!nodes)
+ return NULL;
+ pair<set<hashedNodeSet>::iterator,bool> uniq;
+ uniq = cache.insert(hashedNodeSet(nodes));
+ if (uniq.second == false) {
+ delete(nodes);
+ dup++;
+ } else {
+ sum += nodes->size();
+ if (nodes->size() > max)
+ max = nodes->size();
+ }
+ return uniq.first->nodes;
+ }
+};
+
/*
* ProtoState - NodeSet and ancillery information used to create a state
*/
class ProtoState {
public:
- typedef NodeSet::iterator iterator;
- iterator begin() { return nodes->begin(); }
- iterator end() { return nodes->end(); }
+ NodeSet *nnodes;
+ NodeSet *anodes;
- NodeSet *nodes;
-
- ProtoState(NodeSet *n): nodes(n) { };
+ ProtoState(NodeSet *n, NodeSet *a = NULL): nnodes(n), anodes(a) { };
bool operator<(ProtoState const &rhs)const
{
- return nodes < rhs.nodes;
+ if (nnodes == rhs.nnodes)
+ return anodes < rhs.anodes;
+ return nnodes < rhs.nnodes;
}
- unsigned long size(void) { return nodes->size(); }
+ unsigned long size(void)
+ {
+ if (anodes)
+ return nnodes->size() + anodes->size();
+ return nnodes->size();
+ }
};
/*
@@ -115,7 +166,7 @@ public:
nodes = n;
/* Compute permissions associated with the State. */
- accept = accept_perms(n.nodes, &audit, &error);
+ accept = accept_perms(n.anodes, &audit, &error);
if (error) {
//cerr << "Failing on accept perms " << error << "\n";
throw error;
@@ -136,36 +187,58 @@ public:
ostream &operator<<(ostream &os, const State &state);
+class NodeMap: public CacheStats
+{
+public:
+ typedef map<ProtoState, State *>::iterator iterator;
+ iterator begin() { return cache.begin(); }
+ iterator end() { return cache.end(); }
-typedef map<ProtoState, State *> NodeMap;
-/* Transitions in the DFA. */
+ map<ProtoState, State *> cache;
-/* dfa_stats - structure to group various stats about dfa creation
- * duplicates - how many duplicate NodeSets where encountered and discarded
- * proto_max - maximum length of a NodeSet encountered during dfa construction
- * proto_sum - sum of NodeSet length during dfa construction. Used to find
- * average length.
- */
-typedef struct dfa_stats {
- unsigned int duplicates, proto_max, proto_sum;
-} dfa_stats_t;
+ NodeMap(void): cache() { };
+ ~NodeMap() { clear(); };
+
+ virtual unsigned long size(void) const { return cache.size(); }
+
+ void clear()
+ {
+ cache.clear();
+ CacheStats::clear();
+ }
+
+ pair<iterator,bool> insert(ProtoState &proto, State *state)
+ {
+ pair<iterator,bool> uniq;
+ uniq = cache.insert(make_pair(proto, state));
+ if (uniq.second == false) {
+ dup++;
+ } else {
+ sum += proto.size();
+ if (proto.size() > max)
+ max = proto.size();
+ }
+ return uniq;
+ }
+};
+
+/* Transitions in the DFA. */
class DFA {
void dump_node_to_dfa(void);
- State *add_new_state(NodeMap &nodemap,
- ProtoState &proto, State *other, dfa_stats_t &stats);
- void update_state_transitions(NodeMap &nodemap,
- list<State *> &work_queue,
- State *state, dfa_stats_t &stats);
- State *find_target_state(NodeMap &nodemap, list<State *> &work_queue,
- NodeSet *nodes, dfa_stats_t &stats);
+ State *add_new_state(NodeSet *nodes, State *other);
+ void update_state_transitions(State *state);
/* temporary values used during computations */
- set<hashedNodeSet> uniq_nodes;
+ NodeCache anodes_cache;
+ NodeCache nnodes_cache;
+ NodeMap node_map;
+ list<State *> work_queue;
public:
DFA(Node *root, dfaflags_t flags);
virtual ~DFA();
+
void remove_unreachable(dfaflags_t flags);
bool same_mappings(State *s1, State *s2);
size_t hash_trans(State *s);
--
1.7.5.4
More information about the AppArmor
mailing list