[RFC][PATCH v2] nih_parse

Casey Dahlin cdahlin at redhat.com
Sat Feb 20 02:46:00 GMT 2010


I've been re-organizing this quite a bit, and fortunately it is no longer the
massive lump it used to be :) I think it still has a bit to go, but I wanted to
collect more feedback on the direction its moved.

This time the pseudo-recursion mechanism is different. Every function gets its
own frame on the actual system stack, but when calling another function, the
function actually returns, saving its state in the virtual stack so it can
resume later.

Some things on the TODO list:
	* I think if I stopped using local variables at all and just
	  manipulated struct members in the virtual stack frame directly I'd
	  save some macro magic, at the cost of some readability. I also have
	  another way of saving the variables that should cut down on some of
	  the magicalness as well. I'll likely try one or the other soon.

	* I don't like the way this version is commented. This is somewhat the
	  fault of the local variable saving and restoring again, as the
	  semantic arguments are separated by miles from the actual function.
	  The above structural changes could fix that as well.
	
	* Still don't have error reporting as shiny as it could be. That's the
	  last task on the list, but it remains fairly simple.

	* Oh yeah, the order of things in the makefiles was wrong, wasn't it?
	  I should have corrected that one...

Improvements:
	* Now possible to break the algorithm into multiple functions.
	* Outside of the stuff that makes the virtual call stuff work, no usage
	  of goto anywhere in site.

Test coverage is 94%. There's 3 lines that don't get hit that probably should,
but its going to take some time for me to produce a parsing example that will
hit them.

Branch is available at https://code.launchpad.net/~cjdahlin/libnih/nih-parse

And now.... a yak:

=== modified file 'nih/Makefile.am'
--- nih/Makefile.am	2009-11-21 21:44:23 +0000
+++ nih/Makefile.am	2010-02-10 05:41:20 +0000
@@ -25,6 +25,7 @@
 	command.c \
 	config.c \
 	logging.c \
+	parse.c \
 	error.c
 
 libnih_la_LDFLAGS = \
@@ -58,6 +59,7 @@
 	command.h \
 	config.h \
 	logging.h \
+	parse.h \
 	error.h \
 	errors.h \
 	test.h \
@@ -95,10 +97,15 @@
 	test_command \
 	test_config \
 	test_logging \
+	test_parse \
 	test_error
 
 check_PROGRAMS = $(TESTS)
 
+test_parse_SOURCES = tests/test_parse.c
+test_parse_LDFLAGS = -static
+test_parse_LDADD = libnih.la
+
 test_alloc_SOURCES = tests/test_alloc.c
 test_alloc_LDFLAGS = -static
 test_alloc_LDADD = libnih.la

=== modified file 'nih/errors.h'
--- nih/errors.h	2009-06-23 09:29:37 +0000
+++ nih/errors.h	2010-02-05 14:32:30 +0000
@@ -42,6 +42,8 @@
 
 	NIH_DIR_LOOP_DETECTED,
 
+	NIH_PARSE_FAILED,
+
 	/* 0x20000 thru 0x2FFFF reserved for applications */
 	NIH_ERROR_APPLICATION_START = 0x20000L,
 
@@ -62,4 +64,6 @@
 
 #define NIH_DIR_LOOP_DETECTED_STR          N_("Directory loop detected")
 
+#define NIH_PARSE_FAILED_STR		   N_("Expected %s")
+
 #endif /* NIH_ERRORS_H */

=== added file 'nih/parse.c'
--- nih/parse.c	1970-01-01 00:00:00 +0000
+++ nih/parse.c	2010-02-20 01:58:32 +0000
@@ -0,0 +1,783 @@
+/* libnih
+ *
+ * parse.c - Parsing expression grammar parser
+ *
+ * Copyright © 2010 Casey Dahlin <cdahlin at redhat.com>
+ * Copyright © 2010 Canonical Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <string.h>
+
+#include <nih/alloc.h>
+#include <nih/logging.h>
+#include <nih/list.h>
+#include <nih/error.h>
+#include <nih/errors.h>
+
+#include <nih/parse.h>
+
+/**
+ * NihParseRecursiveFunction:
+ *
+ * Special virtual-recursive function format.
+ **/
+struct nih_parse_frame;
+typedef struct nih_parse_frame * (*NihParseRecursiveFunction)(struct nih_parse_frame *);
+
+/**
+ * NihParseFrame:
+ * @prev: Previous frame,
+ * @function: function for frame,
+ * @retpoint: return point within function,
+ * @data: local data from function,
+ * @retval_target: place to put function return value.
+ * @retval_received: value returned by function called from @function.
+ *
+ * A call frame for the virtual stack used by nih_parse internal calls.
+ **/
+typedef struct nih_parse_frame {
+	struct nih_parse_frame    *prev;
+	NihParseRecursiveFunction  function;
+	void                      *retpoint;
+	void                      *data;
+	void                     **retval_target;
+	void                      *retval_received;
+} NihParseFrame;
+
+
+/**
+ * NihParseData:
+ * @parent: Parent node for the new matching node,
+ * @symbol: symbol to match,
+ * @ret: previously matched node to backtrack,
+ * @nth_time: number of times we will have matched @symbol if we match,
+ * @parent_ref: The child_ref that led us from @parent to this symbol.
+ *
+ * Stack data for the pseudo-recursive nih_parse_r function.
+ **/
+typedef struct nih_parse_data {
+	NihParseNode           *parent;
+	NihParseSymbol         *symbol;
+	NihParseNode           *ret;
+	size_t                  nth_time;
+	NihParseSymbolChildRef *parent_ref;
+	const char             *text_pos;
+	const char             *text_end;
+
+	/* Private locals */
+	size_t                  iter;
+	NihParseSymbolChildRef *child;
+	int                     alt_matched;
+	NihParseNode           *tail;
+} NihParseData;
+
+
+/**
+ * NihParseTerminalData:
+ * @parent: Alloc parent,
+ * @symbol: symbol to check,
+ * @nth_time: this is the nth time this symbol has been matched,
+ * @parent_ref: reference from parent node to the node to be created,
+ * @ret: node being backtracked.
+ *
+ * Stack data for the pseudo-recursive nih_parse_terminal_r
+ * function.
+ **/
+typedef struct nih_parse_terminal_data {
+	void                   *parent;
+	NihParseSymbol         *symbol;
+	size_t                  nth_time;
+	NihParseSymbolChildRef *parent_ref;
+	NihParseNode           *bt_node;
+	const char             *text_pos;
+	const char             *text_end;
+} NihParseTerminalData;
+
+
+/**
+ * NihParseChildData
+ * @times: Number of times we've matched this child already,
+ * @child: child ref to match,
+ * @node: parent node,
+ * @text_pos: position of text to start,
+ * @text_end: ending position of text.
+ *
+ * Stack data for the pseudo-recursive nih_parse_child_r function.
+ **/
+typedef struct nih_parse_child_data {
+	size_t                  times;
+	NihParseSymbolChildRef *child;
+	NihParseNode           *node;
+	const char             *text_pos;
+	const char             *text_end;
+} NihParseChildData;
+
+
+/**
+ * NihParseBacktrackData
+ * @node: node to backtrack matching on,
+ * @idx: index of last child we tried to match within @node->symbol,
+ * @alt_matched: flag to set if we match a member of an alternate group,
+ * @text_pos: the position within the text,
+ * @text_end: the character after the end of the text.
+ *
+ * Stack data for the pseudo-recursive nih_parse_backtrack_r function.
+ **/
+typedef struct nih_parse_backtrack_data {
+	NihParseNode           *node;
+	size_t                 *idx;
+	int                    *alt_matched;
+	const char            **text_pos;
+	const char             *text_end;
+
+	/* Private locals */
+	NihParseNode           *tail;
+	NihParseSymbolChildRef *child;
+	int                     success;
+} NihParseBacktrackData;
+
+
+/**
+ * NIH_PARSE_CHARSET_MATCH:
+ * @set: Set to match against,
+ * @c: char to match.
+ *
+ * See if @c is in @set.
+ *
+ * Returns: TRUE or FALSE.
+ **/
+#define NIH_PARSE_CHARSET_MATCH(set, c) ({			\
+	!!((set)[(c)/64] & ((uint64_t)1 << ((c) % 64)));	\
+})
+
+
+/**
+ * NIH_PARSE_OOM_ABORT:
+ * @frame: current NihParseFrame
+ *
+ * Called from within a function using NihParseFrame recursion to raise an OOM
+ * and clear the stack. Returns NULL as a convenience since that's what the
+ * function should also return at that point.
+ **/
+#define NIH_PARSE_OOM_ABORT(frame) ({			\
+	NihParseFrame *_frame = (frame);		\
+							\
+	while (_frame) {				\
+		NihParseFrame *to_free = _frame;	\
+		_frame = _frame->prev;			\
+		nih_free (to_free);			\
+	}						\
+							\
+	nih_error_raise_no_memory ();			\
+							\
+	(NULL);						\
+})
+
+
+/**
+ * NIH_PARSE_POP_FRAME:
+ * @frame_in: frame to pop,
+ * @retval_in: value to return for popped_frame.
+ *
+ * Pop to the next frame on an NihParseFrame stack.
+ *
+ * Returns: The new stack head or NULL if empty.
+ **/
+#define NIH_PARSE_POP_FRAME(frame, retval_in) ({	\
+	NihParseFrame *_frame = (frame);		\
+	NihParseFrame *my_prev = _frame->prev;		\
+	*_frame->retval_target = (retval_in);		\
+	nih_free (_frame);				\
+	(my_prev);					\
+})
+
+
+/**
+ * NIH_PARSE_CALL:
+ * @func: Macro prefix indicating what function to call,
+ * @frame: top of the NihParseFrame stack.
+ *
+ * Conducts a call of another function using the virtual NihParseFrame stack.
+ * This macro will invoke return and return an object of type NihParseFrame
+ * indicating what function should be called. It will also store in @frame the
+ * information necessary to resume this function after the call completes,
+ * including a local jump point inside the macro so the function may be called
+ * again and resumed from this point in the code.
+ *
+ * @func is the prefix to a series of macros which are assumed to be defined:
+ *
+ * @func_DATA is assumed to contain the type for the local data struct for the
+ * new function, into which the arguments passed after @func and @frame will be
+ * placed.
+ *
+ * @func_NAME is the name of the actual function to call, which should be
+ * safely castable to NihParseRecursiveFunction.
+ *
+ * @func_UPDATE should be a macro taking one argument. It will be passed @frame
+ * and should update its data member.
+ *
+ * Returns: The value returned by the function. Note that this macro only
+ * appears to return within properly arranged NihParseFrame users.
+ **/
+#define NIH_PARSE_CALL(func, frame, ...) ({			\
+	__label__ retpoint;					\
+	NihParseFrame *_frame = (frame);			\
+	NihParseFrame *next = nih_new (NULL, NihParseFrame);	\
+	func ## _DATA args = { __VA_ARGS__ };			\
+								\
+	if (! next)						\
+		return NIH_PARSE_OOM_ABORT (frame);		\
+								\
+	next->prev = frame;					\
+	next->retpoint = NULL;					\
+	next->retval_target = &frame->retval_received;		\
+	next->function = func ## _NAME;				\
+								\
+	next->data = nih_new (next, func ## _DATA);		\
+								\
+	if (! next->data)					\
+		return NIH_PARSE_OOM_ABORT (next);		\
+								\
+	*(func ## _DATA *)next->data = args;			\
+								\
+	NIH_PARSE_UPDATE (_frame);				\
+								\
+	frame->retpoint = &&retpoint;				\
+	return next;						\
+								\
+retpoint:							\
+	(frame->retval_received);				\
+})
+
+
+/**
+ * NIH_PARSE_NODE_END:
+ * @node: Node to operate on.
+ *
+ * Find the position of the first character after this node.
+ **/
+#define NIH_PARSE_NODE_END(node) ({					\
+	NihParseNode *_node = (node);					\
+	NihParseNode *tail;						\
+	const char *ret = _node->start;					\
+									\
+	if (! NIH_LIST_EMPTY (&_node->children)) {			\
+		tail = (NihParseNode *)_node->children.prev;		\
+		ret = tail->start + tail->len;				\
+	} else if (! &_node->symbol->type & NIH_PARSE_NONTERM) {	\
+		ret += _node->len;					\
+	}								\
+									\
+	(ret);								\
+})
+
+
+/* Prototypes for static declarations. */
+static NihParseNode *nih_parse_node_new (NihParseNode *parent,
+					 NihParseSymbol *symbol,
+					 const char *start, size_t len,
+					 size_t nth_match,
+					 NihParseSymbolChildRef *parent_ref)
+	__attribute__ ((warn_unused_result, malloc));
+
+static NihParseNode *nih_parse_terminal (void *parent, NihParseSymbol *symbol,
+					 const char *text_pos, size_t text_len,
+					 int *oom_flag, size_t nth_time,
+					 NihParseSymbolChildRef *parent_ref)
+	__attribute__ ((warn_unused_result));
+
+
+/* Prototypes for functions designed to use the NihParseFrame virtual stack */
+static NihParseFrame *nih_parse_r (NihParseFrame *frame)
+	__attribute__ ((warn_unused_result));
+#define NIH_PARSE_NAME nih_parse_r 
+#define NIH_PARSE_DATA NihParseData
+#define NIH_PARSE_R_UPDATE(frame) ({				\
+	NihParseData *data = (frame)->data;			\
+	data->iter = iter;					\
+	data->child = child;					\
+	data->ret = ret;					\
+	data->parent = parent;					\
+	data->text_pos = text_pos;				\
+	data->text_end = text_end;				\
+	data->parent_ref = parent_ref;				\
+	data->symbol = symbol;					\
+	data->alt_matched = alt_matched;			\
+})
+
+static NihParseFrame *nih_parse_terminal_r (NihParseFrame *frame)
+	__attribute__ ((warn_unused_result));
+#define NIH_PARSE_TERMINAL_NAME nih_parse_terminal_r 
+#define NIH_PARSE_TERMINAL_DATA NihParseTerminalData
+#define NIH_PARSE_TERMINAL_UPDATE(frame) ({				\
+	NihParseTerminalData *data = (frame)->data;			\
+	data->parent = parent;						\
+	data->symbol = symbol;						\
+	data->nth_time = nth_time;					\
+	data->parent_ref = parent_ref;					\
+	data->bt_node = bt_node;					\
+})
+
+static NihParseFrame *nih_parse_child_r (NihParseFrame *frame)
+	__attribute__ ((warn_unused_result));
+#define NIH_PARSE_CHILD_NAME nih_parse_child_r 
+#define NIH_PARSE_CHILD_DATA NihParseChildData
+#define NIH_PARSE_CHILD_UPDATE(frame) ({		\
+	NihParseChildData *data = (frame)->data;	\
+	data->times = times;				\
+	data->child = child;				\
+	data->node = node;				\
+	data->text_pos = text_pos;			\
+	data->text_end = text_end;			\
+})
+
+static NihParseFrame *nih_parse_backtrack_r (NihParseFrame *frame)
+	__attribute__ ((warn_unused_result));
+#define NIH_PARSE_BACKTRACK_NAME nih_parse_backtrack_r 
+#define NIH_PARSE_BACKTRACK_DATA NihParseBacktrackData
+#define NIH_PARSE_BACKTRACK_UPDATE(frame) ({	\
+	data->node = node;			\
+	data->idx = idx;			\
+	data->alt_matched = alt_matched;	\
+	data->text_pos = text_pos;		\
+	data->text_end = text_end;		\
+	data->tail = tail;			\
+	data->child = child;			\
+	data->success = success;		\
+})
+
+
+static NihParseFrame *nih_parse_or_raise_r (NihParseFrame *frame)
+	__attribute__ ((warn_unused_result));
+#define NIH_PARSE_OR_RAISE_NAME nih_parse_or_raise_r 
+#define NIH_PARSE_OR_RAISE_DATA NIH_PARSE_DATA
+#define NIH_PARSE_OR_RAISE_UPDATE(frame) NIH_PARSE_R_UPDATE (frame)
+
+
+/**
+ * nih_parse:
+ * @alloc_parent: Alloc parent,
+ * @symbol: symbol to match,
+ * @text_pos: position in the text stream,
+ * @text_len: characters remaining.
+ *
+ * Determines if @symbol matches at @text_pos and returns a parse tree
+ * describing its correspondance.
+ *
+ * Returns: A new NihParseNode or NULL on error.
+ **/
+NihParseNode *
+nih_parse (void           *alloc_parent,
+	   NihParseSymbol *symbol,
+	   const char     *text_pos,
+	   size_t          text_len)
+{
+	nih_local NihParseFrame *frame = nih_new (NULL, NihParseFrame);
+	NihParseNode *ret;
+	NihParseData data = {
+		NULL,
+		symbol,
+		NULL,
+		1,
+		NULL,
+		text_pos,
+		text_pos + text_len,
+	};
+
+	if (! frame) {
+		nih_error_raise_no_memory ();
+		return NULL;
+	}
+
+	frame->prev = NULL;
+	frame->function =
+		(NihParseRecursiveFunction)nih_parse_or_raise_r;
+	frame->retpoint = NULL;
+	frame->retval_received = NULL;
+	frame->retval_target = (void **)&ret;
+	frame->data = &data;
+
+	while (frame) {
+		int depth = 0;
+		frame = ((NihParseRecursiveFunction)frame->function) (frame);
+	}
+
+	return ret;
+}
+
+
+/**
+ * nih_parse_or_raise_r:
+ * @frame: frame we executed in.
+ *
+ * Recursive form of nih_parse that uses pseudo-recursion and
+ * NihParseFrame.
+ *
+ * Returns: A new frame for the next function to call or NULL if we're done.
+ **/
+static NihParseFrame *
+nih_parse_or_raise_r (NihParseFrame *frame)
+{
+#define NIH_PARSE_UPDATE NIH_PARSE_R_UPDATE
+	NihParseData *data = frame->data;
+
+	NihParseNode           *parent      = data->parent;
+	NihParseSymbol         *symbol      = data->symbol;
+	NihParseNode           *ret         = data->ret;
+	size_t                  nth_time    = data->nth_time;
+	NihParseSymbolChildRef *parent_ref  = data->parent_ref;
+	const char             *text_pos    = data->text_pos;
+	const char             *text_end    = data->text_end;
+
+	size_t                  iter        = data->iter;
+	NihParseSymbolChildRef *child       = data->child;
+	int                     alt_matched = data->alt_matched;
+
+	if (frame->retpoint)
+		goto *frame->retpoint;
+
+	ret = NIH_PARSE_CALL (NIH_PARSE, frame, parent, symbol, ret, nth_time,
+			      parent_ref, text_pos, text_end);
+
+	if (ret)
+		return NIH_PARSE_POP_FRAME (frame, ret);
+
+	nih_error_raise_printf (NIH_PARSE_FAILED, NIH_PARSE_FAILED_STR,
+				"symbol");
+
+	return NIH_PARSE_POP_FRAME (frame, NULL);
+#undef NIH_PARSE_UPDATE
+}
+
+/**
+ * nih_parse_r:
+ * @frame: frame we executed in.
+ *
+ * Recursive form of nih_parse that uses pseudo-recursion and
+ * NihParseFrame.
+ *
+ * Returns: A new frame for the next function to call or NULL if we're done.
+ **/
+static NihParseFrame *
+nih_parse_r (NihParseFrame *frame)
+{
+#define NIH_PARSE_UPDATE NIH_PARSE_R_UPDATE
+	NihParseData *data = frame->data;
+
+	NihParseNode           *parent      = data->parent;
+	NihParseSymbol         *symbol      = data->symbol;
+	NihParseNode           *ret         = data->ret;
+	NihParseSymbolChildRef *parent_ref  = data->parent_ref;
+	size_t                  nth_time    = data->nth_time;
+	const char             *text_pos    = data->text_pos;
+	const char             *text_end    = data->text_end;
+
+	size_t                  iter        = data->iter;
+	NihParseSymbolChildRef *child       = data->child;
+	int                     alt_matched = data->alt_matched;
+
+	if (frame->retpoint)
+		goto *frame->retpoint;
+
+	nih_assert (text_pos != NULL);
+	nih_assert (symbol != NULL);
+
+	if (symbol->type != NIH_PARSE_NONTERM) {
+		ret = NIH_PARSE_CALL (NIH_PARSE_TERMINAL, frame,
+				      parent, symbol, nth_time, parent_ref,
+				      ret, text_pos, text_end);
+
+		return NIH_PARSE_POP_FRAME (frame, ret);
+	}
+
+	alt_matched = 0;
+
+	if (! ret) {
+		ret = nih_parse_node_new (parent, symbol, text_pos, 0,
+					  nth_time, parent_ref);
+		iter = 0;
+	} else {
+		iter = symbol->length - 1;
+
+		if (! NIH_PARSE_CALL (NIH_PARSE_BACKTRACK, frame, ret,
+				      &data->iter, &data->alt_matched,
+				      &data->text_pos, text_end)) {
+			nih_free (ret);
+			return NIH_PARSE_POP_FRAME (frame, NULL);
+		}
+
+		iter++;
+	}
+
+	if (! ret)
+		return NIH_PARSE_OOM_ABORT (frame);
+
+	for (; iter < symbol->length; iter++) {
+		child = &symbol->children[iter];
+
+		if (alt_matched) {
+			alt_matched = child->flags & NIH_PARSE_ALT_WITH_NEXT;
+			continue;
+		}
+
+		if (NIH_PARSE_CALL (NIH_PARSE_CHILD, frame, 0, child, ret,
+				    text_pos, text_end)) {
+			alt_matched = child->flags & NIH_PARSE_ALT_WITH_NEXT;
+
+			if (! (child->flags & (NIH_PARSE_NOT | NIH_PARSE_LOOKAHEAD)))
+				text_pos = NIH_PARSE_NODE_END (ret);
+
+			continue;
+		}
+
+		if (! NIH_PARSE_CALL (NIH_PARSE_BACKTRACK, frame, ret,
+				      &data->iter, &data->alt_matched,
+				      &data->text_pos, text_end)) {
+			nih_free (ret);
+			return NIH_PARSE_POP_FRAME (frame, NULL);
+		}
+	}
+
+	ret->len = text_pos - ret->start;
+	return NIH_PARSE_POP_FRAME (frame, ret);
+#undef NIH_PARSE_UPDATE
+}
+
+/**
+ * nih_parse_child_r:
+ * @frame: frame we executed in.
+ *
+ * Parses a text stream given an NihParseSymbolChildRef. The node referenced is
+ * matched the number of times the child specifies it should be. Success is
+ * determined in light of the NIH_PARSE_NOT flag, i.e. the function will return
+ * success if the match fails and the child ref has NIH_PARSE_NOT set.
+ *
+ * Returns: A new frame for the next function to call or NULL if we're done.
+ **/
+static NihParseFrame *
+nih_parse_child_r (NihParseFrame *frame)
+{
+#define NIH_PARSE_UPDATE NIH_PARSE_CHILD_UPDATE
+	NihParseChildData *data = frame->data;
+
+	size_t                  times    = data->times;
+	NihParseSymbolChildRef *child    = data->child;
+	NihParseNode           *node     = data->node;
+	const char             *text_pos = data->text_pos;
+	const char             *text_end = data->text_end;
+
+	if (frame->retpoint)
+		goto *frame->retpoint;
+
+	for (; times < child->max_times || ! child->max_times;
+	     times++) {
+		if (! NIH_PARSE_CALL (NIH_PARSE, frame, node, child->child,
+				      NULL, times + 1, child, text_pos,
+				      text_end))
+			break;
+
+		text_pos += ((NihParseNode *)node->children.prev)->len;
+	}
+
+	if (times < child->min_times && (child->flags & NIH_PARSE_NOT))
+		return NIH_PARSE_POP_FRAME (frame, node);
+
+	if (times < child->min_times || (child->flags & NIH_PARSE_NOT))
+		return NIH_PARSE_POP_FRAME (frame, NULL);
+
+	return NIH_PARSE_POP_FRAME (frame, node);
+#undef NIH_PARSE_UPDATE
+}
+
+/**
+ * nih_parse_backtrack_r:
+ * @frame: frame we executed in.
+ *
+ * Given a node which has partially matched with a non-terminal symbol, rewind
+ * and find an alternate match path for the same symbols.
+ *
+ * Returns: A new frame for the next function to call or NULL if we're done.
+ **/
+static NihParseFrame *
+nih_parse_backtrack_r (NihParseFrame *frame)
+{
+#define NIH_PARSE_UPDATE NIH_PARSE_BACKTRACK_UPDATE
+	NihParseBacktrackData *data = frame->data;
+
+	NihParseNode           *node        = data->node;
+	size_t                 *idx         = data->idx;
+	int                    *alt_matched = data->alt_matched;
+	const char            **text_pos    = data->text_pos;
+	const char             *text_end    = data->text_end;
+
+	NihParseNode           *tail        = data->tail;
+	NihParseSymbolChildRef *child       = data->child;
+	int                     success     = data->success;
+
+	if (frame->retpoint)
+		goto *frame->retpoint;
+
+	*alt_matched = 0;
+	success = 0;
+	child = &node->symbol->children[*idx];
+
+	if (child->flags & NIH_PARSE_ALT_WITH_NEXT)
+		return NIH_PARSE_POP_FRAME (frame, node);
+
+	while (! NIH_LIST_EMPTY (&node->children)) {
+		tail = (NihParseNode *)node->children.prev;
+
+		while (tail->parent_ref != child) {
+			(*idx)--;
+			child = &node->symbol->children[*idx];
+		}
+
+		if (child->flags & (NIH_PARSE_NOT | NIH_PARSE_LOOKAHEAD)) {
+			nih_free (tail);
+			continue;
+		}
+
+		if (tail->nth_match > child->min_times
+		    || child->flags & NIH_PARSE_ALT_WITH_NEXT)
+			success = 1;
+
+		if (! NIH_PARSE_CALL (NIH_PARSE, frame, node, tail->symbol,
+				      tail, tail->nth_match, child, *text_pos,
+				      text_end)) {
+			if (! success)
+				continue;
+
+			*text_pos = NIH_PARSE_NODE_END (node);
+			return NIH_PARSE_POP_FRAME (frame, node);
+		}
+
+		if (! NIH_PARSE_CALL (NIH_PARSE_CHILD, frame, tail->nth_match,
+				      child, node, tail->start + tail->len,
+				      text_end))
+			continue;
+
+		*alt_matched = child->flags & NIH_PARSE_ALT_WITH_NEXT;
+		*text_pos = NIH_PARSE_NODE_END (node);
+
+		return NIH_PARSE_POP_FRAME (frame, node);
+	}
+
+	return NIH_PARSE_POP_FRAME (frame, NULL);
+#undef NIH_PARSE_UPDATE
+}
+
+/**
+ * nih_parse_terminal:
+ * @frame: frame we executed in.
+ *
+ * Check to see if @symbol matches at @text_pos, where @symbol is a terminal
+ * symbol.
+ *
+ * Returns: A new frame for the next function to call or NULL if we're done.
+ **/
+static NihParseFrame *
+nih_parse_terminal_r (NihParseFrame *frame)
+{
+#define NIH_PARSE_UPDATE NIH_PARSE_TERMINAL_UPDATE
+	NihParseTerminalData *data = frame->data;
+
+	void                   *parent     = data->parent;
+	NihParseSymbol         *symbol     = data->symbol;
+	size_t                  nth_time   = data->nth_time;
+	NihParseSymbolChildRef *parent_ref = data->parent_ref;
+	NihParseNode           *bt_node    = data->bt_node;
+	const char             *text_pos   = data->text_pos;
+	const char             *text_end   = data->text_end;
+
+	if (frame->retpoint)
+		goto *frame->retpoint;
+
+	nih_assert (symbol != NULL);
+	nih_assert (symbol->type != NIH_PARSE_NONTERM);
+
+	if (bt_node) {
+		nih_free (bt_node);
+		return NIH_PARSE_POP_FRAME (frame, NULL);
+	}
+
+	if (text_end - text_pos < symbol->length)
+		return NIH_PARSE_POP_FRAME (frame, NULL);
+
+	if (symbol->type == NIH_PARSE_CHARSET &&
+	    ! NIH_PARSE_CHARSET_MATCH (symbol->charset, *text_pos))
+		return NIH_PARSE_POP_FRAME (frame, NULL);
+
+	if (symbol->type == NIH_PARSE_STRING &&
+	    strncmp (symbol->text, text_pos, symbol->length))
+		return NIH_PARSE_POP_FRAME (frame, NULL);
+
+	bt_node = nih_parse_node_new (parent, symbol, text_pos,
+				      symbol->length, nth_time, parent_ref);
+
+	if (! bt_node)
+		return NIH_PARSE_OOM_ABORT(frame);
+
+	return NIH_PARSE_POP_FRAME (frame, bt_node);
+#undef NIH_PARSE_UPDATE
+}
+
+/**
+ * nih_parse_node_new:
+ * @parent: Alloc parent,
+ * @symbol: Symbol matched,
+ * @start: First character if this is a terminal match,
+ * @len: Length if this is a terminal match,
+ * @nth_match: This is the nth consectutive match of @symbol,
+ * @parent_ref: Reference from parent symbol to this symbol.
+ *
+ * Create a new NihParseNode.
+ * 
+ * Returns: The new node or NULL on OOM.
+ **/
+static NihParseNode *
+nih_parse_node_new (NihParseNode           *parent,
+		    NihParseSymbol         *symbol,
+		    const char             *start,
+		    size_t                  len,
+		    size_t                  nth_match,
+		    NihParseSymbolChildRef *parent_ref)
+{
+	NihParseNode *ret = nih_new (parent, NihParseNode);
+
+	nih_assert (symbol != NULL);
+
+	if (! ret)
+		return NULL;
+
+	nih_list_init (&ret->node);
+	nih_list_init (&ret->children);
+	nih_alloc_set_destructor (ret, nih_list_destroy);
+
+	if (parent)
+		nih_list_add (&parent->children, &ret->node);
+
+	ret->start = start;
+	ret->symbol = symbol;
+	ret->len = len;
+	ret->parent_ref = parent_ref;
+	ret->nth_match = nth_match;
+
+	return ret;
+}

=== added file 'nih/parse.h'
--- nih/parse.h	1970-01-01 00:00:00 +0000
+++ nih/parse.h	2010-02-17 22:55:00 +0000
@@ -0,0 +1,120 @@
+/* libnih
+ *
+ * Copyright © 2010 Casey Dahlin <cdahlin at redhat.com>
+ * Copyright © 2010 Canonical Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef NIH_PARSE_H
+#define NIH_PARSE_H
+
+#include <sys/types.h>
+
+#include <nih/list.h>
+
+/**
+ * NihParseSymbolRefFlags:
+ *
+ * Flags describing properties of a symbol parent-child relationship.
+ *
+ * NIH_PARSE_ALT_WITH_NEXT: Either this child OR the next should match.
+ *
+ * NIH_PARSE_LOOKAHEAD: Start the next child matching at the same position as
+ * this one.
+ *
+ * NIH_PARSE_NOT: This child must NOT match.
+ **/
+typedef unsigned int NihParseSymbolRefFlags;
+#define NIH_PARSE_ALT_WITH_NEXT 0x1
+#define NIH_PARSE_LOOKAHEAD     0x2
+#define NIH_PARSE_NOT           0x4
+
+/**
+ * NihParseSymbolChildRef:
+ * @child: The child symbol,
+ * @min_times: Number of times @child must match,
+ * @max_times: Number of times @child may match (0 for infinite),
+ * @flags: Properties of this relationship.
+ *
+ * A parent-child relationship between symbols.
+ **/
+struct nih_parse_symbol;
+typedef struct nih_parse_symbol_child_ref {
+	struct nih_parse_symbol *child;
+	size_t min_times;
+	size_t max_times;
+	NihParseSymbolRefFlags flags;
+} NihParseSymbolChildRef;
+
+/**
+ * NihParseSymbol:
+ * @type: type of symbol,
+ * 	NIH_PARSE_STRING: A literal string to match,
+ * 	NIH_PARSE_CHARSET: A set of characters from which to match one,
+ * 	NIH_PARSE_NONTERM: A set of other symbols to match,
+ * @text: text to match (NIH_PARSE_STRING only),
+ * @length: length of @text or @children (Always 1 for NIH_PARSE_CHARSET),
+ * @charset: characters to match as a bitfield (NIH_PARSE_CHARSET only),
+ * @children: child symbols (NIH_PARSE_NONTERM only),
+ * 
+ * A syntactical unit of a PEG grammar.
+ **/
+typedef struct nih_parse_symbol {
+	enum {
+		NIH_PARSE_STRING,
+		NIH_PARSE_CHARSET,
+		NIH_PARSE_NONTERM
+	} type;
+
+	const char                    *text;
+	NihParseSymbolChildRef        *children;
+	size_t                         length;
+	u_int64_t                      charset[4];
+} NihParseSymbol;
+
+
+/**
+ * NihParseNode:
+ * @node: list node,
+ * @symbol: symbol matched,
+ * @start: text start position,
+ * @len: text length,
+ * @nth_match: this was the nth time in a row this symbol matched,
+ * @children: child nodes.
+ * @parent_ref: Child ref from the parent symbol that led to parsing this node.
+ *
+ * Node in a parse tree resulting from matching a string to a particular
+ * symbol.
+ **/
+typedef struct nih_parse_node {
+	NihList                 node;
+	NihParseSymbol         *symbol;
+	const char             *start;
+	size_t                  len;
+	size_t                  nth_match;
+	NihList                 children;
+	NihParseSymbolChildRef *parent_ref;
+} NihParseNode;
+
+
+NIH_BEGIN_EXTERN
+
+NihParseNode *nih_parse (void *alloc_parent, NihParseSymbol *symbol,
+			 const char *text_pos, size_t text_len)
+	__attribute__ ((warn_unused_result, malloc));
+
+NIH_END_EXTERN
+
+#endif /* NIH_PARSE_H */

=== added file 'nih/tests/test_parse.c'
--- nih/tests/test_parse.c	1970-01-01 00:00:00 +0000
+++ nih/tests/test_parse.c	2010-02-20 01:49:43 +0000
@@ -0,0 +1,652 @@
+/* libnih
+ *
+ * test_parse.c - test suite for nih/parse.c
+ *
+ * Copyright © 2010 Casey Dahlin <cdahlin at redhat.com>
+ * Copyright © 2010 Canonical Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <nih/test.h>
+
+#include <string.h>
+
+#include <nih/macros.h>
+#include <nih/parse.h>
+#include <nih/alloc.h>
+#include <nih/error.h>
+#include <nih/errors.h>
+
+NihParseSymbol otoro = {
+	.type = NIH_PARSE_STRING,
+	.text = "otoro",
+	.length = 5,
+};
+
+NihParseSymbol sake = {
+	.type = NIH_PARSE_STRING,
+	.text = "sake",
+	.length = 4,
+};
+
+NihParseSymbol sawara = {
+	.type = NIH_PARSE_STRING,
+	.text = "sawara",
+	.length = 6,
+};
+
+NihParseSymbol hotate = {
+	.type = NIH_PARSE_STRING,
+	.text = "hotate",
+	.length = 6,
+};
+
+NihParseSymbol maki = {
+	.type = NIH_PARSE_STRING,
+	.text = "-maki",
+	.length = 5,
+};
+
+NihParseSymbol nigri = {
+	.type = NIH_PARSE_STRING,
+	.text = "-nigri",
+	.length = 6,
+};
+
+NihParseSymbol nl = {
+	.type = NIH_PARSE_STRING,
+	.text = "\n",
+	.length = 1,
+};
+
+NihParseSymbolChildRef fish_children[] = {
+	{ &otoro, 1, 1, NIH_PARSE_ALT_WITH_NEXT },
+	{ &sake, 1, 1, NIH_PARSE_ALT_WITH_NEXT },
+	{ &sawara, 1, 1, NIH_PARSE_ALT_WITH_NEXT },
+	{ &hotate, 1, 1, 0 },
+};
+NihParseSymbol fish = {
+	.type = NIH_PARSE_NONTERM,
+	.length = 4,
+	.children = fish_children,
+};
+
+NihParseSymbolChildRef sushi_type_children[] = {
+	{ &maki, 1, 1, NIH_PARSE_ALT_WITH_NEXT },
+	{ &nigri, 1, 1, 0 },
+};
+NihParseSymbol sushi_type = {
+	.type = NIH_PARSE_NONTERM,
+	.length = 2,
+	.children = sushi_type_children,
+};
+
+NihParseSymbolChildRef sushi_children[] = {
+	{ &fish, 1, 1, 0 },
+	{ &sushi_type, 1, 1, 0 },
+};
+NihParseSymbol sushi = {
+	.type = NIH_PARSE_NONTERM,
+	.length = 2,
+	.children = sushi_children,
+};
+
+NihParseSymbolChildRef sushi_menu_tail_children[] = {
+	{ &nl, 1, 1, 0 },
+	{ &sushi, 1, 1, 0 },
+};
+NihParseSymbol sushi_menu_tail = {
+	.type = NIH_PARSE_NONTERM,
+	.length = 2,
+	.children = sushi_menu_tail_children,
+};
+
+NihParseSymbolChildRef sushi_menu_children[] = {
+	{ &sushi, 1, 1, 0 },
+	{ &sushi_menu_tail, 0, 0, 0 },
+};
+NihParseSymbol sushi_menu = {
+	.type = NIH_PARSE_NONTERM,
+	.length = 2,
+	.children = sushi_menu_children,
+};
+
+NihParseSymbol vowel = {
+	.type = NIH_PARSE_CHARSET,
+	.length = 1,
+	.charset = { 0, 0x20822200000000, 0, 0 }, /* [aeiou] */
+};
+
+NihParseSymbol sawar = {
+	.type = NIH_PARSE_STRING,
+	.text = "sawar",
+	.length = 5,
+};
+
+NihParseSymbolChildRef sawar__children[] = {
+	{ &sawar, 1, 1, 0 },
+	{ &vowel, 1, 1, 0 },
+};
+NihParseSymbol sawar_ = {
+	.type = NIH_PARSE_NONTERM,
+	.length = 2,
+	.children = sawar__children,
+};
+
+NihParseSymbolChildRef sawar_and_a_children[] = {
+	{ &sawara, 1, 1, NIH_PARSE_LOOKAHEAD },
+	{ &sawar_, 1, 1, 0 },
+};
+NihParseSymbol sawar_and_a = {
+	.type = NIH_PARSE_NONTERM,
+	.length = 2,
+	.children = sawar_and_a_children,
+};
+
+NihParseSymbolChildRef sawar_not_a_children[] = {
+	{ &sawara, 1, 1, NIH_PARSE_NOT },
+	{ &sawar_, 1, 1, 0 },
+};
+NihParseSymbol sawar_not_a = {
+	.type = NIH_PARSE_NONTERM,
+	.length = 2,
+	.children = sawar_not_a_children,
+};
+
+NihParseSymbolChildRef vowels_and_a_vowel_children[] = {
+	{ &vowel, 0, 0, 0 },
+	{ &vowel, 1, 1, 0 },
+};
+NihParseSymbol vowels_and_a_vowel = {
+	.type = NIH_PARSE_NONTERM,
+	.length = 2,
+	.children = vowels_and_a_vowel_children,
+};
+
+NihParseSymbolChildRef vowels_3_to_6_children[] = {
+	{ &vowel, 3, 6, 0 },
+};
+NihParseSymbol vowels_3_to_6 = {
+	.type = NIH_PARSE_NONTERM,
+	.length = 1,
+	.children = vowels_3_to_6_children,
+};
+
+NihParseSymbol abcab = {
+	.type = NIH_PARSE_STRING,
+	.text = "abcab",
+	.length = 5,
+};
+
+NihParseSymbol abc = {
+	.type = NIH_PARSE_STRING,
+	.text = "abc",
+	.length = 3,
+};
+
+NihParseSymbolChildRef abcab_or_abc_children[] = {
+	{ &abcab, 1, 1, NIH_PARSE_ALT_WITH_NEXT },
+	{ &abc, 1, 1, 0 },
+};
+NihParseSymbol abcab_or_abc = {
+	.type = NIH_PARSE_NONTERM,
+	.length = 2,
+	.children = abcab_or_abc_children,
+};
+
+NihParseSymbolChildRef abcab_or_abc_repeat_children[] = {
+	{ &abcab_or_abc, 2, 0, 0 },
+};
+NihParseSymbol abcab_or_abc_repeat = {
+	.type = NIH_PARSE_NONTERM,
+	.length = 1,
+	.children = abcab_or_abc_repeat_children,
+};
+
+
+#define TEST_PARSE_FAIL(...) ({						\
+	NihError *error;						\
+	nih_error_push_context ();					\
+									\
+	TEST_EQ_P (nih_parse (__VA_ARGS__), NULL);			\
+									\
+	error = nih_error_get ();					\
+									\
+	TEST_EQ (error->number, NIH_PARSE_FAILED);			\
+	TEST_EQ_STR (error->message, "Expected symbol");		\
+	TEST_ALLOC_PARENT (error->message, error);			\
+									\
+	nih_free (error);						\
+	nih_error_pop_context ();					\
+})
+
+#define TEST_NODE(sym, text, times)		\
+	TEST_NE_P (result, NULL);		\
+	TEST_EQ_P (result->symbol, (sym));	\
+	TEST_EQ_STRN (result->start, (text));	\
+	TEST_EQ (result->len, strlen (text));	\
+	TEST_EQ (result->nth_match, (times))
+
+#define TEST_PARSE_SYMBOL_MUST_MATCH(...) ({				\
+	NihParseNode *result = nih_parse (__VA_ARGS__);	\
+	TEST_NE_P (result, NULL);					\
+	(result);							\
+})
+
+#define TEST_PARSE(...)							\
+	for (NihParseNode *result =					\
+	     TEST_PARSE_SYMBOL_MUST_MATCH (__VA_ARGS__); result;	\
+	     nih_free (result), result = NULL)
+
+#define TEST_CHILD_COUNT(n) ({				\
+	int count = 0;					\
+							\
+	NIH_LIST_FOREACH (&result->children, iter)	\
+		count++;				\
+							\
+	TEST_EQ (count, (n));				\
+})
+
+#define TEST_GET_CHILD(n) ({				\
+	int count = 0;					\
+	NihParseNode *child;				\
+							\
+	NIH_LIST_FOREACH (&result->children, iter) {	\
+		child = (NihParseNode *)iter;		\
+		count++;				\
+		if (count == (n))			\
+			break;				\
+	}						\
+							\
+	TEST_EQ (count, (n));				\
+	(child);					\
+})
+
+/* Some trickiness here to make a new result variable in the new scope with a
+ * value defined in terms of the old result variable from the parent scope.
+ */
+#define TEST_CHILD_NODE(n)				\
+	for (NihParseNode *child = TEST_GET_CHILD(n),	\
+	     *result = child;				\
+	     result;					\
+	     result = NULL)
+
+void
+test_symbol_matches (void)
+{
+	char *string;
+
+	TEST_FUNCTION ("nih_parse");
+
+
+	TEST_FEATURE ("with trivial string pattern");
+	TEST_PARSE (NULL, &otoro, "otoro", 5) {
+		TEST_NODE (&otoro, "otoro", 1);
+		TEST_CHILD_COUNT (0);
+	}
+
+
+	TEST_FEATURE ("with trivial string pattern and no match");
+	TEST_PARSE_FAIL (NULL, &otoro, "otter", 5);
+
+
+	TEST_FEATURE ("with alternation group");
+	TEST_PARSE (NULL, &fish, "sawara", 6) {
+		TEST_NODE (&fish, "sawara", 1);
+		TEST_CHILD_COUNT (1);
+
+		TEST_CHILD_NODE (1) {
+			TEST_NODE (&sawara, "sawara", 1);
+			TEST_CHILD_COUNT (0);
+		}
+	}
+
+
+	TEST_FEATURE ("with alternation group and no match");
+	TEST_PARSE_FAIL (NULL, &fish, "sawhorse", 8);
+
+
+	TEST_FEATURE ("with multiple non-terminal layers");
+	TEST_PARSE (NULL, &sushi, "sake-nigri", 10) {
+		TEST_NODE (&sushi, "sake-nigri", 1);
+		TEST_CHILD_COUNT (2);
+
+		TEST_CHILD_NODE (1) {
+			TEST_NODE (&fish, "sake", 1);
+			TEST_CHILD_COUNT (1);
+
+			TEST_CHILD_NODE (1) {
+				TEST_NODE (&sake, "sake", 1);
+				TEST_CHILD_COUNT (0);
+			}
+		}
+
+		TEST_CHILD_NODE (2) {
+			TEST_NODE (&sushi_type, "-nigri", 1);
+			TEST_CHILD_COUNT (1);
+
+			TEST_CHILD_NODE (1) {
+				TEST_NODE (&nigri, "-nigri", 1);
+				TEST_CHILD_COUNT (0);
+			}
+		}
+	}
+
+
+	TEST_FEATURE ("with multiple non-terminal layers and no match");
+	TEST_PARSE_FAIL (NULL, &sushi, "sake-negima", 11);
+
+
+	string = "sake-nigri\n"
+		 "sawara-maki\n"
+		 "hotate-nigri";
+
+	TEST_FEATURE ("with repetition");
+	TEST_PARSE (NULL, &sushi_menu, string, 35) {
+		TEST_NODE (&sushi_menu, string, 1);
+		TEST_CHILD_COUNT (3);
+
+		TEST_CHILD_NODE (1) {
+			TEST_NODE (&sushi, "sake-nigri", 1);
+			TEST_CHILD_COUNT (2);
+			TEST_CHILD_NODE (1) {
+				TEST_NODE (&fish, "sake", 1);
+				TEST_CHILD_COUNT (1);
+
+				TEST_CHILD_NODE (1) {
+					TEST_NODE (&sake, "sake", 1);
+					TEST_CHILD_COUNT (0);
+				}
+			}
+
+			TEST_CHILD_NODE (2) {
+				TEST_NODE (&sushi_type, "-nigri", 1);
+				TEST_CHILD_COUNT (1);
+
+				TEST_CHILD_NODE (1) {
+					TEST_NODE (&nigri, "-nigri", 1);
+					TEST_CHILD_COUNT (0);
+				}
+			}
+		}
+
+		TEST_CHILD_NODE (2) {
+			TEST_NODE (&sushi_menu_tail, "\nsawara-maki", 1);
+			TEST_CHILD_COUNT (2);
+
+			TEST_CHILD_NODE (1) {
+				TEST_NODE (&nl, "\n", 1);
+				TEST_CHILD_COUNT (0);
+			}
+
+			TEST_CHILD_NODE (2) {
+				TEST_NODE (&sushi, "sawara-maki", 1);
+				TEST_CHILD_COUNT (2);
+
+				TEST_CHILD_NODE (1) {
+					TEST_NODE (&fish, "sawara", 1);
+					TEST_CHILD_COUNT (1);
+
+					TEST_CHILD_NODE (1) {
+						TEST_NODE (&sawara, "sawara", 1);
+						TEST_CHILD_COUNT (0);
+					}
+				}
+
+				TEST_CHILD_NODE (2) {
+					TEST_NODE (&sushi_type, "-maki", 1);
+					TEST_CHILD_COUNT (1);
+
+					TEST_CHILD_NODE (1) {
+						TEST_NODE (&maki, "-maki", 1);
+						TEST_CHILD_COUNT (0);
+					}
+				}
+			}
+		}
+
+		TEST_CHILD_NODE (3) {
+			TEST_NODE (&sushi_menu_tail, "\nhotate-nigri", 2);
+			TEST_CHILD_COUNT (2);
+
+			TEST_CHILD_NODE (1) {
+				TEST_NODE (&nl, "\n", 1);
+				TEST_CHILD_COUNT (0);
+			}
+
+			TEST_CHILD_NODE (2) {
+				TEST_NODE (&sushi, "hotate-nigri", 1);
+				TEST_CHILD_COUNT (2);
+
+				TEST_CHILD_NODE (1) {
+					TEST_NODE (&fish, "hotate", 1);
+					TEST_CHILD_COUNT (1);
+
+					TEST_CHILD_NODE (1) {
+						TEST_NODE (&hotate, "hotate", 1);
+						TEST_CHILD_COUNT (0);
+					}
+				}
+
+				TEST_CHILD_NODE (2) {
+					TEST_NODE (&sushi_type, "-nigri", 1);
+					TEST_CHILD_COUNT (1);
+
+					TEST_CHILD_NODE (1) {
+						TEST_NODE (&nigri, "-nigri", 1);
+						TEST_CHILD_COUNT (0);
+					}
+				}
+			}
+		}
+	}
+
+
+	TEST_FEATURE ("with charset (1)");
+	TEST_PARSE (NULL, &vowel, "a", 1) {
+		TEST_NODE (&vowel, "a", 1);
+		TEST_CHILD_COUNT (0);
+	}
+
+
+	TEST_FEATURE ("with charset (2)");
+	TEST_PARSE (NULL, &vowel, "i", 1) {
+		TEST_NODE (&vowel, "i", 1);
+		TEST_CHILD_COUNT (0);
+	}
+
+
+	TEST_FEATURE ("with charset and no match");
+	TEST_PARSE_FAIL (NULL, &vowel, "q", 1);
+
+
+	TEST_FEATURE ("with look-ahead");
+	TEST_PARSE (NULL, &sawar_and_a, "sawara", 6) {
+		TEST_NODE (&sawar_and_a, "sawara", 1);
+		TEST_CHILD_COUNT (2);
+
+		TEST_CHILD_NODE (1) {
+			TEST_NODE (&sawara, "sawara", 1);
+			TEST_CHILD_COUNT (0);
+		}
+
+		TEST_CHILD_NODE (2) {
+			TEST_NODE (&sawar_, "sawara", 1);
+			TEST_CHILD_COUNT (2);
+
+			TEST_CHILD_NODE (1) {
+				TEST_NODE (&sawar, "sawar", 1);
+				TEST_CHILD_COUNT (0);
+			}
+
+			TEST_CHILD_NODE (2) {
+				TEST_NODE (&vowel, "a", 1);
+				TEST_CHILD_COUNT (0);
+			}
+		}
+	}
+
+
+	TEST_FEATURE ("with look-ahead and no match");
+	TEST_PARSE_FAIL (NULL, &sawar_and_a, "saware", 6);
+
+
+	TEST_FEATURE ("with negation");
+	TEST_PARSE (NULL, &sawar_not_a, "saware", 6) {
+		TEST_NODE (&sawar_not_a, "saware", 1);
+		TEST_CHILD_COUNT (1);
+
+		TEST_CHILD_NODE (1) {
+			TEST_NODE (&sawar_, "saware", 1);
+			TEST_CHILD_COUNT (2);
+
+			TEST_CHILD_NODE (1) {
+				TEST_NODE (&sawar, "sawar", 1);
+				TEST_CHILD_COUNT (0);
+			}
+
+			TEST_CHILD_NODE (2) {
+				TEST_NODE (&vowel, "e", 1);
+				TEST_CHILD_COUNT (0);
+			}
+		}
+	}
+
+
+	TEST_FEATURE ("with negation and no match");
+	TEST_PARSE_FAIL (NULL, &sawar_not_a, "sawara", 6);
+
+	
+	TEST_FEATURE ("with bounds");
+	TEST_PARSE (NULL, &vowels_3_to_6, "aeiou", 5) {
+		TEST_NODE (&vowels_3_to_6, "aeiou", 1);
+		TEST_CHILD_COUNT (5);
+
+		for (int i = 0; i < 5; i++) {
+			char match[] = { 'x', '\0' };
+
+			match[0] = "aeiou"[i];
+			TEST_CHILD_NODE (i+1) {
+				TEST_NODE (&vowel, match, i+1);
+				TEST_CHILD_COUNT (0);
+			}
+		}
+	}
+
+
+	TEST_FEATURE ("with bounds and too few");
+	TEST_PARSE_FAIL (NULL, &vowels_3_to_6, "ae", 2);
+
+
+	TEST_FEATURE ("with bounds and just enough");
+	TEST_PARSE (NULL, &vowels_3_to_6, "aei", 3) {
+		TEST_NODE (&vowels_3_to_6, "aei", 1);
+		TEST_CHILD_COUNT (3);
+
+		for (int i = 0; i < 3; i++) {
+			char match[] = { 'x', '\0' };
+
+			match[0] = "aei"[i];
+			TEST_CHILD_NODE (i+1) {
+				TEST_NODE (&vowel, match, i+1);
+				TEST_CHILD_COUNT (0);
+			}
+		}
+	}
+
+
+	TEST_FEATURE ("with bounds and too many");
+	TEST_PARSE (NULL, &vowels_3_to_6, "aeiouaeiou", 10) {
+		TEST_NODE (&vowels_3_to_6, "aeioua", 1);
+		TEST_CHILD_COUNT (6);
+
+		for (int i = 0; i < 6; i++) {
+			char match[] = { 'x', '\0' };
+
+			match[0] = "aeioua"[i];
+			TEST_CHILD_NODE (i+1) {
+				TEST_NODE (&vowel, match, i+1);
+				TEST_CHILD_COUNT (0);
+			}
+		}
+	}
+
+
+	TEST_FEATURE ("with backtrack")
+	TEST_PARSE (NULL, &vowels_and_a_vowel, "aeiou", 5) {
+		TEST_NODE (&vowels_and_a_vowel, "aeiou", 1);
+		TEST_CHILD_COUNT (5);
+
+		TEST_CHILD_NODE (1) {
+			TEST_NODE (&vowel, "a", 1);
+			TEST_CHILD_COUNT (0);
+		}
+
+		TEST_CHILD_NODE (2) {
+			TEST_NODE (&vowel, "e", 2);
+			TEST_CHILD_COUNT (0);
+		}
+
+		TEST_CHILD_NODE (3) {
+			TEST_NODE (&vowel, "i", 3);
+			TEST_CHILD_COUNT (0);
+		}
+
+		TEST_CHILD_NODE (4) {
+			TEST_NODE (&vowel, "o", 4);
+			TEST_CHILD_COUNT (0);
+		}
+
+		TEST_CHILD_NODE (5) {
+			TEST_NODE (&vowel, "u", 1);
+			TEST_CHILD_COUNT (0);
+		}
+	}
+
+
+	TEST_FEATURE("with sub-backtrack in repetition path");
+	TEST_PARSE (NULL, &abcab_or_abc_repeat, "abcabc", 6) {
+		TEST_NODE (&abcab_or_abc_repeat, "abcabc", 1);
+		TEST_CHILD_COUNT (2);
+
+		TEST_CHILD_NODE (1) {
+			TEST_NODE (&abcab_or_abc, "abc", 1);
+			TEST_CHILD_COUNT (1);
+
+			TEST_CHILD_NODE (1) {
+				TEST_NODE (&abc, "abc", 1);
+				TEST_CHILD_COUNT (0);
+			}
+		}
+
+		TEST_CHILD_NODE (2) {
+			TEST_NODE (&abcab_or_abc, "abc", 2);
+			TEST_CHILD_COUNT (1);
+
+			TEST_CHILD_NODE (1) {
+				TEST_NODE (&abc, "abc", 1);
+				TEST_CHILD_COUNT (0);
+			}
+		}
+	}
+}
+
+int
+main (int    argc,
+      char **argv)
+{
+	test_symbol_matches ();
+	return 0;
+}




More information about the upstart-devel mailing list