[PATCH 1/6] utilities: kernelscan: Tool to scan kernel for error messages

Fri Nov 2 16:58:32 UTC 2012

From: Colin Ian King <colin.king at canonical.com>

This is the inital first cut of the kernelscan utility that
helps us to track new kernel messages so we can add them to
the fwts klog json database.

Signed-off-by: Colin Ian King <colin.king at canonical.com>
---
 Makefile.am                |    2 +-
 configure.ac               |    1 +
 src/utilities/Makefile.am  |    6 +
 src/utilities/kernelscan.c |  976 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 984 insertions(+), 1 deletion(-)
 create mode 100644 src/utilities/Makefile.am
 create mode 100644 src/utilities/kernelscan.c

diff --git a/Makefile.am b/Makefile.am
index 09f5bec..057c47c 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -1,3 +1,3 @@
-SUBDIRS = src data
+SUBDIRS = src data src/utilities
 
 ACLOCAL_AMFLAGS = -I m4
diff --git a/configure.ac b/configure.ac
index 48481aa..77d44bf 100644
--- a/configure.ac
+++ b/configure.ac
@@ -81,6 +81,7 @@
 	   src/acpica/Makefile
            src/lib/Makefile
            src/lib/src/Makefile
+	   src/utilities/Makefile
            data/Makefile
           ])
           AC_OUTPUT
diff --git a/src/utilities/Makefile.am b/src/utilities/Makefile.am
new file mode 100644
index 0000000..427c44f
--- /dev/null
+++ b/src/utilities/Makefile.am
@@ -0,0 +1,6 @@
+AM_CPPFLAGS = -Wall -Werror -Wextra
+
+bin_PROGRAMS = kernelscan
+kernelscan_SOURCES = kernelscan.c
+kernelscan_LDFLAGS = -ljson -lpcre
+
diff --git a/src/utilities/kernelscan.c b/src/utilities/kernelscan.c
new file mode 100644
index 0000000..7569079
--- /dev/null
+++ b/src/utilities/kernelscan.c
@@ -0,0 +1,976 @@
+/*
+ * Copyright (C) 2012 Canonical
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ */
+
+#include <stdio.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <unistd.h>
+
+#include <pcre.h>
+#include <json/json.h>
+
+#define PARSER_OK		0
+#define PARSER_COMMENT_FOUND	1
+
+#define __JSON_ERR_PTR__ ((json_object*) -1)
+/*
+ *  Older versions of json-c may return an error in an
+ *  object as a ((json_object*)-1), where as newer
+ *  versions return NULL, so check for these. Sigh.
+ */
+#define JSON_ERROR(ptr) \
+	( (ptr == NULL) || ((json_object*)ptr == __JSON_ERR_PTR__) )
+
+typedef enum {
+        COMPARE_REGEX = 'r',
+        COMPARE_STRING = 's',
+        COMPARE_UNKNOWN = 'u',
+} compare_mode;
+
+typedef struct {
+        char *pattern;		/* pattern that we compare to kernel messages */
+	compare_mode cm;	/* 'r' regex or 's' string comparison */
+        pcre *re;		/* regex from pattern */
+        pcre_extra *extra;
+} klog_pattern;
+
+/*
+ *  Subset of tokens that we need to intelligently parse the kernel C source
+ */
+typedef enum {
+	TOKEN_UNKNOWN,		/* No idea what token it is */
+	TOKEN_NUMBER,		/* Integer */
+	TOKEN_LITERAL_STRING,	/* "string" */
+	TOKEN_LITERAL_CHAR,	/* 'x' */
+	TOKEN_IDENTIFIER,	/* identifier */
+	TOKEN_PAREN_OPENED,	/* ( */
+	TOKEN_PAREN_CLOSED,	/* ) */
+	TOKEN_CPP,		/* # C pre-propressor */
+	TOKEN_WHITE_SPACE,	/* ' ', '\t', '\r', '\n' white space */
+	TOKEN_LESS_THAN,	/* < */
+	TOKEN_GREATER_THAN,	/* > */
+	TOKEN_COMMA,		/* , */
+	TOKEN_ARROW,		/* -> */
+	TOKEN_TERMINAL,		/* ; */
+} token_type;
+
+/*
+ *  A token
+ */
+typedef struct {
+	char *token;		/* The gathered string for this token */
+	size_t len;		/* Length of the token buffer */
+	char *ptr;		/* Current end of the token during the lexical analysis */
+	token_type type;	/* The type of token we think it is */
+} token;
+
+/*
+ *  Quick and dirty way to push input stream back, like ungetc()
+ */
+typedef struct get_stack {
+	int ch;			/* Char pushed back */
+	struct get_stack *next;	/* Next one in list */
+} get_stack;
+
+/*
+ *  Parser context
+ */
+typedef struct {
+	FILE *fp;		/* The file descriptor we are reading */
+	bool skip_white_space;	/* Magic skip white space flag */
+	get_stack *get_chars;	/* Ungot chars get pushed onto this */
+} parser;
+
+/*
+ *  FWTS klog patterns, loaded from a json file
+ */
+static klog_pattern *patterns;
+
+static int get_token(parser *p, token *t);
+
+/*
+ *  Initialise the parser
+ */
+static void parser_new(parser *p, FILE *fp, bool skip_white_space)
+{
+	p->get_chars = NULL;
+	p->fp = fp;
+	p->skip_white_space = skip_white_space;
+}
+
+/*
+ *  Get next character from input stream
+ */
+static int get_next(parser *p)
+{
+	int ch;
+
+	/*
+	 * If we have chars pushed using unget_next
+	 * then pop them off the list first
+	 */
+	if (p->get_chars) {
+		get_stack *tmp = p->get_chars;
+		ch = tmp->ch;
+
+		p->get_chars = tmp->next;
+		free(tmp);
+
+		return ch;
+	}
+	return fgetc(p->fp);
+}
+
+/*
+ *  Push character back onto the input
+ *  stream (in this case, it is a simple FIFO stack
+ */
+static void unget_next(parser *p, int ch)
+{
+	get_stack *new;
+
+	if ((new = calloc(sizeof(get_stack), 1)) == NULL) {
+		fprintf(stderr, "unget_next: Out of memory!\n");
+		exit(EXIT_FAILURE);
+	}
+
+	new->ch = ch;
+	new->next = p->get_chars;
+	p->get_chars = new;
+}
+
+/*
+ *  Create a new token, give it plenty of slop so
+ *  we don't need to keep on reallocating the token
+ *  buffer as we append more characters to it during
+ *  the lexing phase.
+ */
+static void token_new(token *t)
+{
+	if ((t->token = calloc(1024, 1)) == NULL) {
+		fprintf(stderr, "token_new: Out of memory!\n");
+		exit(EXIT_FAILURE);
+	}
+	t->len = 1024;
+	t->ptr = t->token;
+	t->type = TOKEN_UNKNOWN;
+}
+
+/*
+ *  Clear the token ready for re-use
+ */
+static void token_clear(token *t)
+{
+	t->ptr = t->token;
+	t->type = TOKEN_UNKNOWN;
+	*(t->ptr) = '\0';
+}
+
+/*
+ *  Free the token
+ */
+static void token_free(token *t)
+{
+	free(t->token);
+	t->token = NULL;
+}
+
+/*
+ *  Append a single character to the token,
+ *  we may run out of space, so this occasionally
+ *  adds an extra 1K of token space for long tokens
+ */
+static void token_append(token *t, int ch)
+{
+	if (t->ptr < t->token + t->len - 1) {
+		/* Enough space, just add char */
+		*(t->ptr) = ch;
+		t->ptr++;
+		*(t->ptr) = 0;
+	} else {
+		/* No more space, add 1K more space */
+		t->len += 1024;
+		if ((t->token = realloc(t->token, t->len)) == NULL) {
+			fprintf(stderr, "token_append: Out of memory!\n");
+			exit(EXIT_FAILURE);
+		}
+		*(t->ptr) = ch;
+		t->ptr++;
+		*(t->ptr) = 0;
+	}
+}
+
+/*
+ *  Figure out if a klog pattern is a regex or a plain text string
+ */
+static compare_mode klog_compare_mode_str_to_val(const char *str)
+{
+	if (strcmp(str, "regex") == 0)
+		return COMPARE_REGEX;
+	else if (strcmp(str, "string") == 0)
+		return COMPARE_STRING;
+	else
+		return COMPARE_UNKNOWN;
+}
+
+/*
+ *  Load FWTS klog messages from the json table
+ */
+static klog_pattern *klog_load(const char *table)
+{
+	int n;
+	int i;
+	json_object *klog_objs;
+	json_object *klog_table;
+	klog_pattern *patterns;
+
+	klog_objs = json_object_from_file("/usr/share/fwts/klog.json");
+	if (JSON_ERROR(klog_objs)) {
+		fprintf(stderr, "Cannot load klog data\n");
+		exit(EXIT_FAILURE);
+	}
+
+	klog_table = json_object_object_get(klog_objs, table);
+	if (JSON_ERROR(klog_table)) {
+		fprintf(stderr, "Cannot fetch klog table object from %s.\n", table);
+		exit(EXIT_FAILURE);
+	}
+
+	n = json_object_array_length(klog_table);
+
+	/* Last entry is null to indicate end, so alloc n+1 items */
+	if ((patterns = calloc(n+1, sizeof(klog_pattern))) == NULL) {
+		fprintf(stderr, "Cannot allocate pattern table.\n");
+		exit(EXIT_FAILURE);
+	}
+
+	/* Now fetch json objects and compile regex */
+	for (i = 0; i < n; i++) {
+		const char *error;
+		char *str;
+		int erroffset;
+		json_object *obj;
+
+		obj = json_object_array_get_idx(klog_table, i);
+		if (JSON_ERROR(obj)) {
+			fprintf(stderr, "Cannot fetch %d item from table %s.\n", i, table);
+			exit(EXIT_FAILURE);
+		}
+
+		str = (char*)json_object_get_string(json_object_object_get(obj, "compare_mode"));
+		if (JSON_ERROR(str)) {
+			fprintf(stderr, "Cannot fetch compare_mode  object, item %d from table %s.\n", i, table);
+			exit(EXIT_FAILURE);
+		}
+		patterns[i].cm = klog_compare_mode_str_to_val(str);
+
+		str = (char*)json_object_get_string(json_object_object_get(obj, "pattern"));
+		if (JSON_ERROR(str)) {
+			fprintf(stderr, "Cannot fetch pattern object, item %d from table %s.\n", i, table);
+			exit(EXIT_FAILURE);
+		}
+		patterns[i].pattern = strdup(str);
+		if (patterns[i].pattern == NULL) {
+			fprintf(stderr, "Failed to strdup regex pattern %d from table %s.\n", i, table);
+			exit(EXIT_FAILURE);
+		}
+
+		if ((patterns[i].re = pcre_compile(patterns[i].pattern, 0, &error, &erroffset, NULL)) == NULL) {
+			fprintf(stderr, "Regex %s failed to compile: %s.\n", patterns[i].pattern, error);
+			patterns[i].re = NULL;
+		} else {
+			patterns[i].extra = pcre_study(patterns[i].re, 0, &error);
+			if (error != NULL) {
+				fprintf(stderr, "Regex %s failed to optimize: %s.\n", patterns[i].pattern, error);
+				patterns[i].re = NULL;
+			}
+		}
+	}
+
+	/* Discard the json table now we've parsed it into patterns */
+	json_object_put(klog_objs);
+
+	return patterns;
+}
+
+/*
+ *  Does str match any of the patterns in the klog pattern table
+ */
+static bool klog_find(char *str, klog_pattern *patterns)
+{
+	int i;
+
+	for (i = 0; patterns[i].pattern; i++) {
+		if (patterns[i].cm == COMPARE_STRING) {
+			if (strstr(str, patterns[i].pattern)) {
+				return true;
+			}
+		}
+		if (patterns[i].cm == COMPARE_REGEX) {
+			int vector[1];
+			if (pcre_exec(patterns[i].re, patterns[i].extra, str, strlen(str), 0, 0, vector, 1) == 0) {
+				return true;
+			}
+		}
+	}
+
+	return false;
+}
+
+/*
+ *  Free the klog patterns
+ */
+static void klog_free(klog_pattern *patterns)
+{
+	int i;
+
+	for (i = 0; patterns[i].pattern; i++) {
+		pcre_free(patterns[i].re);
+		pcre_free(patterns[i].extra);
+		free(patterns[i].pattern);
+	}
+	free(patterns);
+}
+
+/*
+ *  Parse C comments and just throw them away
+ */
+static int skip_comments(parser *p)
+{
+	int ch;
+	int nextch;
+
+	nextch = get_next(p);
+	if (nextch == EOF)
+		return EOF;
+
+	if (nextch == '/') {
+		do {
+			ch = get_next(p);
+			if (ch == EOF)
+				return EOF;
+		}
+		while (ch != '\n');
+
+		return PARSER_COMMENT_FOUND;
+	}
+
+	if (nextch == '*') {
+		for (;;) {
+			ch = get_next(p);
+			if (ch == EOF)
+				return EOF;
+
+			if (ch == '*') {
+				ch = get_next(p);
+				if (ch == EOF)
+					return EOF;
+
+				if (ch == '/')
+					return PARSER_COMMENT_FOUND;
+			}
+		}
+	}
+
+	/* Not a comment, push back */
+	unget_next(p, nextch);
+
+	return PARSER_OK;
+}
+
+/*
+ *  Parse an integer.  This is fairly minimal as the
+ *  kernel doesn't have floats or doubles, so we
+ *  can just parse decimal, octal or hex values.
+ */
+static int parse_number(parser *p, token *t, int ch)
+{
+	int nextch1, nextch2;
+	bool ishex = false;
+	bool isoct = false;
+
+	/*
+	 *  Crude way to detect the kind of integer
+	 */
+	if (ch == '0') {
+		token_append(t, ch);
+
+		nextch1 = get_next(p);
+		if (nextch1 == EOF) {
+			token_append(t, ch);
+			return PARSER_OK;
+		}
+
+		if (nextch1 >= '0' && nextch1 <= '8') {
+			/* Must be an octal value */
+			ch = nextch1;
+			isoct = true;
+		} else if (nextch1 == 'x' || nextch1 == 'X') {
+			/* Is it hexadecimal? */
+			nextch2 = get_next(p);
+			if (nextch2 == EOF) {
+				unget_next(p, nextch1);
+				return PARSER_OK;
+			}
+
+			if (isxdigit(nextch2)) {
+				/* Hexadecimal */
+				token_append(t, nextch1);
+				ch = nextch2;
+				ishex = true;
+			} else {
+				/* Nope */
+				unget_next(p, nextch2);
+				unget_next(p, nextch1);
+				return PARSER_OK;
+			}
+		} else {
+			unget_next(p, nextch1);
+			return PARSER_OK;
+		}
+	}
+
+	/*
+	 * OK, we now know what type of integer we
+	 * are processing, so just gather up the digits
+	 */
+	token_append(t, ch);
+
+	for (;;) {
+		ch = get_next(p);
+
+		if (ch == EOF) {
+			unget_next(p, ch);
+			return PARSER_OK;
+		}
+
+		if (ishex) {
+			if (isxdigit(ch)) {
+				token_append(t, ch);
+			} else {
+				unget_next(p, ch);
+				return PARSER_OK;
+			}
+		} else if (isoct) {
+			if (ch >= '0' && ch <= '8') {
+				token_append(t, ch);
+			} else {
+				unget_next(p, ch);
+				return PARSER_OK;
+			}
+		} else {
+			if (isdigit(ch)) {
+				token_append(t, ch);
+			} else {
+				unget_next(p, ch);
+				return PARSER_OK;
+			}
+		}
+	}
+}
+
+/*
+ *  Parse identifiers
+ */
+static int parse_identifier(parser *p, token *t, int ch)
+{
+	token_append(t, ch);
+
+	t->type = TOKEN_IDENTIFIER;
+
+	for (;;) {
+		ch = get_next(p);
+		if (ch == EOF) {
+			break;
+		}
+		if (isalnum(ch) || ch == '_') {
+			token_append(t, ch);
+		} else {
+			unget_next(p, ch);
+			break;
+		}
+	}
+
+	return PARSER_OK;
+}
+
+/*
+ *  Parse literal strings
+ */
+static int parse_literal(parser *p, token *t, int literal, token_type type)
+{
+	bool escaped = false;
+	int ch;
+
+	t->type = type;
+
+	token_append(t, literal);
+
+	for (;;) {
+		ch = get_next(p);
+		if (ch == EOF) {
+			return PARSER_OK;
+		}
+
+		if (ch == '\\') {
+			escaped = true;
+			token_append(t, ch);
+			continue;
+		}
+
+		if (!escaped && ch == literal) {
+			token_append(t, ch);
+			return PARSER_OK;
+		}
+		escaped = false;
+
+		token_append(t, ch);
+	}
+
+	return PARSER_OK;
+}
+
+/*
+ *  Parse operators such as +, - which can
+ *  be + or ++ forms.
+ */
+static int parse_op(parser *p, token *t, int op)
+{
+	int ch;
+
+	token_append(t, op);
+
+	ch = get_next(p);
+	if (ch == EOF) {
+		return PARSER_OK;
+	}
+
+	if (ch == op) {
+		token_append(t, op);
+		return PARSER_OK;
+	}
+
+	unget_next(p, ch);
+	return PARSER_OK;
+}
+
+/*
+ *  Parse -, --, ->
+ */
+static int parse_minus(parser *p, token *t, int op)
+{
+	int ch;
+
+	token_append(t, op);
+
+	ch = get_next(p);
+	if (ch == EOF) {
+		return PARSER_OK;
+	}
+
+	if (ch == op) {
+		token_append(t, ch);
+		return PARSER_OK;
+	}
+
+	if (ch == '>') {
+		token_append(t, ch);
+		t->type = TOKEN_ARROW;
+		return PARSER_OK;
+	}
+
+	unget_next(p, ch);
+	return PARSER_OK;
+}
+
+/*
+ *  Gather a token from input stream
+ */
+static int get_token(parser *p, token *t)
+{
+	int ch;
+	int ret;
+
+	for (;;) {
+		ch = get_next(p);
+
+		switch (ch) {
+		case EOF:
+			return EOF;
+
+		/* Skip comments */
+		case '/':
+			ret = skip_comments(p);
+			if (ret == EOF)
+				return EOF;
+			if (ret == PARSER_COMMENT_FOUND)
+				continue;
+			token_append(t, ch);
+			return PARSER_OK;
+		case '#':
+			token_append(t, ch);
+			t->type = TOKEN_CPP;
+			return PARSER_OK;
+		case ' ':
+		case '\t':
+		case '\r':
+		case '\n':
+		case '\\':
+			if (p->skip_white_space)
+				continue;
+			else {
+				token_append(t, ch);
+				t->type = TOKEN_WHITE_SPACE;
+				return PARSER_OK;
+			}
+		case '(':
+			token_append(t, ch);
+			t->type = TOKEN_PAREN_OPENED;
+			return PARSER_OK;
+		case ')':
+			token_append(t, ch);
+			t->type = TOKEN_PAREN_CLOSED;
+			return PARSER_OK;
+		case '<':
+			token_append(t, ch);
+			t->type = TOKEN_LESS_THAN;
+			return PARSER_OK;
+		case '>':
+			token_append(t, ch);
+			t->type = TOKEN_GREATER_THAN;
+			return PARSER_OK;
+		case ',':
+			token_append(t, ch);
+			t->type = TOKEN_COMMA;
+			return PARSER_OK;
+		case ';':
+			token_append(t, ch);
+			t->type = TOKEN_TERMINAL;
+			return PARSER_OK;
+		case '{':
+		case '}':
+		case ':':
+		case '~':
+		case '?':
+		case '*':
+		case '%':
+		case '!':
+		case '.':
+			token_append(t, ch);
+			return PARSER_OK;
+		case '0'...'9':
+			return parse_number(p, t, ch);
+			break;
+		case 'a'...'z':
+		case 'A'...'Z':
+			return parse_identifier(p, t, ch);
+			break;
+		case '"':
+			return parse_literal(p, t, ch, TOKEN_LITERAL_STRING);
+		case '\'':
+			return parse_literal(p, t, ch, TOKEN_LITERAL_CHAR);
+		case '+':
+		case '=':
+		case '|':
+		case '&':
+			return parse_op(p, t, ch);
+		case '-':
+			return parse_minus(p, t, ch);
+		}
+	}
+
+	return PARSER_OK;
+}
+
+/*
+ *  Literals such as "foo" and 'f' sometimes
+ *  need the quotes stripping off.
+ */
+static void literal_strip_quotes(token *t)
+{
+	size_t len = strlen(t->token);
+
+	t->token[len-1] = 0;
+
+	strcpy(t->token, t->token + 1);
+}
+
+/*
+ *  Concatenate new string onto old. The old
+ *  string can be NULL or an existing string
+ *  on the heap.  This returns the newly
+ *  concatenated string.
+ */
+static char *strdupcat(char *old, char *new)
+{
+	size_t len = strlen(new);
+	char *tmp;
+
+	if (old == NULL) {
+		tmp = malloc(len + 1);
+		if (tmp == NULL) {
+			fprintf(stderr, "strdupcat(): Out of memory.\n");
+			exit(EXIT_FAILURE);
+		}
+		strcpy(tmp, new);
+	} else {
+		size_t oldlen = strlen(old);
+		tmp = realloc(old, oldlen + len + 1);
+		if (tmp == NULL) {
+			fprintf(stderr, "strdupcat(): Out of memory.\n");
+			exit(EXIT_FAILURE);
+		}
+		strcat(tmp, new);
+	}
+
+	return tmp;
+}
+
+/*
+ *  Parse a kernel message, like printk() or dev_err()
+ */
+static int parse_kernel_message(parser *p, token *t)
+{
+	int ret;
+	bool got_string = false;
+	bool emit = false;
+	bool found = false;
+	token_type prev_token_type = TOKEN_UNKNOWN;
+	char *str = NULL;
+	char *line = NULL;
+	bool printk;
+
+	printk = (strcmp(t->token, "printk") == 0);
+
+	if (strcmp(t->token, "dev_err") == 0) {
+		emit = true;
+		line = strdupcat(line, "dev_err");
+	}
+	token_clear(t);
+
+	for (;;) {
+		ret = get_token(p, t);
+		if (ret == EOF) {
+			return EOF;
+		}
+
+		/*
+		 *  Hit ; so lets push out what we've parsed
+		 */
+		if (t->type == TOKEN_TERMINAL) {
+			if (emit) {
+				if (found) {
+					printf("OK : %s\n", line);
+				} else {
+					printf("ADD: %s\n", line);
+				}
+				free(line);
+			}
+			return PARSER_OK;
+		}
+
+		/*
+		 *  We are only interested in KERN_ERR
+		 *  printk messages
+		 */
+		if (printk &&
+		    (t->type == TOKEN_IDENTIFIER) &&
+		    (prev_token_type == TOKEN_PAREN_OPENED) &&
+		    (strcmp(t->token, "KERN_ERR") == 0)) {
+			line = strdupcat(line, "printk( ");
+		}
+
+		if (t->type == TOKEN_LITERAL_STRING) {
+			literal_strip_quotes(t);
+			str = strdupcat(str, t->token);
+
+			if (!got_string)
+				line = strdupcat(line, "\"");
+
+			got_string = true;
+		} else {
+			if (got_string)
+				line = strdupcat(line, "\"");
+
+			got_string = false;
+
+			if (str) {
+				found |= klog_find(str, patterns);
+				free(str);
+				str = NULL;
+			}
+		}
+
+		line = strdupcat(line, t->token);
+		if (t->type == TOKEN_COMMA)
+			line = strdupcat(line, " ");
+
+		prev_token_type = t->type;
+
+		token_clear(t);
+	}
+}
+
+/*
+ *  Parse input looking for printk or dev_err calls
+ */
+static void parse_kernel_messages(FILE *fp)
+{
+	token t;
+	parser p;
+
+	parser_new(&p, fp, true);
+	p.fp = fp;
+	p.skip_white_space = true;
+
+	token_new(&t);
+
+	while ((get_token(&p, &t)) != EOF) {
+		if ((strcmp(t.token, "printk") == 0) ||
+		    (strcmp(t.token, "dev_err") == 0)) {
+			parse_kernel_message(&p, &t);
+		} else
+			token_clear(&t);
+	}
+
+	token_free(&t);
+}
+
+/*
+ *  This is evil.  We parse the input stream
+ *  and throw away all #includes so we don't get
+ *  gcc -E breaking on include files that we haven't
+ *  got.  We don't really care at this level about
+ *  macros being expanded as we want to see tokens
+ *  such as KERN_ERR later on.
+ */
+static int parse_cpp_include(parser *p, token *t)
+{
+	/*
+	 *  Gloop up #include "foo.h"
+	 */
+	do {
+		token_clear(t);
+		if (get_token(p, t) == EOF)
+			return EOF;
+		/* End of line, we're done! */
+		if (strcmp(t->token, "\n") == 0)
+			return PARSER_OK;
+	} while (t->type == TOKEN_WHITE_SPACE);
+
+
+	/*
+	 *  Ah, we gobbled up white spaces and
+	 *  now we should be at a '<' token
+	 *  Parse #include <something/foo.h>
+	 */
+	if (t->type == TOKEN_LESS_THAN) {
+		do {
+			if (get_token(p, t) == EOF)
+				return EOF;
+		} while (t->type != TOKEN_GREATER_THAN);
+	}
+
+	token_clear(t);
+
+	return PARSER_OK;
+}
+
+/*
+ *  CPP phase, find and remove #includes
+ */
+static int parse_cpp_includes(FILE *fp)
+{
+	token t;
+	parser p;
+
+	parser_new(&p, fp, false);
+	p.fp = fp;
+	p.skip_white_space = false;
+
+	token_new(&t);
+
+	while ((get_token(&p, &t)) != EOF) {
+		if (t.type == TOKEN_CPP) {
+			for (;;) {
+				token_clear(&t);
+				if (get_token(&p, &t) == EOF)
+					return EOF;
+				if (strcmp(t.token, "\n") == 0)
+					break;
+				if (t.type == TOKEN_WHITE_SPACE) {
+					continue;
+				}
+				if (strcmp(t.token, "include") == 0) {
+					if (parse_cpp_include(&p, &t) == EOF)
+						return EOF;
+					break;
+				}
+				printf("#%s", t.token);
+				break;
+			}
+		} else {
+			printf("%s", t.token);
+		}
+		token_clear(&t);
+	}
+	return EOF;
+}
+
+/*
+ *  Scan kernel source for printk KERN_ERR and dev_err
+ *  calls.
+ *
+ *  Usage:
+ *  	cat drivers/pnp/pnpacpi/rsparser.c | kernel_scan -E | gcc  -E - | kernel_scan -P
+ *
+ *  This prints out any kernel printk KERN_ERR calls
+ *  or dev_err calls and checks to see if the error can be matched by
+ *  any of the fwts klog messages.  It has some intelligence, it glues
+ *  literal strings together such as "this is" "a message" into
+ *  "this is a message" before it makes the klog comparison.
+ */
+int main(int argc, char **argv)
+{
+	if (argc < 2) {
+		fprintf(stderr, "%s: [-E] [-P]\n", argv[0]);
+		exit(EXIT_FAILURE);
+	}
+
+	/*
+	 *  GCC -E preprocess phase
+	 */
+	if (strcmp(argv[1], "-E") == 0) {
+		parse_cpp_includes(stdin);
+		exit(EXIT_SUCCESS);
+	}
+
+	/*
+	 *  Parse kernel printk and dev_err phase
+	 */
+	if (strcmp(argv[1], "-P") == 0) {
+		patterns = klog_load("firmware_error_warning_patterns");
+		parse_kernel_messages(stdin);
+		klog_free(patterns);
+	}
+
+	exit(EXIT_SUCCESS);
+}
-- 
1.7.10.4