Rev 74: Rewrite the internal dumping routines to use a callback function in http://bazaar.launchpad.net/~meliae-dev/meliae/trunk
John Arbash Meinel
john at arbash-meinel.com
Fri Sep 11 16:09:51 BST 2009
At http://bazaar.launchpad.net/~meliae-dev/meliae/trunk
------------------------------------------------------------
revno: 74
revision-id: john at arbash-meinel.com-20090911150934-m7krgjyi2fm2bosm
parent: john at arbash-meinel.com-20090910220645-od403p0lgw4qz3d9
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: trunk
timestamp: Fri 2009-09-11 10:09:34 -0500
message:
Rewrite the internal dumping routines to use a callback function
rather than explicitly depending on a FILE* structure.
It makes it a bit more complex, but certainly more flexible.
-------------- next part --------------
=== modified file 'meliae/_scanner.pyx'
--- a/meliae/_scanner.pyx 2009-09-10 19:50:57 +0000
+++ b/meliae/_scanner.pyx 2009-09-11 15:09:34 +0000
@@ -20,6 +20,7 @@
ctypedef long size_t
ctypedef struct FILE:
pass
+ FILE *stderr
size_t fwrite(void *, size_t, size_t, FILE *)
size_t fprintf(FILE *, char *, ...)
@@ -31,7 +32,10 @@
cdef extern from "_scanner_core.h":
Py_ssize_t _size_of(object c_obj)
- void _dump_object_info(FILE *, object c_obj, object nodump, int recurse)
+ ctypedef void (*write_callback)(void *callee_data, char *bytes, size_t len)
+
+ void _dump_object_info(write_callback write, void *callee_data,
+ object c_obj, object nodump, int recurse)
object _get_referents(object c_obj)
@@ -53,13 +57,38 @@
return _size_of(obj)
-def dump_object_info(object fp, object obj, object nodump=None, int recurse_depth=1):
- cdef FILE *out
-
- out = PyFile_AsFile(fp)
- if out == NULL:
+cdef void _file_io_callback(void *callee_data, char *bytes, size_t len):
+ cdef FILE *file_cb
+
+ file_cb = <FILE *>callee_data
+ fwrite(bytes, 1, len, file_cb)
+
+
+def dump_object_info(object out, object obj, object nodump=None,
+ int recurse_depth=1):
+ """Dump the object information to the given output.
+
+ :param out: Either a File object, or a callable.
+ If a File object, we will write bytes to the underlying FILE*
+ Otherwise, we will call(str) with bytes as we build up the state of the
+ object. Note that a single call will not be a complete description, but
+ potentially a single character for formatting.
+ :param obj: The object to inspect
+ :param nodump: If supplied, this is a set() of objects that we want to
+ exclude from the dump file.
+ :param recurse_depth: 0 to only dump the supplied object
+ 1 to dump the object and immediate neighbors that would not otherwise be
+ referenced (such as strings).
+ 2 dump everything we find and continue recursing
+ """
+ cdef FILE *fp_out
+
+ fp_out = PyFile_AsFile(out)
+ if fp_out != NULL:
+ # This must be a callable
+ _dump_object_info(_file_io_callback, fp_out, obj, nodump, recurse_depth)
+ else:
raise TypeError('not a file')
- _dump_object_info(out, obj, nodump, recurse_depth)
def get_referents(object obj):
=== modified file 'meliae/_scanner_core.c'
--- a/meliae/_scanner_core.c 2009-09-10 22:06:45 +0000
+++ b/meliae/_scanner_core.c 2009-09-11 15:09:34 +0000
@@ -39,11 +39,15 @@
#endif
struct ref_info {
- FILE *out;
+ write_callback write;
+ void *data;
int first;
PyObject *nodump;
};
+void _dump_object_to_ref_info(struct ref_info *info, PyObject *c_obj,
+ int recurse);
+
Py_ssize_t
_basic_object_size(PyObject *c_obj)
{
@@ -137,16 +141,36 @@
}
+/* TODO: figure out the GCC magic macro to make it clear this is essentially a
+ * printf function. So that it checks argument types, etc.
+ */
+void
+_write_to_ref_info(struct ref_info *info, const char *fmt_string, ...)
+{
+ char temp_buf[1024] = {0};
+ va_list args;
+ size_t n_bytes;
+
+ va_start(args, fmt_string);
+ n_bytes = vsnprintf(temp_buf, 1024, fmt_string, args);
+ va_end(args);
+ info->write(info->data, temp_buf, n_bytes);
+}
+
int
_dump_reference(PyObject *c_obj, void* val)
{
- struct ref_info *out;
- out = (struct ref_info*)val;
- if (out->first) {
- out->first = 0;
- fprintf(out->out, "%lu", (unsigned long)c_obj);
+ struct ref_info *info;
+ info = (struct ref_info*)val;
+ /* TODO: This is casting a pointer into an unsigned long, which we assume
+ * is 'long enough'. We probably should really be using uintptr_t or
+ * something like that.
+ */
+ if (info->first) {
+ info->first = 0;
+ _write_to_ref_info(info, "%lu", (unsigned long)c_obj);
} else {
- fprintf(out->out, ", %lu", (unsigned long)c_obj);
+ _write_to_ref_info(info, ", %lu", (unsigned long)c_obj);
}
return 0;
}
@@ -158,7 +182,7 @@
struct ref_info *info;
info = (struct ref_info *)val;
// The caller has asked us to dump self, but no recursive children
- _dump_object_info(info->out, c_obj, info->nodump, 0);
+ _dump_object_to_ref_info(info, c_obj, 0);
return 0;
}
@@ -175,7 +199,7 @@
|| (PyType_Check(c_obj)
&& !PyType_HasFeature((PyTypeObject*)c_obj, Py_TPFLAGS_HEAPTYPE)))
{
- _dump_object_info(info->out, c_obj, info->nodump, 0);
+ _dump_object_to_ref_info(info, c_obj, 0);
}
// We know that it is safe to recurse here, because tp_traverse is NULL
return 0;
@@ -183,48 +207,50 @@
void
-_dump_json_c_string(FILE *out, const char *buf, Py_ssize_t len)
+_dump_json_c_string(struct ref_info *info, const char *buf, Py_ssize_t len)
{
Py_ssize_t i;
char c;
- // Never try to dump more than this many chars
+ // Never try to dump more than 100 chars
if (len == -1) {
len = strlen(buf);
}
if (len > 100) {
len = 100;
}
- fprintf(out, "\"");
+ // TODO: consider writing to a small memory buffer, rather that writing
+ // repeatedly to the callback. We know the maximum write size is
+ // 6*100+2 for all unicode chars + the json quote chars.
+ _write_to_ref_info(info, "\"");
for (i = 0; i < len; ++i) {
c = buf[i];
if (c <= 0x1f || c > 0x7e) { // use the unicode escape sequence
- fprintf(out, "\\u00%02x", ((unsigned short)c & 0xFF));
+ _write_to_ref_info(info, "\\u00%02x", ((unsigned short)c & 0xFF));
} else if (c == '\\' || c == '/' || c == '"') {
- fprintf(out, "\\%c", c);
+ _write_to_ref_info(info, "\\%c", c);
} else {
- fprintf(out, "%c", c);
+ _write_to_ref_info(info, "%c", c);
}
}
- fprintf(out, "\"");
+ _write_to_ref_info(info, "\"");
}
void
-_dump_string(FILE *out, PyObject *c_obj)
+_dump_string(struct ref_info *info, PyObject *c_obj)
{
- // TODO: consider writing to a small memory buffer, before writing to disk
Py_ssize_t str_size;
char *str_buf;
str_buf = PyString_AS_STRING(c_obj);
str_size = PyString_GET_SIZE(c_obj);
- _dump_json_c_string(out, str_buf, str_size);
+ _dump_json_c_string(info, str_buf, str_size);
}
void
-_dump_unicode(FILE *out, PyObject *c_obj)
+_dump_unicode(struct ref_info *info, PyObject *c_obj)
{
// TODO: consider writing to a small memory buffer, before writing to disk
Py_ssize_t uni_size;
@@ -238,37 +264,55 @@
if (uni_size > 100) {
uni_size = 100;
}
- fprintf(out, "\"");
+ _write_to_ref_info(info, "\"");
for (i = 0; i < uni_size; ++i) {
c = uni_buf[i];
if (c <= 0x1f || c > 0x7e) {
- fprintf(out, "\\u%04x", ((unsigned short)c & 0xFFFF));
+ _write_to_ref_info(info, "\\u%04x", ((unsigned short)c & 0xFFFF));
} else if (c == '\\' || c == '/' || c == '"') {
- fprintf(out, "\\%c", (unsigned char)c);
+ _write_to_ref_info(info, "\\%c", (unsigned char)c);
} else {
- fprintf(out, "%c", (unsigned char)c);
+ _write_to_ref_info(info, "%c", (unsigned char)c);
}
}
- fprintf(out, "\"");
+ _write_to_ref_info(info, "\"");
}
-void
-_dump_object_info(FILE *out, PyObject *c_obj, PyObject *nodump, int recurse)
+void
+_dump_object_info(write_callback write, void *callee_data,
+ PyObject *c_obj, PyObject *nodump, int recurse)
{
- Py_ssize_t size;
struct ref_info info;
+
+ info.write = write;
+ info.data = callee_data;
+ info.first = 1;
+ info.nodump = nodump;
+ if (nodump != NULL) {
+ Py_INCREF(nodump);
+ }
+ _dump_object_to_ref_info(&info, c_obj, recurse);
+ if (info.nodump != NULL) {
+ Py_DECREF(nodump);
+ }
+}
+
+void
+_dump_object_to_ref_info(struct ref_info *info, PyObject *c_obj, int recurse)
+{
+ Py_ssize_t size;
int retval;
- info.out = out;
- info.nodump = nodump; /* Stealing the reference, but not permanently */
-
- if (nodump != Py_None && PyAnySet_Check(nodump)) {
- if (c_obj == nodump) {
+ if (info->nodump != NULL &&
+ info->nodump != Py_None
+ && PyAnySet_Check(info->nodump))
+ {
+ if (c_obj == info->nodump) {
/* Don't dump the 'nodump' set. */
return;
}
- retval = PySet_Contains(nodump, c_obj);
+ retval = PySet_Contains(info->nodump, c_obj);
if (retval == 1) {
/* This object is part of the no-dump set, don't dump the object */
return;
@@ -279,57 +323,58 @@
}
size = _size_of(c_obj);
- fprintf(out, "{\"address\": %lu, \"type\": ", (unsigned long)c_obj);
- _dump_json_c_string(out, c_obj->ob_type->tp_name, -1);
- fprintf(out, ", \"size\": " SSIZET_FMT, _size_of(c_obj));
+ _write_to_ref_info(info, "{\"address\": %lu, \"type\": ",
+ (unsigned long)c_obj);
+ _dump_json_c_string(info, c_obj->ob_type->tp_name, -1);
+ _write_to_ref_info(info, ", \"size\": " SSIZET_FMT, _size_of(c_obj));
// HANDLE __name__
if (PyModule_Check(c_obj)) {
- fprintf(out, ", \"name\": ");
- _dump_json_c_string(out, PyModule_GetName(c_obj), -1);
+ _write_to_ref_info(info, ", \"name\": ");
+ _dump_json_c_string(info, PyModule_GetName(c_obj), -1);
} else if (PyFunction_Check(c_obj)) {
- fprintf(out, ", \"name\": ");
- _dump_string(out, ((PyFunctionObject *)c_obj)->func_name);
+ _write_to_ref_info(info, ", \"name\": ");
+ _dump_string(info, ((PyFunctionObject *)c_obj)->func_name);
} else if (PyType_Check(c_obj)) {
- fprintf(out, ", \"name\": ");
- _dump_json_c_string(out, ((PyTypeObject *)c_obj)->tp_name, -1);
+ _write_to_ref_info(info, ", \"name\": ");
+ _dump_json_c_string(info, ((PyTypeObject *)c_obj)->tp_name, -1);
} else if (PyClass_Check(c_obj)) {
/* Old style class */
- fprintf(out, ", \"name\": ");
- _dump_string(out, ((PyClassObject *)c_obj)->cl_name);
+ _write_to_ref_info(info, ", \"name\": ");
+ _dump_string(info, ((PyClassObject *)c_obj)->cl_name);
}
if (PyString_Check(c_obj)) {
- fprintf(out, ", \"len\": " SSIZET_FMT, PyString_GET_SIZE(c_obj));
- fprintf(out, ", \"value\": ");
- _dump_string(out, c_obj);
+ _write_to_ref_info(info, ", \"len\": " SSIZET_FMT, PyString_GET_SIZE(c_obj));
+ _write_to_ref_info(info, ", \"value\": ");
+ _dump_string(info, c_obj);
} else if (PyUnicode_Check(c_obj)) {
- fprintf(out, ", \"len\": " SSIZET_FMT, PyUnicode_GET_SIZE(c_obj));
- fprintf(out, ", \"value\": ");
- _dump_unicode(out, c_obj);
+ _write_to_ref_info(info, ", \"len\": " SSIZET_FMT, PyUnicode_GET_SIZE(c_obj));
+ _write_to_ref_info(info, ", \"value\": ");
+ _dump_unicode(info, c_obj);
} else if (PyInt_CheckExact(c_obj)) {
- fprintf(out, ", \"value\": %ld", PyInt_AS_LONG(c_obj));
+ _write_to_ref_info(info, ", \"value\": %ld", PyInt_AS_LONG(c_obj));
} else if (PyTuple_Check(c_obj)) {
- fprintf(out, ", \"len\": " SSIZET_FMT, PyTuple_GET_SIZE(c_obj));
+ _write_to_ref_info(info, ", \"len\": " SSIZET_FMT, PyTuple_GET_SIZE(c_obj));
} else if (PyList_Check(c_obj)) {
- fprintf(out, ", \"len\": " SSIZET_FMT, PyList_GET_SIZE(c_obj));
+ _write_to_ref_info(info, ", \"len\": " SSIZET_FMT, PyList_GET_SIZE(c_obj));
} else if (PyAnySet_Check(c_obj)) {
- fprintf(out, ", \"len\": " SSIZET_FMT, PySet_GET_SIZE(c_obj));
+ _write_to_ref_info(info, ", \"len\": " SSIZET_FMT, PySet_GET_SIZE(c_obj));
} else if (PyDict_Check(c_obj)) {
- fprintf(out, ", \"len\": " SSIZET_FMT, PyDict_Size(c_obj));
+ _write_to_ref_info(info, ", \"len\": " SSIZET_FMT, PyDict_Size(c_obj));
}
- fprintf(out, ", \"refs\": [");
+ _write_to_ref_info(info, ", \"refs\": [");
if (Py_TYPE(c_obj)->tp_traverse != NULL) {
- info.first = 1;
- Py_TYPE(c_obj)->tp_traverse(c_obj, _dump_reference, &info);
+ info->first = 1;
+ Py_TYPE(c_obj)->tp_traverse(c_obj, _dump_reference, info);
}
- fprintf(out, "]}\n");
+ _write_to_ref_info(info, "]}\n");
if (Py_TYPE(c_obj)->tp_traverse != NULL && recurse != 0) {
if (recurse == 2) { /* Always dump one layer deeper */
- Py_TYPE(c_obj)->tp_traverse(c_obj, _dump_child, &info);
+ Py_TYPE(c_obj)->tp_traverse(c_obj, _dump_child, info);
} else if (recurse == 1) {
/* strings and such aren't in gc.get_objects, so we need to dump
* them when they are referenced.
*/
- Py_TYPE(c_obj)->tp_traverse(c_obj, _dump_if_no_traverse, &info);
+ Py_TYPE(c_obj)->tp_traverse(c_obj, _dump_if_no_traverse, info);
}
}
}
=== modified file 'meliae/_scanner_core.h'
--- a/meliae/_scanner_core.h 2009-09-10 19:50:57 +0000
+++ b/meliae/_scanner_core.h 2009-09-11 15:09:34 +0000
@@ -35,9 +35,15 @@
Py_ssize_t _size_of(PyObject *c_obj);
/**
+ * This callback will be used to dump more info to the user.
+ */
+typedef void (*write_callback)(void *callee_data, char *bytes, size_t len);
+
+/**
* Write the information about this object to the file.
*/
-void _dump_object_info(FILE *out, PyObject *c_obj, PyObject *nodump, int recurse);
+void _dump_object_info(write_callback write, void *callee_data,
+ PyObject *c_obj, PyObject *nodump, int recurse);
/**
* Return a PyList of all objects referenced via tp_traverse.
More information about the bazaar-commits
mailing list