Rev 77: Switching to a 'callable' meant that we doubled the time to dump 150MB. in http://bazaar.launchpad.net/~meliae-dev/meliae/trunk
John Arbash Meinel
john at arbash-meinel.com
Fri Sep 11 17:40:28 BST 2009
At http://bazaar.launchpad.net/~meliae-dev/meliae/trunk
------------------------------------------------------------
revno: 77
revision-id: john at arbash-meinel.com-20090911164011-9odhs7rer071rq1a
parent: john at arbash-meinel.com-20090911161600-7fhsncg08p3kxoov
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: trunk
timestamp: Fri 2009-09-11 11:40:11 -0500
message:
Switching to a 'callable' meant that we doubled the time to dump 150MB.
However, changing the inner 'dump_c_string' function to work on a local buffer
allows us to do a lot of simple memory copying without any printf formatting.
Net result is that 10s => 19s and now => 7.8s.
-------------- next part --------------
=== modified file 'meliae/_scanner.pyx'
--- a/meliae/_scanner.pyx 2009-09-11 16:16:00 +0000
+++ b/meliae/_scanner.pyx 2009-09-11 16:40:11 +0000
@@ -65,6 +65,7 @@
file_cb = <FILE *>callee_data
fwrite(bytes, 1, len, file_cb)
+
cdef void _callable_callback(void *callee_data, char *bytes, size_t len):
callable = <object>callee_data
=== modified file 'meliae/_scanner_core.c'
--- a/meliae/_scanner_core.c 2009-09-11 15:20:35 +0000
+++ b/meliae/_scanner_core.c 2009-09-11 16:40:11 +0000
@@ -34,6 +34,7 @@
# else
# define SSIZET_FMT "%d"
# endif
+# define snprintf _snprintf
#else
# define SSIZET_FMT "%zd"
#endif
@@ -164,6 +165,9 @@
_dump_reference(PyObject *c_obj, void* val)
{
struct ref_info *info;
+ size_t n_bytes;
+ char buf[24] = {0}; /* it seems that 64-bit long fits in 20 decimals */
+
info = (struct ref_info*)val;
/* TODO: This is casting a pointer into an unsigned long, which we assume
* is 'long enough'. We probably should really be using uintptr_t or
@@ -171,10 +175,11 @@
*/
if (info->first) {
info->first = 0;
- _write_to_ref_info(info, "%lu", (unsigned long)c_obj);
+ n_bytes = snprintf(buf, 24, "%lu", (unsigned long)c_obj);
} else {
- _write_to_ref_info(info, ", %lu", (unsigned long)c_obj);
+ n_bytes = snprintf(buf, 24, ", %lu", (unsigned long)c_obj);
}
+ info->write(info->data, buf, n_bytes);
return 0;
}
@@ -213,7 +218,8 @@
_dump_json_c_string(struct ref_info *info, const char *buf, Py_ssize_t len)
{
Py_ssize_t i;
- char c;
+ char c, *ptr, *end;
+ char out_buf[1024] = {0};
// Never try to dump more than 100 chars
if (len == -1) {
@@ -225,18 +231,26 @@
// TODO: consider writing to a small memory buffer, rather that writing
// repeatedly to the callback. We know the maximum write size is
// 6*100+2 for all unicode chars + the json quote chars.
- _write_to_ref_info(info, "\"");
+ ptr = out_buf;
+ end = out_buf + 1024;
+ *ptr++ = '"';
for (i = 0; i < len; ++i) {
c = buf[i];
if (c <= 0x1f || c > 0x7e) { // use the unicode escape sequence
- _write_to_ref_info(info, "\\u00%02x", ((unsigned short)c & 0xFF));
+ ptr += snprintf(ptr, end-ptr, "\\u00%02x",
+ ((unsigned short)c & 0xFF));
} else if (c == '\\' || c == '/' || c == '"') {
- _write_to_ref_info(info, "\\%c", c);
+ *ptr++ = '\\';
+ *ptr++ = 'c';
} else {
- _write_to_ref_info(info, "%c", c);
+ *ptr++ = c;
}
}
- _write_to_ref_info(info, "\"");
+ *ptr++ = '"';
+ if (ptr >= end) {
+ /* Abort somehow */
+ }
+ info->write(info->data, out_buf, ptr-out_buf);
}
void
More information about the bazaar-commits
mailing list