Rev 77: Switching to a 'callable' meant that we doubled the time to dump 150MB. in http://bazaar.launchpad.net/~meliae-dev/meliae/trunk

John Arbash Meinel john at arbash-meinel.com
Fri Sep 11 17:40:28 BST 2009


At http://bazaar.launchpad.net/~meliae-dev/meliae/trunk

------------------------------------------------------------
revno: 77
revision-id: john at arbash-meinel.com-20090911164011-9odhs7rer071rq1a
parent: john at arbash-meinel.com-20090911161600-7fhsncg08p3kxoov
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: trunk
timestamp: Fri 2009-09-11 11:40:11 -0500
message:
  Switching to a 'callable' meant that we doubled the time to dump 150MB.
  However, changing the inner 'dump_c_string' function to work on a local buffer
  allows us to do a lot of simple memory copying without any printf formatting.
  Net result is that 10s => 19s and now => 7.8s.
-------------- next part --------------
=== modified file 'meliae/_scanner.pyx'
--- a/meliae/_scanner.pyx	2009-09-11 16:16:00 +0000
+++ b/meliae/_scanner.pyx	2009-09-11 16:40:11 +0000
@@ -65,6 +65,7 @@
     file_cb = <FILE *>callee_data
     fwrite(bytes, 1, len, file_cb)
 
+
 cdef void _callable_callback(void *callee_data, char *bytes, size_t len):
     callable = <object>callee_data
 

=== modified file 'meliae/_scanner_core.c'
--- a/meliae/_scanner_core.c	2009-09-11 15:20:35 +0000
+++ b/meliae/_scanner_core.c	2009-09-11 16:40:11 +0000
@@ -34,6 +34,7 @@
 #  else
 #    define SSIZET_FMT "%d"
 #  endif
+#  define snprintf _snprintf
 #else
 #  define SSIZET_FMT "%zd"
 #endif
@@ -164,6 +165,9 @@
 _dump_reference(PyObject *c_obj, void* val)
 {
     struct ref_info *info;
+    size_t n_bytes;
+    char buf[24] = {0}; /* it seems that 64-bit long fits in 20 decimals */
+
     info = (struct ref_info*)val;
     /* TODO: This is casting a pointer into an unsigned long, which we assume
      *       is 'long enough'. We probably should really be using uintptr_t or
@@ -171,10 +175,11 @@
      */
     if (info->first) {
         info->first = 0;
-        _write_to_ref_info(info, "%lu", (unsigned long)c_obj);
+        n_bytes = snprintf(buf, 24, "%lu", (unsigned long)c_obj);
     } else {
-        _write_to_ref_info(info, ", %lu", (unsigned long)c_obj);
+        n_bytes = snprintf(buf, 24, ", %lu", (unsigned long)c_obj);
     }
+    info->write(info->data, buf, n_bytes);
     return 0;
 }
 
@@ -213,7 +218,8 @@
 _dump_json_c_string(struct ref_info *info, const char *buf, Py_ssize_t len)
 {
     Py_ssize_t i;
-    char c;
+    char c, *ptr, *end;
+    char out_buf[1024] = {0};
 
     // Never try to dump more than 100 chars
     if (len == -1) {
@@ -225,18 +231,26 @@
     // TODO: consider writing to a small memory buffer, rather that writing
     //       repeatedly to the callback. We know the maximum write size is
     //       6*100+2 for all unicode chars + the json quote chars.
-    _write_to_ref_info(info, "\"");
+    ptr = out_buf;
+    end = out_buf + 1024;
+    *ptr++ = '"';
     for (i = 0; i < len; ++i) {
         c = buf[i];
         if (c <= 0x1f || c > 0x7e) { // use the unicode escape sequence
-            _write_to_ref_info(info, "\\u00%02x", ((unsigned short)c & 0xFF));
+            ptr += snprintf(ptr, end-ptr, "\\u00%02x",
+                            ((unsigned short)c & 0xFF));
         } else if (c == '\\' || c == '/' || c == '"') {
-            _write_to_ref_info(info, "\\%c", c);
+            *ptr++ = '\\';
+            *ptr++ = 'c';
         } else {
-            _write_to_ref_info(info, "%c", c);
+            *ptr++ = c;
         }
     }
-    _write_to_ref_info(info, "\"");
+    *ptr++ = '"';
+    if (ptr >= end) {
+        /* Abort somehow */
+    }
+    info->write(info->data, out_buf, ptr-out_buf);
 }
 
 void



More information about the bazaar-commits mailing list