Rev 3665: Use the compiled flatten function. in http://bzr.arbash-meinel.com/branches/bzr/1.7-dev/btree

John Arbash Meinel john at arbash-meinel.com
Thu Aug 21 22:58:59 BST 2008


At http://bzr.arbash-meinel.com/branches/bzr/1.7-dev/btree

------------------------------------------------------------
revno: 3665
revision-id: john at arbash-meinel.com-20080821215857-xfk04aivsp0ntvpd
parent: john at arbash-meinel.com-20080821213749-uoi9fiyg0aiea9sn
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: btree
timestamp: Thu 2008-08-21 16:58:57 -0500
message:
  Use the compiled flatten function.
  Shows a 3-5% gain in testing and real-world conditions.
modified:
  bzrlib/_parse_btree_c.pyx      _parse_btree_c.pyx-20080703034413-3q25bklkenti3p8p-2
  bzrlib/btree_index.py          index.py-20080624222253-p0x5f92uyh5hw734-7
-------------- next part --------------
=== modified file 'bzrlib/_parse_btree_c.pyx'
--- a/bzrlib/_parse_btree_c.pyx	2008-08-21 21:37:49 +0000
+++ b/bzrlib/_parse_btree_c.pyx	2008-08-21 21:58:57 +0000
@@ -267,7 +267,7 @@
     cdef int first_bit
 
     # I don't expect that we can do faster than string.join()
-    string_key = '\x00'.join(node[1])
+    string_key = '\0'.join(node[1])
 
     # TODO: instead of using string joins, precompute the final string length,
     #       and then malloc a single string and copy everything in.
@@ -284,13 +284,14 @@
     ref_len = 0
     if reference_lists:
         # Figure out how many bytes it will take to store the references
-        next_len = len(node[3]) # TODO: use a Py function
+        ref_lists = node[3]
+        next_len = len(ref_lists) # TODO: use a Py function
         if next_len > 0:
             # If there are no nodes, we don't need to do any work
             # Otherwise we will need (len - 1) '\t' characters to separate
             # the reference lists
             ref_len = ref_len + (next_len - 1)
-            for ref_list in node[3]:
+            for ref_list in ref_lists:
                 next_len = len(ref_list)
                 if next_len > 0:
                     # We will need (len - 1) '\r' characters to separate the
@@ -303,7 +304,7 @@
                             # separate the reference key
                             ref_len = ref_len + (next_len - 1)
                             for ref in reference:
-                                ref_len = ref_len + len(ref)
+                                ref_len = ref_len + PyString_Size(ref)
 
     # So we have the (key NULL refs NULL value LF)
     key_len = PyString_Size(string_key)
@@ -319,7 +320,7 @@
     out = out + 1
     if ref_len > 0:
         first_ref_list = 1
-        for ref_list in node[3]:
+        for ref_list in ref_lists:
             if first_ref_list == 0:
                 out[0] = c'\t'
                 out = out + 1

=== modified file 'bzrlib/btree_index.py'
--- a/bzrlib/btree_index.py	2008-08-21 19:53:53 +0000
+++ b/bzrlib/btree_index.py	2008-08-21 21:58:57 +0000
@@ -331,15 +331,8 @@
             #       and then do a single malloc() rather than lots of
             #       intermediate mallocs as we build everything up.
             #       ATM 3 / 13s are spent flattening nodes (10s is compressing)
-            if self.reference_lists:
-                flattened_references = ['\r'.join(['\x00'.join(reference)
-                                                   for reference in ref_list])
-                                        for ref_list in node[3]]
-            else:
-                flattened_references = []
-            string_key = '\x00'.join(node[1])
-            line = ("%s\x00%s\x00%s\n" % (string_key,
-                '\t'.join(flattened_references), node[2]))
+            string_key, line = _parse_btree._flatten_node(node,
+                                                          self.reference_lists)
             self._add_key(string_key, line, rows)
         for row in reversed(rows):
             pad = (type(row) != _LeafBuilderRow)



More information about the bazaar-commits mailing list