Rev 5368: The results were just too promising. faster *and* better memory. in http://bazaar.launchpad.net/~jameinel/bzr/2.3-btree-chk-leaf

John Arbash Meinel john at arbash-meinel.com
Tue Aug 3 22:30:00 BST 2010


At http://bazaar.launchpad.net/~jameinel/bzr/2.3-btree-chk-leaf

------------------------------------------------------------
revno: 5368
revision-id: john at arbash-meinel.com-20100803212946-xurjxv9i4xz8luac
parent: john at arbash-meinel.com-20100803211445-yfaqzmzvf8ybi28a
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.3-btree-chk-leaf
timestamp: Tue 2010-08-03 16:29:46 -0500
message:
  The results were just too promising. faster *and* better memory.
-------------- next part --------------
=== modified file 'bzrlib/_btree_serializer_pyx.pyx'
--- a/bzrlib/_btree_serializer_pyx.pyx	2010-08-03 21:14:45 +0000
+++ b/bzrlib/_btree_serializer_pyx.pyx	2010-08-03 21:29:46 +0000
@@ -387,9 +387,13 @@
 
     cdef public int num_entries
     cdef gc_chk_sha1_record *entries
-    # TODO: Consider what a mini-index might look like. Something that could
-    #       let us quickly jump to a subset range, rather than doing pure
-    #       bisect all the time.
+    # This is for the mini-index. We look at all the keys and use whatever byte
+    # is first unique across all stored keys (this is often the first byte)
+    # we then store the entries offset for the first record that matches that
+    # byte. This does assume that we'll never have more than 32k entries, but
+    # that doesn't seem to be a terrible assumption (we should have ~100)
+    cdef public short interesting_byte
+    cdef short offsets[257]
 
     def __sizeof__(self):
         return (sizeof(GCCHKSHA1LeafNode)
@@ -425,7 +429,6 @@
         cdef char *c_val
         if not PyTuple_CheckExact(key) and not StaticTuple_CheckExact(key):
             return 0
-            #? raise TypeError('Keys must be a tuple or StaticTuple')
         if len(key) != 1:
             return 0
         val = key[0]
@@ -609,8 +612,6 @@
         # the same across them.
 
 
-
-
 def _parse_into_chk(bytes, key_length, ref_list_length):
     """Parse into a format optimized for chk records."""
     assert key_length == 1

=== modified file 'bzrlib/tests/__init__.py'
--- a/bzrlib/tests/__init__.py	2010-07-29 11:17:57 +0000
+++ b/bzrlib/tests/__init__.py	2010-08-03 21:29:46 +0000
@@ -3665,6 +3665,7 @@
         'bzrlib.tests.per_workingtree',
         'bzrlib.tests.test__annotator',
         'bzrlib.tests.test__bencode',
+        'bzrlib.tests.test__btree_serializer',
         'bzrlib.tests.test__chk_map',
         'bzrlib.tests.test__dirstate_helpers',
         'bzrlib.tests.test__groupcompress',

=== added file 'bzrlib/tests/test__btree_serializer.py'
--- a/bzrlib/tests/test__btree_serializer.py	1970-01-01 00:00:00 +0000
+++ b/bzrlib/tests/test__btree_serializer.py	2010-08-03 21:29:46 +0000
@@ -0,0 +1,51 @@
+# Copyright (C) 2010 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+#
+
+"""Direct tests of the btree serializer extension"""
+
+from bzrlib import tests
+
+from bzrlib.tests.test_btree_index import compiled_btreeparser_feature
+
+
+class TestGCCKHSHA1LeafNode(tests.TestCase):
+
+    _test_needs_features = [compiled_btreeparser_feature]
+
+    def setUp(self):
+        super(TestGCCKHSHA1LeafNode, self).setUp()
+        self.module = compiled_btreeparser_feature.module
+
+    def assertInvalid(self, bytes):
+        """Ensure that we get a proper error when trying to parse invalid bytes.
+
+        (mostly this is testing that bad input doesn't cause us to segfault)
+        """
+        self.assertRaises((ValueError, TypeError), 
+                          self.module._parse_into_chk, bytes, 1, 0)
+
+    def test_non_str(self):
+        self.assertInvalid(u'type=leaf\n')
+
+    def test_not_leaf(self):
+        self.assertInvalid('type=internal\n')
+
+    def test_empty_leaf(self):
+        leaf = self.module._parse_into_chk('type=leaf\n', 1, 0)
+        self.assertEqual(0, len(leaf))
+        self.assertEqual([], leaf.all_items())
+        self.assertEqual([], leaf.all_keys())



More information about the bazaar-commits mailing list