Rev 4788: Some small cleanups. in http://bazaar.launchpad.net/~jameinel/bzr/chk-index

John Arbash Meinel john at arbash-meinel.com
Wed Oct 28 14:01:42 GMT 2009


At http://bazaar.launchpad.net/~jameinel/bzr/chk-index

------------------------------------------------------------
revno: 4788
revision-id: john at arbash-meinel.com-20091028140134-hkegjr19i9spixsj
parent: john at arbash-meinel.com-20091028035345-ybopxvpny56m4l2g
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: chk-index
timestamp: Wed 2009-10-28 09:01:34 -0500
message:
  Some small cleanups.
-------------- next part --------------
=== modified file 'bzrlib/chk_index.py'
--- a/bzrlib/chk_index.py	2009-10-28 03:51:32 +0000
+++ b/bzrlib/chk_index.py	2009-10-28 14:01:34 +0000
@@ -328,6 +328,15 @@
     return _struct_H.unpack(bit_key[:2])[0]
 
 
+_bit_key_to_offset = {
+    0: _bit_key_to_offset_0,
+    16: _bit_key_to_offset_16,
+    256: _bit_key_to_offset_256,
+    4096: _bit_key_to_offset_4096,
+    65536: _bit_key_to_offset_65536,
+}
+
+
 class CHKIndexBuilder(object):
     """Build up a new CHKIndex."""
 
@@ -388,19 +397,22 @@
             defined for this index.
         """
         # TODO: key = StaticTuple.from_sequence(key)
-        # TODO: Would we save a lot of memory if we extract just the 'sha1'
+        #       Though this isn't very important because we only keep the
+        #       'bit_key' internally.
+        # TODO: Do we save a lot of memory by extracting just the 'sha1'
         #       value and used that as our index, rather than using the full
         #       key?
-        #       Note that bit_key is much smaller than 'key', however 'key'
-        #       will be shared with other parts of code. So we may not be
-        #       saving anything here
+        #       Note that bit_key is 44bytes for 24b string + 20 bytes of sha.
+        #       Versus 16 bytes StaticTuple + 24b string + 45 bytes hex (85b)
+        #       However, the StaticTuple will be shared with code like the chk
+        #       map. So it may be just a pure *cost* of 44bytes.
         bit_key = self._check_key(key)
-        group_key = (group_start, group_len)  # TODO: StaticTuple().intern()
+        group_key = static_tuple.StaticTuple(group_start, group_len).intern()
         self._ensure_group(group_key)
         if bit_key in self._nodes:
             raise errors.BadIndexDuplicateKey(key, self)
-        # TODO: StaticTuple
-        self._nodes[bit_key] = (group_key, inner_start, inner_len)
+        self._nodes[bit_key] = static_tuple.StaticTuple(
+            group_key, inner_start, inner_len)
 
     def key_count(self):
         """How many keys are present in this index?"""
@@ -442,30 +454,10 @@
                                  ' you can transform _entry_to_bytes')
         return entry_coder.pack(bit_key, group_offset, inner_start, inner_len)
         
-    @staticmethod
-    def _get_bit_key_to_mini_index(num_mini_index_entries):
-        """return a function gives the mini-index offset.
-        
-        The function should take a 'bit_key' and return the mini-index offset
-        that corresponds to that key.
-        """
-        if num_mini_index_entries == 0:
-            return _bit_key_to_offset_0
-        if num_mini_index_entries == 16:
-            return _bit_key_to_offset_16
-        if num_mini_index_entries == 256:
-            return _bit_key_to_offset_256
-        if num_mini_index_entries == 4096:
-            return _bit_key_to_offset_4096
-        if num_mini_index_entries == 65536:
-            return _bit_key_to_offset_65536
-                
-
     def _build_mini_index_and_entries(self, header):
         # For now, we buffer everything in memory, we *could* use a temp file
         # instead
-        bit_key_to_offset = self._get_bit_key_to_mini_index(
-                                header.num_mini_index_entries)
+        bit_key_to_offset = _bit_key_to_offset[header.num_mini_index_entries]
         null_entry = '\x00'*header.mini_index_entry_offset_bytes
         mini_index_bytes = [null_entry] * header.num_mini_index_entries
         entry_bytes = []

=== modified file 'bzrlib/tests/test_chk_index.py'
--- a/bzrlib/tests/test_chk_index.py	2009-10-28 03:53:45 +0000
+++ b/bzrlib/tests/test_chk_index.py	2009-10-28 14:01:34 +0000
@@ -183,37 +183,32 @@
             b2a_hex(self.bit_k1) + '01' '00' '01',
             ]), '\n'.join([b2a_hex(b) for b in entry_bytes]))
 
-    def test__get_bit_key_to_mini_index_0(self):
-        offset_func = chk_index.CHKIndexBuilder._get_bit_key_to_mini_index(0)
-        self.assertEqual(0, offset_func(self.bit_k1))
-        self.assertEqual(0, offset_func(self.bit_k2))
-        self.assertEqual(0, offset_func(self.bit_k3))
+    def test__bit_key_to_offset_0(self):
+        self.assertEqual(0, chk_index._bit_key_to_offset_0(self.bit_k1))
+        self.assertEqual(0, chk_index._bit_key_to_offset_0(self.bit_k2))
+        self.assertEqual(0, chk_index._bit_key_to_offset_0(self.bit_k3))
 
-    def test__get_bit_key_to_mini_index_16(self):
-        offset_func = chk_index.CHKIndexBuilder._get_bit_key_to_mini_index(16)
+    def test__bit_key_to_offset_16(self):
         # da39... maps to 'd'
-        self.assertEqual(0xd, offset_func(self.bit_k1))
-        self.assertEqual(0x8, offset_func(self.bit_k2))
-        self.assertEqual(0xe, offset_func(self.bit_k3))
+        self.assertEqual(0xd, chk_index._bit_key_to_offset_16(self.bit_k1))
+        self.assertEqual(0x8, chk_index._bit_key_to_offset_16(self.bit_k2))
+        self.assertEqual(0xe, chk_index._bit_key_to_offset_16(self.bit_k3))
 
-    def test__get_bit_key_to_mini_index_256(self):
-        offset_func = chk_index.CHKIndexBuilder._get_bit_key_to_mini_index(256)
+    def test__bit_key_to_offset_256(self):
         # da39... maps to 'da'
-        self.assertEqual(0xda, offset_func(self.bit_k1))
-        self.assertEqual(0x86, offset_func(self.bit_k2))
-        self.assertEqual(0xe9, offset_func(self.bit_k3))
+        self.assertEqual(0xda, chk_index._bit_key_to_offset_256(self.bit_k1))
+        self.assertEqual(0x86, chk_index._bit_key_to_offset_256(self.bit_k2))
+        self.assertEqual(0xe9, chk_index._bit_key_to_offset_256(self.bit_k3))
 
-    def test__get_bit_key_to_mini_index_4096(self):
-        offset_func = chk_index.CHKIndexBuilder._get_bit_key_to_mini_index(4096)
+    def test__bit_key_to_offset_4096(self):
         # da39... maps to 0xda3
-        self.assertEqual(0xda3, offset_func(self.bit_k1))
-        self.assertEqual(0x86f, offset_func(self.bit_k2))
-        self.assertEqual(0xe9d, offset_func(self.bit_k3))
+        self.assertEqual(0xda3, chk_index._bit_key_to_offset_4096(self.bit_k1))
+        self.assertEqual(0x86f, chk_index._bit_key_to_offset_4096(self.bit_k2))
+        self.assertEqual(0xe9d, chk_index._bit_key_to_offset_4096(self.bit_k3))
 
-    def test__get_bit_key_to_mini_index_65536(self):
-        offset_func = chk_index.CHKIndexBuilder._get_bit_key_to_mini_index(
-                            65536)
+    def test__bit_key_to_offset_65536(self):
         # da39... maps to 0xda39
+        offset_func = chk_index._bit_key_to_offset_65536
         self.assertEqual(0xda39, offset_func(self.bit_k1))
         self.assertEqual(0x86f7, offset_func(self.bit_k2))
         self.assertEqual(0xe9d7, offset_func(self.bit_k3))



More information about the bazaar-commits mailing list