Rev 4794: Move the 'groups' header after the 'mini-index' header since in http://bazaar.launchpad.net/~jameinel/bzr/chk-index

John Arbash Meinel john at arbash-meinel.com
Wed Oct 28 17:04:18 GMT 2009


At http://bazaar.launchpad.net/~jameinel/bzr/chk-index

------------------------------------------------------------
revno: 4794
revision-id: john at arbash-meinel.com-20091028170410-7s6zo4cwgcn65p8f
parent: john at arbash-meinel.com-20091028170002-sob4l165skyupg3h
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: chk-index
timestamp: Wed 2009-10-28 12:04:10 -0500
message:
  Move the 'groups' header after the 'mini-index' header since
  that is the position in the file.
-------------- next part --------------
=== modified file 'bzrlib/chk_index.py'
--- a/bzrlib/chk_index.py	2009-10-28 17:00:02 +0000
+++ b/bzrlib/chk_index.py	2009-10-28 17:04:10 +0000
@@ -140,10 +140,10 @@
         """
         bytes = ('%(signature)s'
                 'hash=%(hash_function)s %(num_hash_bytes)d\n'
+                'mini_index=%(mini_index_offset)d %(num_mini_index_entries)d'
+                    ' %(mini_index_entry_offset_bytes)d\n'
                 'groups=%(group_index_offset)d %(group_index_start_bytes)d'
                     ' %(group_index_length_bytes)d %(num_groups)d\n'
-                'mini_index=%(mini_index_offset)d %(num_mini_index_entries)d'
-                    ' %(mini_index_entry_offset_bytes)d\n'
                 'entry=%(num_entries)d %(entry_hash_bytes)d'
                     ' %(entry_group_offset_bytes)d %(entry_group_start_bytes)d'
                     ' %(entry_group_length_bytes)d\n'
@@ -182,14 +182,14 @@
                 % (bytes,))
         pos = newline_pos + 1
         (pos,
+            (header.mini_index_offset, header.num_mini_index_entries,
+             header.mini_index_entry_offset_bytes,
+            )) = cls._read_line(bytes, pos, 'mini_index', 3)
+        (pos,
             (header.group_index_offset, header.group_index_start_bytes,
              header.group_index_length_bytes, header.num_groups,
             )) = cls._read_line(bytes, pos, 'groups', 4)
         (pos,
-            (header.mini_index_offset, header.num_mini_index_entries,
-             header.mini_index_entry_offset_bytes,
-            )) = cls._read_line(bytes, pos, 'mini_index', 3)
-        (pos,
             (header.num_entries, header.entry_hash_bytes,
              header.entry_group_offset_bytes, header.entry_group_start_bytes,
              header.entry_group_length_bytes,
@@ -377,7 +377,7 @@
         #       12bytes per entry + mapping overhead. Down from ~48. The dict
         #       itself is 12MB for the 400+k keys from launchpad + 44*400k for
         #       all the sha strings. A Judy Tree would make the mapping
-        #       overhead <20*400k.?Or 8MB down from ~30MB.
+        #       overhead <20*400k, Or 8MB down from ~30MB.
         self._nodes = {}
         # map from group (start, end) to the final offset for the group
         # the (0, 0) group is special, it indicates the empty content group
@@ -457,8 +457,6 @@
             max_inner_length = max([v[2] for v in self._nodes.itervalues()])
         else:
             max_inner_start = max_inner_length = 1
-        # TODO: We could walk the nodes and find the max-start and max-length
-        #       for entries. Not sure if it is really worthwhile
         header = CHKIndexHeader.build_header_for(len(self._groups),
             max_group_offset, max_group_length, len(self._nodes),
             max_inner_start, max_inner_length)

=== modified file 'bzrlib/tests/test_chk_index.py'
--- a/bzrlib/tests/test_chk_index.py	2009-10-28 16:24:42 +0000
+++ b/bzrlib/tests/test_chk_index.py	2009-10-28 17:04:10 +0000
@@ -301,8 +301,8 @@
         bytes = ''.join(builder.finish())
         header = """Groupcompress CHK Index 1
 hash=sha1 20
+mini_index=120 0 1
 groups=120 2 2 3
-mini_index=120 0 1
 entry=3 20 1 2 2
 """
         self.assertEqualDiff(header, bytes[:len(header)])
@@ -328,8 +328,8 @@
         bytes = ''.join(builder.finish())
         header = """Groupcompress CHK Index 1
 hash=sha1 20
+mini_index=120 16 2
 groups=152 4 2 21
-mini_index=120 16 2
 entry=400 20 1 2 1
 """
         self.assertEqualDiff(header, bytes[:len(header)])
@@ -399,8 +399,8 @@
         bytes = header.serialize()
         header_bytes = """Groupcompress CHK Index 1
 hash=sha1 20
+mini_index=120 256 4
 groups=1144 4 4 541
-mini_index=120 256 4
 entry=12345 20 2 4 4
 """
         self.assertEqualDiff(header_bytes, bytes[:len(header_bytes)])
@@ -429,8 +429,8 @@
         bytes = header.serialize()
         header_bytes = """Groupcompress CHK Index 1
 hash=sha1 20
+mini_index=120 65536 4
 groups=262264 8 4 100000000
-mini_index=120 65536 4
 entry=100000000 20 4 4 4
 """
         self.assertEqualDiff(header_bytes, bytes[:len(header_bytes)])
@@ -440,8 +440,8 @@
     def test_deserialize(self):
         content = """Groupcompress CHK Index 1
 hash=sha1 20
+mini_index=120 256 4
 groups=1144 4 4 541
-mini_index=120 256 4
 entry=12345 20 2 4 4
 """
         header = chk_index.CHKIndexHeader.deserialize(content)
@@ -474,51 +474,51 @@
             'hash=sha256 32\n') # unsupported sha
         self.assertBadHeader(chk_index._CHKSIGNATURE +
             'hash=sha1 20\n'
-            'mini_index=7517 1 4\n') # no groups=
-        self.assertBadHeader(chk_index._CHKSIGNATURE +
-            'hash=sha1 20\n'
-            'groups=1025 4 4 a\n') # not an int
-        self.assertBadHeader(chk_index._CHKSIGNATURE +
-            'hash=sha1 20\n'
+            'groups=1025 4 4 a\n') # no mini_index=
+        self.assertBadHeader(chk_index._CHKSIGNATURE +
+            'hash=sha1 20\n'
+            'mini_index=7517 a 4\n') # not an int
+        self.assertBadHeader(chk_index._CHKSIGNATURE +
+            'hash=sha1 20\n'
+            'mini_index=7517 1\n') # too few
+        self.assertBadHeader(chk_index._CHKSIGNATURE +
+            'hash=sha1 20\n'
+            'mini_index=7517 1 4 2\n') # too many
+        self.assertBadHeader(chk_index._CHKSIGNATURE +
+            'hash=sha1 20\n'
+            'mini_index=7517 1 4\n'
+            'entries=1025 4 4 a\n') # no 'groups='
+        self.assertBadHeader(chk_index._CHKSIGNATURE +
+            'hash=sha1 20\n'
+            'mini_index=7517 1 4\n'
             'groups=1025 4 4\n') # too few
         self.assertBadHeader(chk_index._CHKSIGNATURE +
             'hash=sha1 20\n'
+            'mini_index=7517 1 4\n'
             'groups=1025 4 4 1 2\n') # too many
         self.assertBadHeader(chk_index._CHKSIGNATURE +
             'hash=sha1 20\n'
             'groups=1025 4 4\n'
-            'entry=12345 20 2\n') # no mini_index=
-        self.assertBadHeader(chk_index._CHKSIGNATURE +
-            'hash=sha1 20\n'
-            'groups=1025 4 4\n'
             'mini_index=7517 a 4\n') # not an int
         self.assertBadHeader(chk_index._CHKSIGNATURE +
             'hash=sha1 20\n'
-            'groups=1025 4 4\n'
-            'mini_index=7517 1\n') # too few
-        self.assertBadHeader(chk_index._CHKSIGNATURE +
-            'hash=sha1 20\n'
-            'groups=1025 4 4\n'
-            'mini_index=7517 1 4 2\n') # too many
-        self.assertBadHeader(chk_index._CHKSIGNATURE +
-            'hash=sha1 20\n'
-            'groups=1025 4 4\n'
             'mini_index=7517 1 4\n'
+            'groups=1025 4 4\n'
             'entr=12345 20 2\n') # not 'entry='
         self.assertBadHeader(chk_index._CHKSIGNATURE +
             'hash=sha1 20\n'
+            'mini_index=7517 1 4\n'
             'groups=1025 4 4\n'
-            'mini_index=7517 1 4\n'
             'entry=12345 a 2\n') # not an int
         self.assertBadHeader(chk_index._CHKSIGNATURE +
             'hash=sha1 20\n'
+            'mini_index=7517 1 4\n'
             'groups=1025 4 4\n'
-            'mini_index=7517 1 4\n'
             'entry=12345 2\n') # too few
         self.assertBadHeader(chk_index._CHKSIGNATURE +
             'hash=sha1 20\n'
+            'mini_index=7517 1 4\n'
             'groups=1025 4 4\n'
-            'mini_index=7517 1 4\n'
             'entry=12345 2 2 2 2 2\n') # too many
 
     def assertMinBytes(self, expected, value):



More information about the bazaar-commits mailing list