Rev 4794: Move the 'groups' header after the 'mini-index' header since in http://bazaar.launchpad.net/~jameinel/bzr/chk-index
John Arbash Meinel
john at arbash-meinel.com
Wed Oct 28 17:04:18 GMT 2009
At http://bazaar.launchpad.net/~jameinel/bzr/chk-index
------------------------------------------------------------
revno: 4794
revision-id: john at arbash-meinel.com-20091028170410-7s6zo4cwgcn65p8f
parent: john at arbash-meinel.com-20091028170002-sob4l165skyupg3h
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: chk-index
timestamp: Wed 2009-10-28 12:04:10 -0500
message:
Move the 'groups' header after the 'mini-index' header since
that is the position in the file.
-------------- next part --------------
=== modified file 'bzrlib/chk_index.py'
--- a/bzrlib/chk_index.py 2009-10-28 17:00:02 +0000
+++ b/bzrlib/chk_index.py 2009-10-28 17:04:10 +0000
@@ -140,10 +140,10 @@
"""
bytes = ('%(signature)s'
'hash=%(hash_function)s %(num_hash_bytes)d\n'
+ 'mini_index=%(mini_index_offset)d %(num_mini_index_entries)d'
+ ' %(mini_index_entry_offset_bytes)d\n'
'groups=%(group_index_offset)d %(group_index_start_bytes)d'
' %(group_index_length_bytes)d %(num_groups)d\n'
- 'mini_index=%(mini_index_offset)d %(num_mini_index_entries)d'
- ' %(mini_index_entry_offset_bytes)d\n'
'entry=%(num_entries)d %(entry_hash_bytes)d'
' %(entry_group_offset_bytes)d %(entry_group_start_bytes)d'
' %(entry_group_length_bytes)d\n'
@@ -182,14 +182,14 @@
% (bytes,))
pos = newline_pos + 1
(pos,
+ (header.mini_index_offset, header.num_mini_index_entries,
+ header.mini_index_entry_offset_bytes,
+ )) = cls._read_line(bytes, pos, 'mini_index', 3)
+ (pos,
(header.group_index_offset, header.group_index_start_bytes,
header.group_index_length_bytes, header.num_groups,
)) = cls._read_line(bytes, pos, 'groups', 4)
(pos,
- (header.mini_index_offset, header.num_mini_index_entries,
- header.mini_index_entry_offset_bytes,
- )) = cls._read_line(bytes, pos, 'mini_index', 3)
- (pos,
(header.num_entries, header.entry_hash_bytes,
header.entry_group_offset_bytes, header.entry_group_start_bytes,
header.entry_group_length_bytes,
@@ -377,7 +377,7 @@
# 12bytes per entry + mapping overhead. Down from ~48. The dict
# itself is 12MB for the 400+k keys from launchpad + 44*400k for
# all the sha strings. A Judy Tree would make the mapping
- # overhead <20*400k.?Or 8MB down from ~30MB.
+ # overhead <20*400k, Or 8MB down from ~30MB.
self._nodes = {}
# map from group (start, end) to the final offset for the group
# the (0, 0) group is special, it indicates the empty content group
@@ -457,8 +457,6 @@
max_inner_length = max([v[2] for v in self._nodes.itervalues()])
else:
max_inner_start = max_inner_length = 1
- # TODO: We could walk the nodes and find the max-start and max-length
- # for entries. Not sure if it is really worthwhile
header = CHKIndexHeader.build_header_for(len(self._groups),
max_group_offset, max_group_length, len(self._nodes),
max_inner_start, max_inner_length)
=== modified file 'bzrlib/tests/test_chk_index.py'
--- a/bzrlib/tests/test_chk_index.py 2009-10-28 16:24:42 +0000
+++ b/bzrlib/tests/test_chk_index.py 2009-10-28 17:04:10 +0000
@@ -301,8 +301,8 @@
bytes = ''.join(builder.finish())
header = """Groupcompress CHK Index 1
hash=sha1 20
+mini_index=120 0 1
groups=120 2 2 3
-mini_index=120 0 1
entry=3 20 1 2 2
"""
self.assertEqualDiff(header, bytes[:len(header)])
@@ -328,8 +328,8 @@
bytes = ''.join(builder.finish())
header = """Groupcompress CHK Index 1
hash=sha1 20
+mini_index=120 16 2
groups=152 4 2 21
-mini_index=120 16 2
entry=400 20 1 2 1
"""
self.assertEqualDiff(header, bytes[:len(header)])
@@ -399,8 +399,8 @@
bytes = header.serialize()
header_bytes = """Groupcompress CHK Index 1
hash=sha1 20
+mini_index=120 256 4
groups=1144 4 4 541
-mini_index=120 256 4
entry=12345 20 2 4 4
"""
self.assertEqualDiff(header_bytes, bytes[:len(header_bytes)])
@@ -429,8 +429,8 @@
bytes = header.serialize()
header_bytes = """Groupcompress CHK Index 1
hash=sha1 20
+mini_index=120 65536 4
groups=262264 8 4 100000000
-mini_index=120 65536 4
entry=100000000 20 4 4 4
"""
self.assertEqualDiff(header_bytes, bytes[:len(header_bytes)])
@@ -440,8 +440,8 @@
def test_deserialize(self):
content = """Groupcompress CHK Index 1
hash=sha1 20
+mini_index=120 256 4
groups=1144 4 4 541
-mini_index=120 256 4
entry=12345 20 2 4 4
"""
header = chk_index.CHKIndexHeader.deserialize(content)
@@ -474,51 +474,51 @@
'hash=sha256 32\n') # unsupported sha
self.assertBadHeader(chk_index._CHKSIGNATURE +
'hash=sha1 20\n'
- 'mini_index=7517 1 4\n') # no groups=
- self.assertBadHeader(chk_index._CHKSIGNATURE +
- 'hash=sha1 20\n'
- 'groups=1025 4 4 a\n') # not an int
- self.assertBadHeader(chk_index._CHKSIGNATURE +
- 'hash=sha1 20\n'
+ 'groups=1025 4 4 a\n') # no mini_index=
+ self.assertBadHeader(chk_index._CHKSIGNATURE +
+ 'hash=sha1 20\n'
+ 'mini_index=7517 a 4\n') # not an int
+ self.assertBadHeader(chk_index._CHKSIGNATURE +
+ 'hash=sha1 20\n'
+ 'mini_index=7517 1\n') # too few
+ self.assertBadHeader(chk_index._CHKSIGNATURE +
+ 'hash=sha1 20\n'
+ 'mini_index=7517 1 4 2\n') # too many
+ self.assertBadHeader(chk_index._CHKSIGNATURE +
+ 'hash=sha1 20\n'
+ 'mini_index=7517 1 4\n'
+ 'entries=1025 4 4 a\n') # no 'groups='
+ self.assertBadHeader(chk_index._CHKSIGNATURE +
+ 'hash=sha1 20\n'
+ 'mini_index=7517 1 4\n'
'groups=1025 4 4\n') # too few
self.assertBadHeader(chk_index._CHKSIGNATURE +
'hash=sha1 20\n'
+ 'mini_index=7517 1 4\n'
'groups=1025 4 4 1 2\n') # too many
self.assertBadHeader(chk_index._CHKSIGNATURE +
'hash=sha1 20\n'
'groups=1025 4 4\n'
- 'entry=12345 20 2\n') # no mini_index=
- self.assertBadHeader(chk_index._CHKSIGNATURE +
- 'hash=sha1 20\n'
- 'groups=1025 4 4\n'
'mini_index=7517 a 4\n') # not an int
self.assertBadHeader(chk_index._CHKSIGNATURE +
'hash=sha1 20\n'
- 'groups=1025 4 4\n'
- 'mini_index=7517 1\n') # too few
- self.assertBadHeader(chk_index._CHKSIGNATURE +
- 'hash=sha1 20\n'
- 'groups=1025 4 4\n'
- 'mini_index=7517 1 4 2\n') # too many
- self.assertBadHeader(chk_index._CHKSIGNATURE +
- 'hash=sha1 20\n'
- 'groups=1025 4 4\n'
'mini_index=7517 1 4\n'
+ 'groups=1025 4 4\n'
'entr=12345 20 2\n') # not 'entry='
self.assertBadHeader(chk_index._CHKSIGNATURE +
'hash=sha1 20\n'
+ 'mini_index=7517 1 4\n'
'groups=1025 4 4\n'
- 'mini_index=7517 1 4\n'
'entry=12345 a 2\n') # not an int
self.assertBadHeader(chk_index._CHKSIGNATURE +
'hash=sha1 20\n'
+ 'mini_index=7517 1 4\n'
'groups=1025 4 4\n'
- 'mini_index=7517 1 4\n'
'entry=12345 2\n') # too few
self.assertBadHeader(chk_index._CHKSIGNATURE +
'hash=sha1 20\n'
+ 'mini_index=7517 1 4\n'
'groups=1025 4 4\n'
- 'mini_index=7517 1 4\n'
'entry=12345 2 2 2 2 2\n') # too many
def assertMinBytes(self, expected, value):
More information about the bazaar-commits
mailing list