Rev 2617: Fix and tune node offset calculation. in http://people.ubuntu.com/~robertc/baz2.0/repository
Robert Collins
robertc at robertcollins.net
Fri Jul 13 10:03:19 BST 2007
At http://people.ubuntu.com/~robertc/baz2.0/repository
------------------------------------------------------------
revno: 2617
revision-id: robertc at robertcollins.net-20070713090317-lq1j1c49ga2lha0o
parent: robertc at robertcollins.net-20070713082425-2zkkb3e67a94564e
committer: Robert Collins <robertc at robertcollins.net>
branch nick: repository
timestamp: Fri 2007-07-13 19:03:17 +1000
message:
Fix and tune node offset calculation.
modified:
bzrlib/index.py index.py-20070712131115-lolkarso50vjr64s-1
bzrlib/tests/test_index.py test_index.py-20070712131115-lolkarso50vjr64s-2
=== modified file 'bzrlib/index.py'
--- a/bzrlib/index.py 2007-07-13 08:10:09 +0000
+++ b/bzrlib/index.py 2007-07-13 09:03:17 +0000
@@ -76,9 +76,11 @@
for reference in reference_list:
if _whitespace_re.search(reference) is not None:
raise errors.BadIndexKey(reference)
- if key in self._nodes:
+ if reference not in self._nodes:
+ self._nodes[reference] = ('a', [], '')
+ if key in self._nodes and self._nodes[key][0] == '':
raise errors.BadIndexDuplicateKey(key, self)
- self._nodes[key] = (references, value)
+ self._nodes[key] = ('', references, value)
def finish(self):
lines = [_SIGNATURE]
@@ -95,45 +97,67 @@
# one to pad all the data with reference-length and determine entry
# addresses.
# One to serialise.
- non_ref_bytes = prefix_length
- total_references = 0
- # XXX: support 'a' field here.
- for key, (references, value) in sorted(self._nodes.items(),reverse=True):
- # key is literal, value is literal, there are 3 null's, 1 NL
- # (ref_lists -1) tabs, (ref-1 cr's per ref_list)
- non_ref_bytes += len(key) + 3 + 1 + self.reference_lists - 1
- for ref_list in references:
- # how many references across the whole file?
- total_references += len(ref_list)
- # accrue reference separators
- non_ref_bytes += len(ref_list) - 1
- # how many digits are needed to represent the total byte count?
- digits = 1
- possible_total_bytes = non_ref_bytes + total_references*digits
- while 10 ** digits < possible_total_bytes:
- digits += 1
+ nodes = sorted(self._nodes.items(),reverse=True)
+ # we only need to pre-pass if we have reference lists at all.
+ if self.reference_lists:
+ non_ref_bytes = prefix_length
+ total_references = 0
+ # TODO use simple multiplication for the constants in this loop.
+ # TODO: factor out the node length calculations so this loop
+ # and the next have less (no!) duplicate code.
+ for key, (absent, references, value) in nodes:
+ # key is literal, value is literal, there are 3 null's, 1 NL
+ non_ref_bytes += len(key) + len(value) + 3 + 1
+ # one byte for absent if set.
+ if absent:
+ non_ref_bytes += 1
+ if self.reference_lists:
+ # (ref_lists -1) tabs
+ non_ref_bytes += self.reference_lists - 1
+ # (ref-1 cr's per ref_list)
+ for ref_list in references:
+ # how many references across the whole file?
+ total_references += len(ref_list)
+ # accrue reference separators
+ if ref_list:
+ non_ref_bytes += len(ref_list) - 1
+ # how many digits are needed to represent the total byte count?
+ digits = 1
possible_total_bytes = non_ref_bytes + total_references*digits
- # resolve key addresses.
- key_addresses = {}
- current_offset = prefix_length
- for key, (references, value) in sorted(self._nodes.items(),reverse=True):
- key_addresses[key] = current_offset
- current_offset += len(key) + 3 + 1 + self.reference_lists - 1
- for ref_list in references:
- # accrue reference separators
- current_offset += len(ref_list) - 1
- # accrue reference bytes
- current_offset += digits * len(ref_list)
- # serialise
- format_string = '%%0%sd' % digits
- for key, (references, value) in sorted(self._nodes.items(),reverse=True):
+ while 10 ** digits < possible_total_bytes:
+ digits += 1
+ possible_total_bytes = non_ref_bytes + total_references*digits
+ # resolve key addresses.
+ key_addresses = {}
+ current_offset = prefix_length
+ for key, (absent, references, value) in nodes:
+ key_addresses[key] = current_offset
+ # key is literal, value is literal, there are 3 null's, 1 NL
+ current_offset += len(key) + len(value) + 3 + 1
+ # one byte for absent if set.
+ if absent:
+ current_offset+= 1
+ if self.reference_lists:
+ # (ref_lists -1) tabs
+ current_offset += self.reference_lists - 1
+ # (ref-1 cr's per ref_list)
+ for ref_list in references:
+ # accrue reference bytes
+ current_offset += digits * len(ref_list)
+ # accrue reference separators
+ if ref_list:
+ # accrue reference separators
+ current_offset += len(ref_list) - 1
+ # serialise
+ format_string = '%%0%sd' % digits
+ for key, (absent, references, value) in nodes:
flattened_references = []
for ref_list in references:
ref_addresses = []
for reference in ref_list:
ref_addresses.append(format_string % key_addresses[reference])
flattened_references.append('\r'.join(ref_addresses))
- lines.append("%s\0\0%s\0%s\n" % (key,
+ lines.append("%s\0%s\0%s\0%s\n" % (key, absent,
'\t'.join(flattened_references), value))
lines.append('\n')
return StringIO(''.join(lines))
=== modified file 'bzrlib/tests/test_index.py'
--- a/bzrlib/tests/test_index.py 2007-07-13 08:24:25 +0000
+++ b/bzrlib/tests/test_index.py 2007-07-13 09:03:17 +0000
@@ -113,7 +113,7 @@
self.assertEqual("Bazaar Graph Index 1\nnode_ref_lists=1\n"
"reference2\0\0\0data\n"
"reference\0\0\0data\n"
- "key\0\x0051\r38\0data\n"
+ "key\0\x0056\r38\0data\n"
"\n", contents)
def test_multiple_reference_lists_are_tab_delimited(self):
@@ -127,6 +127,17 @@
"key\0\x0038\t38\0data\n"
"\n", contents)
+ def test_add_node_referencing_missing_key_makes_absent(self):
+ builder = GraphIndexBuilder(reference_lists=1)
+ builder.add_node('key', (['reference', 'reference2'], ), 'data')
+ stream = builder.finish()
+ contents = stream.read()
+ self.assertEqual("Bazaar Graph Index 1\nnode_ref_lists=1\n"
+ "reference2\0a\0\0\n"
+ "reference\0a\0\0\n"
+ "key\0\x0053\r38\0data\n"
+ "\n", contents)
+
def test_add_node_bad_key(self):
builder = GraphIndexBuilder()
for bad_char in '\t\n\x0b\x0c\r\x00 ':
@@ -181,7 +192,7 @@
def test_add_key_after_referencing_key(self):
builder = GraphIndexBuilder(reference_lists=1)
- builder.add_node('key', (['reference']), 'data')
+ builder.add_node('key', (['reference'], ), 'data')
builder.add_node('reference', ([],), 'data')
More information about the bazaar-commits
mailing list