Rev 2617: Fix and tune node offset calculation. in http://people.ubuntu.com/~robertc/baz2.0/repository

Robert Collins robertc at robertcollins.net
Fri Jul 13 10:03:19 BST 2007


At http://people.ubuntu.com/~robertc/baz2.0/repository

------------------------------------------------------------
revno: 2617
revision-id: robertc at robertcollins.net-20070713090317-lq1j1c49ga2lha0o
parent: robertc at robertcollins.net-20070713082425-2zkkb3e67a94564e
committer: Robert Collins <robertc at robertcollins.net>
branch nick: repository
timestamp: Fri 2007-07-13 19:03:17 +1000
message:
  Fix and tune node offset calculation.
modified:
  bzrlib/index.py                index.py-20070712131115-lolkarso50vjr64s-1
  bzrlib/tests/test_index.py     test_index.py-20070712131115-lolkarso50vjr64s-2
=== modified file 'bzrlib/index.py'
--- a/bzrlib/index.py	2007-07-13 08:10:09 +0000
+++ b/bzrlib/index.py	2007-07-13 09:03:17 +0000
@@ -76,9 +76,11 @@
             for reference in reference_list:
                 if _whitespace_re.search(reference) is not None:
                     raise errors.BadIndexKey(reference)
-        if key in self._nodes:
+                if reference not in self._nodes:
+                    self._nodes[reference] = ('a', [], '')
+        if key in self._nodes and self._nodes[key][0] == '':
             raise errors.BadIndexDuplicateKey(key, self)
-        self._nodes[key] = (references, value)
+        self._nodes[key] = ('', references, value)
 
     def finish(self):
         lines = [_SIGNATURE]
@@ -95,45 +97,67 @@
         # one to pad all the data with reference-length and determine entry
         # addresses.
         # One to serialise.
-        non_ref_bytes = prefix_length
-        total_references = 0
-        # XXX: support 'a' field here.
-        for key, (references, value) in sorted(self._nodes.items(),reverse=True):
-            # key is literal, value is literal, there are 3 null's, 1 NL
-            # (ref_lists -1) tabs, (ref-1 cr's per ref_list)
-            non_ref_bytes += len(key) + 3 + 1 + self.reference_lists - 1
-            for ref_list in references:
-                # how many references across the whole file?
-                total_references += len(ref_list)
-                # accrue reference separators
-                non_ref_bytes += len(ref_list) - 1
-        # how many digits are needed to represent the total byte count?
-        digits = 1
-        possible_total_bytes = non_ref_bytes + total_references*digits
-        while 10 ** digits < possible_total_bytes:
-            digits += 1
+        nodes = sorted(self._nodes.items(),reverse=True)
+        # we only need to pre-pass if we have reference lists at all.
+        if self.reference_lists:
+            non_ref_bytes = prefix_length
+            total_references = 0
+            # TODO use simple multiplication for the constants in this loop.
+            # TODO: factor out the node length calculations so this loop 
+            #       and the next have less (no!) duplicate code.
+            for key, (absent, references, value) in nodes:
+                # key is literal, value is literal, there are 3 null's, 1 NL
+                non_ref_bytes += len(key) + len(value) + 3 + 1
+                # one byte for absent if set.
+                if absent:
+                    non_ref_bytes += 1
+                if self.reference_lists:
+                    # (ref_lists -1) tabs
+                    non_ref_bytes += self.reference_lists - 1
+                    # (ref-1 cr's per ref_list)
+                    for ref_list in references:
+                        # how many references across the whole file?
+                        total_references += len(ref_list)
+                        # accrue reference separators
+                        if ref_list:
+                            non_ref_bytes += len(ref_list) - 1
+            # how many digits are needed to represent the total byte count?
+            digits = 1
             possible_total_bytes = non_ref_bytes + total_references*digits
-        # resolve key addresses.
-        key_addresses = {}
-        current_offset = prefix_length
-        for key, (references, value) in sorted(self._nodes.items(),reverse=True):
-            key_addresses[key] = current_offset
-            current_offset += len(key) + 3 + 1 + self.reference_lists - 1
-            for ref_list in references:
-                # accrue reference separators
-                current_offset += len(ref_list) - 1
-                # accrue reference bytes
-                current_offset += digits * len(ref_list)
-        # serialise
-        format_string = '%%0%sd' % digits
-        for key, (references, value) in sorted(self._nodes.items(),reverse=True):
+            while 10 ** digits < possible_total_bytes:
+                digits += 1
+                possible_total_bytes = non_ref_bytes + total_references*digits
+            # resolve key addresses.
+            key_addresses = {}
+            current_offset = prefix_length
+            for key, (absent, references, value) in nodes:
+                key_addresses[key] = current_offset
+                # key is literal, value is literal, there are 3 null's, 1 NL
+                current_offset += len(key) + len(value) + 3 + 1
+                # one byte for absent if set.
+                if absent:
+                    current_offset+= 1
+                if self.reference_lists:
+                    # (ref_lists -1) tabs
+                    current_offset += self.reference_lists - 1
+                    # (ref-1 cr's per ref_list)
+                    for ref_list in references:
+                        # accrue reference bytes
+                        current_offset += digits * len(ref_list)
+                        # accrue reference separators
+                        if ref_list:
+                            # accrue reference separators
+                            current_offset += len(ref_list) - 1
+            # serialise
+            format_string = '%%0%sd' % digits
+        for key, (absent, references, value) in nodes:
             flattened_references = []
             for ref_list in references:
                 ref_addresses = []
                 for reference in ref_list:
                     ref_addresses.append(format_string % key_addresses[reference])
                 flattened_references.append('\r'.join(ref_addresses))
-            lines.append("%s\0\0%s\0%s\n" % (key,
+            lines.append("%s\0%s\0%s\0%s\n" % (key, absent,
                 '\t'.join(flattened_references), value))
         lines.append('\n')
         return StringIO(''.join(lines))

=== modified file 'bzrlib/tests/test_index.py'
--- a/bzrlib/tests/test_index.py	2007-07-13 08:24:25 +0000
+++ b/bzrlib/tests/test_index.py	2007-07-13 09:03:17 +0000
@@ -113,7 +113,7 @@
         self.assertEqual("Bazaar Graph Index 1\nnode_ref_lists=1\n"
             "reference2\0\0\0data\n"
             "reference\0\0\0data\n"
-            "key\0\x0051\r38\0data\n"
+            "key\0\x0056\r38\0data\n"
             "\n", contents)
 
     def test_multiple_reference_lists_are_tab_delimited(self):
@@ -127,6 +127,17 @@
             "key\0\x0038\t38\0data\n"
             "\n", contents)
 
+    def test_add_node_referencing_missing_key_makes_absent(self):
+        builder = GraphIndexBuilder(reference_lists=1)
+        builder.add_node('key', (['reference', 'reference2'], ), 'data')
+        stream = builder.finish()
+        contents = stream.read()
+        self.assertEqual("Bazaar Graph Index 1\nnode_ref_lists=1\n"
+            "reference2\0a\0\0\n"
+            "reference\0a\0\0\n"
+            "key\0\x0053\r38\0data\n"
+            "\n", contents)
+
     def test_add_node_bad_key(self):
         builder = GraphIndexBuilder()
         for bad_char in '\t\n\x0b\x0c\r\x00 ':
@@ -181,7 +192,7 @@
 
     def test_add_key_after_referencing_key(self):
         builder = GraphIndexBuilder(reference_lists=1)
-        builder.add_node('key', (['reference']), 'data')
+        builder.add_node('key', (['reference'], ), 'data')
         builder.add_node('reference', ([],), 'data')
 
 




More information about the bazaar-commits mailing list