Rev 2614: Node references are byte offsets. in http://people.ubuntu.com/~robertc/baz2.0/repository

Robert Collins robertc at robertcollins.net
Fri Jul 13 09:10:11 BST 2007


At http://people.ubuntu.com/~robertc/baz2.0/repository

------------------------------------------------------------
revno: 2614
revision-id: robertc at robertcollins.net-20070713081009-uouct3cvz4dz1rtl
parent: robertc at robertcollins.net-20070713072951-zyno1jr1tjyo819y
committer: Robert Collins <robertc at robertcollins.net>
branch nick: repository
timestamp: Fri 2007-07-13 18:10:09 +1000
message:
  Node references are byte offsets.
modified:
  bzrlib/index.py                index.py-20070712131115-lolkarso50vjr64s-1
  bzrlib/tests/test_index.py     test_index.py-20070712131115-lolkarso50vjr64s-2
=== modified file 'bzrlib/index.py'
--- a/bzrlib/index.py	2007-07-13 07:28:18 +0000
+++ b/bzrlib/index.py	2007-07-13 08:10:09 +0000
@@ -83,10 +83,56 @@
     def finish(self):
         lines = [_SIGNATURE]
         lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n')
+        prefix_length = len(lines[0]) + len(lines[1])
+        # references are byte offsets. To avoid having to do nasty
+        # polynomial work to resolve offsets (references to later in the 
+        # file cannot be determined until all the inbetween references have
+        # been calculated too) we pad the offsets with 0's to make them be
+        # of consistent length. Using binary offsets would break the trivial
+        # file parsing.
+        # to calculate the width of zero's needed we do three passes:
+        # one to gather all the non-reference data and the number of references.
+        # one to pad all the data with reference-length and determine entry
+        # addresses.
+        # One to serialise.
+        non_ref_bytes = prefix_length
+        total_references = 0
+        # XXX: support 'a' field here.
+        for key, (references, value) in sorted(self._nodes.items(),reverse=True):
+            # key is literal, value is literal, there are 3 null's, 1 NL
+            # (ref_lists -1) tabs, (ref-1 cr's per ref_list)
+            non_ref_bytes += len(key) + 3 + 1 + self.reference_lists - 1
+            for ref_list in references:
+                # how many references across the whole file?
+                total_references += len(ref_list)
+                # accrue reference separators
+                non_ref_bytes += len(ref_list) - 1
+        # how many digits are needed to represent the total byte count?
+        digits = 1
+        possible_total_bytes = non_ref_bytes + total_references*digits
+        while 10 ** digits < possible_total_bytes:
+            digits += 1
+            possible_total_bytes = non_ref_bytes + total_references*digits
+        # resolve key addresses.
+        key_addresses = {}
+        current_offset = prefix_length
+        for key, (references, value) in sorted(self._nodes.items(),reverse=True):
+            key_addresses[key] = current_offset
+            current_offset += len(key) + 3 + 1 + self.reference_lists - 1
+            for ref_list in references:
+                # accrue reference separators
+                current_offset += len(ref_list) - 1
+                # accrue reference bytes
+                current_offset += digits * len(ref_list)
+        # serialise
+        format_string = '%%0%sd' % digits
         for key, (references, value) in sorted(self._nodes.items(),reverse=True):
             flattened_references = []
             for ref_list in references:
-                flattened_references.append('')
+                ref_addresses = []
+                for reference in ref_list:
+                    ref_addresses.append(format_string % key_addresses[reference])
+                flattened_references.append('\r'.join(ref_addresses))
             lines.append("%s\0\0%s\0%s\n" % (key,
                 '\t'.join(flattened_references), value))
         lines.append('\n')

=== modified file 'bzrlib/tests/test_index.py'
--- a/bzrlib/tests/test_index.py	2007-07-13 07:29:51 +0000
+++ b/bzrlib/tests/test_index.py	2007-07-13 08:10:09 +0000
@@ -92,6 +92,17 @@
             "key\0\0\t\0data\n"
             "\n", contents)
 
+    def test_node_references_are_byte_offsets(self):
+        builder = GraphIndexBuilder(reference_lists=1)
+        builder.add_node('reference', ([], ), 'data')
+        builder.add_node('key', (['reference'], ), 'data')
+        stream = builder.finish()
+        contents = stream.read()
+        self.assertEqual("Bazaar Graph Index 1\nnode_ref_lists=1\n"
+            "reference\0\0\0data\n"
+            "key\0\x0038\0data\n"
+            "\n", contents)
+
     def test_add_node_bad_key(self):
         builder = GraphIndexBuilder()
         for bad_char in '\t\n\x0b\x0c\r\x00 ':




More information about the bazaar-commits mailing list