Rev 2247: Use a different structure. in http://bzr.arbash-meinel.com/branches/bzr/0.15-dev/list_patch

John Arbash Meinel john at arbash-meinel.com
Tue Jan 30 01:19:01 GMT 2007


At http://bzr.arbash-meinel.com/branches/bzr/0.15-dev/list_patch

------------------------------------------------------------
revno: 2247
revision-id: john at arbash-meinel.com-20070130011856-ifomln3u7jxv6yf8
parent: john at arbash-meinel.com-20070130001148-y92vo2prhnswmoz0
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: list_patch
timestamp: Mon 2007-01-29 19:18:56 -0600
message:
  Use a different structure.
  The new structure just keeps a linked list of range references.
  This means that the large content lists are never modified.
  The only thing we have to modify is a dictionary of tuples.
  Which should mean we don't have to worry about collapsing the deltas,
  because in essence, that is what we are doing here.
modified:
  bzrlib/hybrid_linked_list.py   hybrid_linked_list.p-20070130001028-zlexwz74tzo56o2t-1
  bzrlib/tests/test_hybrid_linked_list.py test_hybrid_linked_l-20070130001028-zlexwz74tzo56o2t-2
-------------- next part --------------
=== modified file 'bzrlib/hybrid_linked_list.py'
--- a/bzrlib/hybrid_linked_list.py	2007-01-30 00:11:48 +0000
+++ b/bzrlib/hybrid_linked_list.py	2007-01-30 01:18:56 +0000
@@ -41,107 +41,136 @@
 
         :param starting: Initialize the structure with this list.
         """
-        # This is a single linked list
-        # We use simple integers for the ids.
-        # None is the marker for end of list.
-        self._counter = 0
+        self._reset(starting)
+
+    def _reset(self, starting):
+        """Reset the internal structures to hold a single list."""
+        self._data_counter = 0
+        self._ranges_counter = 0
+
+        # This uses 3 maps.
+        # one map is just from simple id => content
+        self._content_map = {0:starting}
+
+        # This is a map from range_id => (content_node_id, start, end)
+        # This allows us to avoid doing any work on the supplied content.
+        # Instead, we just keep updating the information about what content
+        # should be extracted at the right time
         self._head = 0
-        # id => content
-        self._nodes = {self._head:starting}
-        # id => next id
-        self._links = {self._head:None}
+        self._range_map = {self._head:(0, 0, len(starting))}
+        # This is a singly linked list map for range to next range
+        self._links = {0:None}
 
     def __iter__(self):
         """Iterate over the final content."""
-        cur_node = self._head
+        cur_range_id = self._head
 
-        while cur_node is not None:
-            for entry in self._nodes[cur_node]:
-                yield entry
-            cur_node = self._links[cur_node]
+        while cur_range_id is not None:
+            content_id, start, end = self._range_map[cur_range_id]
+            content = self._content_map[content_id]
+            for pos in xrange(start, end):
+                yield content[pos]
+            cur_range_id = self._links[cur_range_id]
 
     def __len__(self):
-        return sum(len(entries) for entries in self._nodes.itervalues())
+        cur_range_id = self._head
+        length = 0
+        while cur_range_id is not None:
+            content_id, start, end = self._range_map[cur_range_id]
+            length += (end - start)
+            cur_range_id = self._links[cur_range_id]
+        return length
+
+    def __getitem__(self, pos):
+        prev_range_id, cur_range_id, offset = self._find_nodes_for_pos(pos)
+        content_id, start, end = self._range_map[cur_range_id]
+        return self._content_map[content_id][start+offset]
 
     def flatten(self):
         """Flatten the linked structure into a single list."""
-        flattened = list(self)
-        self._head = 0
-        self._counter = 0
-        self._nodes = {self._head:flattened}
-        self._links = {self._head:None}
+        self._reset(list(self))
+
+    def _add_content(self, entries):
+        self._data_counter += 1
+        self._content_map[self._data_counter] = entries
+        return self._data_counter
 
     def _find_nodes_for_pos(self, pos):
         """Figure out what nodes correspond to the given position.
 
-        :return: (prev_node, cur_node, offset_in_cur_node)
-            Where prev_node is the node pointing to cur_node (used for
+        :return: (prev_range_id, cur_range_id, offset_from_cur_start)
+            Where prev_range_id is the node pointing to cur_range_id (used for
             maintaining the single-linked-list structure)
-            cur_node is the node where the position would be found
-            offset_in_cur_node is the offset into the current node
+            cur_range_id is the node where the position would be found
+            offset_in_cur_node is the offset into the current node (from start)
         """
-        prev_node = None
-        cur_node = self._head
+        # TODO: Pretty much everyone who calls _find_nodes_for_pos then needs
+        # to turn around and look up the cur_range_id in the _range_map. So
+        # extend the signature to return the extracted values.
+        prev_range_id = None
+        cur_range_id = self._head
         cur_pos = 0
 
-        while cur_node is not None:
-            cur_entries = self._nodes[cur_node]
-            next_pos = cur_pos + len(cur_entries)
+        while cur_range_id is not None:
+            content_id, start, end = self._range_map[cur_range_id]
+            next_pos = cur_pos + (end - start)
             if pos < next_pos:
                 break
-            prev_node = cur_node
-            cur_node = self._links[cur_node]
+            prev_range_id = cur_range_id
+            cur_range_id = self._links[cur_range_id]
             cur_pos = next_pos
-        return prev_node, cur_node, pos - cur_pos
+        return prev_range_id, cur_range_id, pos - cur_pos
 
-    def _new_node_id(self):
-        """Get a new node id."""
-        self._counter += 1
-        return self._counter
+    def _new_range_id(self, content_id, start, end):
+        """Get a new range id."""
+        self._ranges_counter += 1
+        self._range_map[self._ranges_counter] = (content_id, start, end)
+        return self._ranges_counter
 
     def insert(self, start, entries):
         """Insert a set of entries starting at the first position."""
-        new_node_id = self._new_node_id()
-        self._nodes[new_node_id] = entries
-
-        prev_node, cur_node, cur_pos = self._find_nodes_for_pos(start)
-
-        if cur_pos == 0:
+        new_content_id = self._add_content(entries)
+
+        new_range_id = self._new_range_id(new_content_id, 0, len(entries))
+
+        prev_range_id, cur_range_id, offset = self._find_nodes_for_pos(start)
+
+        if offset == 0:
             # We are inserting before an existing node, or at the exact end of
             # the list
-            if prev_node is None:
+            if prev_range_id is None:
                 # We are at _head, update its reference
-                self._links[new_node_id] = self._head
-                self._head = new_node_id
+                self._links[new_range_id] = self._head
+                self._head = new_range_id
             else:
-                self._links[new_node_id] = cur_node
-                self._links[prev_node] = new_node_id
+                self._links[new_range_id] = cur_range_id
+                self._links[prev_range_id] = new_range_id
         else:
             # We are inserting inside an existing node
-            if cur_node is None:
+            if cur_range_id is None:
                 # We are appending a node
                 # python lists have the property that x[BIG_NUM:BIGNUM] = [10]
                 # will always append 10 to the list, even though it is out of
                 # range
                 # TODO: For now, I'm duplicating that, but it seems to make
                 #   more sense to raise IndexError or something
-                self._links[new_node_id] = None
-                self._links[prev_node] = new_node_id
+                self._links[new_range_id] = None
+                self._links[prev_range_id] = new_range_id
             else:
                 # We need to break up the current node, and insert the
                 # requested one
-                split_node_id = self._new_node_id()
-
-                next_node_id = self._links[cur_node]
-
-                cur_entries = self._nodes[cur_node]
-                split_entries = cur_entries[cur_pos:]
-                cur_entries = cur_entries[:cur_pos]
-
-                self._nodes[split_node_id] = split_entries
-
-                self._links[split_node_id] = next_node_id
-                self._links[new_node_id] = split_node_id
-
-                self._nodes[cur_node] = cur_entries
-                self._links[cur_node] = new_node_id
+                content_id, start, end = self._range_map[cur_range_id]
+                break_point = start + offset
+                split_range_id = self._new_range_id(content_id, break_point,
+                                                    end)
+                next_range_id = self._links[cur_range_id]
+                self._links[split_range_id] = next_range_id
+                self._links[new_range_id] = split_range_id
+
+                # We could make this atomic by creating a new id for the
+                # initial range, and then the atomic step is
+                # self._links[prev_range_id] = new_head_range
+                # And then afterwards we would want to delete the cur_range_id
+                # entry in the map
+                self._range_map[cur_range_id] = (content_id, start, break_point)
+                self._links[cur_range_id] = new_range_id

=== modified file 'bzrlib/tests/test_hybrid_linked_list.py'
--- a/bzrlib/tests/test_hybrid_linked_list.py	2007-01-30 00:11:48 +0000
+++ b/bzrlib/tests/test_hybrid_linked_list.py	2007-01-30 01:18:56 +0000
@@ -36,11 +36,13 @@
 
     def test__find_nodes_for_pos(self):
         hll = hybrid_linked_list.HybridLinkedList([])
-        hll._nodes = {0:[0, 1],
-                      1:[2, 3, 4],
-                      2:[5],
-                     }
+        hll._range_map = {0:(0, 0, 2),
+                          1:(0, 3, 6),
+                          2:(0, 6, 7),
+                         }
         hll._links = {0:1, 1:2, 2:None}
+        hll._content_map = {0:[0, 1, 2, 3, 4, 5, 6, 7, 8]}
+        self.assertEqual([0, 1, 3, 4, 5, 6], list(hll))
         self.assertEqual((None, 0, 0), hll._find_nodes_for_pos(0))
         self.assertEqual((None, 0, 1), hll._find_nodes_for_pos(1))
         self.assertEqual((0, 1, 0), hll._find_nodes_for_pos(2))
@@ -50,6 +52,18 @@
         self.assertEqual((2, None, 0), hll._find_nodes_for_pos(6))
         self.assertEqual((2, None, 1), hll._find_nodes_for_pos(7))
 
+    def test__getitem__(self):
+        hll = hybrid_linked_list.HybridLinkedList([])
+        hll._range_map = {0:(0, 0, 2),
+                          1:(0, 3, 6),
+                          2:(0, 6, 7),
+                         }
+        hll._links = {0:1, 1:2, 2:None}
+        hll._content_map = {0:[0, 1, 2, 3, 4, 5, 6, 7, 8]}
+        self.assertEqual([0, 1, 3, 4, 5, 6], list(hll))
+        for expected, pos in zip([0, 1, 3, 4, 5, 6], xrange(0,5)):
+            self.assertEqual(expected, hll[pos])
+
     def test_insert(self):
         hll = hybrid_linked_list.HybridLinkedList([0, 1, 2, 3, 4])
         hll.insert(3, [5, 6, 7])



More information about the bazaar-commits mailing list