Rev 2251: Switch from using a separate content_id => content map in http://bzr.arbash-meinel.com/branches/bzr/0.15-dev/list_patch

John Arbash Meinel john at arbash-meinel.com
Tue Jan 30 16:30:14 GMT 2007


At http://bzr.arbash-meinel.com/branches/bzr/0.15-dev/list_patch

------------------------------------------------------------
revno: 2251
revision-id: john at arbash-meinel.com-20070130163007-lb5g247vjygjhkzb
parent: john at arbash-meinel.com-20070130035040-sd7e63bpxs1xw4n6
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: list_patch
timestamp: Tue 2007-01-30 10:30:07 -0600
message:
  Switch from using a separate content_id => content map
  into just using the explicit content.
  Add tests to make sure the content isn't being copied.
  This should allow content ranges to be freed when they
  are no longer referenced.
modified:
  bzrlib/hybrid_linked_list.py   hybrid_linked_list.p-20070130001028-zlexwz74tzo56o2t-1
  bzrlib/tests/test_hybrid_linked_list.py test_hybrid_linked_l-20070130001028-zlexwz74tzo56o2t-2
-------------- next part --------------
=== modified file 'bzrlib/hybrid_linked_list.py'
--- a/bzrlib/hybrid_linked_list.py	2007-01-30 03:50:40 +0000
+++ b/bzrlib/hybrid_linked_list.py	2007-01-30 16:30:07 +0000
@@ -33,12 +33,6 @@
     nodes themselves are never modified.
     """
 
-    # TODO: jam 20060129 Right now this class holds onto all content, even
-    #       after it is no longer being referenced. This is mostly because of
-    #       the internal content_id => content mapping.
-    #       It should actually be possible to use a reference to the content
-    #       directly, though it makes testing a little bit clumsier.
-
     def __init__(self, starting=[]):
         """Create a new HybridLinkedList.
 
@@ -48,13 +42,9 @@
 
     def _reset(self, starting):
         """Reset the internal structures to hold a single list."""
-        self._data_counter = -1
         self._ranges_counter = -1
 
-        # This uses 3 maps.
-        # one map is just from simple id => content
-        self._content_map = {}
-
+        # This uses 2 maps.
         # This is a map from range_id => (content_node_id, start, end)
         # This allows us to avoid doing any work on the supplied content.
         # Instead, we just keep updating the information about what content
@@ -72,8 +62,7 @@
         cur_range_id = self._head
 
         while cur_range_id is not None:
-            content_id, start, end = self._range_map[cur_range_id]
-            content = self._content_map[content_id]
+            content, start, end = self._range_map[cur_range_id]
             for pos in xrange(start, end):
                 yield content[pos]
             cur_range_id = self._links[cur_range_id]
@@ -82,25 +71,20 @@
         cur_range_id = self._head
         length = 0
         while cur_range_id is not None:
-            content_id, start, end = self._range_map[cur_range_id]
+            content, start, end = self._range_map[cur_range_id]
             length += (end - start)
             cur_range_id = self._links[cur_range_id]
         return length
 
     def __getitem__(self, pos):
         (prev_range_id, cur_range_id, offset,
-         content_id, start, end) = self._find_nodes_for_pos(pos)
-        return self._content_map[content_id][start+offset]
+         content, start, end) = self._find_nodes_for_pos(pos)
+        return content[start+offset]
 
     def flatten(self):
         """Flatten the linked structure into a single list."""
         self._reset(list(self))
 
-    def _add_content(self, entries):
-        self._data_counter += 1
-        self._content_map[self._data_counter] = entries
-        return self._data_counter
-
     def _find_nodes_for_pos(self, pos):
         """Figure out what nodes correspond to the given position.
 
@@ -113,34 +97,32 @@
         prev_range_id = None
         cur_range_id = self._head
         cur_pos = 0
-        content_id = start = end = None
+        content = start = end = None
 
         while cur_range_id is not None:
-            content_id, start, end = self._range_map[cur_range_id]
+            content, start, end = self._range_map[cur_range_id]
             next_pos = cur_pos + (end - start)
             if pos < next_pos:
                 break
             prev_range_id = cur_range_id
             cur_range_id = self._links[cur_range_id]
             cur_pos = next_pos
-            content_id = start = end = None
+            content = start = end = None
         return (prev_range_id, cur_range_id, pos - cur_pos,
-                content_id, start, end)
+                content, start, end)
 
-    def _new_range_id(self, content_id, start, end):
+    def _new_range_id(self, content, start, end):
         """Get a new range id."""
         self._ranges_counter += 1
-        self._range_map[self._ranges_counter] = (content_id, start, end)
+        self._range_map[self._ranges_counter] = (content, start, end)
         return self._ranges_counter
 
-    def insert(self, start, entries):
+    def insert(self, start, content):
         """Insert a set of entries starting at the first position."""
-        new_content_id = self._add_content(entries)
-
-        new_range_id = self._new_range_id(new_content_id, 0, len(entries))
+        new_range_id = self._new_range_id(content, 0, len(content))
 
         (prev_range_id, cur_range_id, offset,
-         content_id, start, end) = self._find_nodes_for_pos(start)
+         content, start, end) = self._find_nodes_for_pos(start)
 
         if offset == 0:
             # We are inserting before an existing node, or at the exact end of
@@ -167,8 +149,7 @@
                 # We need to break up the current node, and insert the
                 # requested one
                 break_point = start + offset
-                split_range_id = self._new_range_id(content_id, break_point,
-                                                    end)
+                split_range_id = self._new_range_id(content, break_point, end)
                 next_range_id = self._links[cur_range_id]
                 self._links[split_range_id] = next_range_id
                 self._links[new_range_id] = split_range_id
@@ -178,7 +159,7 @@
                 # self._links[prev_range_id] = new_head_range
                 # And then afterwards we would want to delete the cur_range_id
                 # entry in the map
-                self._range_map[cur_range_id] = (content_id, start, break_point)
+                self._range_map[cur_range_id] = (content, start, break_point)
                 self._links[cur_range_id] = new_range_id
 
     def delete(self, start, end):
@@ -196,7 +177,7 @@
             return
 
         (prev_range_id, cur_range_id, offset,
-         content_id, cur_start, cur_end) = self._find_nodes_for_pos(start)
+         content, cur_start, cur_end) = self._find_nodes_for_pos(start)
 
         if cur_range_id is None:
             # We fell off the end, nothing to do
@@ -235,7 +216,7 @@
                 else:
                     # CASE 2: we save the beginning, but remove the tail
                     # This only requires a node entry update
-                    self._range_map[cur_range_id] = (content_id, cur_start,
+                    self._range_map[cur_range_id] = (content, cur_start,
                                                      cur_start + offset)
                     prev_range_id = cur_range_id
                 remove_length -= possible_remove_length
@@ -244,12 +225,12 @@
                 # step to next
                 offset = 0
                 cur_range_id = next_range_id
-                content_id, cur_start, cur_end = self._range_map[next_range_id]
+                content, cur_start, cur_end = self._range_map[next_range_id]
             else:
                 if offset == 0:
                     # CASE 3: We remove only the beginning of this node
                     # Only requires a node entry update
-                    self._range_map[cur_range_id] = (content_id,
+                    self._range_map[cur_range_id] = (content,
                             cur_start + remove_length, cur_end)
                 else:
                     # We have a beginning and a tail, we need to create a new
@@ -260,34 +241,34 @@
                     new_end = cur_start + offset
                     new_start = new_end + remove_length
 
-                    new_tail_range_id = self._new_range_id(content_id,
+                    new_tail_range_id = self._new_range_id(content,
                                                            new_start, cur_end)
                     next_range_id = self._links[cur_range_id]
                     self._links[new_tail_range_id] = next_range_id
 
                     self._links[cur_range_id] = new_tail_range_id
-                    self._range_map[cur_range_id] = (content_id, cur_start,
+                    self._range_map[cur_range_id] = (content, cur_start,
                                                      new_end)
                 # We officially removed enough, so we are done
                 break
                 # we are not removing the start of this node
                 prev_range_id = cur_range_id
 
-    def replace(self, start, end, entries):
+    def replace(self, start, end, content):
         """Replace a section of existing content, with new content.
 
-        This is equivalent to: list[start, end] = entries
+        This is equivalent to: list[start, end] = content
 
         if start == end then content will just be inserted at start. If
-        entries is empty, then the range [start,end) will just be removed.
+        content is empty, then the range [start,end) will just be removed.
 
         :param start: The starting location to be replaced
         :param end: The last position to be removed (non_inclusive)
-        :param entries: An array of new content to use. This will not be
+        :param content: An array of new content to use. This will not be
             modified, so it is safe to pass a list or a tuple.
         :return: None
         """
         if start != end:
             self.delete(start, end)
-        if entries:
-            self.insert(start, entries)
+        if content:
+            self.insert(start, content)

=== modified file 'bzrlib/tests/test_hybrid_linked_list.py'
--- a/bzrlib/tests/test_hybrid_linked_list.py	2007-01-30 03:50:40 +0000
+++ b/bzrlib/tests/test_hybrid_linked_list.py	2007-01-30 16:30:07 +0000
@@ -35,16 +35,17 @@
         self.assertEqual([], list(hll))
 
     def test__find_nodes_for_pos(self):
-        hll = hybrid_linked_list.HybridLinkedList([0, 1, 2, 3, 4, 5, 6, 7, 8])
+        content = [0, 1, 2, 3, 4, 5, 6, 7, 8]
+        hll = hybrid_linked_list.HybridLinkedList(content)
         hll.delete(2, 3)
         hll.delete(5, 7)
         self.assertEqual([0, 1, 3, 4, 5, 8], list(hll))
-        self.assertEqual((None, 0, 0, 0, 0, 2), hll._find_nodes_for_pos(0))
-        self.assertEqual((None, 0, 1, 0, 0, 2), hll._find_nodes_for_pos(1))
-        self.assertEqual((0, 1, 0, 0, 3, 6), hll._find_nodes_for_pos(2))
-        self.assertEqual((0, 1, 1, 0, 3, 6), hll._find_nodes_for_pos(3))
-        self.assertEqual((0, 1, 2, 0, 3, 6), hll._find_nodes_for_pos(4))
-        self.assertEqual((1, 2, 0, 0, 8, 9), hll._find_nodes_for_pos(5))
+        self.assertEqual((None, 0, 0, content, 0, 2), hll._find_nodes_for_pos(0))
+        self.assertEqual((None, 0, 1, content, 0, 2), hll._find_nodes_for_pos(1))
+        self.assertEqual((0, 1, 0, content, 3, 6), hll._find_nodes_for_pos(2))
+        self.assertEqual((0, 1, 1, content, 3, 6), hll._find_nodes_for_pos(3))
+        self.assertEqual((0, 1, 2, content, 3, 6), hll._find_nodes_for_pos(4))
+        self.assertEqual((1, 2, 0, content, 8, 9), hll._find_nodes_for_pos(5))
         self.assertEqual((2, None, 0, None, None, None),
                          hll._find_nodes_for_pos(6))
         self.assertEqual((2, None, 1, None, None, None),
@@ -89,14 +90,14 @@
         hll = self.get_sectioned_hll()
         self.assertEqual(9, len(hll))
         self.assertEqual([0, 1, 2, 3, 4, 5, 6, 7, 8], list(hll))
-        self.assertEqual(3, hll._data_counter)
-        self.assertEqual({0:[0, 1, 2], 1:[3, 4], 2:[5, 6, 7], 3:[8]},
-                         hll._content_map)
         self.assertEqual(3, hll._ranges_counter)
         self.assertEqual(0, hll._head)
         self.assertEqual({0:1, 1:2, 2:3, 3:None}, hll._links)
-        self.assertEqual({0:(0, 0, 3), 1:(1, 0, 2), 2:(2, 0, 3), 3:(3, 0, 1)},
-                         hll._range_map)
+        self.assertEqual({0:([0, 1, 2], 0, 3),
+                          1:([3, 4], 0, 2),
+                          2:([5, 6, 7], 0, 3),
+                          3:([8], 0, 1),
+                         }, hll._range_map)
 
     def test_delete(self):
         hll = hybrid_linked_list.HybridLinkedList([0, 1, 2, 3, 4])
@@ -115,8 +116,10 @@
         self.assertEqual(3, hll._ranges_counter)
         self.assertEqual(1, hll._head)
         self.assertEqual({1:2, 2:3, 3:None}, hll._links)
-        self.assertEqual({1:(1, 0, 2), 2:(2, 0, 3), 3:(3, 0, 1)},
-                         hll._range_map)
+        self.assertEqual({1:([3, 4], 0, 2),
+                          2:([5, 6, 7], 0, 3),
+                          3:([8], 0, 1),
+                         }, hll._range_map)
 
     def test_delete_middle_range(self):
         hll = self.get_sectioned_hll()
@@ -125,8 +128,10 @@
         self.assertEqual(3, hll._ranges_counter)
         self.assertEqual(0, hll._head)
         self.assertEqual({0:2, 2:3, 3:None}, hll._links)
-        self.assertEqual({0:(0, 0, 3), 2:(2, 0, 3), 3:(3, 0, 1)},
-                         hll._range_map)
+        self.assertEqual({0:([0, 1, 2], 0, 3),
+                          2:([5, 6, 7], 0, 3),
+                          3:([8], 0, 1),
+                         }, hll._range_map)
 
     def test_delete_last_range(self):
         hll = self.get_sectioned_hll()
@@ -135,8 +140,10 @@
         self.assertEqual(3, hll._ranges_counter)
         self.assertEqual(0, hll._head)
         self.assertEqual({0:1, 1:2, 2:None}, hll._links)
-        self.assertEqual({0:(0, 0, 3), 1:(1, 0, 2), 2:(2, 0, 3)},
-                         hll._range_map)
+        self.assertEqual({0:([0, 1, 2], 0, 3),
+                          1:([3, 4], 0, 2),
+                          2:([5, 6, 7], 0, 3),
+                         }, hll._range_map)
 
     def test_delete_partial_beginning(self):
         hll = self.get_sectioned_hll()
@@ -145,8 +152,11 @@
         self.assertEqual(3, hll._ranges_counter)
         self.assertEqual(0, hll._head)
         self.assertEqual({0:1, 1:2, 2:3, 3:None}, hll._links)
-        self.assertEqual({0:(0, 2, 3), 1:(1, 0, 2), 2:(2, 0, 3), 3:(3, 0, 1)},
-                         hll._range_map)
+        self.assertEqual({0:([0, 1, 2], 2, 3),
+                          1:([3, 4], 0, 2),
+                          2:([5, 6, 7], 0, 3),
+                          3:([8], 0, 1),
+                         }, hll._range_map)
 
     def test_delete_partial_ending(self):
         hll = self.get_sectioned_hll()
@@ -155,8 +165,11 @@
         self.assertEqual(3, hll._ranges_counter)
         self.assertEqual(0, hll._head)
         self.assertEqual({0:1, 1:2, 2:3, 3:None}, hll._links)
-        self.assertEqual({0:(0, 0, 1), 1:(1, 0, 2), 2:(2, 0, 3), 3:(3, 0, 1)},
-                         hll._range_map)
+        self.assertEqual({0:([0, 1, 2], 0, 1),
+                          1:([3, 4], 0, 2),
+                          2:([5, 6, 7], 0, 3),
+                          3:([8], 0, 1),
+                         }, hll._range_map)
 
     def test_delete_middle_section(self):
         hll = self.get_sectioned_hll()
@@ -165,9 +178,15 @@
         self.assertEqual(4, hll._ranges_counter)
         self.assertEqual(0, hll._head)
         self.assertEqual({0:4, 1:2, 2:3, 3:None, 4:1}, hll._links)
-        self.assertEqual({0:(0, 0, 1), 1:(1, 0, 2), 2:(2, 0, 3), 3:(3, 0, 1),
-                          4:(0, 2, 3)
+        self.assertEqual({0:([0, 1, 2], 0, 1),
+                          1:([3, 4], 0, 2),
+                          2:([5, 6, 7], 0, 3),
+                          3:([8], 0, 1),
+                          4:([0, 1, 2], 2, 3),
                          }, hll._range_map)
+        # Make sure that even though we have 2 entries with the same content,
+        # it just copied a reference, not the actual content.
+        self.assertEqual(id(hll._range_map[0][0]), id(hll._range_map[4][0]))
 
     def test_delete_middle(self):
         hll = self.get_sectioned_hll()
@@ -176,8 +195,11 @@
         self.assertEqual(3, hll._ranges_counter)
         self.assertEqual(0, hll._head)
         self.assertEqual({0:1, 1:2, 2:3, 3:None}, hll._links)
-        self.assertEqual({0:(0, 0, 3), 1:(1, 1, 2), 2:(2, 0, 3), 3:(3, 0, 1)},
-                         hll._range_map)
+        self.assertEqual({0:([0, 1, 2], 0, 3),
+                          1:([3, 4], 1, 2),
+                          2:([5, 6, 7], 0, 3),
+                          3:([8], 0, 1),
+                         }, hll._range_map)
 
     def test_delete_across_sections(self):
         hll = self.get_sectioned_hll()
@@ -186,8 +208,10 @@
         self.assertEqual(3, hll._ranges_counter)
         self.assertEqual(0, hll._head)
         self.assertEqual({0:2, 2:3, 3:None}, hll._links)
-        self.assertEqual({0:(0, 0, 1), 2:(2, 2, 3), 3:(3, 0, 1)},
-                         hll._range_map)
+        self.assertEqual({0:([0, 1, 2], 0, 1),
+                          2:([5, 6, 7], 2, 3),
+                          3:([8], 0, 1),
+                         }, hll._range_map)
 
     def test_delete_off_end(self):
         hll = self.get_sectioned_hll()
@@ -196,7 +220,8 @@
         self.assertEqual(3, hll._ranges_counter)
         self.assertEqual(0, hll._head)
         self.assertEqual({0:None}, hll._links)
-        self.assertEqual({0:(0, 0, 1)}, hll._range_map)
+        self.assertEqual({0:([0, 1, 2], 0, 1),
+                         }, hll._range_map)
 
     def test_delete_all(self):
         hll = self.get_sectioned_hll()



More information about the bazaar-commits mailing list