Rev 2247: Use a different structure. in http://bzr.arbash-meinel.com/branches/bzr/0.15-dev/list_patch
John Arbash Meinel
john at arbash-meinel.com
Tue Jan 30 01:19:01 GMT 2007
At http://bzr.arbash-meinel.com/branches/bzr/0.15-dev/list_patch
------------------------------------------------------------
revno: 2247
revision-id: john at arbash-meinel.com-20070130011856-ifomln3u7jxv6yf8
parent: john at arbash-meinel.com-20070130001148-y92vo2prhnswmoz0
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: list_patch
timestamp: Mon 2007-01-29 19:18:56 -0600
message:
Use a different structure.
The new structure just keeps a linked list of range references.
This means that the large content lists are never modified.
The only thing we have to modify is a dictionary of tuples.
Which should mean we don't have to worry about collapsing the deltas,
because in essence, that is what we are doing here.
modified:
bzrlib/hybrid_linked_list.py hybrid_linked_list.p-20070130001028-zlexwz74tzo56o2t-1
bzrlib/tests/test_hybrid_linked_list.py test_hybrid_linked_l-20070130001028-zlexwz74tzo56o2t-2
-------------- next part --------------
=== modified file 'bzrlib/hybrid_linked_list.py'
--- a/bzrlib/hybrid_linked_list.py 2007-01-30 00:11:48 +0000
+++ b/bzrlib/hybrid_linked_list.py 2007-01-30 01:18:56 +0000
@@ -41,107 +41,136 @@
:param starting: Initialize the structure with this list.
"""
- # This is a single linked list
- # We use simple integers for the ids.
- # None is the marker for end of list.
- self._counter = 0
+ self._reset(starting)
+
+ def _reset(self, starting):
+ """Reset the internal structures to hold a single list."""
+ self._data_counter = 0
+ self._ranges_counter = 0
+
+ # This uses 3 maps.
+ # one map is just from simple id => content
+ self._content_map = {0:starting}
+
+ # This is a map from range_id => (content_node_id, start, end)
+ # This allows us to avoid doing any work on the supplied content.
+ # Instead, we just keep updating the information about what content
+ # should be extracted at the right time
self._head = 0
- # id => content
- self._nodes = {self._head:starting}
- # id => next id
- self._links = {self._head:None}
+ self._range_map = {self._head:(0, 0, len(starting))}
+ # This is a singly linked list map for range to next range
+ self._links = {0:None}
def __iter__(self):
"""Iterate over the final content."""
- cur_node = self._head
+ cur_range_id = self._head
- while cur_node is not None:
- for entry in self._nodes[cur_node]:
- yield entry
- cur_node = self._links[cur_node]
+ while cur_range_id is not None:
+ content_id, start, end = self._range_map[cur_range_id]
+ content = self._content_map[content_id]
+ for pos in xrange(start, end):
+ yield content[pos]
+ cur_range_id = self._links[cur_range_id]
def __len__(self):
- return sum(len(entries) for entries in self._nodes.itervalues())
+ cur_range_id = self._head
+ length = 0
+ while cur_range_id is not None:
+ content_id, start, end = self._range_map[cur_range_id]
+ length += (end - start)
+ cur_range_id = self._links[cur_range_id]
+ return length
+
+ def __getitem__(self, pos):
+ prev_range_id, cur_range_id, offset = self._find_nodes_for_pos(pos)
+ content_id, start, end = self._range_map[cur_range_id]
+ return self._content_map[content_id][start+offset]
def flatten(self):
"""Flatten the linked structure into a single list."""
- flattened = list(self)
- self._head = 0
- self._counter = 0
- self._nodes = {self._head:flattened}
- self._links = {self._head:None}
+ self._reset(list(self))
+
+ def _add_content(self, entries):
+ self._data_counter += 1
+ self._content_map[self._data_counter] = entries
+ return self._data_counter
def _find_nodes_for_pos(self, pos):
"""Figure out what nodes correspond to the given position.
- :return: (prev_node, cur_node, offset_in_cur_node)
- Where prev_node is the node pointing to cur_node (used for
+ :return: (prev_range_id, cur_range_id, offset_from_cur_start)
+ Where prev_range_id is the node pointing to cur_range_id (used for
maintaining the single-linked-list structure)
- cur_node is the node where the position would be found
- offset_in_cur_node is the offset into the current node
+ cur_range_id is the node where the position would be found
+ offset_in_cur_node is the offset into the current node (from start)
"""
- prev_node = None
- cur_node = self._head
+ # TODO: Pretty much everyone who calls _find_nodes_for_pos then needs
+ # to turn around and look up the cur_range_id in the _range_map. So
+ # extend the signature to return the extracted values.
+ prev_range_id = None
+ cur_range_id = self._head
cur_pos = 0
- while cur_node is not None:
- cur_entries = self._nodes[cur_node]
- next_pos = cur_pos + len(cur_entries)
+ while cur_range_id is not None:
+ content_id, start, end = self._range_map[cur_range_id]
+ next_pos = cur_pos + (end - start)
if pos < next_pos:
break
- prev_node = cur_node
- cur_node = self._links[cur_node]
+ prev_range_id = cur_range_id
+ cur_range_id = self._links[cur_range_id]
cur_pos = next_pos
- return prev_node, cur_node, pos - cur_pos
+ return prev_range_id, cur_range_id, pos - cur_pos
- def _new_node_id(self):
- """Get a new node id."""
- self._counter += 1
- return self._counter
+ def _new_range_id(self, content_id, start, end):
+ """Get a new range id."""
+ self._ranges_counter += 1
+ self._range_map[self._ranges_counter] = (content_id, start, end)
+ return self._ranges_counter
def insert(self, start, entries):
"""Insert a set of entries starting at the first position."""
- new_node_id = self._new_node_id()
- self._nodes[new_node_id] = entries
-
- prev_node, cur_node, cur_pos = self._find_nodes_for_pos(start)
-
- if cur_pos == 0:
+ new_content_id = self._add_content(entries)
+
+ new_range_id = self._new_range_id(new_content_id, 0, len(entries))
+
+ prev_range_id, cur_range_id, offset = self._find_nodes_for_pos(start)
+
+ if offset == 0:
# We are inserting before an existing node, or at the exact end of
# the list
- if prev_node is None:
+ if prev_range_id is None:
# We are at _head, update its reference
- self._links[new_node_id] = self._head
- self._head = new_node_id
+ self._links[new_range_id] = self._head
+ self._head = new_range_id
else:
- self._links[new_node_id] = cur_node
- self._links[prev_node] = new_node_id
+ self._links[new_range_id] = cur_range_id
+ self._links[prev_range_id] = new_range_id
else:
# We are inserting inside an existing node
- if cur_node is None:
+ if cur_range_id is None:
# We are appending a node
# python lists have the property that x[BIG_NUM:BIGNUM] = [10]
# will always append 10 to the list, even though it is out of
# range
# TODO: For now, I'm duplicating that, but it seems to make
# more sense to raise IndexError or something
- self._links[new_node_id] = None
- self._links[prev_node] = new_node_id
+ self._links[new_range_id] = None
+ self._links[prev_range_id] = new_range_id
else:
# We need to break up the current node, and insert the
# requested one
- split_node_id = self._new_node_id()
-
- next_node_id = self._links[cur_node]
-
- cur_entries = self._nodes[cur_node]
- split_entries = cur_entries[cur_pos:]
- cur_entries = cur_entries[:cur_pos]
-
- self._nodes[split_node_id] = split_entries
-
- self._links[split_node_id] = next_node_id
- self._links[new_node_id] = split_node_id
-
- self._nodes[cur_node] = cur_entries
- self._links[cur_node] = new_node_id
+ content_id, start, end = self._range_map[cur_range_id]
+ break_point = start + offset
+ split_range_id = self._new_range_id(content_id, break_point,
+ end)
+ next_range_id = self._links[cur_range_id]
+ self._links[split_range_id] = next_range_id
+ self._links[new_range_id] = split_range_id
+
+ # We could make this atomic by creating a new id for the
+ # initial range, and then the atomic step is
+ # self._links[prev_range_id] = new_head_range
+ # And then afterwards we would want to delete the cur_range_id
+ # entry in the map
+ self._range_map[cur_range_id] = (content_id, start, break_point)
+ self._links[cur_range_id] = new_range_id
=== modified file 'bzrlib/tests/test_hybrid_linked_list.py'
--- a/bzrlib/tests/test_hybrid_linked_list.py 2007-01-30 00:11:48 +0000
+++ b/bzrlib/tests/test_hybrid_linked_list.py 2007-01-30 01:18:56 +0000
@@ -36,11 +36,13 @@
def test__find_nodes_for_pos(self):
hll = hybrid_linked_list.HybridLinkedList([])
- hll._nodes = {0:[0, 1],
- 1:[2, 3, 4],
- 2:[5],
- }
+ hll._range_map = {0:(0, 0, 2),
+ 1:(0, 3, 6),
+ 2:(0, 6, 7),
+ }
hll._links = {0:1, 1:2, 2:None}
+ hll._content_map = {0:[0, 1, 2, 3, 4, 5, 6, 7, 8]}
+ self.assertEqual([0, 1, 3, 4, 5, 6], list(hll))
self.assertEqual((None, 0, 0), hll._find_nodes_for_pos(0))
self.assertEqual((None, 0, 1), hll._find_nodes_for_pos(1))
self.assertEqual((0, 1, 0), hll._find_nodes_for_pos(2))
@@ -50,6 +52,18 @@
self.assertEqual((2, None, 0), hll._find_nodes_for_pos(6))
self.assertEqual((2, None, 1), hll._find_nodes_for_pos(7))
+ def test__getitem__(self):
+ hll = hybrid_linked_list.HybridLinkedList([])
+ hll._range_map = {0:(0, 0, 2),
+ 1:(0, 3, 6),
+ 2:(0, 6, 7),
+ }
+ hll._links = {0:1, 1:2, 2:None}
+ hll._content_map = {0:[0, 1, 2, 3, 4, 5, 6, 7, 8]}
+ self.assertEqual([0, 1, 3, 4, 5, 6], list(hll))
+ for expected, pos in zip([0, 1, 3, 4, 5, 6], xrange(0,5)):
+ self.assertEqual(expected, hll[pos])
+
def test_insert(self):
hll = hybrid_linked_list.HybridLinkedList([0, 1, 2, 3, 4])
hll.insert(3, [5, 6, 7])
More information about the bazaar-commits
mailing list