Rev 2246: Create hybrid linked list structure with basic insert functionality. in http://bzr.arbash-meinel.com/branches/bzr/0.15-dev/list_patch
John Arbash Meinel
john at arbash-meinel.com
Tue Jan 30 00:11:56 GMT 2007
At http://bzr.arbash-meinel.com/branches/bzr/0.15-dev/list_patch
------------------------------------------------------------
revno: 2246
revision-id: john at arbash-meinel.com-20070130001148-y92vo2prhnswmoz0
parent: john at arbash-meinel.com-20070127134240-nae88rt1lsm658qn
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: list_patch
timestamp: Mon 2007-01-29 18:11:48 -0600
message:
Create hybrid linked list structure with basic insert functionality.
This keeps a set of lists, and breaks them as new inserts are done.
However, I think it can be done more optimally by just handling a list
of references. I'll work on that next.
added:
bzrlib/hybrid_linked_list.py hybrid_linked_list.p-20070130001028-zlexwz74tzo56o2t-1
bzrlib/tests/test_hybrid_linked_list.py test_hybrid_linked_l-20070130001028-zlexwz74tzo56o2t-2
modified:
bzrlib/tests/__init__.py selftest.py-20050531073622-8d0e3c8845c97a64
-------------- next part --------------
=== added file 'bzrlib/hybrid_linked_list.py'
--- a/bzrlib/hybrid_linked_list.py 1970-01-01 00:00:00 +0000
+++ b/bzrlib/hybrid_linked_list.py 2007-01-30 00:11:48 +0000
@@ -0,0 +1,147 @@
+# Copyright (C) 2007 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+"""A hybrid between linked lists and straight arrays."""
+
+
+class HybridLinkedList(object):
+ """A hybrid between linked lists and straight arrays.
+
+ The python "list" implementation is an array where nodes are stored in a
+ linear segment of memory. This makes it somewhat inefficient at inserting
+ and removing nodes.
+
+ On the other hand, you have a pure linked-list implementation, where each
+ node stores a pointer to the other nodes. However, this is inefficient at
+ random access, because you have to iterate over all of the nodes.
+
+ This class attempts to create a hybrid between them. Which creates nodes
+ during insert, update, delete, but generally stores things as arrays.
+
+ The current structure is optimized for being able to modify small sections
+ at a time. But not really move hunks around. (so you can remove a few
+ lines, and add a few lines, but not really move lines around).
+ """
+
+ def __init__(self, starting=[]):
+ """Create a new HybridLinkedList.
+
+ :param starting: Initialize the structure with this list.
+ """
+ # This is a single linked list
+ # We use simple integers for the ids.
+ # None is the marker for end of list.
+ self._counter = 0
+ self._head = 0
+ # id => content
+ self._nodes = {self._head:starting}
+ # id => next id
+ self._links = {self._head:None}
+
+ def __iter__(self):
+ """Iterate over the final content."""
+ cur_node = self._head
+
+ while cur_node is not None:
+ for entry in self._nodes[cur_node]:
+ yield entry
+ cur_node = self._links[cur_node]
+
+ def __len__(self):
+ return sum(len(entries) for entries in self._nodes.itervalues())
+
+ def flatten(self):
+ """Flatten the linked structure into a single list."""
+ flattened = list(self)
+ self._head = 0
+ self._counter = 0
+ self._nodes = {self._head:flattened}
+ self._links = {self._head:None}
+
+ def _find_nodes_for_pos(self, pos):
+ """Figure out what nodes correspond to the given position.
+
+ :return: (prev_node, cur_node, offset_in_cur_node)
+ Where prev_node is the node pointing to cur_node (used for
+ maintaining the single-linked-list structure)
+ cur_node is the node where the position would be found
+ offset_in_cur_node is the offset into the current node
+ """
+ prev_node = None
+ cur_node = self._head
+ cur_pos = 0
+
+ while cur_node is not None:
+ cur_entries = self._nodes[cur_node]
+ next_pos = cur_pos + len(cur_entries)
+ if pos < next_pos:
+ break
+ prev_node = cur_node
+ cur_node = self._links[cur_node]
+ cur_pos = next_pos
+ return prev_node, cur_node, pos - cur_pos
+
+ def _new_node_id(self):
+ """Get a new node id."""
+ self._counter += 1
+ return self._counter
+
+ def insert(self, start, entries):
+ """Insert a set of entries starting at the first position."""
+ new_node_id = self._new_node_id()
+ self._nodes[new_node_id] = entries
+
+ prev_node, cur_node, cur_pos = self._find_nodes_for_pos(start)
+
+ if cur_pos == 0:
+ # We are inserting before an existing node, or at the exact end of
+ # the list
+ if prev_node is None:
+ # We are at _head, update its reference
+ self._links[new_node_id] = self._head
+ self._head = new_node_id
+ else:
+ self._links[new_node_id] = cur_node
+ self._links[prev_node] = new_node_id
+ else:
+ # We are inserting inside an existing node
+ if cur_node is None:
+ # We are appending a node
+ # python lists have the property that x[BIG_NUM:BIGNUM] = [10]
+ # will always append 10 to the list, even though it is out of
+ # range
+ # TODO: For now, I'm duplicating that, but it seems to make
+ # more sense to raise IndexError or something
+ self._links[new_node_id] = None
+ self._links[prev_node] = new_node_id
+ else:
+ # We need to break up the current node, and insert the
+ # requested one
+ split_node_id = self._new_node_id()
+
+ next_node_id = self._links[cur_node]
+
+ cur_entries = self._nodes[cur_node]
+ split_entries = cur_entries[cur_pos:]
+ cur_entries = cur_entries[:cur_pos]
+
+ self._nodes[split_node_id] = split_entries
+
+ self._links[split_node_id] = next_node_id
+ self._links[new_node_id] = split_node_id
+
+ self._nodes[cur_node] = cur_entries
+ self._links[cur_node] = new_node_id
=== added file 'bzrlib/tests/test_hybrid_linked_list.py'
--- a/bzrlib/tests/test_hybrid_linked_list.py 1970-01-01 00:00:00 +0000
+++ b/bzrlib/tests/test_hybrid_linked_list.py 2007-01-30 00:11:48 +0000
@@ -0,0 +1,104 @@
+# Copyright (C) 2007 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+"""Test the specialized memory structure for patch application."""
+
+from bzrlib import (
+ hybrid_linked_list,
+ tests,
+ )
+
+
+class TestHybridLinkedList(tests.TestCase):
+
+ def test_create_and_iterate(self):
+ hll = hybrid_linked_list.HybridLinkedList([0, 1, 2, 3, 4])
+ self.assertEqual([0, 1, 2, 3, 4], list(hll))
+ self.assertEqual(5, len(hll))
+
+ def test_empty(self):
+ hll = hybrid_linked_list.HybridLinkedList([])
+ self.assertEqual(0, len(hll))
+ self.assertEqual([], list(hll))
+
+ def test__find_nodes_for_pos(self):
+ hll = hybrid_linked_list.HybridLinkedList([])
+ hll._nodes = {0:[0, 1],
+ 1:[2, 3, 4],
+ 2:[5],
+ }
+ hll._links = {0:1, 1:2, 2:None}
+ self.assertEqual((None, 0, 0), hll._find_nodes_for_pos(0))
+ self.assertEqual((None, 0, 1), hll._find_nodes_for_pos(1))
+ self.assertEqual((0, 1, 0), hll._find_nodes_for_pos(2))
+ self.assertEqual((0, 1, 1), hll._find_nodes_for_pos(3))
+ self.assertEqual((0, 1, 2), hll._find_nodes_for_pos(4))
+ self.assertEqual((1, 2, 0), hll._find_nodes_for_pos(5))
+ self.assertEqual((2, None, 0), hll._find_nodes_for_pos(6))
+ self.assertEqual((2, None, 1), hll._find_nodes_for_pos(7))
+
+ def test_insert(self):
+ hll = hybrid_linked_list.HybridLinkedList([0, 1, 2, 3, 4])
+ hll.insert(3, [5, 6, 7])
+ # self.assertEqual([[0, 1, 2], [5, 6, 7], [3, 4]], hll._nodes)
+ self.assertEqual([0, 1, 2, 5, 6, 7, 3, 4], list(hll))
+ self.assertEqual(8, len(hll))
+ hll.flatten()
+ # self.assertEqual([[0, 1, 2, 5, 6, 7, 3, 4]], hll._nodes)
+
+ def test_insert_beginning(self):
+ hll = hybrid_linked_list.HybridLinkedList([0, 1, 2, 3, 4])
+ hll.insert(0, [5])
+ self.assertEqual([5, 0, 1, 2, 3, 4], list(hll))
+
+ def test_insert_end(self):
+ hll = hybrid_linked_list.HybridLinkedList([0, 1, 2, 3, 4])
+ hll.insert(5, [5])
+ self.assertEqual([0, 1, 2, 3, 4, 5], list(hll))
+
+# def test_delete(self):
+# hll = hybrid_linked_list.HybridLinkedList([0, 1, 2, 3, 4])
+# hll.delete(2, 3) # del list[2:3]
+# self.assertEqual([0, 1, 3, 4], list(hll))
+#
+# def test_delete_multi(self):
+# hll = hybrid_linked_list.HybridLinkedList([0, 1, 2, 3, 4])
+# hll.delete(1, 3) # del list[2:3]
+# self.assertEqual([0, 3, 4], list(hll))
+#
+# def test_replace_insert_beginning(self):
+# """Test replacing when we are purely inserting."""
+# hll = hybrid_linked_list.HybridLinkedList([0, 1, 2, 3, 4])
+# hll.replace(0, 0, [5, 6, 7]) # list[0:0] = [5, 6, 7]
+# self.assertEqual([5, 6, 7, 0, 1, 2, 3, 4], list(hll))
+#
+# def test_replace_insert_middle(self):
+# """Test replacing when we are purely inserting."""
+# hll = hybrid_linked_list.HybridLinkedList([0, 1, 2, 3, 4])
+# hll.replace(3, 3, [5, 6, 7]) # list[3:3] = [5, 6, 7]
+# self.assertEqual([0, 1, 2, 5, 6, 7, 3, 4], list(hll))
+#
+# def test_replace_insert_end(self):
+# """Test replacing when we are purely inserting."""
+# hll = hybrid_linked_list.HybridLinkedList([0, 1, 2, 3, 4])
+# hll.replace(5, 5, [5, 6, 7]) # list[5:5] = [5, 6, 7]
+# self.assertEqual([0, 1, 2, 3, 4, 5, 6, 7], list(hll))
+#
+# def test_replace_beginning(self):
+# hll = hybrid_linked_list.HybridLinkedList([0, 1, 2, 3, 4])
+# hll.replace(0, 1, [5])
+# self.assertEqual([[5], [1, 2, 3, 4]], hll._nodes)
+# self.assertEqual([5, 1, 2, 3, 4], list(hll))
=== modified file 'bzrlib/tests/__init__.py'
--- a/bzrlib/tests/__init__.py 2007-01-19 19:42:19 +0000
+++ b/bzrlib/tests/__init__.py 2007-01-30 00:11:48 +0000
@@ -1774,6 +1774,7 @@
'bzrlib.tests.test_hashcache',
'bzrlib.tests.test_http',
'bzrlib.tests.test_http_response',
+ 'bzrlib.tests.test_hybrid_linked_list',
'bzrlib.tests.test_identitymap',
'bzrlib.tests.test_ignores',
'bzrlib.tests.test_inv',
More information about the bazaar-commits
mailing list