Rev 2246: Create hybrid linked list structure with basic insert functionality. in http://bzr.arbash-meinel.com/branches/bzr/0.15-dev/list_patch

John Arbash Meinel john at arbash-meinel.com
Tue Jan 30 00:11:56 GMT 2007


At http://bzr.arbash-meinel.com/branches/bzr/0.15-dev/list_patch

------------------------------------------------------------
revno: 2246
revision-id: john at arbash-meinel.com-20070130001148-y92vo2prhnswmoz0
parent: john at arbash-meinel.com-20070127134240-nae88rt1lsm658qn
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: list_patch
timestamp: Mon 2007-01-29 18:11:48 -0600
message:
  Create hybrid linked list structure with basic insert functionality.
  This keeps a set of lists, and breaks them as new inserts are done.
  However, I think it can be done more optimally by just handling a list
  of references. I'll work on that next.
added:
  bzrlib/hybrid_linked_list.py   hybrid_linked_list.p-20070130001028-zlexwz74tzo56o2t-1
  bzrlib/tests/test_hybrid_linked_list.py test_hybrid_linked_l-20070130001028-zlexwz74tzo56o2t-2
modified:
  bzrlib/tests/__init__.py       selftest.py-20050531073622-8d0e3c8845c97a64
-------------- next part --------------
=== added file 'bzrlib/hybrid_linked_list.py'
--- a/bzrlib/hybrid_linked_list.py	1970-01-01 00:00:00 +0000
+++ b/bzrlib/hybrid_linked_list.py	2007-01-30 00:11:48 +0000
@@ -0,0 +1,147 @@
+# Copyright (C) 2007 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""A hybrid between linked lists and straight arrays."""
+
+
+class HybridLinkedList(object):
+    """A hybrid between linked lists and straight arrays.
+
+    The python "list" implementation is an array where nodes are stored in a
+    linear segment of memory. This makes it somewhat inefficient at inserting
+    and removing nodes.
+
+    On the other hand, you have a pure linked-list implementation, where each
+    node stores a pointer to the other nodes. However, this is inefficient at
+    random access, because you have to iterate over all of the nodes.
+
+    This class attempts to create a hybrid between them. Which creates nodes
+    during insert, update, delete, but generally stores things as arrays.
+
+    The current structure is optimized for being able to modify small sections
+    at a time. But not really move hunks around. (so you can remove a few
+    lines, and add a few lines, but not really move lines around).
+    """
+
+    def __init__(self, starting=[]):
+        """Create a new HybridLinkedList.
+
+        :param starting: Initialize the structure with this list.
+        """
+        # This is a single linked list
+        # We use simple integers for the ids.
+        # None is the marker for end of list.
+        self._counter = 0
+        self._head = 0
+        # id => content
+        self._nodes = {self._head:starting}
+        # id => next id
+        self._links = {self._head:None}
+
+    def __iter__(self):
+        """Iterate over the final content."""
+        cur_node = self._head
+
+        while cur_node is not None:
+            for entry in self._nodes[cur_node]:
+                yield entry
+            cur_node = self._links[cur_node]
+
+    def __len__(self):
+        return sum(len(entries) for entries in self._nodes.itervalues())
+
+    def flatten(self):
+        """Flatten the linked structure into a single list."""
+        flattened = list(self)
+        self._head = 0
+        self._counter = 0
+        self._nodes = {self._head:flattened}
+        self._links = {self._head:None}
+
+    def _find_nodes_for_pos(self, pos):
+        """Figure out what nodes correspond to the given position.
+
+        :return: (prev_node, cur_node, offset_in_cur_node)
+            Where prev_node is the node pointing to cur_node (used for
+            maintaining the single-linked-list structure)
+            cur_node is the node where the position would be found
+            offset_in_cur_node is the offset into the current node
+        """
+        prev_node = None
+        cur_node = self._head
+        cur_pos = 0
+
+        while cur_node is not None:
+            cur_entries = self._nodes[cur_node]
+            next_pos = cur_pos + len(cur_entries)
+            if pos < next_pos:
+                break
+            prev_node = cur_node
+            cur_node = self._links[cur_node]
+            cur_pos = next_pos
+        return prev_node, cur_node, pos - cur_pos
+
+    def _new_node_id(self):
+        """Get a new node id."""
+        self._counter += 1
+        return self._counter
+
+    def insert(self, start, entries):
+        """Insert a set of entries starting at the first position."""
+        new_node_id = self._new_node_id()
+        self._nodes[new_node_id] = entries
+
+        prev_node, cur_node, cur_pos = self._find_nodes_for_pos(start)
+
+        if cur_pos == 0:
+            # We are inserting before an existing node, or at the exact end of
+            # the list
+            if prev_node is None:
+                # We are at _head, update its reference
+                self._links[new_node_id] = self._head
+                self._head = new_node_id
+            else:
+                self._links[new_node_id] = cur_node
+                self._links[prev_node] = new_node_id
+        else:
+            # We are inserting inside an existing node
+            if cur_node is None:
+                # We are appending a node
+                # python lists have the property that x[BIG_NUM:BIGNUM] = [10]
+                # will always append 10 to the list, even though it is out of
+                # range
+                # TODO: For now, I'm duplicating that, but it seems to make
+                #   more sense to raise IndexError or something
+                self._links[new_node_id] = None
+                self._links[prev_node] = new_node_id
+            else:
+                # We need to break up the current node, and insert the
+                # requested one
+                split_node_id = self._new_node_id()
+
+                next_node_id = self._links[cur_node]
+
+                cur_entries = self._nodes[cur_node]
+                split_entries = cur_entries[cur_pos:]
+                cur_entries = cur_entries[:cur_pos]
+
+                self._nodes[split_node_id] = split_entries
+
+                self._links[split_node_id] = next_node_id
+                self._links[new_node_id] = split_node_id
+
+                self._nodes[cur_node] = cur_entries
+                self._links[cur_node] = new_node_id

=== added file 'bzrlib/tests/test_hybrid_linked_list.py'
--- a/bzrlib/tests/test_hybrid_linked_list.py	1970-01-01 00:00:00 +0000
+++ b/bzrlib/tests/test_hybrid_linked_list.py	2007-01-30 00:11:48 +0000
@@ -0,0 +1,104 @@
+# Copyright (C) 2007 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""Test the specialized memory structure for patch application."""
+
+from bzrlib import (
+    hybrid_linked_list,
+    tests,
+    )
+
+
+class TestHybridLinkedList(tests.TestCase):
+
+    def test_create_and_iterate(self):
+        hll = hybrid_linked_list.HybridLinkedList([0, 1, 2, 3, 4])
+        self.assertEqual([0, 1, 2, 3, 4], list(hll))
+        self.assertEqual(5, len(hll))
+
+    def test_empty(self):
+        hll = hybrid_linked_list.HybridLinkedList([])
+        self.assertEqual(0, len(hll))
+        self.assertEqual([], list(hll))
+
+    def test__find_nodes_for_pos(self):
+        hll = hybrid_linked_list.HybridLinkedList([])
+        hll._nodes = {0:[0, 1],
+                      1:[2, 3, 4],
+                      2:[5],
+                     }
+        hll._links = {0:1, 1:2, 2:None}
+        self.assertEqual((None, 0, 0), hll._find_nodes_for_pos(0))
+        self.assertEqual((None, 0, 1), hll._find_nodes_for_pos(1))
+        self.assertEqual((0, 1, 0), hll._find_nodes_for_pos(2))
+        self.assertEqual((0, 1, 1), hll._find_nodes_for_pos(3))
+        self.assertEqual((0, 1, 2), hll._find_nodes_for_pos(4))
+        self.assertEqual((1, 2, 0), hll._find_nodes_for_pos(5))
+        self.assertEqual((2, None, 0), hll._find_nodes_for_pos(6))
+        self.assertEqual((2, None, 1), hll._find_nodes_for_pos(7))
+
+    def test_insert(self):
+        hll = hybrid_linked_list.HybridLinkedList([0, 1, 2, 3, 4])
+        hll.insert(3, [5, 6, 7])
+        # self.assertEqual([[0, 1, 2], [5, 6, 7], [3, 4]], hll._nodes)
+        self.assertEqual([0, 1, 2, 5, 6, 7, 3, 4], list(hll))
+        self.assertEqual(8, len(hll))
+        hll.flatten()
+        # self.assertEqual([[0, 1, 2, 5, 6, 7, 3, 4]], hll._nodes)
+
+    def test_insert_beginning(self):
+        hll = hybrid_linked_list.HybridLinkedList([0, 1, 2, 3, 4])
+        hll.insert(0, [5])
+        self.assertEqual([5, 0, 1, 2, 3, 4], list(hll))
+
+    def test_insert_end(self):
+        hll = hybrid_linked_list.HybridLinkedList([0, 1, 2, 3, 4])
+        hll.insert(5, [5])
+        self.assertEqual([0, 1, 2, 3, 4, 5], list(hll))
+
+#    def test_delete(self):
+#        hll = hybrid_linked_list.HybridLinkedList([0, 1, 2, 3, 4])
+#        hll.delete(2, 3) # del list[2:3]
+#        self.assertEqual([0, 1, 3, 4], list(hll))
+#
+#    def test_delete_multi(self):
+#        hll = hybrid_linked_list.HybridLinkedList([0, 1, 2, 3, 4])
+#        hll.delete(1, 3) # del list[2:3]
+#        self.assertEqual([0, 3, 4], list(hll))
+#
+#    def test_replace_insert_beginning(self):
+#        """Test replacing when we are purely inserting."""
+#        hll = hybrid_linked_list.HybridLinkedList([0, 1, 2, 3, 4])
+#        hll.replace(0, 0, [5, 6, 7]) # list[0:0] = [5, 6, 7]
+#        self.assertEqual([5, 6, 7, 0, 1, 2, 3, 4], list(hll))
+#
+#    def test_replace_insert_middle(self):
+#        """Test replacing when we are purely inserting."""
+#        hll = hybrid_linked_list.HybridLinkedList([0, 1, 2, 3, 4])
+#        hll.replace(3, 3, [5, 6, 7]) # list[3:3] = [5, 6, 7]
+#        self.assertEqual([0, 1, 2, 5, 6, 7, 3, 4], list(hll))
+#
+#    def test_replace_insert_end(self):
+#        """Test replacing when we are purely inserting."""
+#        hll = hybrid_linked_list.HybridLinkedList([0, 1, 2, 3, 4])
+#        hll.replace(5, 5, [5, 6, 7]) # list[5:5] = [5, 6, 7]
+#        self.assertEqual([0, 1, 2, 3, 4, 5, 6, 7], list(hll))
+#
+#    def test_replace_beginning(self):
+#        hll = hybrid_linked_list.HybridLinkedList([0, 1, 2, 3, 4])
+#        hll.replace(0, 1, [5])
+#        self.assertEqual([[5], [1, 2, 3, 4]], hll._nodes)
+#        self.assertEqual([5, 1, 2, 3, 4], list(hll))

=== modified file 'bzrlib/tests/__init__.py'
--- a/bzrlib/tests/__init__.py	2007-01-19 19:42:19 +0000
+++ b/bzrlib/tests/__init__.py	2007-01-30 00:11:48 +0000
@@ -1774,6 +1774,7 @@
                    'bzrlib.tests.test_hashcache',
                    'bzrlib.tests.test_http',
                    'bzrlib.tests.test_http_response',
+                   'bzrlib.tests.test_hybrid_linked_list',
                    'bzrlib.tests.test_identitymap',
                    'bzrlib.tests.test_ignores',
                    'bzrlib.tests.test_inv',



More information about the bazaar-commits mailing list