Rev 2259: Keep track of the current length of the HybridLinkedList. in http://bzr.arbash-meinel.com/branches/bzr/0.15-dev/list_patch

John Arbash Meinel john at arbash-meinel.com
Tue Jan 30 20:19:53 GMT 2007


At http://bzr.arbash-meinel.com/branches/bzr/0.15-dev/list_patch

------------------------------------------------------------
revno: 2259
revision-id: john at arbash-meinel.com-20070130201946-5jsmey4eftrb4s6a
parent: john at arbash-meinel.com-20070130190506-vurde2hqemerst4x
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: list_patch
timestamp: Tue 2007-01-30 14:19:46 -0600
message:
  Keep track of the current length of the HybridLinkedList.
  Profiling when actually using the HLL showed that __len__ was taking
  a measurable amount of time.
modified:
  bzrlib/hybrid_linked_list.py   hybrid_linked_list.p-20070130001028-zlexwz74tzo56o2t-1
-------------- next part --------------
=== modified file 'bzrlib/hybrid_linked_list.py'
--- a/bzrlib/hybrid_linked_list.py	2007-01-30 19:05:06 +0000
+++ b/bzrlib/hybrid_linked_list.py	2007-01-30 20:19:46 +0000
@@ -40,9 +40,10 @@
     :ivar _links: This maps each range to the next range. It could technically
         be stored with _ranges, but it makes it easier to update links if they
         are in a separate dict. (less tuple churn)
+    :ivar _length: The current length of the output content
     """
 
-    __slots__ = ['_counter', '_head', '_ranges', '_links']
+    __slots__ = ['_counter', '_head', '_ranges', '_links', '_length']
 
     def __init__(self, starting=[]):
         """Create a new HybridLinkedList.
@@ -64,6 +65,7 @@
         self._ranges = {}
         # This is a singly linked list map for range to next range
         self._links = {}
+        self._length = 0
 
         if starting:
             self.insert(0, starting)
@@ -79,24 +81,16 @@
             cur_range_id = self._links[cur_range_id]
 
     def __len__(self):
-        # TODO: jam 20070130 Should we fast path __len__? We could always just
-        #       keep a reference of the current length, and updated it when we
-        #       add or delete entries. list() uses it, as does __getitem__
-        cur_range_id = self._head
-        length = 0
-        while cur_range_id is not None:
-            start, end, content = self._ranges[cur_range_id]
-            length += (end - start)
-            cur_range_id = self._links[cur_range_id]
-        return length
+        # Profiling shows __len__ to be adding a lot of overhead, so we use a
+        # fast-path
+        return self._length
 
     def __eq__(self, other):
         """Check for equality"""
         if isinstance(other, (list, tuple, HybridLinkedList)):
             # Equality is determined based on content, not on exact definitions
             other_length = len(other)
-            self_length = len(self)
-            if other_length != self_length:
+            if other_length != self._length:
                 return False
             for self_val, other_val in izip(self, other):
                 if self_val != other_val:
@@ -213,10 +207,11 @@
 
     def insert(self, start, content):
         """Insert a set of entries starting at the first position."""
-        new_range_id = self._new_range_id(0, len(content), content)
+        content_len = len(content)
+        new_range_id = self._new_range_id(0, content_len, content)
 
         (prev_range_id, cur_range_id, offset,
-         start, end, content) = self._find_nodes_for_pos(start)
+         start, end, cur_content) = self._find_nodes_for_pos(start)
 
         if offset == 0:
             # We are inserting before an existing node, or at the exact end of
@@ -233,7 +228,7 @@
             # We need to break up the current node, and insert the
             # requested one
             break_point = start + offset
-            split_range_id = self._new_range_id(break_point, end, content)
+            split_range_id = self._new_range_id(break_point, end, cur_content)
             next_range_id = self._links[cur_range_id]
             self._links[split_range_id] = next_range_id
             self._links[new_range_id] = split_range_id
@@ -243,7 +238,7 @@
             # self._links[prev_range_id] = new_head_range
             # And then afterwards we would want to delete the cur_range_id
             # entry in the map
-            self._ranges[cur_range_id] = (start, break_point, content)
+            self._ranges[cur_range_id] = (start, break_point, cur_content)
             self._links[cur_range_id] = new_range_id
         else:
             # We are appending a node
@@ -254,6 +249,7 @@
             #   more sense to raise IndexError or something
             self._links[new_range_id] = None
             self._links[prev_range_id] = new_range_id
+        self._length += content_len
 
     def delete(self, start, end):
         """Remove a section from the content.
@@ -314,6 +310,7 @@
                                                      content)
                     prev_range_id = cur_range_id
                 remove_length -= possible_remove_length
+                self._length -= possible_remove_length
                 if remove_length == 0:
                     break
                 # step to next
@@ -344,6 +341,7 @@
                     self._ranges[cur_range_id] = (cur_start, new_end,
                                                      content)
                 # We officially removed enough, so we are done
+                self._length -= remove_length
                 break
                 # we are not removing the start of this node
                 prev_range_id = cur_range_id
@@ -379,4 +377,5 @@
         new_hll._head = self._head
         new_hll._ranges = self._ranges.copy()
         new_hll._links = self._links.copy()
+        new_hll._length = self._length
         return new_hll



More information about the bazaar-commits mailing list