Rev 3821: Play around with not using prefix extraction. in http://bzr.arbash-meinel.com/branches/bzr/brisbane/hack

Tue Dec 23 23:51:09 GMT 2008

At http://bzr.arbash-meinel.com/branches/bzr/brisbane/hack

------------------------------------------------------------
revno: 3821
revision-id: john at arbash-meinel.com-20081223235053-2zcki62ikvtadozq
parent: john at arbash-meinel.com-20081223221609-62qykvgj154t892r
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: hack
timestamp: Tue 2008-12-23 17:50:53 -0600
message:
  Play around with not using prefix extraction.
  
  Fix up the Packer code to handle that chk pages are delta compressed.
  This happens during 'autopack'.
  
  Change pack.finish() to use transport.move() instead of transport.rename()
  Also allow using the cached bytes as a parents text, rather than asking
  the code to extract it again.
  
  Oh, and use intern() for _get_cached_ascii, because it seems to save a decent
  amount of memory when doing large conversions.
-------------- next part --------------
=== modified file 'bzrlib/chk_map.py'

--- a/bzrlib/chk_map.py	2008-12-23 21:51:51 +0000
+++ b/bzrlib/chk_map.py	2008-12-23 23:50:53 +0000
@@ -43,7 +43,7 @@
 
 from bzrlib import lazy_import
 lazy_import.lazy_import(globals(), """
-from bzrlib import errors, trace, versionedfile
+from bzrlib import errors, knit, trace, versionedfile
 """)
 from bzrlib import osutils
 from bzrlib.lru_cache import LRUCache
@@ -506,6 +506,7 @@
             # And then that common prefix will not be stored in any of the
             # entry lines
             prefix_len = len(self._common_serialised_prefix)
+            # prefix_len = 0
             bytes_for_items = (self._raw_size - (prefix_len * self._len))
         return (9 # 'chkleaf:\n'
             + len(str(self._maximum_size)) + 1
@@ -682,6 +683,8 @@
         if self._common_serialised_prefix is None:
             lines.append('\n')
         else:
+            # lines.append('\n')
+            # prefix_len = 0
             lines.append('%s\n' % (self._common_serialised_prefix,))
             prefix_len = len(self._common_serialised_prefix)
         for key, value in sorted(self._items.items()):
@@ -1000,6 +1003,8 @@
         lines.append("%d\n" % self._key_width)
         lines.append("%d\n" % self._len)
         assert self._lookup_prefix is not None
+        # lines.append('\n')
+        # prefix_len = 0
         lines.append('%s\n' % (self._lookup_prefix,))
         prefix_len = len(self._lookup_prefix)
         for prefix, node in sorted(self._items.items()):
@@ -1014,11 +1019,20 @@
         key = ('sha1:' + sha1,)
         if self._orig_key is None:
             parents = ()
+            parent_texts = None
         else:
             parents = (self._orig_key,)
+            orig_bytes = _page_cache.get(self._orig_key, None)
+            if orig_bytes is None:
+                parent_texts = None
+            else:
+                parent_lines = osutils.chunks_to_lines([orig_bytes])
+                parent_content = knit.PlainKnitContent(parent_lines, self._orig_key)
+                parent_texts = {self._orig_key: parent_content}
         if not store.has_key(key):
             # We know the key doesn't exist, because we just checked
-            store.add_lines(key, parents, lines, random_id=True)
+            store.add_lines(key, parents, lines, random_id=True,
+                            parent_texts=parent_texts)
         self._key = key
         self._orig_key = self._key
         _page_cache.add(self._key, ''.join(lines))

=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py	2008-12-23 22:16:09 +0000
+++ b/bzrlib/repofmt/pack_repo.py	2008-12-23 23:50:53 +0000
@@ -416,7 +416,7 @@
         #  - try for HASH.pack
         #  - try for temporary-name
         #  - refresh the pack-list to see if the pack is now absent
-        self.upload_transport.rename(self.random_name,
+        self.upload_transport.move(self.random_name,
                 '../packs/' + self.name + '.pack')
         self._state = 'finished'
         if 'pack' in debug.debug_flags:
@@ -886,23 +886,15 @@
         chk_nodes = self._index_contents(chk_indices, refs)
         new_refs = set()
         total_items, readv_group_iter = self._least_readv_node_readv(chk_nodes)
-        lines = []
-        last_key = None
         for line, key in self._copy_nodes_graph(chk_index_map,
             self.new_pack._writer, self.new_pack.chk_index,
             readv_group_iter, total_items, output_lines=True):
-            if last_key is None:
-                last_key = key
-            if last_key != key:
-                last_key = key
-                bytes = ''.join(lines)
-                node = chk_map._deserialise(bytes, last_key)
-                new_refs.update(node.refs())
-                lines = []
-        if lines:
-            bytes = ''.join(lines)
-            node = chk_map._deserialise(bytes, last_key)
-            new_refs.update(node.refs())
+            if line.startswith('version'):
+                continue
+            pos = line.find('sha1:')
+            if pos != -1:
+                new_key = (line[pos:pos+45],)
+                new_refs.add(new_key)
         return new_refs
 
     def _copy_nodes(self, nodes, index_map, writer, write_index,

=== modified file 'bzrlib/xml8.py'
--- a/bzrlib/xml8.py	2008-12-23 17:09:18 +0000
+++ b/bzrlib/xml8.py	2008-12-23 23:50:53 +0000
@@ -131,7 +131,7 @@
     if a_str.__class__ == unicode:
         return _encode_utf8(a_str)
     else:
-        return _get_cached_ascii(a_str)
+        return intern(a_str)
 
 
 def _clear_cache():