Rev 2675: When adding file content, change the code to use add_lines() so that we in http://bzr.arbash-meinel.com/plugins/DEACTIVATED/svn/add_lines_cache
John Arbash Meinel
john at arbash-meinel.com
Wed Feb 25 16:30:28 GMT 2009
At http://bzr.arbash-meinel.com/plugins/DEACTIVATED/svn/add_lines_cache
------------------------------------------------------------
revno: 2675
revision-id: john at arbash-meinel.com-20090225163023-3c50ro91cbnki9ab
parent: jelmer at samba.org-20090225021057-i3hl3izwyy4rt5kl
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: add_lines_cache
timestamp: Wed 2009-02-25 10:30:23 -0600
message:
When adding file content, change the code to use add_lines() so that we
can provide the parent_texts from cache.
-------------- next part --------------
=== modified file 'fetch.py'
--- a/fetch.py 2009-02-25 02:10:57 +0000
+++ b/fetch.py 2009-02-25 16:30:23 +0000
@@ -129,6 +129,13 @@
file_ids.add(file_id)
+def md5_strings(lines):
+ """Return the MD5sum of a list of lines."""
+ s = osutils.md5()
+ map(s.update, lines)
+ return s.hexdigest()
+
+
def md5_string(string):
"""Return the MD5sum of a string.
@@ -507,7 +514,7 @@
file_data = self.editor._text_cache.get((base_file_id, base_revid))
if file_data is None: # Not present in cache
record = self._get_record_stream(base_file_id, base_revid)
- file_data = record.get_bytes_as('fulltext')
+ file_data = osutils.chunks_to_lines(record.get_bytes_as('chunked'))
if file_id == base_file_id:
file_parents = [base_revid]
old_path = path
@@ -527,6 +534,7 @@
super(FileRevisionBuildEditor, self).__init__(editor, path)
self.old_path = old_path
self.file_id = file_id
+ # This should be the *lines* of the file
self.file_data = data
self.is_symlink = is_symlink
self.file_parents = file_parents
@@ -534,42 +542,58 @@
self.parent_file_id = parent_file_id
def _apply_textdelta(self, base_checksum=None):
- actual_checksum = osutils.md5(self.file_data).hexdigest()
+ actual_checksum = md5_strings(self.file_data)
assert base_checksum is None or base_checksum == actual_checksum, \
"base checksum mismatch: %r != %r" % (base_checksum,
actual_checksum)
self.file_stream = StringIO()
- return apply_txdelta_handler(self.file_data, self.file_stream)
+ return apply_txdelta_handler(''.join(self.file_data), self.file_stream)
def _close(self, checksum=None):
if self.file_stream is not None:
self.file_stream.seek(0)
- fulltext = self.file_stream.read()
+ lines = self.file_stream.readlines()
else:
# Data didn't change or file is new
- fulltext = self.file_data
+ lines = self.file_data
- actual_checksum = md5_string(fulltext)
+ actual_checksum = md5_strings(lines)
assert checksum is None or checksum == actual_checksum
- text_revision = (self.editor._get_text_revid(self.path) or
+ text_revision = (self.editor._get_text_revid(self.path) or
self.editor.revid)
text_parents = self.editor._get_text_parents(self.path)
if text_parents is None:
text_parents = self.file_parents
- text_sha1 = osutils.sha_string(fulltext)
- self.editor.texts.insert_record_stream([
- FulltextContentFactory((self.file_id, text_revision),
- [(self.file_id, revid) for revid in text_parents],
- text_sha1,
- fulltext)])
- self.editor._text_cache[self.file_id, text_revision] = fulltext
+ parent_keys = [(self.file_id, revid) for revid in text_parents]
+ parent_texts = {}
+ if parent_keys:
+ parent_text = self.editor._parent_text_cache.get(parent_keys[0],
+ None)
+ if parent_text is not None:
+ parent_texts[parent_keys[0]] = parent_text
+ file_key = (self.file_id, text_revision)
+ # add_lines_with_ghosts?
+ text_sha1, text_size, parent_content = self.editor.texts.add_lines(
+ file_key, parent_keys, lines,
+ parent_texts=parent_texts,
+ # random_id=True, # This avoids an index lookup, can we do it?
+ # check_content=False, # Can we assume we are always line-safe?
+ )
+ self.editor._text_cache[file_key] = lines
+ if parent_content is not None:
+ # TODO: parent_content is meant to be an opaque structure. However
+ # if we know the target is a knit or pack repo, we could
+ # share the _text_cache, rather than creating a new one here.
+ self.editor._parent_text_cache[file_key] = parent_content
+ content_starts_with_link = False
+ if lines and lines[0].startswith('link '):
+ content_starts_with_link = True
if self.is_special is not None:
- self.is_symlink = (self.is_special and
- fulltext.startswith("link "))
- elif (fulltext.startswith("link ")):
- # This file just might be a file that is svn:special but didn't
+ self.is_symlink = (self.is_special and content_starts_with_link)
+ elif content_starts_with_link:
+ # This file just might be a file that is svn:special but didn't
# contain a symlink but does now
if not self.is_symlink:
pass # FIXME: Query whether this file has svn:special set.
@@ -584,6 +608,7 @@
if self.is_symlink:
ie = InventoryLink(self.file_id, urlutils.basename(self.path), self.parent_file_id)
+ fulltext = ''.join(lines)
ie.symlink_target = fulltext[len("link "):]
if "\n" in ie.symlink_target:
raise AssertionError("bzr doesn't support newlines in symlink targets yet")
@@ -594,7 +619,7 @@
ie = InventoryFile(self.file_id, urlutils.basename(self.path), self.parent_file_id)
ie.symlink_target = None
ie.text_sha1 = text_sha1
- ie.text_size = len(fulltext)
+ ie.text_size = text_size
assert ie.text_size is not None
ie.executable = self.is_executable
ie.revision = text_revision
@@ -614,7 +639,13 @@
self.revid = revid
self._text_revids = None
self._text_parents = None
- self._text_cache = lru_cache.LRUSizeCache(TEXT_CACHE_SIZE )
+ def lines_to_size(lines):
+ return sum(map(len, lines))
+ self._text_cache = lru_cache.LRUSizeCache(TEXT_CACHE_SIZE,
+ compute_size=lines_to_size)
+ # TODO: it would be nice to get rid of this extra cache
+ self._parent_text_cache = lru_cache.LRUCache(TEXT_CACHE_SIZE /
+ (1*1024*1024))
self.old_inventory = prev_inventory
self._inv_delta = []
self._deleted = set()
More information about the bazaar-commits
mailing list