Rev 2678: Lots of cleanup, including implementing get_build_chain for the new index format. in http://bzr.arbash-meinel.com/branches/bzr/0.19-dev/pyrex_knit_extract
John Arbash Meinel
john at arbash-meinel.com
Fri Aug 3 01:01:09 BST 2007
At http://bzr.arbash-meinel.com/branches/bzr/0.19-dev/pyrex_knit_extract
------------------------------------------------------------
revno: 2678
revision-id: john at arbash-meinel.com-20070803000035-8lrg5av9vw3zw89j
parent: john at arbash-meinel.com-20070802233253-jksnt66mv2ti5mhi
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: pyrex_knit_extract
timestamp: Thu 2007-08-02 19:00:35 -0500
message:
Lots of cleanup, including implementing get_build_chain for the new index format.
Remove a bunch of unnecessary functions.
modified:
bzrlib/_knit_helpers_c.pyx knit_c.pyx-20070509143944-u42gy8w387a10m0j-1
bzrlib/_knit_helpers_py.py _knit_load_data_py.p-20070629000948-9a0nh4s118bi5y8n-1
bzrlib/knit.py knit.py-20051212171256-f056ac8f0fbe1bd9
bzrlib/tests/test_knit.py test_knit.py-20051212171302-95d4c00dd5f11f2b
-------------- next part --------------
=== modified file 'bzrlib/_knit_helpers_c.pyx'
--- a/bzrlib/_knit_helpers_c.pyx 2007-08-02 23:26:06 +0000
+++ b/bzrlib/_knit_helpers_c.pyx 2007-08-03 00:00:35 +0000
@@ -386,52 +386,6 @@
return sio.readlines()
-def _extract_lines_from_gzip_c(data):
- """Convert a gzip stream into a set of lines.
-
- :param data: A string of gzip data
- :return: A list of lines extracted from the decompressed data.
- """
- cdef Bytef *c_buf
- cdef uLong buf_size
- cdef Bytef *c_data
- cdef uLong data_size
- cdef int retcode
- cdef PyObject *pyobj_buf
- cdef z_stream strm
-
- c_data = <Bytef *>PyString_AsString(data)
- data_size = PyString_Size(data)
-
- buf_size = 1000000
- # A PyString is used to allow us to avoid malloc
- # We could use a global as long as we didn't care about thread safety
- buf = PyString_FromStringAndSize(NULL, buf_size)
-
- # Is this super ugly?
- c_buf = <Bytef *>PyString_AsString(buf)
-
- memset(&strm, 0, sizeof(z_stream))
- strm.next_in = c_data
- strm.avail_in = data_size
- strm.total_in = 0
- strm.next_out = c_buf
- strm.avail_out = buf_size
- strm.total_out = 0
-
- # windowBits can be +16 to indicate we are decompressing a gzip stream, or
- # +32 for it to auto determine whether this is a gzip or zlib stream.
- inflateInit2(&strm, 15+16)
-
- retcode = inflate(&strm, Z_FINISH)
- assert retcode == Z_STREAM_END, (
- "Expected Z_STREAM_END (%d) got %d while decompressing"
- % (Z_STREAM_END, retcode))
-
- # Now that the data is decompressed, resize our buffer
- return _convert_bytes_to_lines(<char*>c_buf, buf_size-strm.avail_out)
-
-
# 1MB for the decompression buffer
cdef int _decompress_buffer_size
@@ -916,39 +870,6 @@
return rec
-def _extract_knit_lines_from_gzip(version_id, data, kd_name):
- """Extract the lines from a gzip chunk.
-
- :param version_id: The chunk should correspond to this version id, verify
- this.
- :param data: The gzip data
- :param kd_name: The KnitData filename. This is used when raising
- KnitCorrupt errors
- :return: sha1digest, [lines]
- """
- try:
- record_contents = _extract_lines_from_gzip_c(data)
- except Exception, e:
- raise errors.KnitCorrupt(kd_name,
- "While reading {%s} got %s(%s)"
- % (version_id, e.__class__.__name__, str(e)))
- header = record_contents.pop(0)
- rec = _check_header(version_id, header, kd_name)
-
- last_line = record_contents.pop()
- if len(record_contents) != int(rec[2]):
- raise errors.KnitCorrupt(kd_name,
- 'incorrect number of lines %s != %s'
- ' for version {%s}'
- % (len(record_contents), int(rec[2]),
- version_id))
- if last_line != 'end %s\n' % rec[1]:
- raise errors.KnitCorrupt(kd_name,
- 'unexpected version end line %r, wanted %r'
- % (last_line, version_id))
- return rec[3], record_contents
-
-
def _extract_knit_fulltext_from_gzip_c(version_id, data, kd_name,
is_annotated):
"""Extract the unannotated fulltext lines from a gzip hunk.
=== modified file 'bzrlib/_knit_helpers_py.py'
--- a/bzrlib/_knit_helpers_py.py 2007-07-25 00:28:51 +0000
+++ b/bzrlib/_knit_helpers_py.py 2007-08-03 00:00:35 +0000
@@ -104,7 +104,10 @@
:return: A list of lines extracted from the decompressed data.
"""
df = GzipFile(mode='rb', fileobj=StringIO(data))
- return df.readlines()
+ try:
+ return df.readlines()
+ finally:
+ df.close()
def _check_header(version_id, line, kd_name):
=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py 2007-08-02 23:32:53 +0000
+++ b/bzrlib/knit.py 2007-08-03 00:00:35 +0000
@@ -1626,6 +1626,24 @@
options.append('no-eol')
return ','.join(options)
+ def get_build_chain(self, version_id):
+ """Get the chain that we need to get a fulltext for this version.
+
+ :return: [(version_id, start, size)] needed to extract this text.
+ """
+ chain = []
+ node = self._get_node(version_id)
+ while node is not None:
+ start, size = self.get_position(node[0])
+ chain.append((node[0], start, size))
+ if self._parent_compression(node[2][1]) == 'line-delta':
+ node = self._get_node(node[2][1][0]) # parent
+ else:
+ break
+ # Put the fulltext first
+ chain.reverse()
+ return chain
+
def get_parents(self, version_id):
"""Return parents of specified version ignoring ghosts."""
parents = list(self.iter_parents([version_id]))
=== modified file 'bzrlib/tests/test_knit.py'
--- a/bzrlib/tests/test_knit.py 2007-07-27 20:11:47 +0000
+++ b/bzrlib/tests/test_knit.py 2007-08-03 00:00:35 +0000
@@ -148,12 +148,20 @@
class LowLevelKnitDataTests(TestCase):
def get_knit_data(self, *args, **kwargs):
- orig = knit._extract_lines_from_gzip
+ orig_fulltext = knit._extract_knit_fulltext_from_gzip
+ orig_linedelta = knit._extract_knit_linedelta_from_gzip
def reset():
- knit._extract_lines_from_gzip = orig
+ knit._extract_knit_fulltext_from_gzip = orig_fulltext
+ knit._extract_knit_linedelta_from_gzip = orig_linedelta
self.addCleanup(reset)
- from bzrlib._knit_helpers_py import _extract_lines_from_gzip_py
- knit._extract_lines_from_gzip = _extract_lines_from_gzip_py
+ from bzrlib._knit_helpers_py import (
+ _extract_knit_fulltext_from_gzip_py,
+ _extract_knit_linedelta_from_gzip_py,
+ )
+ knit._extract_knit_fulltext_from_gzip = \
+ _extract_knit_fulltext_from_gzip_py
+ knit._extract_knit_linedelta_from_gzip = \
+ _extract_knit_linedelta_from_gzip_py
return _KnitData(*args, **kwargs)
def create_gz_content(self, text):
@@ -273,12 +281,20 @@
_test_needs_features = [CompiledKnitFeature]
def get_knit_data(self, *args, **kwargs):
- orig = knit._extract_lines_from_gzip
+ orig_fulltext = knit._extract_knit_fulltext_from_gzip
+ orig_linedelta = knit._extract_knit_linedelta_from_gzip
def reset():
- knit._extract_lines_from_gzip = orig
+ knit._extract_knit_fulltext_from_gzip = orig_fulltext
+ knit._extract_knit_linedelta_from_gzip = orig_linedelta
self.addCleanup(reset)
- from bzrlib._knit_helpers_c import _extract_lines_from_gzip_c
- knit._extract_lines_from_gzip = _extract_lines_from_gzip_c
+ from bzrlib._knit_helpers_c import (
+ _extract_knit_fulltext_from_gzip_c,
+ _extract_knit_linedelta_from_gzip_c,
+ )
+ knit._extract_knit_fulltext_from_gzip = \
+ _extract_knit_fulltext_from_gzip_c
+ knit._extract_knit_linedelta_from_gzip = \
+ _extract_knit_linedelta_from_gzip_c
return _KnitData(*args, **kwargs)
More information about the bazaar-commits
mailing list