Rev 73: We are now able to add multiple sources to the delta generator. in http://bzr.arbash-meinel.com/plugins/groupcompress_rabin
John Arbash Meinel
john at arbash-meinel.com
Mon Mar 2 19:15:41 GMT 2009
At http://bzr.arbash-meinel.com/plugins/groupcompress_rabin
------------------------------------------------------------
revno: 73
revision-id: john at arbash-meinel.com-20090302191537-7mvjwk2042fvj9gg
parent: john at arbash-meinel.com-20090302185236-gm5ckgaic13q6vvs
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: groupcompress_rabin
timestamp: Mon 2009-03-02 13:15:37 -0600
message:
We are now able to add multiple sources to the delta generator.
-------------- next part --------------
=== modified file '_groupcompress_pyx.pyx'
--- a/_groupcompress_pyx.pyx 2009-03-02 18:52:36 +0000
+++ b/_groupcompress_pyx.pyx 2009-03-02 19:15:37 +0000
@@ -51,28 +51,28 @@
object PyString_FromStringAndSize(char *, Py_ssize_t)
-# cdef void *safe_malloc(size_t count) except NULL:
-# cdef void *result
-# result = malloc(count)
-# if result == NULL:
-# raise MemoryError('Failed to allocate %d bytes of memory' % (count,))
-# return result
-#
-#
-# cdef void *safe_realloc(void * old, size_t count) except NULL:
-# cdef void *result
-# result = realloc(old, count)
-# if result == NULL:
-# raise MemoryError('Failed to reallocate to %d bytes of memory'
-# % (count,))
-# return result
-#
-#
-# cdef int safe_free(void **val) except -1:
-# assert val != NULL
-# if val[0] != NULL:
-# free(val[0])
-# val[0] = NULL
+cdef void *safe_malloc(size_t count) except NULL:
+ cdef void *result
+ result = malloc(count)
+ if result == NULL:
+ raise MemoryError('Failed to allocate %d bytes of memory' % (count,))
+ return result
+
+
+cdef void *safe_realloc(void * old, size_t count) except NULL:
+ cdef void *result
+ result = realloc(old, count)
+ if result == NULL:
+ raise MemoryError('Failed to reallocate to %d bytes of memory'
+ % (count,))
+ return result
+
+
+cdef int safe_free(void **val) except -1:
+ assert val != NULL
+ if val[0] != NULL:
+ free(val[0])
+ val[0] = NULL
def make_delta_index(source):
return DeltaIndex(source)
@@ -80,28 +80,42 @@
cdef class DeltaIndex:
- cdef object _source
- cdef delta_index *_index
+ #cdef list _sources
+ cdef readonly object _sources
+ cdef delta_index **_indexes
+ cdef readonly unsigned int _num_indexes
+ cdef readonly unsigned int _max_num_indexes
+ cdef readonly unsigned long _source_offset
def __repr__(self):
- if self._index == NULL:
- return '%s(NULL)' % (self.__class__.__name__,)
- return '%s(%d)' % (self.__class__.__name__,
- len(self._source))
-
- def __init__(self, source):
- self._source = None
- self._index = NULL
- self._create_delta_index(source)
-
- def _create_delta_index(self, source):
+ return '%s(%d, %d, %d)' % (self.__class__.__name__,
+ len(self._sources), self._source_offset,
+ self._num_indexes)
+
+ def __init__(self, source=None):
+ self._sources = []
+ self._max_num_indexes = 1024
+ self._indexes = <delta_index**>safe_malloc(sizeof(delta_index*)
+ * self._max_num_indexes)
+ self._num_indexes = 0
+ self._source_offset = 0
+
+ if source is not None:
+ self.add_source(source)
+
+ def __dealloc__(self):
+ self._ensure_no_indexes()
+
+ def add_source(self, source):
cdef char *c_source
cdef Py_ssize_t c_source_size
+ cdef delta_index *index
+ cdef unsigned int num_indexes
if not PyString_CheckExact(source):
raise TypeError('source is not a str')
- self._source = source
+ self._sources.append(source)
c_source = PyString_AS_STRING(source)
c_source_size = PyString_GET_SIZE(source)
@@ -111,16 +125,32 @@
# fit just fine into the structure. But for now, we just wrap
# create_delta_index (For example, we could always reserve enough
# space to hash a 4MB string, etc.)
- self._index = create_delta_index(c_source, c_source_size, 0)
- # TODO: Handle if _index == NULL
-
- cdef _ensure_no_index(self):
- if self._index != NULL:
- free_delta_index(self._index)
- self._index = NULL
-
- def __dealloc__(self):
- self._ensure_no_index()
+ index = create_delta_index(c_source, c_source_size, self._source_offset)
+ self._source_offset += c_source_size
+ if index != NULL:
+ num_indexes = self._num_indexes + 1
+ if num_indexes >= self._max_num_indexes:
+ self._expand_indexes()
+ self._indexes[self._num_indexes] = index
+ self._num_indexes = num_indexes
+
+ cdef _expand_indexes(self):
+ self._max_num_indexes = self._max_num_indexes * 2
+ self._indexes = <delta_index **>safe_realloc(self._indexes,
+ sizeof(delta_index *)
+ * self._max_num_indexes)
+
+ cdef _ensure_no_indexes(self):
+ cdef int i
+
+ if self._indexes != NULL:
+ for i from 0 <= i < self._num_indexes:
+ free_delta_index(self._indexes[i])
+ self._indexes[i] = NULL
+ free(self._indexes)
+ self._indexes = NULL
+ self._max_num_indexes = 0
+ self._num_indexes = 0
def make_delta(self, target_bytes, max_delta_size=0):
"""Create a delta from the current source to the target bytes."""
@@ -129,7 +159,7 @@
cdef void * delta
cdef unsigned long delta_size
- if self._index == NULL:
+ if self._num_indexes == 0:
return None
if not PyString_CheckExact(target_bytes):
@@ -141,7 +171,8 @@
# TODO: inline some of create_delta so we at least don't have to double
# malloc, and can instead use PyString_FromStringAndSize, to
# allocate the bytes into the final string
- delta = create_delta(&self._index, 1, target, target_size,
+ delta = create_delta(self._indexes, self._num_indexes,
+ target, target_size,
&delta_size, max_delta_size)
result = None
if delta:
=== modified file 'tests/test__groupcompress_pyx.py'
--- a/tests/test__groupcompress_pyx.py 2009-03-02 17:05:33 +0000
+++ b/tests/test__groupcompress_pyx.py 2009-03-02 19:15:37 +0000
@@ -132,9 +132,39 @@
def test_repr(self):
di = self._gc_module.DeltaIndex('test text\n')
- self.assertEqual('DeltaIndex(10)', repr(di))
+ self.assertEqual('DeltaIndex(1, 10, 1)', repr(di))
def test_make_delta(self):
di = self._gc_module.DeltaIndex(_text1)
delta = di.make_delta(_text2)
self.assertEqual('MN\x90/\x1fdiffer from\nagainst other text\n', delta)
+
+ def test_delta_against_multiple_sources(self):
+ di = self._gc_module.DeltaIndex()
+ first_text = ('a bit of text, that\n'
+ 'does not have much in\n'
+ 'common with the next text\n'
+ )
+ di.add_source(first_text)
+ self.assertEqual(1, di._num_indexes)
+ self.assertEqual(1024, di._max_num_indexes)
+ self.assertEqual(len(first_text), di._source_offset)
+ second_text = ('some more bits of text\n'
+ 'which does have a little bit in\n'
+ 'common with the previous text\n'
+ )
+ di.add_source(second_text)
+ self.assertEqual(2, di._num_indexes)
+ self.assertEqual(1024, di._max_num_indexes)
+ self.assertEqual(len(first_text) + len(second_text), di._source_offset)
+ third_text = ('a bit of text, that\n'
+ 'has some in common with the previous text\n'
+ 'and not much in\n'
+ 'common with the next text\n'
+ )
+ delta = di.make_delta(third_text)
+ result = self._gc_module.apply_delta(first_text + second_text, delta)
+ self.assertEqualDiff(third_text, result)
+ self.assertEqual('\x99\x01h\x90\x14\x0chas some in '
+ '\x91{\x1e\x07and not\x91!#', delta)
+
More information about the bazaar-commits
mailing list