Rev 73: We are now able to add multiple sources to the delta generator. in http://bzr.arbash-meinel.com/plugins/groupcompress_rabin

John Arbash Meinel john at arbash-meinel.com
Mon Mar 2 19:15:41 GMT 2009


At http://bzr.arbash-meinel.com/plugins/groupcompress_rabin

------------------------------------------------------------
revno: 73
revision-id: john at arbash-meinel.com-20090302191537-7mvjwk2042fvj9gg
parent: john at arbash-meinel.com-20090302185236-gm5ckgaic13q6vvs
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: groupcompress_rabin
timestamp: Mon 2009-03-02 13:15:37 -0600
message:
  We are now able to add multiple sources to the delta generator.
-------------- next part --------------
=== modified file '_groupcompress_pyx.pyx'
--- a/_groupcompress_pyx.pyx	2009-03-02 18:52:36 +0000
+++ b/_groupcompress_pyx.pyx	2009-03-02 19:15:37 +0000
@@ -51,28 +51,28 @@
     object PyString_FromStringAndSize(char *, Py_ssize_t)
 
 
-# cdef void *safe_malloc(size_t count) except NULL:
-#     cdef void *result
-#     result = malloc(count)
-#     if result == NULL:
-#         raise MemoryError('Failed to allocate %d bytes of memory' % (count,))
-#     return result
-# 
-# 
-# cdef void *safe_realloc(void * old, size_t count) except NULL:
-#     cdef void *result
-#     result = realloc(old, count)
-#     if result == NULL:
-#         raise MemoryError('Failed to reallocate to %d bytes of memory'
-#                           % (count,))
-#     return result
-# 
-# 
-# cdef int safe_free(void **val) except -1:
-#     assert val != NULL
-#     if val[0] != NULL:
-#         free(val[0])
-#         val[0] = NULL
+cdef void *safe_malloc(size_t count) except NULL:
+    cdef void *result
+    result = malloc(count)
+    if result == NULL:
+        raise MemoryError('Failed to allocate %d bytes of memory' % (count,))
+    return result
+
+
+cdef void *safe_realloc(void * old, size_t count) except NULL:
+    cdef void *result
+    result = realloc(old, count)
+    if result == NULL:
+        raise MemoryError('Failed to reallocate to %d bytes of memory'
+                          % (count,))
+    return result
+
+
+cdef int safe_free(void **val) except -1:
+    assert val != NULL
+    if val[0] != NULL:
+        free(val[0])
+        val[0] = NULL
 
 def make_delta_index(source):
     return DeltaIndex(source)
@@ -80,28 +80,42 @@
 
 cdef class DeltaIndex:
 
-    cdef object _source
-    cdef delta_index *_index
+    #cdef list _sources
+    cdef readonly object _sources
+    cdef delta_index **_indexes
+    cdef readonly unsigned int _num_indexes
+    cdef readonly unsigned int _max_num_indexes
+    cdef readonly unsigned long _source_offset
 
     def __repr__(self):
-        if self._index == NULL:
-            return '%s(NULL)' % (self.__class__.__name__,)
-        return '%s(%d)' % (self.__class__.__name__,
-            len(self._source))
-
-    def __init__(self, source):
-        self._source = None
-        self._index = NULL
-        self._create_delta_index(source)
-
-    def _create_delta_index(self, source):
+        return '%s(%d, %d, %d)' % (self.__class__.__name__,
+            len(self._sources), self._source_offset,
+            self._num_indexes)
+
+    def __init__(self, source=None):
+        self._sources = []
+        self._max_num_indexes = 1024
+        self._indexes = <delta_index**>safe_malloc(sizeof(delta_index*)
+                                                   * self._max_num_indexes)
+        self._num_indexes = 0
+        self._source_offset = 0
+
+        if source is not None:
+            self.add_source(source)
+
+    def __dealloc__(self):
+        self._ensure_no_indexes()
+
+    def add_source(self, source):
         cdef char *c_source
         cdef Py_ssize_t c_source_size
+        cdef delta_index *index
+        cdef unsigned int num_indexes
 
         if not PyString_CheckExact(source):
             raise TypeError('source is not a str')
 
-        self._source = source
+        self._sources.append(source)
         c_source = PyString_AS_STRING(source)
         c_source_size = PyString_GET_SIZE(source)
 
@@ -111,16 +125,32 @@
         #       fit just fine into the structure. But for now, we just wrap
         #       create_delta_index (For example, we could always reserve enough
         #       space to hash a 4MB string, etc.)
-        self._index = create_delta_index(c_source, c_source_size, 0)
-        # TODO: Handle if _index == NULL
-
-    cdef _ensure_no_index(self):
-        if self._index != NULL:
-            free_delta_index(self._index)
-            self._index = NULL
-
-    def __dealloc__(self):
-        self._ensure_no_index()
+        index = create_delta_index(c_source, c_source_size, self._source_offset)
+        self._source_offset += c_source_size
+        if index != NULL:
+            num_indexes = self._num_indexes + 1
+            if num_indexes >= self._max_num_indexes:
+                self._expand_indexes()
+            self._indexes[self._num_indexes] = index
+            self._num_indexes = num_indexes
+
+    cdef _expand_indexes(self):
+        self._max_num_indexes = self._max_num_indexes * 2
+        self._indexes = <delta_index **>safe_realloc(self._indexes,
+                                                sizeof(delta_index *)
+                                                * self._max_num_indexes)
+
+    cdef _ensure_no_indexes(self):
+        cdef int i
+
+        if self._indexes != NULL:
+            for i from 0 <= i < self._num_indexes:
+                free_delta_index(self._indexes[i])
+                self._indexes[i] = NULL
+            free(self._indexes)
+            self._indexes = NULL
+            self._max_num_indexes = 0
+            self._num_indexes = 0
 
     def make_delta(self, target_bytes, max_delta_size=0):
         """Create a delta from the current source to the target bytes."""
@@ -129,7 +159,7 @@
         cdef void * delta
         cdef unsigned long delta_size
 
-        if self._index == NULL:
+        if self._num_indexes == 0:
             return None
 
         if not PyString_CheckExact(target_bytes):
@@ -141,7 +171,8 @@
         # TODO: inline some of create_delta so we at least don't have to double
         #       malloc, and can instead use PyString_FromStringAndSize, to
         #       allocate the bytes into the final string
-        delta = create_delta(&self._index, 1, target, target_size,
+        delta = create_delta(self._indexes, self._num_indexes,
+                             target, target_size,
                              &delta_size, max_delta_size)
         result = None
         if delta:

=== modified file 'tests/test__groupcompress_pyx.py'
--- a/tests/test__groupcompress_pyx.py	2009-03-02 17:05:33 +0000
+++ b/tests/test__groupcompress_pyx.py	2009-03-02 19:15:37 +0000
@@ -132,9 +132,39 @@
 
     def test_repr(self):
         di = self._gc_module.DeltaIndex('test text\n')
-        self.assertEqual('DeltaIndex(10)', repr(di))
+        self.assertEqual('DeltaIndex(1, 10, 1)', repr(di))
 
     def test_make_delta(self):
         di = self._gc_module.DeltaIndex(_text1)
         delta = di.make_delta(_text2)
         self.assertEqual('MN\x90/\x1fdiffer from\nagainst other text\n', delta)
+
+    def test_delta_against_multiple_sources(self):
+        di = self._gc_module.DeltaIndex()
+        first_text = ('a bit of text, that\n'
+                      'does not have much in\n'
+                      'common with the next text\n'
+                     )
+        di.add_source(first_text)
+        self.assertEqual(1, di._num_indexes)
+        self.assertEqual(1024, di._max_num_indexes)
+        self.assertEqual(len(first_text), di._source_offset)
+        second_text = ('some more bits of text\n'
+                       'which does have a little bit in\n'
+                       'common with the previous text\n'
+                      )
+        di.add_source(second_text)
+        self.assertEqual(2, di._num_indexes)
+        self.assertEqual(1024, di._max_num_indexes)
+        self.assertEqual(len(first_text) + len(second_text), di._source_offset)
+        third_text = ('a bit of text, that\n'
+                      'has some in common with the previous text\n'
+                      'and not much in\n'
+                      'common with the next text\n'
+                     )
+        delta = di.make_delta(third_text)
+        result = self._gc_module.apply_delta(first_text + second_text, delta)
+        self.assertEqualDiff(third_text, result)
+        self.assertEqual('\x99\x01h\x90\x14\x0chas some in '
+                         '\x91{\x1e\x07and not\x91!#', delta)
+



More information about the bazaar-commits mailing list