Rev 5731: (gz) Change diff-delta.c code so MemoryError can be raised from OOM rather in file:///home/pqm/archives/thelove/bzr/%2Btrunk/

Canonical.com Patch Queue Manager pqm at pqm.ubuntu.com
Tue Mar 22 16:39:44 UTC 2011


At file:///home/pqm/archives/thelove/bzr/%2Btrunk/

------------------------------------------------------------
revno: 5731 [merge]
revision-id: pqm at pqm.ubuntu.com-20110322163939-lju83ajjj2eh7n60
parent: pqm at pqm.ubuntu.com-20110322122953-gsrls0jajhfnobt5
parent: gzlist at googlemail.com-20110322155809-4mr9nj9zi25yw32y
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Tue 2011-03-22 16:39:39 +0000
message:
  (gz) Change diff-delta.c code so MemoryError can be raised from OOM rather
   than AssertionError (Martin [gz])
modified:
  bzrlib/_groupcompress_pyx.pyx  _groupcompress_c.pyx-20080724041824-yelg6ii7c7zxt4z0-1
  bzrlib/delta.h                 delta.h-20090227173129-qsu3u43vowf1q3ay-1
  bzrlib/diff-delta.c            diffdelta.c-20090226042143-l9wzxynyuxnb5hus-1
  doc/en/release-notes/bzr-2.4.txt bzr2.4.txt-20110114053217-k7ym9jfz243fddjm-1
=== modified file 'bzrlib/_groupcompress_pyx.pyx'
--- a/bzrlib/_groupcompress_pyx.pyx	2010-08-02 17:08:29 +0000
+++ b/bzrlib/_groupcompress_pyx.pyx	2011-03-22 15:58:09 +0000
@@ -46,13 +46,26 @@
         unsigned long agg_offset
     struct delta_index:
         pass
-    delta_index * create_delta_index(source_info *src, delta_index *old) nogil
-    delta_index * create_delta_index_from_delta(source_info *delta,
-                                                delta_index *old) nogil
+    ctypedef enum delta_result:
+        DELTA_OK
+        DELTA_OUT_OF_MEMORY
+        DELTA_INDEX_NEEDED
+        DELTA_SOURCE_EMPTY
+        DELTA_SOURCE_BAD
+        DELTA_BUFFER_EMPTY
+        DELTA_SIZE_TOO_BIG
+    delta_result create_delta_index(source_info *src,
+                                    delta_index *old,
+                                    delta_index **fresh) nogil
+    delta_result create_delta_index_from_delta(source_info *delta,
+                                               delta_index *old,
+                                               delta_index **fresh) nogil
     void free_delta_index(delta_index *index) nogil
-    void *create_delta(delta_index *indexes,
-             void *buf, unsigned long bufsize,
-             unsigned long *delta_size, unsigned long max_delta_size) nogil
+    delta_result create_delta(delta_index *indexes,
+                              void *buf, unsigned long bufsize,
+                              unsigned long *delta_size,
+                              unsigned long max_delta_size,
+                              void **delta_data) nogil
     unsigned long get_delta_hdr_size(unsigned char **datap,
                                      unsigned char *top) nogil
     unsigned long sizeof_delta_index(delta_index *index)
@@ -86,6 +99,20 @@
     return DeltaIndex(source)
 
 
+cdef object _translate_delta_failure(delta_result result):
+    if result == DELTA_OUT_OF_MEMORY:
+        return MemoryError("Delta function failed to allocate memory")
+    elif result == DELTA_INDEX_NEEDED:
+        return ValueError("Delta function requires delta_index param")
+    elif result == DELTA_SOURCE_EMPTY:
+        return ValueError("Delta function given empty source_info param")
+    elif result == DELTA_SOURCE_BAD:
+        return RuntimeError("Delta function given invalid source_info param")
+    elif result == DELTA_BUFFER_EMPTY:
+        return ValueError("Delta function given empty buffer params")
+    return AssertionError("Unrecognised delta result code: %d" % result)
+
+
 cdef class DeltaIndex:
 
     # We need Pyrex 0.9.8+ to understand a 'list' definition, and this object
@@ -147,6 +174,7 @@
         cdef char *c_delta
         cdef Py_ssize_t c_delta_size
         cdef delta_index *index
+        cdef delta_result res
         cdef unsigned int source_location
         cdef source_info *src
         cdef unsigned int num_indexes
@@ -165,9 +193,11 @@
         src.size = c_delta_size
         src.agg_offset = self._source_offset + unadded_bytes
         with nogil:
-            index = create_delta_index_from_delta(src, self._index)
+            res = create_delta_index_from_delta(src, self._index, &index)
+        if res != DELTA_OK:
+            raise _translate_delta_failure(res)
         self._source_offset = src.agg_offset + src.size
-        if index != NULL:
+        if index != self._index:
             free_delta_index(self._index)
             self._index = index
 
@@ -181,6 +211,7 @@
         cdef char *c_source
         cdef Py_ssize_t c_source_size
         cdef delta_index *index
+        cdef delta_result res
         cdef unsigned int source_location
         cdef source_info *src
         cdef unsigned int num_indexes
@@ -206,22 +237,27 @@
         # We delay creating the index on the first insert
         if source_location != 0:
             with nogil:
-                index = create_delta_index(src, self._index)
-            if index != NULL:
+                res = create_delta_index(src, self._index, &index)
+            if res != DELTA_OK:
+                raise _translate_delta_failure(res)
+            if index != self._index:
                 free_delta_index(self._index)
                 self._index = index
 
     cdef _populate_first_index(self):
         cdef delta_index *index
+        cdef delta_result res
         if len(self._sources) != 1 or self._index != NULL:
             raise AssertionError('_populate_first_index should only be'
                 ' called when we have a single source and no index yet')
 
-        # We know that self._index is already NULL, so whatever
-        # create_delta_index returns is fine
+        # We know that self._index is already NULL, so create_delta_index
+        # will always create a new index unless there's a malloc failure
         with nogil:
-            self._index = create_delta_index(&self._source_infos[0], NULL)
-        assert self._index != NULL
+            res = create_delta_index(&self._source_infos[0], NULL, &index)
+        if res != DELTA_OK:
+            raise _translate_delta_failure(res)
+        self._index = index
 
     cdef _expand_sources(self):
         raise RuntimeError('if we move self._source_infos, then we need to'
@@ -238,6 +274,7 @@
         cdef void * delta
         cdef unsigned long delta_size
         cdef unsigned long c_max_delta_size
+        cdef delta_result res
 
         if self._index == NULL:
             if len(self._sources) == 0:
@@ -256,13 +293,14 @@
         #       allocate the bytes into the final string
         c_max_delta_size = max_delta_size
         with nogil:
-            delta = create_delta(self._index,
-                                 target, target_size,
-                                 &delta_size, c_max_delta_size)
+            res = create_delta(self._index, target, target_size,
+                               &delta_size, c_max_delta_size, &delta)
         result = None
-        if delta:
+        if res == DELTA_OK:
             result = PyString_FromStringAndSize(<char *>delta, delta_size)
             free(delta)
+        elif res != DELTA_SIZE_TOO_BIG:
+            raise _translate_delta_failure(res)
         return result
 
 
@@ -369,8 +407,8 @@
                 # Copy instruction
                 data = _decode_copy_instruction(data, cmd, &cp_off, &cp_size)
                 if (cp_off + cp_size < cp_size or
-                    cp_off + cp_size > source_size or
-                    cp_size > size):
+                    cp_off + cp_size > <unsigned int>source_size or
+                    cp_size > <unsigned int>size):
                     failed = 1
                     break
                 memcpy(out, source + cp_off, cp_size)

=== modified file 'bzrlib/delta.h'
--- a/bzrlib/delta.h	2009-04-09 20:23:07 +0000
+++ b/bzrlib/delta.h	2011-03-22 00:33:21 +0000
@@ -21,33 +21,48 @@
                                  aggregate source */
 };
 
+
+/* result type for functions that have multiple failure modes */
+typedef enum {
+    DELTA_OK,             /* Success */
+    DELTA_OUT_OF_MEMORY,  /* Could not allocate required memory */
+    DELTA_INDEX_NEEDED,   /* A delta_index must be passed */
+    DELTA_SOURCE_EMPTY,   /* A source_info had no content */
+    DELTA_SOURCE_BAD,     /* A source_info had invalid or corrupt content */
+    DELTA_BUFFER_EMPTY,   /* A buffer pointer and size */
+    DELTA_SIZE_TOO_BIG,   /* Delta data is larger than the max requested */
+} delta_result;
+
+
 /*
  * create_delta_index: compute index data from given buffer
  *
- * This returns a pointer to a struct delta_index that should be passed to
- * subsequent create_delta() calls, or to free_delta_index().  A NULL pointer
- * is returned on failure.  The given buffer must not be freed nor altered
- * before free_delta_index() is called.  The returned pointer must be freed
- * using free_delta_index().
+ * Returns a delta_result status, when DELTA_OK then *fresh is set to a struct
+ * delta_index that should be passed to subsequent create_delta() calls, or to
+ * free_delta_index().  Other values are a failure, and *fresh is unset.
+ * The given buffer must not be freed nor altered before free_delta_index() is
+ * called. The resultant struct must be freed using free_delta_index().
  */
-extern struct delta_index *
+extern delta_result
 create_delta_index(const struct source_info *src,
-                   struct delta_index *old);
+                   struct delta_index *old,
+                   struct delta_index **fresh);
 
 
 /*
  * create_delta_index_from_delta: compute index data from given buffer
  *
- * This returns a pointer to a struct delta_index that should be passed to
- * subsequent create_delta() calls, or to free_delta_index().  A NULL pointer
- * is returned on failure.
+ * Returns a delta_result status, when DELTA_OK then *fresh is set to a struct
+ * delta_index that should be passed to subsequent create_delta() calls, or to
+ * free_delta_index().  Other values are a failure, and *fresh is unset.
  * The bytes must be in the form of a delta structure, as generated by
  * create_delta(). The generated index will only index the insert bytes, and
  * not any of the control structures.
  */
-extern struct delta_index *
+extern delta_result
 create_delta_index_from_delta(const struct source_info *delta,
-                              struct delta_index *old);
+                              struct delta_index *old,
+                              struct delta_index **fresh);
 /*
  * free_delta_index: free the index created by create_delta_index()
  *
@@ -67,15 +82,16 @@
  *
  * This function may be called multiple times with different buffers using
  * the same delta_index pointer.  If max_delta_size is non-zero and the
- * resulting delta is to be larger than max_delta_size then NULL is returned.
- * On success, a non-NULL pointer to the buffer with the delta data is
- * returned and *delta_size is updated with its size.  The returned buffer
- * must be freed by the caller.
+ * resulting delta is to be larger than max_delta_size then DELTA_SIZE_TOO_BIG
+ * is returned.  Otherwise on success, DELTA_OK is returned and *delta_data is
+ * set to a new buffer with the delta data and *delta_size is updated with its
+ * size.  That buffer must be freed by the caller.
  */
-extern void *
+extern delta_result
 create_delta(const struct delta_index *index,
-         const void *buf, unsigned long bufsize,
-         unsigned long *delta_size, unsigned long max_delta_size);
+             const void *buf, unsigned long bufsize,
+             unsigned long *delta_size, unsigned long max_delta_size,
+             void **delta_data);
 
 /* the smallest possible delta size is 3 bytes
  * Target size, Copy command, Copy length

=== modified file 'bzrlib/diff-delta.c'
--- a/bzrlib/diff-delta.c	2010-08-02 16:35:11 +0000
+++ b/bzrlib/diff-delta.c	2011-03-20 18:58:43 +0000
@@ -4,7 +4,7 @@
  * This code was greatly inspired by parts of LibXDiff from Davide Libenzi
  * http://www.xmailserver.org/xdiff-lib.html
  *
- * Rewritten for GIT by Nicolas Pitre <nico at cam.org>, (C) 2005-2007
+ * Rewritten for GIT by Nicolas Pitre <nico at fluxnic.net>, (C) 2005-2007
  * Adapted for Bazaar by John Arbash Meinel <john at arbash-meinel.com> (C) 2009
  *
  * This program is free software; you can redistribute it and/or modify
@@ -280,8 +280,11 @@
         if (fit_in_old) {
             // fprintf(stderr, "Fit all %d entries into old index\n",
             //                 copied_count);
-            /* No need to allocate a new buffer */
-            return NULL;
+            /*
+             * No need to allocate a new buffer, but return old_index ptr so
+             * callers can distinguish this from an OOM failure.
+             */
+            return old_index;
         } else {
             // fprintf(stderr, "Fit only %d entries into old index,"
             //                 " reallocating\n", copied_count);
@@ -370,9 +373,10 @@
 }
 
 
-struct delta_index *
+delta_result
 create_delta_index(const struct source_info *src,
-                   struct delta_index *old)
+                   struct delta_index *old,
+                   struct delta_index **fresh)
 {
     unsigned int i, hsize, hmask, num_entries, prev_val, *hash_count;
     unsigned int total_num_entries;
@@ -383,7 +387,7 @@
     unsigned long memsize;
 
     if (!src->buf || !src->size)
-        return NULL;
+        return DELTA_SOURCE_EMPTY;
     buffer = src->buf;
 
     /* Determine index hash size.  Note that indexing skips the
@@ -408,7 +412,7 @@
           sizeof(*entry) * total_num_entries;
     mem = malloc(memsize);
     if (!mem)
-        return NULL;
+        return DELTA_OUT_OF_MEMORY;
     hash = mem;
     mem = hash + hsize;
     entry = mem;
@@ -419,7 +423,7 @@
     hash_count = calloc(hsize, sizeof(*hash_count));
     if (!hash_count) {
         free(hash);
-        return NULL;
+        return DELTA_OUT_OF_MEMORY;
     }
 
     /* then populate the index for the new data */
@@ -450,16 +454,15 @@
     total_num_entries = limit_hash_buckets(hash, hash_count, hsize,
                                            total_num_entries);
     free(hash_count);
-    if (old) {
-        old->last_src = src;
-    }
     index = pack_delta_index(hash, hsize, total_num_entries, old);
     free(hash);
+    /* pack_delta_index only returns NULL on malloc failure */
     if (!index) {
-        return NULL;
+        return DELTA_OUT_OF_MEMORY;
     }
     index->last_src = src;
-    return index;
+    *fresh = index;
+    return DELTA_OK;
 }
 
 /* Take some entries, and put them into a custom hash.
@@ -679,9 +682,10 @@
     }
 }
 
-struct delta_index *
+delta_result
 create_delta_index_from_delta(const struct source_info *src,
-                              struct delta_index *old_index)
+                              struct delta_index *old_index,
+                              struct delta_index **fresh)
 {
     unsigned int i, num_entries, max_num_entries, prev_val, num_inserted;
     unsigned int hash_offset;
@@ -690,8 +694,10 @@
     struct delta_index *new_index;
     struct index_entry *entry, *entries;
 
+    if (!old_index)
+        return DELTA_INDEX_NEEDED;
     if (!src->buf || !src->size)
-        return NULL;
+        return DELTA_SOURCE_EMPTY;
     buffer = src->buf;
     top = buffer + src->size;
 
@@ -707,7 +713,7 @@
     /* allocate an array to hold whatever entries we find */
     entries = malloc(sizeof(*entry) * max_num_entries);
     if (!entries) /* malloc failure */
-        return NULL;
+        return DELTA_OUT_OF_MEMORY;
 
     /* then populate the index for the new data */
     prev_val = ~0;
@@ -774,16 +780,16 @@
         }
     }
     if (data != top) {
-        /* Something was wrong with this delta */
+        /* The source_info data passed was corrupted or otherwise invalid */
         free(entries);
-        return NULL;
+        return DELTA_SOURCE_BAD;
     }
     if (num_entries == 0) {
         /** Nothing to index **/
         free(entries);
-        return NULL;
+        *fresh = old_index;
+        return DELTA_OK;
     }
-    assert(old_index != NULL);
     old_index->last_src = src;
     /* See if we can fill in these values into the holes in the array */
     entry = entries;
@@ -841,11 +847,15 @@
         new_index = create_index_from_old_and_new_entries(old_index,
             entry, num_entries);
     } else {
-        new_index = NULL;
+        new_index = old_index;
         // fprintf(stderr, "inserted %d without resizing\n", num_inserted);
     }
     free(entries);
-    return new_index;
+    /* create_index_from_old_and_new_entries returns NULL on malloc failure */
+    if (!new_index)
+        return DELTA_OUT_OF_MEMORY;
+    *fresh = new_index;
+    return DELTA_OK;
 }
 
 void free_delta_index(struct delta_index *index)
@@ -868,10 +878,11 @@
  */
 #define MAX_OP_SIZE (5 + 5 + 1 + RABIN_WINDOW + 7)
 
-void *
+delta_result
 create_delta(const struct delta_index *index,
              const void *trg_buf, unsigned long trg_size,
-             unsigned long *delta_size, unsigned long max_size)
+             unsigned long *delta_size, unsigned long max_size,
+             void **delta_data)
 {
     unsigned int i, outpos, outsize, moff, val;
     int msize;
@@ -882,9 +893,9 @@
     unsigned long source_size;
 
     if (!trg_buf || !trg_size)
-        return NULL;
+        return DELTA_BUFFER_EMPTY;
     if (index == NULL)
-        return NULL;
+        return DELTA_INDEX_NEEDED;
 
     outpos = 0;
     outsize = 8192;
@@ -892,7 +903,7 @@
         outsize = max_size + MAX_OP_SIZE + 1;
     out = malloc(outsize);
     if (!out)
-        return NULL;
+        return DELTA_OUT_OF_MEMORY;
 
     source_size = index->last_src->size + index->last_src->agg_offset;
 
@@ -1071,7 +1082,7 @@
             out = realloc(out, outsize);
             if (!out) {
                 free(tmp);
-                return NULL;
+                return DELTA_OUT_OF_MEMORY;
             }
         }
     }
@@ -1081,11 +1092,12 @@
 
     if (max_size && outpos > max_size) {
         free(out);
-        return NULL;
+        return DELTA_SIZE_TOO_BIG;
     }
 
     *delta_size = outpos;
-    return out;
+    *delta_data = out;
+    return DELTA_OK;
 }
 
 /* vim: et ts=4 sw=4 sts=4

=== modified file 'doc/en/release-notes/bzr-2.4.txt'
--- a/doc/en/release-notes/bzr-2.4.txt	2011-03-22 12:29:53 +0000
+++ b/doc/en/release-notes/bzr-2.4.txt	2011-03-22 16:39:39 +0000
@@ -291,6 +291,10 @@
   by cacthing them so they can be re-raised in the controlling thread. It's
   available in the ``bzrlib.cethread`` module.  (Vincent Ladeuil)
 
+* Correctly propogate malloc failures from diff-delta.c code as MemoryError
+  so OOM conditions during groupcompress are clearly reported. This entailed a
+  change to several function signatures. (Martin [gz], #633336)
+
 * ``HookPoint.lazy_hook`` and ``Hooks.install_named_lazy_hook`` can install 
   hooks for which the callable is loaded lazily.  (Jelmer Vernooij)
 




More information about the bazaar-commits mailing list