Rev 5369: (jam) Add __sizeof__ to our extension types. in file:///home/pqm/archives/thelove/bzr/%2Btrunk/

Canonical.com Patch Queue Manager pqm at pqm.ubuntu.com
Fri Aug 6 04:24:43 BST 2010


At file:///home/pqm/archives/thelove/bzr/%2Btrunk/

------------------------------------------------------------
revno: 5369 [merge]
revision-id: pqm at pqm.ubuntu.com-20100806032440-cetyanzrh3jcxjlf
parent: pqm at pqm.ubuntu.com-20100805083007-b14pqhgoyerngrh6
parent: john at arbash-meinel.com-20100802192030-p5ofsyvopbog4xj3
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Fri 2010-08-06 04:24:40 +0100
message:
  (jam) Add __sizeof__ to our extension types.
modified:
  bzrlib/_groupcompress_pyx.pyx  _groupcompress_c.pyx-20080724041824-yelg6ii7c7zxt4z0-1
  bzrlib/_simple_set_pyx.pyx     _static_tuple_intern-20091002053806-sid67p8spedru51w-1
  bzrlib/diff-delta.c            diffdelta.c-20090226042143-l9wzxynyuxnb5hus-1
  bzrlib/tests/test__simple_set.py test__static_tuple_i-20091002053806-sid67p8spedru51w-2
=== modified file 'bzrlib/_groupcompress_pyx.pyx'
--- a/bzrlib/_groupcompress_pyx.pyx	2010-02-17 17:11:16 +0000
+++ b/bzrlib/_groupcompress_pyx.pyx	2010-08-02 17:08:29 +0000
@@ -22,6 +22,8 @@
 
 
 cdef extern from "Python.h":
+    ctypedef struct PyObject:
+        pass
     ctypedef int Py_ssize_t # Required for older pyrex versions
     int PyString_CheckExact(object)
     char * PyString_AS_STRING(object)
@@ -53,6 +55,7 @@
              unsigned long *delta_size, unsigned long max_delta_size) nogil
     unsigned long get_delta_hdr_size(unsigned char **datap,
                                      unsigned char *top) nogil
+    unsigned long sizeof_delta_index(delta_index *index)
     Py_ssize_t DELTA_SIZE_MIN
 
 
@@ -91,8 +94,8 @@
     cdef readonly object _sources
     cdef source_info *_source_infos
     cdef delta_index *_index
+    cdef public unsigned long _source_offset
     cdef readonly unsigned int _max_num_sources
-    cdef public unsigned long _source_offset
 
     def __init__(self, source=None):
         self._sources = []
@@ -105,6 +108,22 @@
         if source is not None:
             self.add_source(source, 0)
 
+    def __sizeof__(self):
+        # We want to track the _source_infos allocations, but the referenced
+        # void* are actually tracked in _sources itself.
+        # XXX: Cython is capable of doing sizeof(class) and returning the size
+        #      of the underlying struct. Pyrex (<= 0.9.9) refuses, so we need
+        #      to do it manually. *sigh* Note that we might get it wrong
+        #      because of alignment issues.
+        cdef Py_ssize_t size
+        # PyObject start, vtable *, 3 object pointers, 2 C ints
+        size = ((sizeof(PyObject) + sizeof(void*) + 3*sizeof(PyObject*)
+                 + sizeof(unsigned long)
+                 + sizeof(unsigned int))
+                + (sizeof(source_info) * self._max_num_sources)
+                + sizeof_delta_index(self._index))
+        return size
+
     def __repr__(self):
         return '%s(%d, %d)' % (self.__class__.__name__,
             len(self._sources), self._source_offset)

=== modified file 'bzrlib/_simple_set_pyx.pyx'
--- a/bzrlib/_simple_set_pyx.pyx	2010-02-17 17:11:16 +0000
+++ b/bzrlib/_simple_set_pyx.pyx	2010-08-02 17:16:12 +0000
@@ -115,6 +115,20 @@
             raise MemoryError()
         memset(self._table, 0, n_bytes)
 
+    def __sizeof__(self):
+        # Note: Pyrex doesn't allow sizeof(class) so we re-implement it here.
+        # Bits are:
+        #   1: PyObject
+        #   2: vtable *
+        #   3: 3 Py_ssize_t
+        #   4: PyObject**
+        # Note that we might get alignment, etc, wrong, but at least this is
+        # better than no estimate at all
+        # return sizeof(SimpleSet) + (self._mask + 1) * (sizeof(PyObject*))
+        return (sizeof(PyObject) + sizeof(void*)
+                + 3*sizeof(Py_ssize_t) + sizeof(PyObject**)
+                + (self._mask + 1) * sizeof(PyObject*))
+
     def __dealloc__(self):
         if self._table != NULL:
             PyMem_Free(self._table)

=== modified file 'bzrlib/diff-delta.c'
--- a/bzrlib/diff-delta.c	2009-12-14 15:54:42 +0000
+++ b/bzrlib/diff-delta.c	2010-08-02 16:35:11 +0000
@@ -853,7 +853,8 @@
     free(index);
 }
 
-unsigned long sizeof_delta_index(struct delta_index *index)
+unsigned long
+sizeof_delta_index(struct delta_index *index)
 {
     if (index)
         return index->memsize;
@@ -872,7 +873,8 @@
              const void *trg_buf, unsigned long trg_size,
              unsigned long *delta_size, unsigned long max_size)
 {
-    unsigned int i, outpos, outsize, moff, msize, val;
+    unsigned int i, outpos, outsize, moff, val;
+    int msize;
     const struct source_info *msource;
     int inscnt;
     const unsigned char *ref_data, *ref_top, *data, *top;
@@ -943,7 +945,7 @@
                  entry++) {
                 const unsigned char *ref;
                 const unsigned char *src;
-                unsigned int ref_size;
+                int ref_size;
                 if (entry->val != val)
                     continue;
                 ref = entry->ptr;
@@ -956,14 +958,14 @@
                  * match more bytes with this location that we have already
                  * matched.
                  */
-                if (ref_size > top - src)
+                if (ref_size > (top - src))
                     ref_size = top - src;
                 if (ref_size <= msize)
                     break;
                 /* See how many bytes actually match at this location. */
                 while (ref_size-- && *src++ == *ref)
                     ref++;
-                if (msize < ref - entry->ptr) {
+                if (msize < (ref - entry->ptr)) {
                     /* this is our best match so far */
                     msize = ref - entry->ptr;
                     msource = entry->src;

=== modified file 'bzrlib/tests/test__simple_set.py'
--- a/bzrlib/tests/test__simple_set.py	2010-02-17 17:11:16 +0000
+++ b/bzrlib/tests/test__simple_set.py	2010-08-02 19:20:30 +0000
@@ -379,3 +379,13 @@
         # And even removing an item still causes it to fail
         obj.discard(k2)
         self.assertRaises(RuntimeError, iterator.next)
+
+    def test__sizeof__(self):
+        # SimpleSet needs a custom sizeof implementation, because it allocates
+        # memory that Python cannot directly see (_table).
+        # Too much variability in platform sizes for us to give a fixed size
+        # here. However without a custom implementation, __sizeof__ would give
+        # us only the size of the object, and not its table. We know the table
+        # is at least 4bytes*1024entries in size.
+        obj = self.module.SimpleSet()
+        self.assertTrue(obj.__sizeof__() > 4096)




More information about the bazaar-commits mailing list