Rev 2674: Implement the inner loop to retry for more data. in http://bzr.arbash-meinel.com/branches/bzr/0.19-dev/pyrex_knit_extract

Thu Aug 2 23:12:35 BST 2007

At http://bzr.arbash-meinel.com/branches/bzr/0.19-dev/pyrex_knit_extract

------------------------------------------------------------
revno: 2674
revision-id: john at arbash-meinel.com-20070802221201-fq0ze6hinmbs5j9q
parent: john at arbash-meinel.com-20070802215505-hahicpf9014hhe3o
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: pyrex_knit_extract
timestamp: Thu 2007-08-02 17:12:01 -0500
message:
  Implement the inner loop to retry for more data.
  Now we can handle any data that fits inside our 1MB extraction buffer.
  But if a single line is >1MB, we will fail.
  Need to figure out how to do that in a better manner.
modified:
  bzrlib/_knit_helpers_c.pyx     knit_c.pyx-20070509143944-u42gy8w387a10m0j-1
-------------- next part --------------
=== modified file 'bzrlib/_knit_helpers_c.pyx'

--- a/bzrlib/_knit_helpers_c.pyx	2007-08-02 21:55:05 +0000
+++ b/bzrlib/_knit_helpers_c.pyx	2007-08-02 22:12:01 +0000
@@ -121,6 +121,7 @@
 cdef extern from "string.h":
     void *memchr(void *s, int c, size_t n)
     int memcmp(void *b1, void *b2, size_t len)
+    void *memmove(void *dest, void *src, size_t len)
 
 
 cdef int string_to_int_safe(char *s, char *end, int *out) except -1:
@@ -540,6 +541,17 @@
         # to the start of the buffer, and do another decompression from the
         # tail.
         avail_out = self.strm.avail_out
+        if avail_out == 0: # Reset the buffer
+            # Move the current data to the front of the decompress buffer
+            # and open up more data at the tail.
+            memmove(self.decompress_buffer, self.cur_available,
+                    self.bytes_available)
+            self.cur_available = self.decompress_buffer
+            self.strm.next_out = <Bytef*>(self.cur_available
+                                          + self.bytes_available)
+            self.strm.avail_out = (self.decompress_buf_size
+                                   - self.bytes_available)
+            avail_out = self.strm.avail_out
         retval = inflate(&self.strm, Z_NO_FLUSH)
         if retval == Z_STREAM_END:
             self.stream_finished = 1 # True
@@ -580,7 +592,9 @@
         # self.strm.next_in should point to the complete gzip'd hunk.
 
         cdef char *end_of_line
+        cdef int bytes_processed
 
+        bytes_processed = 0
         # TODO: jam 2007-07-24 Figure out how to make this code support lines
         #       of "unlimited" length. Such as a versioned ISO where a single
         #       line is unlikely to fit in the decompression buffer
@@ -595,7 +609,20 @@
             if end_of_line == NULL:
                 # We reached the end of the buffer without finding a newline
                 # we need to extract more bytes, and try again
-                assert False, 'not implemented yet'
+                if self.stream_finished:
+                    raise errors.KnitCorrupt(self.knit_data_name,
+                        "Missing a trailing newline")
+                # Extract a bit more data out of the stream
+                bytes_processed = self.bytes_available
+                self._extract_from_stream()
+                assert self.bytes_available > bytes_processed
+                end_of_line = <char *>memchr(
+                        self.cur_available+bytes_processed,
+                        c'\n', self.bytes_available-bytes_processed)
+                if end_of_line == NULL:
+                    # This seems like we have a single line which
+                    # is longer than our buffer :(
+                    raise AssertionError('Not implemented')
             self.cur_line = self.cur_available
             self.cur_available = end_of_line + 1
             self.cur_line_size = self.cur_available - self.cur_line