Rev 92: Remove the multi-index handling now that we have index combining instead. in http://bazaar.launchpad.net/%7Ebzr/bzr-groupcompress/rabin

John Arbash Meinel john at arbash-meinel.com
Tue Mar 3 18:11:24 GMT 2009


At http://bazaar.launchpad.net/%7Ebzr/bzr-groupcompress/rabin

------------------------------------------------------------
revno: 92
revision-id: john at arbash-meinel.com-20090303181057-i1239vipqi27fxbs
parent: john at arbash-meinel.com-20090303180544-mfgw9jsndwiwj047
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: rabin
timestamp: Tue 2009-03-03 12:10:57 -0600
message:
  Remove the multi-index handling now that we have index combining instead.
-------------- next part --------------
=== modified file '_groupcompress_pyx.pyx'
--- a/_groupcompress_pyx.pyx	2009-03-03 18:05:44 +0000
+++ b/_groupcompress_pyx.pyx	2009-03-03 18:10:57 +0000
@@ -36,8 +36,7 @@
         # struct index_entry *hash[]
     delta_index * create_delta_index(source_info *src, delta_index *old)
     void free_delta_index(delta_index *index)
-    void *create_delta(delta_index **indexes,
-             unsigned int num_indexes,
+    void *create_delta(delta_index *indexes,
              void *buf, unsigned long bufsize,
              unsigned long *delta_size, unsigned long max_delta_size)
     unsigned long get_delta_hdr_size(unsigned char **datap,
@@ -180,7 +179,7 @@
         # TODO: inline some of create_delta so we at least don't have to double
         #       malloc, and can instead use PyString_FromStringAndSize, to
         #       allocate the bytes into the final string
-        delta = create_delta(&self._index, 1,
+        delta = create_delta(self._index,
                              target, target_size,
                              &delta_size, max_delta_size)
         result = None

=== modified file 'delta.h'
--- a/delta.h	2009-03-03 18:05:44 +0000
+++ b/delta.h	2009-03-03 18:10:57 +0000
@@ -51,8 +51,7 @@
  * must be freed by the caller.
  */
 extern void *
-create_delta(struct delta_index **indexes,
-		 unsigned int num_indexes,
+create_delta(const struct delta_index *index,
 		 const void *buf, unsigned long bufsize,
 		 unsigned long *delta_size, unsigned long max_delta_size);
 

=== modified file 'diff-delta.c'
--- a/diff-delta.c	2009-03-03 18:05:44 +0000
+++ b/diff-delta.c	2009-03-03 18:10:57 +0000
@@ -125,7 +125,7 @@
 
 struct delta_index {
 	unsigned long memsize; /* Total bytes pointed to by this index */
-	struct source_info *src; /* Information about the referenced source */
+	const struct source_info *last_src; /* Information about the referenced source */
 	unsigned int hash_mask; /* val & hash_mask gives the hash index for a given
 							   entry */
 	unsigned int num_entries; /* The total number of entries in this index */
@@ -348,7 +348,7 @@
 										   total_num_entries);
 	free(hash_count);
 	index = pack_delta_index(hash, hsize, total_num_entries);
-	index->src = src;
+	index->last_src = src;
 	free(hash);
 	if (!index) {
 		return NULL;
@@ -376,13 +376,11 @@
 #define MAX_OP_SIZE	(5 + 5 + 1 + RABIN_WINDOW + 7)
 
 void *
-create_delta(struct delta_index **indexes,
-			 unsigned int num_indexes,
+create_delta(const struct delta_index *index,
 			 const void *trg_buf, unsigned long trg_size,
 			 unsigned long *delta_size, unsigned long max_size)
 {
-	unsigned int i, j, outpos, outsize, moff, msize, val;
-	const struct delta_index *index;
+	unsigned int i, outpos, outsize, moff, msize, val;
 	const struct source_info *msource;
 	int inscnt;
 	const unsigned char *ref_data, *ref_top, *data, *top;
@@ -391,7 +389,7 @@
 
 	if (!trg_buf || !trg_size)
 		return NULL;
-	if (num_indexes == 0)
+	if (index == NULL)
 		return NULL;
 
 	outpos = 0;
@@ -403,15 +401,8 @@
 		return NULL;
 
 	/* store reference buffer size */
-	i = 0;
-	index = indexes[0];
-	for (j = 0; j < num_indexes; ++j) {
-		index = indexes[j];
-		i += index->src->size;
-	}
-	assert(i <= index->src->size + index->src->agg_offset);
-	i = index->src->size + index->src->agg_offset;
-	source_size = i;
+	source_size = index->last_src->size + index->last_src->agg_offset;
+	i = source_size;
 	while (i >= 0x80) {
 		out[outpos++] = i | 0x80;
 		i >>= 7;
@@ -456,46 +447,43 @@
 			/* Shift the window by one byte. */
 			val ^= U[data[-RABIN_WINDOW]];
 			val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
-			for (j = 0; j < num_indexes; ++j) {
-				index = indexes[j];
-				i = val & index->hash_mask;
-				/* TODO: When using multiple indexes like this, the hash tables
-				 *		 mapping val => index_entry become less efficient.
-				 *		 You end up getting a lot more collisions in the hash,
-				 *		 which doesn't actually lead to a entry->val match.
+			i = val & index->hash_mask;
+			/* TODO: When using multiple indexes like this, the hash tables
+			 *		 mapping val => index_entry become less efficient.
+			 *		 You end up getting a lot more collisions in the hash,
+			 *		 which doesn't actually lead to a entry->val match.
+			 */
+			for (entry = index->hash[i]; entry < index->hash[i+1];
+				 entry++) {
+				const unsigned char *ref;
+				const unsigned char *src;
+				unsigned int ref_size;
+				if (entry->val != val)
+					continue;
+				ref = entry->ptr;
+				src = data;
+				ref_data = entry->src->buf;
+				ref_top = ref_data + entry->src->size;
+				ref_size = ref_top - ref;
+				/* ref_size is the longest possible match that we could make
+				 * here. If ref_size <= msize, then we know that we cannot
+				 * match more bytes with this location that we have already
+				 * matched.
 				 */
-				for (entry = index->hash[i]; entry < index->hash[i+1];
-					 entry++) {
-					const unsigned char *ref;
-					const unsigned char *src;
-					unsigned int ref_size;
-					if (entry->val != val)
-						continue;
-					ref = entry->ptr;
-					src = data;
-					ref_data = entry->src->buf;
-					ref_top = ref_data + entry->src->size;
-					ref_size = ref_top - ref;
-					/* ref_size is the longest possible match that we could make
-					 * here. If ref_size <= msize, then we know that we cannot
-					 * match more bytes with this location that we have already
-					 * matched.
-					 */
-					if (ref_size > top - src)
-						ref_size = top - src;
-					if (ref_size <= msize)
+				if (ref_size > top - src)
+					ref_size = top - src;
+				if (ref_size <= msize)
+					break;
+				/* See how many bytes actually match at this location. */
+				while (ref_size-- && *src++ == *ref)
+					ref++;
+				if (msize < ref - entry->ptr) {
+					/* this is our best match so far */
+					msize = ref - entry->ptr;
+					msource = entry->src;
+					moff = entry->ptr - ref_data;
+					if (msize >= 4096) /* good enough */
 						break;
-					/* See how many bytes actually match at this location. */
-					while (ref_size-- && *src++ == *ref)
-						ref++;
-					if (msize < ref - entry->ptr) {
-						/* this is our best match so far */
-						msize = ref - entry->ptr;
-						msource = entry->src;
-						moff = entry->ptr - ref_data;
-						if (msize >= 4096) /* good enough */
-							break;
-					}
 				}
 			}
 		}



More information about the bazaar-commits mailing list