Rev 89: Start moving the information about source buffers into the actual index_entry. in http://bazaar.launchpad.net/%7Ebzr/bzr-groupcompress/rabin
John Arbash Meinel
john at arbash-meinel.com
Tue Mar 3 16:02:46 GMT 2009
At http://bazaar.launchpad.net/%7Ebzr/bzr-groupcompress/rabin
------------------------------------------------------------
revno: 89
revision-id: john at arbash-meinel.com-20090303160222-4bkou2s65s60h75a
parent: john at arbash-meinel.com-20090303150939-93yexh0v5hmvkwdo
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: rabin
timestamp: Tue 2009-03-03 10:02:22 -0600
message:
Start moving the information about source buffers into the actual index_entry.
This leads the way for combining indexes for multiple sources together.
-------------- next part --------------
=== modified file 'diff-delta.c'
--- a/diff-delta.c 2009-03-03 14:59:31 +0000
+++ b/diff-delta.c 2009-03-03 16:02:22 +0000
@@ -112,8 +112,16 @@
0x133eb0ac, 0x6d8b90a1, 0x450d4467, 0x3bb8646a
};
+struct source_info {
+ const void *src_buf; /* Pointer to the beginning of source data */
+ unsigned long src_size; /* Total length of source data */
+ unsigned long agg_src_offset; /* Start of source data as part of the
+ aggregate source */
+};
+
struct index_entry {
const unsigned char *ptr;
+ const struct source_info *src;
unsigned int val;
};
@@ -124,12 +132,10 @@
struct delta_index {
unsigned long memsize; /* Total bytes pointed to by this index */
- const void *src_buf; /* Pointer to the beginning of source data */
- unsigned long src_size; /* Total length of source data */
- unsigned long agg_src_offset; /* Start of source data as part of the
- aggregate source */
+ struct source_info src; /* Information about the referenced source */
unsigned int hash_mask; /* val & hash_mask gives the hash index for a given
entry */
+ unsigned int num_entries; /* The total number of entries in this index */
struct index_entry *hash[];
};
@@ -216,6 +222,7 @@
index = mem;
index->memsize = memsize;
index->hash_mask = hsize - 1;
+ index->num_entries = entries;
mem = index->hash;
packed_hash = mem;
@@ -228,8 +235,10 @@
* into consecutive array entries.
*/
packed_hash[i] = packed_entry;
- for (entry = hash[i]; entry; entry = entry->next)
- *packed_entry++ = entry->entry;
+ for (entry = hash[i]; entry; entry = entry->next, ++packed_entry) {
+ *packed_entry = entry->entry;
+ packed_entry->src = &index->src;
+ }
}
/* Sentinel value to indicate the length of the last hash bucket */
@@ -311,9 +320,9 @@
if (!index) {
return NULL;
}
- index->src_buf = buf;
- index->src_size = bufsize;
- index->agg_src_offset = agg_src_offset;
+ index->src.src_buf = buf;
+ index->src.src_size = bufsize;
+ index->src.agg_src_offset = agg_src_offset;
return index;
}
@@ -343,7 +352,8 @@
unsigned long *delta_size, unsigned long max_size)
{
unsigned int i, j, outpos, outsize, moff, msize, val;
- const struct delta_index *index, *mindex;
+ const struct delta_index *index;
+ const struct source_info *msource;
int inscnt;
const unsigned char *ref_data, *ref_top, *data, *top;
unsigned char *out;
@@ -366,10 +376,10 @@
index = indexes[0];
for (j = 0; j < num_indexes; ++j) {
index = indexes[j];
- i += index->src_size;
+ i += index->src.src_size;
}
- assert(i <= index->src_size + index->agg_src_offset);
- i = index->src_size + index->agg_src_offset;
+ assert(i <= index->src.src_size + index->src.agg_src_offset);
+ i = index->src.src_size + index->src.agg_src_offset;
while (i >= 0x80) {
out[outpos++] = i | 0x80;
i >>= 7;
@@ -404,7 +414,7 @@
moff = 0;
msize = 0;
- mindex = NULL;
+ msource = NULL;
while (data < top) {
if (msize < 4096) {
/* we don't have a 'worthy enough' match yet, so let's look for
@@ -417,8 +427,6 @@
for (j = 0; j < num_indexes; ++j) {
index = indexes[j];
i = val & index->hash_mask;
- ref_top = index->src_buf + index->src_size;
- ref_data = index->src_buf;
/* TODO: When using multiple indexes like this, the hash tables
* mapping val => index_entry become less efficient.
* You end up getting a lot more collisions in the hash,
@@ -426,11 +434,16 @@
*/
for (entry = index->hash[i]; entry < index->hash[i+1];
entry++) {
- const unsigned char *ref = entry->ptr;
- const unsigned char *src = data;
- unsigned int ref_size = ref_top - ref;
+ const unsigned char *ref;
+ const unsigned char *src;
+ unsigned int ref_size;
if (entry->val != val)
continue;
+ ref = entry->ptr;
+ src = data;
+ ref_data = entry->src->src_buf;
+ ref_top = ref_data + entry->src->src_size;
+ ref_size = ref_top - ref;
/* ref_size is the longest possible match that we could make
* here. If ref_size <= msize, then we know that we cannot
* match more bytes with this location that we have already
@@ -446,7 +459,7 @@
if (msize < ref - entry->ptr) {
/* this is our best match so far */
msize = ref - entry->ptr;
- mindex = index;
+ msource = entry->src;
moff = entry->ptr - ref_data;
if (msize >= 4096) /* good enough */
break;
@@ -477,7 +490,7 @@
unsigned char *op;
if (inscnt) {
- ref_data = mindex->src_buf;
+ ref_data = msource->src_buf;
while (moff && ref_data[moff-1] == data[-1]) {
/* we can match one byte back */
msize++;
@@ -504,7 +517,7 @@
/* moff is the offset in the local structure, for encoding, we need
* to push it into the global offset
*/
- moff += mindex->agg_src_offset;
+ moff += msource->agg_src_offset;
if (moff & 0x000000ff)
out[outpos++] = moff >> 0, i |= 0x01;
if (moff & 0x0000ff00)
@@ -516,7 +529,7 @@
/* Put it back into local coordinates, in case we have multiple
* copies in a row.
*/
- moff -= mindex->agg_src_offset;
+ moff -= msource->agg_src_offset;
if (msize & 0x00ff)
out[outpos++] = msize >> 0, i |= 0x10;
More information about the bazaar-commits
mailing list