Rev 3900: Now we are able to weave 'add_source' into the existing index. in http://bzr.arbash-meinel.com/branches/bzr/brisbane/gc_delta_index_room
John Arbash Meinel
john at arbash-meinel.com
Thu Mar 19 06:02:00 GMT 2009
At http://bzr.arbash-meinel.com/branches/bzr/brisbane/gc_delta_index_room
------------------------------------------------------------
revno: 3900
revision-id: john at arbash-meinel.com-20090319060153-p0e7qedqdq9vd8iq
parent: john at arbash-meinel.com-20090319044457-py567xqq3nyzwau4
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: gc_delta_index_room
timestamp: Thu 2009-03-19 01:01:53 -0500
message:
Now we are able to weave 'add_source' into the existing index.
This brings 'bzr pack' time down to ~23.6s (with debugging on).
According to lsprof time for 'add_delta_source' overall dropped from ~5s down to
about 300ms, and now the time for 'add_source' dropped 5s->3.3s->1.6s.
Next thing is to probably bump the number of free slots.
-------------- next part --------------
=== modified file 'bzrlib/delta.h'
--- a/bzrlib/delta.h 2009-03-18 21:50:56 +0000
+++ b/bzrlib/delta.h 2009-03-19 06:01:53 +0000
@@ -32,7 +32,7 @@
*/
extern struct delta_index *
create_delta_index(const struct source_info *src,
- const struct delta_index *old);
+ struct delta_index *old);
/*
=== modified file 'bzrlib/diff-delta.c'
--- a/bzrlib/diff-delta.c 2009-03-19 04:44:57 +0000
+++ b/bzrlib/diff-delta.c 2009-03-19 06:01:53 +0000
@@ -209,14 +209,77 @@
static struct delta_index *
pack_delta_index(struct unpacked_index_entry **hash, unsigned int hsize,
- unsigned int num_entries, const struct delta_index *old_index)
+ unsigned int num_entries, struct delta_index *old_index)
{
- unsigned int i, j, hmask, memsize;
+ unsigned int i, j, hmask, memsize, to_copy, fit_in_old, copied_count;
struct unpacked_index_entry *entry;
struct delta_index *index;
- struct index_entry *packed_entry, **packed_hash, *old_entry;
+ struct index_entry *packed_entry, **packed_hash, *old_entry, *copy_from;
struct index_entry null_entry = {0};
void *mem;
+
+ hmask = hsize - 1;
+
+ if (old_index) {
+ fprintf(stderr, "Packing %d entries into %d for total of %d entries"
+ " %x => %x\n",
+ num_entries - old_index->num_entries,
+ old_index->num_entries, num_entries,
+ old_index->hash_mask, hmask);
+ } else {
+ fprintf(stderr, "Packing %d entries into a new index\n",
+ num_entries);
+ }
+ /* First, see if we can squeeze the new items into the existing structure.
+ */
+ fit_in_old = 0;
+ copied_count = 0;
+ if (old_index && old_index->hash_mask == hmask) {
+ fit_in_old = 1;
+ for (i = 0; i < hsize; ++i) {
+ packed_entry = NULL;
+ for (entry = hash[i]; entry; entry = entry->next) {
+ if (packed_entry == NULL) {
+ /* Find the last open spot */
+ packed_entry = old_index->hash[i + 1];
+ --packed_entry;
+ while (packed_entry >= old_index->hash[i]
+ && packed_entry->ptr == NULL) {
+ --packed_entry;
+ }
+ ++packed_entry;
+ }
+ if (packed_entry >= old_index->hash[i+1]
+ || packed_entry->ptr != NULL) {
+ /* There are no free spots here :( */
+ fit_in_old = 0;
+ break;
+ }
+ /* We found an empty spot to put this entry
+ * Copy it over, and remove it from the linked list, just in
+ * case we end up running out of room later.
+ */
+ *packed_entry++ = entry->entry;
+ assert(entry == hash[i]);
+ hash[i] = entry->next;
+ copied_count += 1;
+ old_index->num_entries++;
+ }
+ if (!fit_in_old) {
+ break;
+ }
+ }
+ }
+ if (old_index) {
+ if (fit_in_old) {
+ fprintf(stderr, "Fit all %d entries into old index\n",
+ copied_count);
+ return NULL;
+ } else {
+ fprintf(stderr, "Fit only %d entries into old index,"
+ " reallocating\n", copied_count);
+ }
+ }
/*
* Now create the packed index in array form
* rather than linked lists.
@@ -230,7 +293,6 @@
return NULL;
}
- hmask = hsize - 1;
index = mem;
index->memsize = memsize;
index->hash_mask = hmask;
@@ -258,13 +320,33 @@
* old_index->hash_mask? Would it make any real difference?
*/
j = i & old_index->hash_mask;
+ copy_from = old_index->hash[j];
+ to_copy = 0;
for (old_entry = old_index->hash[j];
old_entry < old_index->hash[j + 1] && old_entry->ptr != NULL;
old_entry++) {
if ((old_entry->val & hmask) == i) {
- *packed_entry++ = *old_entry;
+ to_copy += 1;
+ } else {
+ /* We reached the end of a string of entries that should
+ * be copied. Copy the group, and then move the pointers.
+ */
+ if (to_copy > 0) {
+ memcpy(packed_entry, copy_from,
+ sizeof(*old_entry)*to_copy);
+ packed_entry += to_copy;
+ to_copy = 0;
+ }
+ /* Don't copy *this* entry, and start the copy after this */
+ copy_from = old_entry + 1;
}
}
+ if (to_copy > 0) {
+ memcpy(packed_entry, copy_from,
+ sizeof(*old_entry)*to_copy);
+ packed_entry += to_copy;
+ to_copy = 0;
+ }
}
for (entry = hash[i]; entry; entry = entry->next) {
*packed_entry++ = entry->entry;
@@ -292,135 +374,8 @@
struct delta_index *
-create_index_from_old_and_hash(const struct delta_index *old,
- struct unpacked_index_entry **hash,
- unsigned int hsize,
- unsigned int num_entries)
-{
- unsigned int i, memsize, total_num_entries, num_nulls;
- struct unpacked_index_entry *entry;
- struct delta_index *index;
- struct index_entry *packed_entry, **packed_hash, null_entry = {0};
- struct index_entry *copy_from_start, *copy_to_start, *old_entry;
- size_t total_copy, to_copy;
- unsigned int num_ops;
- unsigned int bytes_copied;
- void *mem;
- /*
- * Now create the packed index in array form
- * rather than linked lists.
- */
- total_num_entries = num_entries + old->num_entries;
- memsize = sizeof(*index)
- + sizeof(*packed_hash) * (hsize+1)
- + sizeof(*packed_entry) * (total_num_entries + hsize * EXTRA_NULLS);
- mem = malloc(memsize);
- if (!mem) {
- return NULL;
- }
-
- index = mem;
- index->memsize = memsize;
- index->hash_mask = hsize - 1;
- index->num_entries = total_num_entries;
-
- mem = index->hash;
- packed_hash = mem;
- mem = packed_hash + (hsize+1);
- packed_entry = mem;
-
- total_copy = 0;
- bytes_copied = 0;
- num_ops = 0;
- copy_from_start = copy_to_start = NULL;
- num_nulls = 0;
- for (i = 0; i < hsize; i++) {
- /*
- * Coalesce all entries belonging to one linked list
- * into consecutive array entries.
- * The entries in old all come before the entries in hash.
- */
- packed_hash[i] = packed_entry;
- old_entry = old->hash[i+1];
- /* Find how many NULL items are in this hash section */
- old_entry--;
- num_nulls = 0;
- while (old_entry >= old->hash[i] && old_entry->ptr == NULL) {
- old_entry--;
- num_nulls++;
- }
- old_entry++;
- to_copy = (old_entry - old->hash[i]);
- if (to_copy > 0) {
- /* This next range can be copied wholesale. However, we will wait
- * until we have to insert something from the new data before we
- * copy everything.
- * So for now, just reserve space for the copy.
- */
- if (total_copy == 0) {
- copy_from_start = old->hash[i];
- copy_to_start = packed_entry;
- }
- packed_entry += to_copy;
- total_copy += to_copy;
- assert((packed_entry - copy_to_start) == total_copy);
- assert((old_entry - copy_from_start) == total_copy);
- }
- for (entry = hash[i]; entry; entry = entry->next) {
- /* We have an entry to insert, so flush the copy buffer */
- if (total_copy > 0) {
- assert(copy_to_start != NULL);
- assert(copy_from_start != NULL);
- memcpy(copy_to_start, copy_from_start,
- total_copy * sizeof(struct index_entry));
- bytes_copied += (total_copy * sizeof(struct index_entry));
- total_copy = 0;
- copy_from_start = copy_to_start = NULL;
- num_ops += 1;
- }
- *packed_entry++ = entry->entry;
- }
- if (total_copy == 0) {
- /* We inserted some entries, copy the null entries to the end */
- for (; num_nulls > 0; --num_nulls) {
- *packed_entry++ = null_entry;
- }
- } else {
- /* We haven't done an insert yet, so just queue the null entries
- * for copying.
- */
- total_copy += num_nulls;
- packed_entry += num_nulls;
- assert((packed_entry - copy_to_start) == total_copy);
- assert((old->hash[i+1] - copy_from_start) == total_copy);
- num_nulls = 0;
- }
- }
- if (total_copy > 0) {
- assert(copy_to_start != NULL);
- assert(copy_from_start != NULL);
- memcpy(copy_to_start, copy_from_start,
- total_copy * sizeof(struct index_entry));
- bytes_copied += (total_copy * sizeof(struct index_entry));
- num_ops += 1;
- for (; num_nulls > 0; --num_nulls) {
- *packed_entry++ = null_entry;
- }
- }
-
- /* Sentinel value to indicate the length of the last hash bucket */
- packed_hash[hsize] = packed_entry;
-
- assert(packed_entry - (struct index_entry *)mem
- == total_num_entries + hsize * EXTRA_NULLS);
- index->last_entry = (packed_entry - 1);
- return index;
-}
-
-
-struct delta_index *
create_delta_index(const struct source_info *src,
- const struct delta_index *old)
+ struct delta_index *old)
{
unsigned int i, hsize, hmask, num_entries, prev_val, *hash_count;
unsigned int total_num_entries;
@@ -494,12 +449,15 @@
total_num_entries = limit_hash_buckets(hash, hash_count, hsize,
total_num_entries);
free(hash_count);
+ if (old) {
+ old->last_src = src;
+ }
index = pack_delta_index(hash, hsize, total_num_entries, old);
- index->last_src = src;
free(hash);
if (!index) {
return NULL;
}
+ index->last_src = src;
return index;
}
More information about the bazaar-commits
mailing list