Rev 3075: Change the C PatienceDiff implementation to support arbitrary objects. in http://bzr.arbash-meinel.com/branches/bzr/0.93-dev/patience_tuples
John Arbash Meinel
john at arbash-meinel.com
Tue Dec 4 15:24:59 GMT 2007
At http://bzr.arbash-meinel.com/branches/bzr/0.93-dev/patience_tuples
------------------------------------------------------------
revno: 3075
revision-id:john at arbash-meinel.com-20071204152439-hlj4y6a8a2fb6nkz
parent: pqm at pqm.ubuntu.com-20071204035213-2kot5u403spjchen
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: patience_tuples
timestamp: Tue 2007-12-04 09:24:39 -0600
message:
Change the C PatienceDiff implementation to support arbitrary objects.
modified:
bzrlib/_patiencediff_c.c _patiencediff_c.c-20070721205602-q3imkipwlgagp3cy-1
bzrlib/tests/test_diff.py testdiff.py-20050727164403-d1a3496ebb12e339
-------------- next part --------------
=== modified file 'bzrlib/_patiencediff_c.c'
--- a/bzrlib/_patiencediff_c.c 2007-09-04 09:10:35 +0000
+++ b/bzrlib/_patiencediff_c.c 2007-12-04 15:24:39 +0000
@@ -70,11 +70,11 @@
struct line {
- int hash; /* hash code of the string */
+ long hash; /* hash code of the string/object */
Py_ssize_t next; /* next line from the same equivalence class */
Py_ssize_t equiv; /* equivalence class */
Py_ssize_t len;
- const char *data;
+ const PyObject *data;
};
@@ -152,7 +152,8 @@
compare_lines(struct line *a, struct line *b)
{
return ((a->hash != b->hash) || (a->len != b->len) ||
- memcmp(a->data, b->data, a->len));
+ PyObject_Compare((PyObject *)a->data, (PyObject *)b->data));
+ //memcmp(a->data, b->data, a->len));
}
@@ -545,9 +546,10 @@
static Py_ssize_t
load_lines(PyObject *orig, struct line **lines)
{
- Py_ssize_t size, i, j;
- int h;
- char *p;
+ Py_ssize_t size, i, j, tuple_len;
+ Py_ssize_t total_len;
+ // int h;
+ // char *p;
struct line *line;
PyObject *seq, *item;
@@ -571,25 +573,39 @@
for (i = 0; i < size; i++) {
item = PySequence_Fast_GET_ITEM(seq, i);
- if (!PyString_Check(item)){
- PyErr_Format(PyExc_TypeError,
- "sequence item %zd: expected string,"
- " %.80s found",
- i, item->ob_type->tp_name);
- Py_DECREF(seq);
- return -1;
+ // if (!PyString_Check(item)){
+ // PyErr_Format(PyExc_TypeError,
+ // "sequence item %zd: expected string,"
+ // " %.80s found",
+ // i, item->ob_type->tp_name);
+ // Py_DECREF(seq);
+ // return -1;
+ // }
+ if (PyString_Check(item)) {
+ // we could use the 'djb2' hash here if we find it is better than
+ // PyObject_Hash
+ line->len = PyString_GET_SIZE(item);
+ } else if (PyTuple_Check(item)) {
+ total_len = 0;
+ tuple_len = PyObject_Length(item);
+ for (j = 0; j < tuple_len; ++j) {
+ total_len += PyObject_Length(PySequence_Fast_GET_ITEM(item, j));
+ }
+ line->len = total_len;
+ } else {
+ // Generic length
+ line->len = PyObject_Length(item);
}
- line->len = PyString_GET_SIZE(item);
- line->data = p = PyString_AS_STRING(item);
- /* 'djb2' hash. This gives us a nice compromise between fast hash
- function and a hash with less collisions. The algorithm doesn't
- use the hash for actual lookups, only for building the table
- so a better hash function wouldn't bring us much benefit, but
- would make this loading code slower. */
- h = 5381;
- for (j = 0; j < line->len; j++)
- h = ((h << 5) + h) + *p++;
- line->hash = h;
+ line->data = item; //p = PyString_AS_STRING(item);
+ // /* 'djb2' hash. This gives us a nice compromise between fast hash
+ // function and a hash with less collisions. The algorithm doesn't
+ // use the hash for actual lookups, only for building the table
+ // so a better hash function wouldn't bring us much benefit, but
+ // would make this loading code slower. */
+ // h = 5381;
+ // for (j = 0; j < line->len; j++)
+ // h = ((h << 5) + h) + *p++;
+ line->hash = PyObject_Hash(item);
line->next = SENTINEL;
line++;
}
=== modified file 'bzrlib/tests/test_diff.py'
--- a/bzrlib/tests/test_diff.py 2007-11-25 21:52:24 +0000
+++ b/bzrlib/tests/test_diff.py 2007-12-04 15:24:39 +0000
@@ -800,6 +800,38 @@
chk_blocks('abbabbbb', 'cabbabbc', [])
chk_blocks('bbbbbbbb', 'cbbbbbbc', [])
+ def test_matching_blocks_tuples(self):
+ def chk_blocks(a, b, expected_blocks):
+ # difflib always adds a signature of the total
+ # length, with no matching entries at the end
+ s = self._PatienceSequenceMatcher(None, a, b)
+ blocks = s.get_matching_blocks()
+ self.assertEquals((len(a), len(b), 0), blocks[-1])
+ self.assertEquals(expected_blocks, blocks[:-1])
+
+ # Some basic matching tests
+ chk_blocks([], [], [])
+ chk_blocks([('a',), ('b',), ('c,')], [], [])
+ chk_blocks([], [('a',), ('b',), ('c,')], [])
+ chk_blocks([('a',), ('b',), ('c,')],
+ [('a',), ('b',), ('c,')],
+ [(0, 0, 3)])
+ chk_blocks([('a',), ('b',), ('c,')],
+ [('a',), ('b',), ('d,')],
+ [(0, 0, 2)])
+ chk_blocks([('d',), ('b',), ('c,')],
+ [('a',), ('b',), ('c,')],
+ [(1, 1, 2)])
+ chk_blocks([('d',), ('a',), ('b',), ('c,')],
+ [('a',), ('b',), ('c,')],
+ [(1, 0, 3)])
+ chk_blocks([('a', 'b'), ('c', 'd'), ('e', 'f')],
+ [('a', 'b'), ('c', 'X'), ('e', 'f')],
+ [(0, 0, 1), (2, 2, 1)])
+ chk_blocks([('a', 'b'), ('c', 'd'), ('e', 'f')],
+ [('a', 'b'), ('c', 'dX'), ('e', 'f')],
+ [(0, 0, 1), (2, 2, 1)])
+
def test_opcodes(self):
def chk_ops(a, b, expected_codes):
s = self._PatienceSequenceMatcher(None, a, b)
More information about the bazaar-commits
mailing list