Rev 74: Add support for nested docs. in http://bazaar.launchpad.net/+branch/u1db

John Arbash Meinel john at arbash-meinel.com
Fri Oct 14 09:14:24 UTC 2011


At http://bazaar.launchpad.net/+branch/u1db

------------------------------------------------------------
revno: 74 [merge]
revision-id: john at arbash-meinel.com-20111014091356-b9031b3gk4njjpac
parent: john at arbash-meinel.com-20111013140048-defyihyn7qf6l2f4
parent: john at arbash-meinel.com-20111014091334-zy341floothmw5um
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: u1db
timestamp: Fri 2011-10-14 11:13:56 +0200
message:
  Add support for nested docs.
modified:
  u1db/backends/sqlite_backend.py sqlite_backend.py-20110908122026-u745g3ftpndrgjl0-1
  u1db/tests/test_backends.py    test_client.py-20110907100834-dekceojbjm2ken0c-6
  u1db/tests/test_sqlite_backend.py test_sqlite_backend.-20110908122026-u745g3ftpndrgjl0-2
-------------- next part --------------
=== modified file 'u1db/backends/sqlite_backend.py'
--- a/u1db/backends/sqlite_backend.py	2011-10-13 12:44:22 +0000
+++ b/u1db/backends/sqlite_backend.py	2011-10-14 09:13:34 +0000
@@ -367,7 +367,8 @@
             c.execute("INSERT INTO document VALUES (?, ?, ?)",
                       (doc_id, new_rev, doc))
         values = [(doc_id, field_name, value) for field_name, value in
-                  raw_doc.iteritems()]
+                  raw_doc.iteritems()
+                  if isinstance(value, (int, float, basestring))]
         c.executemany("INSERT INTO document_fields VALUES (?, ?, ?)",
                       values)
         c.execute("INSERT INTO transaction_log(doc_id) VALUES (?)",
@@ -454,6 +455,32 @@
     def _extra_schema_init(self, c):
         c.execute("ALTER TABLE document_fields ADD COLUMN offset INT")
 
+    def _convert_to_fields(self, doc_id, base_field, raw_doc):
+        """Convert a dict representation into named fields.
+
+        So something like: {'key1': 'val1', 'key2': 'val2'}
+        gets converted into: [(doc_id, 'key1', 'val1', 0)
+                              (doc_id, 'key2', 'val2', 0)]
+        :param doc_id: Just added to every record.
+        :param base_field: if set, these are nested keys, so each field should
+            be appropriately prefixed.
+        :param raw_doc: The python dictionary.
+        """
+        # TODO: Handle lists
+        values = []
+        for field_name, value in raw_doc.iteritems():
+            if base_field:
+                full_name = base_field + '.' + field_name
+            else:
+                full_name = field_name
+            if isinstance(value, (int, float, basestring)):
+                values.append((doc_id, full_name, value, len(values)))
+            else:
+                subvalues = self._convert_to_fields(doc_id, full_name, value)
+                for _, subfield_name, val, _ in subvalues:
+                    values.append((doc_id, subfield_name, val, len(values)))
+        return values
+
     def _put_and_update_indexes(self, doc_id, old_doc, new_rev, doc):
         c = self._db_handle.cursor()
         if doc:
@@ -471,9 +498,7 @@
         else:
             c.execute("INSERT INTO document VALUES (?, ?, ?)",
                       (doc_id, new_rev, doc_content))
-        values = [(doc_id, field_name, value, idx)
-                  for idx, (field_name, value)
-                  in enumerate(raw_doc.iteritems())]
+        values = self._convert_to_fields(doc_id, None, raw_doc)
         c.executemany("INSERT INTO document_fields VALUES (?, ?, ?, ?)",
                       values)
         c.execute("INSERT INTO transaction_log(doc_id) VALUES (?)",
@@ -499,7 +524,16 @@
         # TODO: What about nested docs?
         raw_doc = compat.OrderedDict()
         for field, value in c.fetchall():
-            raw_doc[field] = value
+            if '.' in field: # A nested document
+                split = field.split('.')
+                cur = raw_doc
+                for subfield in split[:-1]:
+                    if subfield not in cur:
+                        cur[subfield] = {}
+                    cur = cur[subfield]
+                cur[split[-1]] = value
+            else:
+                raw_doc[field] = value
         doc = simplejson.dumps(raw_doc)
         return doc_rev, doc
 

=== modified file 'u1db/tests/test_backends.py'
--- a/u1db/tests/test_backends.py	2011-10-13 14:00:48 +0000
+++ b/u1db/tests/test_backends.py	2011-10-13 14:16:06 +0000
@@ -26,6 +26,7 @@
 
 
 simple_doc = '{"key": "value"}'
+nested_doc = '{"key": "value", "sub": {"doc": "underneath"}}'
 
 
 def create_memory_database(machine_id):
@@ -166,6 +167,10 @@
         self.assertEqual((2, set([doc_id])), self.c.whats_changed())
         self.assertEqual((2, set()), self.c.whats_changed(2))
 
+    def test_handles_nested_content(self):
+        doc_id, new_rev = self.c.create_doc(nested_doc)
+        self.assertEqual((new_rev, nested_doc, False), self.c.get_doc(doc_id))
+
     def test__get_sync_info(self):
         self.assertEqual(('test', 0, 0), self.c._get_sync_info('other'))
 

=== modified file 'u1db/tests/test_sqlite_backend.py'
--- a/u1db/tests/test_sqlite_backend.py	2011-10-13 12:44:22 +0000
+++ b/u1db/tests/test_sqlite_backend.py	2011-10-14 09:13:34 +0000
@@ -207,3 +207,12 @@
         self.assertEqual((doc1_rev, '{}', False), self.db.get_doc(doc1_id))
         doc1_rev2 = self.db.delete_doc(doc1_id, doc1_rev)
         self.assertEqual((doc1_rev2, None, False), self.db.get_doc(doc1_id))
+
+    def test_deeply_nested(self):
+        doc1_id, doc1_rev = self.db.create_doc(
+            '{"a": {"b": {"c": {"d": "x"}}}}')
+        c = self.db._get_sqlite_handle().cursor()
+        c.execute("SELECT doc_id, field_name, value FROM document_fields"
+                  " ORDER BY doc_id, field_name")
+        self.assertEqual([(doc1_id, 'a.b.c.d', 'x'),
+                         ], c.fetchall())



More information about the bazaar-commits mailing list