1"""
2NoSQL Document Store with SQLite JSON
3
4Demonstrates document-oriented database concepts using SQLite's JSON functions:
5- Schema-less storage (flexible documents)
6- CRUD operations on JSON documents
7- Nested queries and array operations
8- Indexing JSON fields
9- Comparison with relational approach
10
11Theory:
12- Document stores organize data as semi-structured documents (JSON, BSON, XML)
13- Schema flexibility: different documents can have different structures
14- Trade-offs: flexibility vs. data integrity
15- SQLite supports JSON1 extension for JSON operations
16- Useful for: hierarchical data, variable schemas, rapid development
17
18CAP theorem consideration:
19- Most NoSQL systems sacrifice strong consistency for availability/partition tolerance
20- SQLite is single-writer, so it maintains strong consistency
21"""
22
23import sqlite3
24import json
25from typing import List, Dict, Any
26from datetime import datetime
27
28
29def demonstrate_document_storage():
30 """Demonstrate storing and querying JSON documents."""
31 print("=" * 60)
32 print("DOCUMENT STORAGE (Schema-less)")
33 print("=" * 60)
34 print()
35
36 conn = sqlite3.connect(':memory:')
37 cursor = conn.cursor()
38
39 # Create simple document store
40 cursor.execute('''
41 CREATE TABLE documents (
42 id INTEGER PRIMARY KEY AUTOINCREMENT,
43 collection TEXT NOT NULL,
44 data TEXT NOT NULL, -- JSON document
45 created_at REAL DEFAULT (julianday('now'))
46 )
47 ''')
48
49 print("1. Inserting documents with different schemas")
50 print("-" * 60)
51
52 # User documents with varying fields
53 users = [
54 {
55 "name": "Alice",
56 "email": "alice@example.com",
57 "age": 30,
58 "address": {
59 "city": "New York",
60 "country": "USA"
61 }
62 },
63 {
64 "name": "Bob",
65 "email": "bob@example.com",
66 "phone": "555-1234", # Different field!
67 "preferences": {
68 "theme": "dark",
69 "notifications": True
70 }
71 },
72 {
73 "name": "Charlie",
74 "age": 25,
75 "skills": ["Python", "SQL", "Docker"], # Array field!
76 "address": {
77 "city": "London",
78 "country": "UK",
79 "postal_code": "SW1A 1AA" # Extra nested field
80 }
81 }
82 ]
83
84 for user in users:
85 cursor.execute(
86 "INSERT INTO documents (collection, data) VALUES (?, ?)",
87 ("users", json.dumps(user))
88 )
89 print(f"✓ Inserted {len(users)} user documents with different schemas")
90
91 # Retrieve documents
92 print("\n2. Retrieving all user documents")
93 print("-" * 60)
94 cursor.execute("SELECT id, data FROM documents WHERE collection = 'users'")
95 for doc_id, data in cursor.fetchall():
96 doc = json.loads(data)
97 print(f" Doc {doc_id}: {doc.get('name')} - {json.dumps(doc, indent=4)}")
98
99 print("\n✓ Schema flexibility: each document can have different fields")
100 print()
101
102
103def demonstrate_json_queries():
104 """Demonstrate JSON query operations."""
105 print("=" * 60)
106 print("JSON QUERY OPERATIONS")
107 print("=" * 60)
108 print()
109
110 conn = sqlite3.connect(':memory:')
111 cursor = conn.cursor()
112
113 cursor.execute('''
114 CREATE TABLE products (
115 id INTEGER PRIMARY KEY AUTOINCREMENT,
116 data TEXT NOT NULL
117 )
118 ''')
119
120 # Insert product documents
121 products = [
122 {
123 "name": "Laptop",
124 "price": 999.99,
125 "specs": {"cpu": "i7", "ram": 16, "storage": 512},
126 "tags": ["electronics", "computers"]
127 },
128 {
129 "name": "Mouse",
130 "price": 29.99,
131 "specs": {"type": "wireless", "dpi": 1600},
132 "tags": ["electronics", "accessories"]
133 },
134 {
135 "name": "Book",
136 "price": 19.99,
137 "author": "John Doe",
138 "tags": ["books", "fiction"]
139 }
140 ]
141
142 for product in products:
143 cursor.execute("INSERT INTO products (data) VALUES (?)", (json.dumps(product),))
144
145 # Extract specific fields
146 print("1. Extract specific JSON fields (json_extract)")
147 print("-" * 60)
148 cursor.execute("""
149 SELECT
150 json_extract(data, '$.name') as name,
151 json_extract(data, '$.price') as price
152 FROM products
153 ORDER BY price DESC
154 """)
155 for row in cursor.fetchall():
156 print(f" {row[0]:15} ${row[1]:.2f}")
157
158 # Filter by nested field
159 print("\n2. Filter by nested field (specs.ram >= 16)")
160 print("-" * 60)
161 cursor.execute("""
162 SELECT json_extract(data, '$.name') as name
163 FROM products
164 WHERE json_extract(data, '$.specs.ram') >= 16
165 """)
166 for row in cursor.fetchall():
167 print(f" {row[0]}")
168
169 # Array operations
170 print("\n3. Find products with 'electronics' tag (array search)")
171 print("-" * 60)
172 cursor.execute("""
173 SELECT json_extract(data, '$.name') as name
174 FROM products, json_each(json_extract(data, '$.tags'))
175 WHERE json_each.value = 'electronics'
176 """)
177 for row in cursor.fetchall():
178 print(f" {row[0]}")
179
180 # Full-text search in JSON
181 print("\n4. Search for 'wireless' anywhere in document")
182 print("-" * 60)
183 cursor.execute("""
184 SELECT json_extract(data, '$.name') as name
185 FROM products
186 WHERE json_extract(data, '$.specs.type') = 'wireless'
187 OR data LIKE '%wireless%'
188 """)
189 for row in cursor.fetchall():
190 print(f" {row[0]}")
191
192 conn.close()
193 print()
194
195
196def demonstrate_json_indexing():
197 """Demonstrate indexing JSON fields for performance."""
198 print("=" * 60)
199 print("INDEXING JSON FIELDS")
200 print("=" * 60)
201 print()
202
203 conn = sqlite3.connect(':memory:')
204 cursor = conn.cursor()
205
206 cursor.execute('''
207 CREATE TABLE events (
208 id INTEGER PRIMARY KEY AUTOINCREMENT,
209 data TEXT NOT NULL
210 )
211 ''')
212
213 # Insert many events
214 print("Inserting 10,000 event documents...")
215 import random
216 event_types = ["login", "logout", "purchase", "view", "click"]
217 for i in range(10000):
218 event = {
219 "type": random.choice(event_types),
220 "user_id": random.randint(1, 1000),
221 "timestamp": datetime.now().isoformat(),
222 "metadata": {
223 "ip": f"192.168.1.{random.randint(1, 255)}",
224 "session_id": f"sess_{random.randint(1000, 9999)}"
225 }
226 }
227 cursor.execute("INSERT INTO events (data) VALUES (?)", (json.dumps(event),))
228 conn.commit()
229 print("✓ Inserted 10,000 events\n")
230
231 # Query without index
232 print("1. Query without index (count by event type)")
233 print("-" * 60)
234 import time
235 start = time.time()
236 cursor.execute("""
237 SELECT
238 json_extract(data, '$.type') as event_type,
239 COUNT(*) as count
240 FROM events
241 WHERE json_extract(data, '$.type') = 'purchase'
242 """)
243 result = cursor.fetchone()
244 elapsed_no_idx = time.time() - start
245 print(f" Found {result[1]} purchases in {elapsed_no_idx*1000:.2f} ms")
246
247 # Create index on extracted JSON field
248 print("\n2. Creating index on json_extract(data, '$.type')")
249 print("-" * 60)
250 cursor.execute("""
251 CREATE INDEX idx_event_type
252 ON events(json_extract(data, '$.type'))
253 """)
254 print("✓ Index created")
255
256 # Query with index
257 print("\n3. Query with index (same query)")
258 print("-" * 60)
259 start = time.time()
260 cursor.execute("""
261 SELECT
262 json_extract(data, '$.type') as event_type,
263 COUNT(*) as count
264 FROM events
265 WHERE json_extract(data, '$.type') = 'purchase'
266 """)
267 result = cursor.fetchone()
268 elapsed_with_idx = time.time() - start
269 print(f" Found {result[1]} purchases in {elapsed_with_idx*1000:.2f} ms")
270
271 if elapsed_no_idx > elapsed_with_idx:
272 speedup = elapsed_no_idx / elapsed_with_idx
273 print(f"\n✓ Speedup: {speedup:.1f}x faster with index")
274
275 # Show query plan
276 print("\n4. Query plan analysis")
277 print("-" * 60)
278 cursor.execute("""
279 EXPLAIN QUERY PLAN
280 SELECT * FROM events
281 WHERE json_extract(data, '$.type') = 'purchase'
282 """)
283 for row in cursor.fetchall():
284 print(f" {row}")
285
286 conn.close()
287 print()
288
289
290def demonstrate_relational_vs_document():
291 """Compare relational and document approaches."""
292 print("=" * 60)
293 print("RELATIONAL VS DOCUMENT APPROACH")
294 print("=" * 60)
295 print()
296
297 conn = sqlite3.connect(':memory:')
298 cursor = conn.cursor()
299
300 # Relational approach: normalized tables
301 print("RELATIONAL APPROACH (Normalized)")
302 print("-" * 60)
303 cursor.execute('''
304 CREATE TABLE blog_posts (
305 id INTEGER PRIMARY KEY,
306 title TEXT,
307 author TEXT,
308 content TEXT,
309 created_at TEXT
310 )
311 ''')
312 cursor.execute('''
313 CREATE TABLE comments (
314 id INTEGER PRIMARY KEY,
315 post_id INTEGER,
316 author TEXT,
317 text TEXT,
318 created_at TEXT,
319 FOREIGN KEY (post_id) REFERENCES blog_posts(id)
320 )
321 ''')
322
323 cursor.execute("""
324 INSERT INTO blog_posts VALUES
325 (1, 'First Post', 'Alice', 'Hello world!', '2024-01-01')
326 """)
327 cursor.execute("""
328 INSERT INTO comments VALUES
329 (1, 1, 'Bob', 'Great post!', '2024-01-02'),
330 (2, 1, 'Charlie', 'Thanks for sharing', '2024-01-03')
331 """)
332
333 print("Query: Get post with all comments (requires JOIN)")
334 cursor.execute("""
335 SELECT
336 p.title,
337 p.author as post_author,
338 c.author as comment_author,
339 c.text as comment_text
340 FROM blog_posts p
341 LEFT JOIN comments c ON p.id = c.post_id
342 WHERE p.id = 1
343 """)
344 print("Results:")
345 for row in cursor.fetchall():
346 print(f" {row}")
347
348 # Document approach: embedded documents
349 print("\n\nDOCUMENT APPROACH (Denormalized)")
350 print("-" * 60)
351 cursor.execute('''
352 CREATE TABLE blog_docs (
353 id INTEGER PRIMARY KEY,
354 data TEXT NOT NULL
355 )
356 ''')
357
358 blog_post = {
359 "title": "First Post",
360 "author": "Alice",
361 "content": "Hello world!",
362 "created_at": "2024-01-01",
363 "comments": [
364 {"author": "Bob", "text": "Great post!", "created_at": "2024-01-02"},
365 {"author": "Charlie", "text": "Thanks for sharing", "created_at": "2024-01-03"}
366 ]
367 }
368
369 cursor.execute("INSERT INTO blog_docs VALUES (1, ?)", (json.dumps(blog_post),))
370
371 print("Query: Get post with all comments (single document, no JOIN)")
372 cursor.execute("SELECT data FROM blog_docs WHERE id = 1")
373 doc = json.loads(cursor.fetchone()[0])
374 print(f"Post: {doc['title']} by {doc['author']}")
375 print("Comments:")
376 for comment in doc['comments']:
377 print(f" - {comment['author']}: {comment['text']}")
378
379 print("\n\nTRADE-OFFS:")
380 print("-" * 60)
381 print("Relational:")
382 print(" ✓ No data duplication")
383 print(" ✓ Easy to update (update in one place)")
384 print(" ✓ Strong schema validation")
385 print(" ✗ Requires JOINs (slower for reads)")
386 print()
387 print("Document:")
388 print(" ✓ Fast reads (no JOINs needed)")
389 print(" ✓ Schema flexibility")
390 print(" ✓ Natural hierarchical data representation")
391 print(" ✗ Data duplication possible")
392 print(" ✗ Updates may require updating multiple documents")
393 print(" ✗ Weaker data integrity")
394
395 conn.close()
396 print()
397
398
399def demonstrate_update_operations():
400 """Demonstrate updating JSON documents."""
401 print("=" * 60)
402 print("UPDATING JSON DOCUMENTS")
403 print("=" * 60)
404 print()
405
406 conn = sqlite3.connect(':memory:')
407 cursor = conn.cursor()
408
409 cursor.execute('''
410 CREATE TABLE user_profiles (
411 id INTEGER PRIMARY KEY,
412 data TEXT NOT NULL
413 )
414 ''')
415
416 user = {
417 "name": "Alice",
418 "email": "alice@example.com",
419 "settings": {
420 "theme": "light",
421 "notifications": True
422 }
423 }
424 cursor.execute("INSERT INTO user_profiles VALUES (1, ?)", (json.dumps(user),))
425
426 print("Original document:")
427 print("-" * 60)
428 cursor.execute("SELECT data FROM user_profiles WHERE id = 1")
429 print(json.dumps(json.loads(cursor.fetchone()[0]), indent=2))
430
431 # Update nested field
432 print("\n1. Update nested field (theme: light → dark)")
433 print("-" * 60)
434 cursor.execute("""
435 UPDATE user_profiles
436 SET data = json_set(data, '$.settings.theme', 'dark')
437 WHERE id = 1
438 """)
439
440 cursor.execute("SELECT data FROM user_profiles WHERE id = 1")
441 print(json.dumps(json.loads(cursor.fetchone()[0]), indent=2))
442
443 # Add new field
444 print("\n2. Add new field (age: 30)")
445 print("-" * 60)
446 cursor.execute("""
447 UPDATE user_profiles
448 SET data = json_set(data, '$.age', 30)
449 WHERE id = 1
450 """)
451
452 cursor.execute("SELECT data FROM user_profiles WHERE id = 1")
453 print(json.dumps(json.loads(cursor.fetchone()[0]), indent=2))
454
455 # Remove field
456 print("\n3. Remove field (email)")
457 print("-" * 60)
458 cursor.execute("""
459 UPDATE user_profiles
460 SET data = json_remove(data, '$.email')
461 WHERE id = 1
462 """)
463
464 cursor.execute("SELECT data FROM user_profiles WHERE id = 1")
465 print(json.dumps(json.loads(cursor.fetchone()[0]), indent=2))
466
467 conn.close()
468 print()
469
470
471if __name__ == "__main__":
472 print("""
473╔══════════════════════════════════════════════════════════════╗
474║ NoSQL DOCUMENT STORE (SQLite JSON) ║
475║ Schema-less Storage, JSON Queries, Indexing ║
476╚══════════════════════════════════════════════════════════════╝
477""")
478
479 demonstrate_document_storage()
480 demonstrate_json_queries()
481 demonstrate_json_indexing()
482 demonstrate_relational_vs_document()
483 demonstrate_update_operations()
484
485 print("=" * 60)
486 print("SUMMARY: DOCUMENT STORES")
487 print("=" * 60)
488 print("When to use:")
489 print(" ✓ Hierarchical/nested data")
490 print(" ✓ Schema flexibility needed")
491 print(" ✓ Rapid prototyping")
492 print(" ✓ Read-heavy workloads")
493 print()
494 print("When NOT to use:")
495 print(" ✗ Complex relationships (many-to-many)")
496 print(" ✗ Strong ACID guarantees required")
497 print(" ✗ Complex queries across documents")
498 print(" ✗ Frequent updates to shared data")
499 print("=" * 60)