02_relational_algebra.py

Download
python 450 lines 13.9 KB
  1"""
  2Relational Algebra Operations
  3
  4Demonstrates fundamental relational algebra operations using SQL equivalents:
  5- Selection (ฯƒ): filter rows based on predicate
  6- Projection (ฯ€): select specific columns
  7- Cartesian Product (ร—): combine all rows from two relations
  8- Join (โ‹ˆ): combine relations based on condition
  9- Union (โˆช): combine tuples from two relations
 10- Intersection (โˆฉ): common tuples
 11- Difference (โˆ’): tuples in first but not second
 12- Rename (ฯ): rename relation/attributes
 13- Division (รท): find tuples that match all values in another relation
 14
 15Theory:
 16- Relational algebra is procedural (how to compute)
 17- Relational calculus is declarative (what to compute)
 18- SQL is based on both but closer to relational algebra
 19- Operations are closed: output is always a relation
 20"""
 21
 22import sqlite3
 23from typing import List, Tuple
 24
 25
 26def setup_database() -> sqlite3.Connection:
 27    """Create sample database for relational algebra operations."""
 28    conn = sqlite3.connect(':memory:')
 29    cursor = conn.cursor()
 30
 31    # Students relation
 32    cursor.execute('''
 33        CREATE TABLE Students (
 34            student_id INTEGER PRIMARY KEY,
 35            name TEXT,
 36            age INTEGER,
 37            major TEXT
 38        )
 39    ''')
 40
 41    # Courses relation
 42    cursor.execute('''
 43        CREATE TABLE Courses (
 44            course_id TEXT PRIMARY KEY,
 45            course_name TEXT,
 46            credits INTEGER
 47        )
 48    ''')
 49
 50    # Enrollment relation
 51    cursor.execute('''
 52        CREATE TABLE Enrollment (
 53            student_id INTEGER,
 54            course_id TEXT,
 55            grade TEXT,
 56            PRIMARY KEY (student_id, course_id)
 57        )
 58    ''')
 59
 60    # Insert sample data
 61    students = [
 62        (1, 'Alice', 20, 'CS'),
 63        (2, 'Bob', 21, 'Math'),
 64        (3, 'Charlie', 20, 'CS'),
 65        (4, 'Diana', 22, 'Physics'),
 66        (5, 'Eve', 21, 'Math')
 67    ]
 68    cursor.executemany("INSERT INTO Students VALUES (?, ?, ?, ?)", students)
 69
 70    courses = [
 71        ('CS101', 'Intro to Programming', 3),
 72        ('CS201', 'Data Structures', 4),
 73        ('MATH101', 'Calculus I', 4),
 74        ('PHYS101', 'Physics I', 4)
 75    ]
 76    cursor.executemany("INSERT INTO Courses VALUES (?, ?, ?)", courses)
 77
 78    enrollments = [
 79        (1, 'CS101', 'A'),
 80        (1, 'CS201', 'B'),
 81        (1, 'MATH101', 'A'),
 82        (2, 'MATH101', 'A'),
 83        (2, 'PHYS101', 'B'),
 84        (3, 'CS101', 'A'),
 85        (3, 'CS201', 'A'),
 86        (4, 'PHYS101', 'A'),
 87        (5, 'MATH101', 'B')
 88    ]
 89    cursor.executemany("INSERT INTO Enrollment VALUES (?, ?, ?)", enrollments)
 90
 91    conn.commit()
 92    return conn
 93
 94
 95def selection(conn: sqlite3.Connection):
 96    """ฯƒ (sigma): Select rows matching a predicate."""
 97    print("=" * 60)
 98    print("SELECTION (ฯƒ)")
 99    print("=" * 60)
100    print("Notation: ฯƒ_predicate(Relation)")
101    print()
102
103    cursor = conn.cursor()
104
105    # ฯƒ_age>20(Students)
106    print("1. ฯƒ_age>20(Students) - Students older than 20")
107    print("-" * 60)
108    cursor.execute("SELECT * FROM Students WHERE age > 20")
109    for row in cursor.fetchall():
110        print(f"  {row}")
111
112    # ฯƒ_major='CS'(Students)
113    print("\n2. ฯƒ_major='CS'(Students) - CS majors")
114    print("-" * 60)
115    cursor.execute("SELECT * FROM Students WHERE major = 'CS'")
116    for row in cursor.fetchall():
117        print(f"  {row}")
118
119    # Complex predicate: ฯƒ_age=20 โˆง major='CS'(Students)
120    print("\n3. ฯƒ_age=20 โˆง major='CS'(Students) - 20-year-old CS majors")
121    print("-" * 60)
122    cursor.execute("SELECT * FROM Students WHERE age = 20 AND major = 'CS'")
123    for row in cursor.fetchall():
124        print(f"  {row}")
125
126
127def projection(conn: sqlite3.Connection):
128    """ฯ€ (pi): Select specific columns."""
129    print("\n" + "=" * 60)
130    print("PROJECTION (ฯ€)")
131    print("=" * 60)
132    print("Notation: ฯ€_attributes(Relation)")
133    print()
134
135    cursor = conn.cursor()
136
137    # ฯ€_name,major(Students)
138    print("1. ฯ€_name,major(Students) - Only names and majors")
139    print("-" * 60)
140    cursor.execute("SELECT name, major FROM Students")
141    for row in cursor.fetchall():
142        print(f"  {row}")
143
144    # ฯ€_major(Students) - automatically removes duplicates
145    print("\n2. ฯ€_major(Students) - Distinct majors (duplicates removed)")
146    print("-" * 60)
147    cursor.execute("SELECT DISTINCT major FROM Students")
148    for row in cursor.fetchall():
149        print(f"  {row[0]}")
150
151    # Combined: ฯ€_name(ฯƒ_age>20(Students))
152    print("\n3. ฯ€_name(ฯƒ_age>20(Students)) - Names of students older than 20")
153    print("-" * 60)
154    cursor.execute("SELECT name FROM Students WHERE age > 20")
155    for row in cursor.fetchall():
156        print(f"  {row[0]}")
157
158
159def cartesian_product(conn: sqlite3.Connection):
160    """ร— (times): Cartesian product of two relations."""
161    print("\n" + "=" * 60)
162    print("CARTESIAN PRODUCT (ร—)")
163    print("=" * 60)
164    print("Notation: R ร— S")
165    print()
166
167    cursor = conn.cursor()
168
169    # Create small tables for demonstration
170    cursor.execute("CREATE TEMP TABLE R (A INTEGER, B TEXT)")
171    cursor.execute("CREATE TEMP TABLE S (C INTEGER, D TEXT)")
172    cursor.execute("INSERT INTO R VALUES (1, 'a'), (2, 'b')")
173    cursor.execute("INSERT INTO S VALUES (3, 'x'), (4, 'y')")
174
175    print("Table R:")
176    cursor.execute("SELECT * FROM R")
177    for row in cursor.fetchall():
178        print(f"  {row}")
179
180    print("\nTable S:")
181    cursor.execute("SELECT * FROM S")
182    for row in cursor.fetchall():
183        print(f"  {row}")
184
185    print("\nR ร— S (Cartesian Product):")
186    print("-" * 60)
187    cursor.execute("SELECT * FROM R, S")  # Equivalent to: SELECT * FROM R CROSS JOIN S
188    for row in cursor.fetchall():
189        print(f"  {row}")
190
191    print(f"\n|R| = 2, |S| = 2  โ†’  |R ร— S| = 4")
192
193
194def joins(conn: sqlite3.Connection):
195    """โ‹ˆ (bowtie): Various join operations."""
196    print("\n" + "=" * 60)
197    print("JOIN OPERATIONS (โ‹ˆ)")
198    print("=" * 60)
199    print("Notation: R โ‹ˆ_condition S")
200    print()
201
202    cursor = conn.cursor()
203
204    # Natural join: Students โ‹ˆ Enrollment
205    print("1. Students โ‹ˆ Enrollment (Natural Join on student_id)")
206    print("-" * 60)
207    cursor.execute("""
208        SELECT S.student_id, S.name, E.course_id, E.grade
209        FROM Students S
210        JOIN Enrollment E ON S.student_id = E.student_id
211        ORDER BY S.student_id
212    """)
213    for row in cursor.fetchall():
214        print(f"  Student {row[0]} ({row[1]}): {row[2]} - Grade {row[3]}")
215
216    # Theta join with condition
217    print("\n2. Students โ‹ˆ_ageโ‰ฅ22 Enrollment (Students aged 22+ with their enrollments)")
218    print("-" * 60)
219    cursor.execute("""
220        SELECT S.name, S.age, E.course_id, E.grade
221        FROM Students S
222        JOIN Enrollment E ON S.student_id = E.student_id
223        WHERE S.age >= 22
224    """)
225    for row in cursor.fetchall():
226        print(f"  {row}")
227
228    # Three-way join
229    print("\n3. Students โ‹ˆ Enrollment โ‹ˆ Courses (Full enrollment details)")
230    print("-" * 60)
231    cursor.execute("""
232        SELECT S.name, C.course_name, E.grade
233        FROM Students S
234        JOIN Enrollment E ON S.student_id = E.student_id
235        JOIN Courses C ON E.course_id = C.course_id
236        ORDER BY S.name
237    """)
238    for row in cursor.fetchall():
239        print(f"  {row[0]:8} | {row[1]:25} | {row[2]}")
240
241    # Left outer join (students who haven't enrolled in PHYS101)
242    print("\n4. LEFT OUTER JOIN (All students, with PHYS101 grade if exists)")
243    print("-" * 60)
244    cursor.execute("""
245        SELECT S.name,
246               COALESCE(E.grade, 'Not Enrolled') as phys_grade
247        FROM Students S
248        LEFT JOIN Enrollment E
249            ON S.student_id = E.student_id AND E.course_id = 'PHYS101'
250    """)
251    for row in cursor.fetchall():
252        print(f"  {row[0]:8} | {row[1]}")
253
254
255def set_operations(conn: sqlite3.Connection):
256    """Union, Intersection, Difference."""
257    print("\n" + "=" * 60)
258    print("SET OPERATIONS (โˆช, โˆฉ, โˆ’)")
259    print("=" * 60)
260    print()
261
262    cursor = conn.cursor()
263
264    # Union: Students taking CS101 OR CS201
265    print("1. UNION (โˆช) - Students in CS101 โˆช Students in CS201")
266    print("-" * 60)
267    cursor.execute("""
268        SELECT S.name, 'CS101' as course
269        FROM Students S JOIN Enrollment E ON S.student_id = E.student_id
270        WHERE E.course_id = 'CS101'
271        UNION
272        SELECT S.name, 'CS201'
273        FROM Students S JOIN Enrollment E ON S.student_id = E.student_id
274        WHERE E.course_id = 'CS201'
275        ORDER BY name
276    """)
277    for row in cursor.fetchall():
278        print(f"  {row[0]} ({row[1]})")
279
280    # Intersection: Students taking BOTH CS101 AND MATH101
281    print("\n2. INTERSECTION (โˆฉ) - Students in CS101 โˆฉ Students in MATH101")
282    print("-" * 60)
283    cursor.execute("""
284        SELECT S.name
285        FROM Students S JOIN Enrollment E ON S.student_id = E.student_id
286        WHERE E.course_id = 'CS101'
287        INTERSECT
288        SELECT S.name
289        FROM Students S JOIN Enrollment E ON S.student_id = E.student_id
290        WHERE E.course_id = 'MATH101'
291    """)
292    for row in cursor.fetchall():
293        print(f"  {row[0]}")
294
295    # Difference: CS majors NOT taking CS201
296    print("\n3. DIFFERENCE (โˆ’) - CS majors โˆ’ Students in CS201")
297    print("-" * 60)
298    cursor.execute("""
299        SELECT name FROM Students WHERE major = 'CS'
300        EXCEPT
301        SELECT S.name
302        FROM Students S JOIN Enrollment E ON S.student_id = E.student_id
303        WHERE E.course_id = 'CS201'
304    """)
305    for row in cursor.fetchall():
306        print(f"  {row[0]}")
307
308
309def division_operation(conn: sqlite3.Connection):
310    """รท (division): Find tuples matching all values."""
311    print("\n" + "=" * 60)
312    print("DIVISION (รท)")
313    print("=" * 60)
314    print("Find students who took ALL CS courses")
315    print()
316
317    cursor = conn.cursor()
318
319    # Find all CS courses
320    print("1. All CS courses:")
321    print("-" * 60)
322    cursor.execute("SELECT course_id, course_name FROM Courses WHERE course_id LIKE 'CS%'")
323    cs_courses = cursor.fetchall()
324    for row in cs_courses:
325        print(f"  {row[0]}: {row[1]}")
326
327    # Division: Students รท CS_Courses
328    # "Which students are enrolled in ALL CS courses?"
329    print("\n2. Students enrolled in ALL CS courses:")
330    print("-" * 60)
331    cursor.execute("""
332        SELECT S.student_id, S.name
333        FROM Students S
334        WHERE NOT EXISTS (
335            -- No CS course exists that this student hasn't taken
336            SELECT C.course_id
337            FROM Courses C
338            WHERE C.course_id LIKE 'CS%'
339            AND NOT EXISTS (
340                SELECT 1
341                FROM Enrollment E
342                WHERE E.student_id = S.student_id
343                AND E.course_id = C.course_id
344            )
345        )
346    """)
347    result = cursor.fetchall()
348    if result:
349        for row in result:
350            print(f"  Student {row[0]}: {row[1]}")
351    else:
352        print("  (None - no student has taken all CS courses)")
353
354    # Alternative formulation using COUNT
355    print("\n3. Alternative using COUNT (same result):")
356    print("-" * 60)
357    cursor.execute("""
358        SELECT S.student_id, S.name
359        FROM Students S
360        WHERE (
361            SELECT COUNT(DISTINCT E.course_id)
362            FROM Enrollment E
363            WHERE E.student_id = S.student_id
364            AND E.course_id LIKE 'CS%'
365        ) = (
366            SELECT COUNT(*)
367            FROM Courses
368            WHERE course_id LIKE 'CS%'
369        )
370    """)
371    for row in cursor.fetchall():
372        print(f"  Student {row[0]}: {row[1]}")
373
374
375def rename_operation(conn: sqlite3.Connection):
376    """ฯ (rho): Rename relations and attributes."""
377    print("\n" + "=" * 60)
378    print("RENAME (ฯ)")
379    print("=" * 60)
380    print("Notation: ฯ_new_name(old_name) or ฯ_(A1,A2,...)(Relation)")
381    print()
382
383    cursor = conn.cursor()
384
385    # Self-join requires renaming
386    print("1. Self-join with aliases (find pairs of students with same age)")
387    print("-" * 60)
388    cursor.execute("""
389        SELECT S1.name as student1, S2.name as student2, S1.age
390        FROM Students S1
391        JOIN Students S2 ON S1.age = S2.age AND S1.student_id < S2.student_id
392        ORDER BY S1.age
393    """)
394    for row in cursor.fetchall():
395        print(f"  {row[0]} and {row[1]} are both {row[2]} years old")
396
397    # Rename columns in result
398    print("\n2. Column renaming (attribute renaming)")
399    print("-" * 60)
400    cursor.execute("""
401        SELECT
402            student_id AS id,
403            name AS full_name,
404            age AS years_old,
405            major AS department
406        FROM Students
407        WHERE age = 20
408    """)
409    print("  " + str(tuple([desc[0] for desc in cursor.description])))
410    for row in cursor.fetchall():
411        print(f"  {row}")
412
413
414if __name__ == "__main__":
415    print("""
416โ•”โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•—
417โ•‘              RELATIONAL ALGEBRA OPERATIONS                   โ•‘
418โ•‘  ฯƒ, ฯ€, ร—, โ‹ˆ, โˆช, โˆฉ, โˆ’, ฯ, รท                                  โ•‘
419โ•šโ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
420""")
421
422    conn = setup_database()
423
424    try:
425        selection(conn)
426        projection(conn)
427        cartesian_product(conn)
428        joins(conn)
429        set_operations(conn)
430        division_operation(conn)
431        rename_operation(conn)
432
433        print("\n" + "=" * 60)
434        print("SUMMARY")
435        print("=" * 60)
436        print("Relational algebra provides:")
437        print("  ฯƒ (selection)   - Filter rows")
438        print("  ฯ€ (projection)  - Select columns")
439        print("  ร— (product)     - Combine all pairs")
440        print("  โ‹ˆ (join)        - Combine on condition")
441        print("  โˆช (union)       - Combine sets")
442        print("  โˆฉ (intersect)   - Common elements")
443        print("  โˆ’ (difference)  - Elements in first but not second")
444        print("  รท (division)    - Universal quantification")
445        print("  ฯ (rename)      - Rename relations/attributes")
446        print("=" * 60)
447
448    finally:
449        conn.close()