08_three_address_code.py

Download
python 499 lines 15.2 KB
  1"""
  208_three_address_code.py - Three-Address Code (TAC) Generation
  3
  4Demonstrates intermediate code generation: converting an AST into
  5Three-Address Code (TAC) and building a Control Flow Graph (CFG).
  6
  7Three-Address Code uses at most three operands per instruction:
  8  t1 = a + b
  9  t2 = t1 * c
 10  if t2 > 0 goto L1
 11  ...
 12
 13TAC instruction types:
 14  ASSIGN    t = a                 (copy)
 15  BINOP     t = a op b            (binary operation)
 16  UNARY     t = op a              (unary operation)
 17  COPY      t = a                 (same as ASSIGN, alias)
 18  LABEL     L:                    (branch target)
 19  JUMP      goto L                (unconditional jump)
 20  CJUMP     if a goto L           (conditional jump)
 21  PARAM     param a               (function call parameter)
 22  CALL      t = call f, n         (function call, n args)
 23  RETURN    return a              (function return)
 24  LOAD      t = a[i]              (array read)
 25  STORE     a[i] = t              (array write)
 26
 27Control Flow Graph:
 28  - Partition TAC into basic blocks (maximal straight-line sequences)
 29  - Add edges: sequential, conditional, jump edges
 30
 31Topics covered:
 32  - Temporary variable generation
 33  - Label generation
 34  - AST to TAC translation
 35  - Short-circuit evaluation for boolean expressions
 36  - CFG construction from TAC
 37  - Basic block identification
 38"""
 39
 40from __future__ import annotations
 41from dataclasses import dataclass, field
 42from typing import Any, Optional
 43
 44
 45# ---------------------------------------------------------------------------
 46# TAC Instructions
 47# ---------------------------------------------------------------------------
 48
 49@dataclass
 50class TACInstr:
 51    """Base class for all TAC instructions."""
 52
 53    def __str__(self) -> str:
 54        return repr(self)
 55
 56
 57@dataclass
 58class Assign(TACInstr):
 59    dest: str
 60    src: Any    # variable name or literal value
 61
 62    def __str__(self):
 63        return f"    {self.dest} = {self.src}"
 64
 65
 66@dataclass
 67class BinOp(TACInstr):
 68    dest: str
 69    left: Any
 70    op: str
 71    right: Any
 72
 73    def __str__(self):
 74        return f"    {self.dest} = {self.left} {self.op} {self.right}"
 75
 76
 77@dataclass
 78class UnaryOp(TACInstr):
 79    dest: str
 80    op: str
 81    src: Any
 82
 83    def __str__(self):
 84        return f"    {self.dest} = {self.op}{self.src}"
 85
 86
 87@dataclass
 88class Label(TACInstr):
 89    name: str
 90
 91    def __str__(self):
 92        return f"{self.name}:"
 93
 94
 95@dataclass
 96class Jump(TACInstr):
 97    target: str
 98
 99    def __str__(self):
100        return f"    goto {self.target}"
101
102
103@dataclass
104class CondJump(TACInstr):
105    condition: Any
106    true_target: str
107    false_target: Optional[str] = None
108
109    def __str__(self):
110        s = f"    if {self.condition} goto {self.true_target}"
111        if self.false_target:
112            s += f" else goto {self.false_target}"
113        return s
114
115
116@dataclass
117class Param(TACInstr):
118    value: Any
119
120    def __str__(self):
121        return f"    param {self.value}"
122
123
124@dataclass
125class Call(TACInstr):
126    dest: Optional[str]
127    func: str
128    num_args: int
129
130    def __str__(self):
131        if self.dest:
132            return f"    {self.dest} = call {self.func}, {self.num_args}"
133        return f"    call {self.func}, {self.num_args}"
134
135
136@dataclass
137class Return(TACInstr):
138    value: Optional[Any] = None
139
140    def __str__(self):
141        if self.value is not None:
142            return f"    return {self.value}"
143        return f"    return"
144
145
146# ---------------------------------------------------------------------------
147# TAC Generator
148# ---------------------------------------------------------------------------
149
150class TACGenerator:
151    """
152    Translates an expression/statement AST into a flat list of TAC instructions.
153    Generates fresh temporary variables (t0, t1, ...) and labels (L0, L1, ...).
154    """
155
156    def __init__(self):
157        self._temp_count = 0
158        self._label_count = 0
159        self.instrs: list[TACInstr] = []
160
161    def new_temp(self) -> str:
162        name = f"t{self._temp_count}"
163        self._temp_count += 1
164        return name
165
166    def new_label(self) -> str:
167        name = f"L{self._label_count}"
168        self._label_count += 1
169        return name
170
171    def emit(self, instr: TACInstr) -> None:
172        self.instrs.append(instr)
173
174    # --- Expression translation ---
175    # Returns the "address" (temp name or literal) holding the result.
176
177    def gen_expr(self, node) -> Any:
178        match node:
179            case {'kind': 'num', 'value': v}:
180                return v
181            case {'kind': 'str', 'value': v}:
182                return repr(v)
183            case {'kind': 'bool', 'value': v}:
184                return 1 if v else 0
185            case {'kind': 'var', 'name': n}:
186                return n
187            case {'kind': 'unary', 'op': op, 'operand': operand}:
188                src = self.gen_expr(operand)
189                t = self.new_temp()
190                self.emit(UnaryOp(t, op, src))
191                return t
192            case {'kind': 'binop', 'op': op, 'left': left, 'right': right}:
193                # Short-circuit for && and ||
194                if op == '&&':
195                    return self.gen_and(left, right)
196                if op == '||':
197                    return self.gen_or(left, right)
198                l = self.gen_expr(left)
199                r = self.gen_expr(right)
200                t = self.new_temp()
201                self.emit(BinOp(t, l, op, r))
202                return t
203            case {'kind': 'call', 'func': f, 'args': args}:
204                for arg in args:
205                    a = self.gen_expr(arg)
206                    self.emit(Param(a))
207                t = self.new_temp()
208                self.emit(Call(t, f, len(args)))
209                return t
210            case {'kind': 'index', 'array': arr, 'index': idx}:
211                a = self.gen_expr(arr)
212                i = self.gen_expr(idx)
213                t = self.new_temp()
214                self.emit(Assign(t, f"{a}[{i}]"))
215                return t
216            case _:
217                raise ValueError(f"Unknown expression node: {node}")
218
219    def gen_and(self, left, right) -> str:
220        """Short-circuit &&: if left is false, skip right."""
221        result = self.new_temp()
222        false_lbl = self.new_label()
223        end_lbl   = self.new_label()
224
225        l = self.gen_expr(left)
226        # if NOT l, jump to false_lbl
227        not_l = self.new_temp()
228        self.emit(UnaryOp(not_l, '!', l))
229        self.emit(CondJump(not_l, false_lbl))
230        # evaluate right
231        r = self.gen_expr(right)
232        self.emit(Assign(result, r))
233        self.emit(Jump(end_lbl))
234        self.emit(Label(false_lbl))
235        self.emit(Assign(result, 0))
236        self.emit(Label(end_lbl))
237        return result
238
239    def gen_or(self, left, right) -> str:
240        """Short-circuit ||: if left is true, skip right."""
241        result = self.new_temp()
242        true_lbl = self.new_label()
243        end_lbl  = self.new_label()
244
245        l = self.gen_expr(left)
246        self.emit(CondJump(l, true_lbl))
247        r = self.gen_expr(right)
248        self.emit(Assign(result, r))
249        self.emit(Jump(end_lbl))
250        self.emit(Label(true_lbl))
251        self.emit(Assign(result, 1))
252        self.emit(Label(end_lbl))
253        return result
254
255    # --- Statement translation ---
256
257    def gen_stmt(self, node) -> None:
258        match node:
259            case {'kind': 'assign', 'target': target, 'value': value}:
260                v = self.gen_expr(value)
261                self.emit(Assign(target, v))
262
263            case {'kind': 'index_assign', 'array': arr, 'index': idx, 'value': value}:
264                i = self.gen_expr(idx)
265                v = self.gen_expr(value)
266                # Represent as: arr[i] = v  (STORE)
267                t = self.new_temp()
268                self.emit(BinOp(t, arr, '[]', i))   # compute address
269                self.emit(Assign(f"{arr}[{i}]", v))
270
271            case {'kind': 'if', 'cond': cond, 'then': then_stmts,
272                  'else': else_stmts}:
273                self.gen_if(cond, then_stmts, else_stmts)
274
275            case {'kind': 'while', 'cond': cond, 'body': body}:
276                self.gen_while(cond, body)
277
278            case {'kind': 'return', 'value': value}:
279                if value is not None:
280                    v = self.gen_expr(value)
281                    self.emit(Return(v))
282                else:
283                    self.emit(Return())
284
285            case {'kind': 'expr_stmt', 'expr': expr}:
286                self.gen_expr(expr)
287
288            case {'kind': 'block', 'stmts': stmts}:
289                for s in stmts:
290                    self.gen_stmt(s)
291
292            case _:
293                raise ValueError(f"Unknown statement node: {node}")
294
295    def gen_if(self, cond, then_stmts, else_stmts) -> None:
296        then_lbl = self.new_label()
297        else_lbl = self.new_label()
298        end_lbl  = self.new_label()
299
300        c = self.gen_expr(cond)
301        self.emit(CondJump(c, then_lbl, else_lbl if else_stmts else end_lbl))
302
303        self.emit(Label(then_lbl))
304        for s in then_stmts:
305            self.gen_stmt(s)
306        self.emit(Jump(end_lbl))
307
308        if else_stmts:
309            self.emit(Label(else_lbl))
310            for s in else_stmts:
311                self.gen_stmt(s)
312            self.emit(Jump(end_lbl))
313
314        self.emit(Label(end_lbl))
315
316    def gen_while(self, cond, body) -> None:
317        test_lbl = self.new_label()
318        body_lbl = self.new_label()
319        end_lbl  = self.new_label()
320
321        self.emit(Label(test_lbl))
322        c = self.gen_expr(cond)
323        self.emit(CondJump(c, body_lbl, end_lbl))
324
325        self.emit(Label(body_lbl))
326        for s in body:
327            self.gen_stmt(s)
328        self.emit(Jump(test_lbl))
329
330        self.emit(Label(end_lbl))
331
332
333# ---------------------------------------------------------------------------
334# Control Flow Graph
335# ---------------------------------------------------------------------------
336
337@dataclass
338class BasicBlock:
339    id: int
340    label: Optional[str]
341    instrs: list[TACInstr] = field(default_factory=list)
342    successors: list[int] = field(default_factory=list)
343
344    def __repr__(self):
345        return f"BB{self.id}({self.label or 'entry' if self.id == 0 else ''})"
346
347
348def build_cfg(instrs: list[TACInstr]) -> list[BasicBlock]:
349    """
350    Partition TAC into basic blocks and build control flow edges.
351
352    A basic block starts at:
353      - The first instruction
354      - Any labeled instruction (branch target)
355      - Any instruction immediately after a jump/conditional jump
356    """
357    if not instrs:
358        return []
359
360    # Find leaders (first instruction of each basic block)
361    leaders: set[int] = {0}
362    for i, instr in enumerate(instrs):
363        if isinstance(instr, (Jump, CondJump, Return)):
364            if i + 1 < len(instrs):
365                leaders.add(i + 1)
366        if isinstance(instr, Label):
367            leaders.add(i)
368
369    sorted_leaders = sorted(leaders)
370
371    # Build blocks
372    blocks: list[BasicBlock] = []
373    label_to_block: dict[str, int] = {}
374
375    for bi, start in enumerate(sorted_leaders):
376        end = sorted_leaders[bi + 1] if bi + 1 < len(sorted_leaders) else len(instrs)
377        block_instrs = instrs[start:end]
378        lbl = block_instrs[0].name if isinstance(block_instrs[0], Label) else None
379        bb = BasicBlock(id=bi, label=lbl, instrs=block_instrs)
380        blocks.append(bb)
381        if lbl:
382            label_to_block[lbl] = bi
383
384    # Build edges
385    for bi, bb in enumerate(blocks):
386        last = bb.instrs[-1] if bb.instrs else None
387        if isinstance(last, Jump):
388            target_bi = label_to_block.get(last.target)
389            if target_bi is not None:
390                bb.successors.append(target_bi)
391        elif isinstance(last, CondJump):
392            t_bi = label_to_block.get(last.true_target)
393            if t_bi is not None:
394                bb.successors.append(t_bi)
395            if last.false_target:
396                f_bi = label_to_block.get(last.false_target)
397                if f_bi is not None:
398                    bb.successors.append(f_bi)
399            # fall-through
400            elif bi + 1 < len(blocks):
401                bb.successors.append(bi + 1)
402        elif not isinstance(last, Return) and bi + 1 < len(blocks):
403            bb.successors.append(bi + 1)
404
405    return blocks
406
407
408def print_cfg(blocks: list[BasicBlock]) -> None:
409    print("\nControl Flow Graph:")
410    for bb in blocks:
411        name = f"BB{bb.id}" + (f" [{bb.label}]" if bb.label else "")
412        succs = [f"BB{s}" for s in bb.successors]
413        print(f"\n  {name}  (successors: {succs or ['none']})")
414        for instr in bb.instrs:
415            print(f"  {instr}")
416
417
418# ---------------------------------------------------------------------------
419# Demo
420# ---------------------------------------------------------------------------
421
422def main():
423    print("=" * 60)
424    print("Three-Address Code Generation Demo")
425    print("=" * 60)
426
427    gen = TACGenerator()
428
429    # Example 1: Arithmetic expression
430    # t = (a + b) * (c - 2)
431    print("\n--- Example 1: (a + b) * (c - 2) ---")
432    expr1 = {'kind': 'binop', 'op': '*',
433              'left':  {'kind': 'binop', 'op': '+',
434                        'left':  {'kind': 'var', 'name': 'a'},
435                        'right': {'kind': 'var', 'name': 'b'}},
436              'right': {'kind': 'binop', 'op': '-',
437                        'left':  {'kind': 'var', 'name': 'c'},
438                        'right': {'kind': 'num', 'value': 2}}}
439    gen.instrs.clear()
440    result = gen.gen_expr(expr1)
441    print(f"Result in: {result}")
442    for i in gen.instrs:
443        print(i)
444
445    # Example 2: If statement with CFG
446    print("\n--- Example 2: if (x > 0) { y = x; } else { y = -x; } ---")
447    gen.instrs.clear()
448    gen._temp_count = gen._label_count = 0
449    if_stmt = {
450        'kind': 'if',
451        'cond': {'kind': 'binop', 'op': '>',
452                 'left': {'kind': 'var', 'name': 'x'},
453                 'right': {'kind': 'num', 'value': 0}},
454        'then': [{'kind': 'assign', 'target': 'y',
455                  'value': {'kind': 'var', 'name': 'x'}}],
456        'else': [{'kind': 'assign', 'target': 'y',
457                  'value': {'kind': 'unary', 'op': '-',
458                            'operand': {'kind': 'var', 'name': 'x'}}}],
459    }
460    gen.gen_stmt(if_stmt)
461    print("TAC:")
462    for i in gen.instrs:
463        print(i)
464    print_cfg(build_cfg(gen.instrs))
465
466    # Example 3: While loop with CFG
467    print("\n--- Example 3: sum = 0; i = 1; while (i <= n) { sum += i; i++; } ---")
468    gen.instrs.clear()
469    gen._temp_count = gen._label_count = 0
470    stmts = [
471        {'kind': 'assign', 'target': 'sum', 'value': {'kind': 'num', 'value': 0}},
472        {'kind': 'assign', 'target': 'i',   'value': {'kind': 'num', 'value': 1}},
473        {'kind': 'while',
474         'cond': {'kind': 'binop', 'op': '<=',
475                  'left': {'kind': 'var', 'name': 'i'},
476                  'right': {'kind': 'var', 'name': 'n'}},
477         'body': [
478             {'kind': 'assign', 'target': 'sum',
479              'value': {'kind': 'binop', 'op': '+',
480                        'left': {'kind': 'var', 'name': 'sum'},
481                        'right': {'kind': 'var', 'name': 'i'}}},
482             {'kind': 'assign', 'target': 'i',
483              'value': {'kind': 'binop', 'op': '+',
484                        'left': {'kind': 'var', 'name': 'i'},
485                        'right': {'kind': 'num', 'value': 1}}},
486         ]},
487        {'kind': 'return', 'value': {'kind': 'var', 'name': 'sum'}},
488    ]
489    for s in stmts:
490        gen.gen_stmt(s)
491    print("TAC:")
492    for i in gen.instrs:
493        print(i)
494    print_cfg(build_cfg(gen.instrs))
495
496
497if __name__ == "__main__":
498    main()