% -*- mode: Noweb; noweb-code-mode: caml-mode -*- % $Id: tree.nw,v 1.9 2004-06-05 15:35:49 govereau Exp $ % --------------------------------------------------------------------------- \section{Intermediate Representation} \label{sec:tree} % --------------------------------------------------------------------------- The intermediate representation used by the Tiger compiler is a variant of the ``Tree Language'' described in \cite{appel}. The abstract syntax for the intermediate representation ``trees'' is show below. Both code labels and temporary variables are represented by symbols. <>= type label = Symbol.symbol and temp = Symbol.symbol @ Statements are represented by the [[stm]] type. Sequences of statements can be created using the [[SEQ]] constructor. In addition, any expression can be used as a statement by way of the [[EXP]] constructor. The [[TRY]] and [[TRYEND]] constructors mark the start and end of try blocks. Both of the try constructors take the label of the continuation for the exception handlers. <>= type stm = SEQ of stm * stm | LABEL of label | CONT of label * label list | JUMP of exp | CJUMP of exp * label * label | MOVE of exp * exp | EXP of exp | TRY of label | TRYEND of label | RET of exp @ The abstract syntax of expressions is represented by the [[exp]] type. Many expressions carry a boolean flag that is true if the expression results in a pointer value. The [[CALL]] constructor also takes an optional string that indicates the calling convention of the function being called, and an optional label indicating a continuation that the function may cut to or unwind to. <>= and exp = BINOP of binop * exp * exp | RELOP of relop * exp * exp | MEM of exp * bool | TEMP of temp * bool | ESEQ of stm * exp | NAME of label | CONST of int | CALL of exp * exp list * string option * label option * bool @ Finally, a handful of binary operators are supported. The binary operators are divided into relational and arithmetic types. <>= and binop = PLUS | MINUS | MUL | DIV and relop = EQ | NE | LT | GT | LE | GE @ % --------------------------------------------------------------------------- \subsection{Interface to the IR} % --------------------------------------------------------------------------- In addition to defining the abstract syntax of the IR, the [[Tree]] module also defines a number of utility functions for manipulating the IR. <>= <> <> @ New code labels and temporary variables can be created using the [[new_label]] and [[new_temp]] functions. The [[new_label]] function generates a new unique code label, and the [[new_temp]] function generates a new temporary variable. <>= val new_label : string -> label val new_temp : unit -> temp @ There are three functions related to binary operators. The [[relop_inverse]] function reverses the meaning of a relational operator. The other two functions return a string representing the [[C--]] operator associated with an given tree language operator. <>= val relop_inverse : relop -> relop val cmm_binop : binop -> string val cmm_relop : relop -> string @ In Tiger, each expression results in a value that is either a machine word, or a pointer to a data structure. The [[is_ptr]] function returns true if a given expression will return a pointer. A related function, [[find_temps]] returns a list of all the temporary variables in a statement. With each temporary in the list is a boolean flag that is true only if the temporary variable holds a pointer. <>= val is_ptr : exp -> bool val find_temps : stm list -> (temp * bool) list @ Finally, for debugging purposes, there are two functions for printing out string representations of expressions and statements. <>= val print_stm : stm -> unit val print_exp : exp -> unit @ % --------------------------------------------------------------------------- \subsection{Implementation of IR Utilities} % --------------------------------------------------------------------------- <>= module S = Symbol <> @ New labels and temporaries are generated using the [[new_symbol]] function from the [[Symbol]] module. <>= let new_label s = S.new_symbol ("L" ^ s) let new_temp () = S.new_symbol "temp" @ The functions for binary operators are simple mappings. <>= let relop_inverse = function EQ -> NE | NE -> EQ | LT -> GE | GT -> LE | LE -> GT | GE -> LT let cmm_binop = function PLUS -> "add" | MINUS -> "sub" | MUL -> "mul" | DIV -> "quot" let cmm_relop = function EQ -> "eq" | NE -> "ne" | LT -> "lt" | GT -> "gt" | LE -> "le" | GE -> "ge" @ The [[is_ptr]] function is implemented by returning the appropriate field from each of the expression types. Expression types that do not have a pointer field are known to {\em not} be pointers. <>= let rec is_ptr = function BINOP _ -> false | RELOP _ -> false | MEM(_,p) -> p | TEMP(_,p) -> p | ESEQ(_,e) -> is_ptr e | NAME _ -> false | CONST _ -> false | CALL(_,_,_,_,p) -> p @ In order to implement the [[find_temps]] function, we build a set of temporary variables and then convert the set into a list. We use the standard library set implementation to build a unique set of temporaries. <>= module TempSet = Set.Make( struct type t = Symbol.symbol * bool let compare = Pervasives.compare end) @ Using the [[TempSet]] module, the implementation of [[find_temps]] is relatively easy. <>= let find_temps stmts = let foldl = List.fold_left in let rec stm set = function SEQ(a,b) -> stm (stm set a) b | LABEL _ -> set | CONT _ -> set | JUMP e -> exp set e | CJUMP(e,_,_) -> exp set e | MOVE(a,b) -> exp (exp set a) b | EXP e -> exp set e | TRY _ -> set | TRYEND _ -> set | RET e -> exp set e and exp set = function BINOP(_,a,b) -> exp (exp set a) b | RELOP(_,a,b) -> exp (exp set a) b | MEM(e,_) -> exp set e | TEMP(t,ptr) -> TempSet.add (t,ptr) set | ESEQ(s,e) -> exp (stm set s) e | NAME _ -> set | CONST _ -> set | CALL(e,el,_,_,_) -> foldl exp set (e :: el) in TempSet.elements (foldl stm TempSet.empty stmts) @ Now, we give the implementation of the two printing functions for statements and expressions. We will define the function for printing statements first. <>= let print_stm = let rec iprintf = function 0 -> Printf.printf | i -> (print_string " "; iprintf (i-1)) in let rec prstm d = function | LABEL l -> iprintf d "LABEL:%s\n " (S.name l) | CONT(l,ls) -> iprintf d "CONT:%s\n " (S.name l) | TRY l -> iprintf d "TRY:%s\n" (S.name l) | TRYEND l -> iprintf d "TRYEND:%s\n" (S.name l) | SEQ(a,b) -> iprintf d "SEQ:\n"; prstm(d+1) a; prstm(d+1) b | MOVE(a,b) -> iprintf d "MOVE:\n"; prexp(d+1) a; prexp(d+1) b | JUMP e -> iprintf d "JUMP:\n"; prexp(d+1) e | EXP e -> iprintf d "EXP:\n"; prexp(d+1) e | RET e -> iprintf d "RET:\n"; prexp(d+1) e | CJUMP(a,t,f) -> iprintf d "CJUMP:\n"; prexp(d+1) a; iprintf (d+1) "true label: %s\n" (S.name t); iprintf (d+1) "false label: %s\n" (S.name f) and prexp d = function BINOP(p,a,b) -> iprintf d "BINOP:%s\n" (cmm_binop p); prexp (d+1) a; prexp (d+1) b | RELOP(p,a,b) -> iprintf d "RELOP:%s\n" (cmm_relop p); prexp (d+1) a; prexp (d+1) b | MEM(e,_) -> iprintf d "MEM:\n"; prexp (d+1) e | TEMP(t,_) -> iprintf d "TEMP %s\n" (S.name t) | ESEQ(s,e) -> iprintf d "ESEQ:\n"; prstm (d+1) s; prexp (d+1) e | NAME lab -> iprintf d "NAME %s\n" (S.name lab) | CONST i -> iprintf d "CONST %d\n" i | CALL(e,el,_,_,_) -> iprintf d "CALL:\n"; prexp (d+1) e; List.iter (prexp (d+2)) el in prstm 0 @ The expression printer just calls the statement printer after converting the expression into a statement. <>= let print_exp e = print_stm (EXP e) @