(* This file defines a very simple interpreter for a little expression language * similar to SML. *) (************************************************************************) (* The first module defines the abstract syntax or AST for the language *) (************************************************************************) structure Ast = struct (* This is a type abbreviation *) type var = string (* we only have one type for this little language *) datatype tipe = IntType (* Define the abstract syntax for a little expression language *) datatype binop = Plus | Minus | Times | Div datatype exp = Int of int | Binop of exp * binop * exp | Var of var | Let of var * exp * exp (* Some example expressions as abstract syntax trees *) val e1 = Let("x",Int 42,Binop(Var "x",Times,Var "x")) val e2 = Binop(e1,Div,Int 3) val e3 = Let("y",e2,Binop(Var "y",Minus,Int 14)) (* Convert the abstract syntax tree to a string *) fun exp2string(e:exp):string = let fun binop2string(b:binop):string = case b of Plus => "+" | Times => "*" | Minus => "-" | Div => "/" in case e of Int(i) => Int.toString i | Binop(e1,b,e2) => "(" ^ (exp2string e1) ^ " " ^ (binop2string b) ^ " " ^ (exp2string e2) ^ ")" | Var(x) => x | Let(x,e1,e2) => "let val " ^ x ^ " = " ^ (exp2string e1) ^ " in " ^ (exp2string e2) ^ " end" end end (* structure Ast *) (************************************************************************) (* An abstract signature for environments *) (************************************************************************) signature ENV = sig type 'a env exception UnboundVariable of Ast.var val empty : unit -> 'a env val lookup : Ast.var -> 'a env -> 'a val extend : Ast.var -> 'a -> 'a env -> 'a env end (*************************************************************************) (* One (inefficient) implementation of environments as association lists *) (*************************************************************************) structure Env :> ENV = struct open Ast type 'a env = (var * 'a) list (* Declares a new exception *) exception UnboundVariable of var (* The empty environment -- an empty association list *) fun empty():'a env = [] (* Lookup variable x in the environment, returning the associated value, * and raising the exception UnboundVariable if the variable is not found. *) fun lookup(x:var) (env:'a env) = case env of [] => raise (UnboundVariable x) | (y,i)::rest => if (x = y) then i else lookup x rest (* Extend env so that it maps x to i *) fun extend (x:var) (i:'a) (env:'a env) = (x,i)::env end (**************************************************************************) (* Evaluate an expression in an environment mapping variables to integers *) (**************************************************************************) structure Eval :> sig val evaluate : Ast.exp -> int end = struct open Ast fun eval (e: exp) (env: int Env.env) : int = let fun binop2fn b = case b of Plus => (op +) (* op is needed for infix functions like + *) | Minus => (op -) | Times => (op * ) (* can't omit the space because of comments! *) | Div => (op div) in case e of Int(i) => i | Binop(e1,b,e2) => (binop2fn b)(eval e1 env, eval e2 env) | Var(x) => Env.lookup x env | Let(x,e1,e2) => let val i = eval e1 env in eval e2 (Env.extend x i env) end end (* Evaluate an expression -- start off with the empty environment *) fun evaluate (e:exp) : int = eval e (Env.empty()) end (* A little error support code *) structure Error = struct exception Error of string fun error s = raise (Error s) end (************************************************************************) (* A simple type-checker for expressions *) (************************************************************************) structure TypeCheck :> sig val typecheck : Ast.exp -> Ast.tipe end = struct open Ast (* Similar structure to the evaluator...the only error we catch is unbound * variables. *) fun tc (e : exp) (env : Ast.tipe Env.env) : tipe = case e of Int _ => IntType | Binop(e1,_,e2) => (case (tc e1 env, tc e2 env) of (IntType,IntType) => IntType) | Var x => (Env.lookup x env handle Env.UnboundVariable x => Error.error("unbound variable "^x)) | Let(x,e1,e2) => let val t = tc e1 env in tc e2 (Env.extend x t env) end fun typecheck (e : exp) : tipe = tc e (Env.empty()) end (************************************************************************) (* lexing -- break input into tokens *) (************************************************************************) signature LEX = sig datatype token = INT of int | VAR of Ast.var | PLUS | TIMES | MINUS | DIV | LET | VAL | IN | END | LPAREN | RPAREN | EQUALS | EOF val token2string : token -> string val lex : char list -> token * (char list) end structure Lex :> LEX = struct datatype token = INT of int | VAR of Ast.var | PLUS | TIMES | MINUS | DIV | LET | VAL | IN | END | LPAREN | RPAREN | EQUALS | EOF fun token2string t = case t of INT i => Int.toString i | VAR x => x | PLUS => "+" | MINUS => "-" | DIV => "/" | TIMES => "*" | LET => "let" | VAL => "val" | IN => "in" | END => "end" | LPAREN => "(" | RPAREN => ")" | EQUALS => "=" | EOF => "" fun lex (cs: char list) : token * (char list) = (case cs of nil => (EOF,nil) | #" " :: rest => lex rest (* skip whitespace *) | #"\n" :: rest => lex rest (* skip whitespace *) | #"(" :: #"*" :: rest => lex_comment(1, rest) (* comment start *) | #"+" :: rest => (PLUS,rest) | #"-" :: rest => (MINUS,rest) | #"*" :: rest => (TIMES,rest) | #"/" :: rest => (DIV,rest) | #"(" :: rest => (LPAREN,rest) | #")" :: rest => (RPAREN,rest) | #"=" :: rest => (EQUALS,rest) | c :: rest => if Char.isAlpha c then (* must be a keyword or variable *) let val (x,rest) = lex_var([c],rest) in if x = "let" then (LET,rest) (* check keywords *) else if x = "val" then (VAL,rest) else if x = "in" then (IN, rest) else if x = "end" then (END, rest) else (VAR x, rest) end else if Char.>=(c,#"0") andalso Char.<=(c,#"9") then (* must be a number *) lex_num ((Char.ord c) - (Char.ord #"0"),rest) else Error.error ("bad character "^(Char.toString c))) (* process comments -- depth records the nesting depth of comments -- we go back * to the lexer only when it drops down to zero *) and lex_comment (depth:int, cs:char list) : token * (char list) = (case cs of #"*" :: #")" :: rest => (* close comment*) let val new_depth = depth - 1 (* see if nesting depth drops to zero *) in if new_depth = 0 then lex rest (* if so, continue with lexer *) else lex_comment (new_depth, rest) (* otherwise, continue with comment *) end | #"(" :: #"*" :: rest => lex_comment (depth + 1, rest) (* increase comment depth *) | c :: rest => lex_comment(depth, rest) (* skip any other characters *) | nil => Error.error "missing comment end") (* oops -- missing end comment *) (* accum represents the value of the number so far *) and lex_num (accum:int, cs:char list) : token * (char list) = (case cs of c :: rest => (* see if c is a digit *) if Char.>=(c,#"0") andalso Char.<=(c,#"9") then (* convert the digit to an integer and fold into accumulator *) lex_num(accum*10 + (Char.ord c) - (Char.ord #"0"), rest) else (INT accum, cs) | [] => (INT accum, cs)) (* accum represents the identifier so far, but as a reversed list of characters *) and lex_var (accum:char list, cs:char list) : string * (char list) = (case cs of c :: rest => (* make sure c is a letter, digit, or underscore -- if so, push it on accum *) if Char.isAlphaNum c orelse c = #"_" then lex_var(c::accum, rest) (* be sure to reverse the list and then collapse it with implode *) else (implode(rev accum), cs) | [] => (implode(rev accum), cs)) end (* Lex *) (************************************************************************) (* parsing -- take in a string, use the lexer to tokenize it, and *) (* build an expression. *) (************************************************************************) structure Parse :> sig val parse : string -> Ast.exp end = struct open Ast open Lex fun token_error (s:string) (t:token) = Error.error("expecting "^s^" but found '"^(token2string t)^"'") (* aexp ::= INT | VAR | '(' exp ')' | 'let' 'val' VAR '=' exp 'in' exp 'end' *) fun parse_aexp (cs : char list) : exp * (char list) = case lex cs of (INT i, cs) => (Int i, cs) | (VAR x, cs) => (Var x, cs) | (LPAREN, cs) => let val (e,cs) = parse_exp cs in case lex cs of (RPAREN, cs) => (e, cs) | (t,_) => token_error "')'" t end | (LET, cs) => (case lex cs of (VAL, cs) => (case lex cs of (VAR x, cs) => (case lex cs of (EQUALS,cs) => (case parse_exp cs of (e1,cs) => (case lex cs of (IN,cs) => (case parse_exp cs of (e2,cs) => (case lex cs of (END,cs) => (Let(x,e1,e2),cs) | (t,_) => token_error "'end'" t)) | (t,_) => token_error "'in'" t)) | (t,_) => token_error "'='" t) | (t,_) => token_error "" t) | (t,_) => token_error "'val'" t) | (t,_) => token_error " or or '(' or 'let'" t (* term ::= aexp | term '*' aexp | term '/' aexp *) and parse_term (cs:char list) : exp * (char list) = let fun loop(e,cs) = case lex cs of (TIMES,cs) => let val (e2,cs) = parse_aexp cs in loop(Binop(e,Times,e2),cs) end | (DIV,cs) => let val (e2,cs) = parse_aexp cs in loop(Binop(e,Times,e2),cs) end | _ => (e,cs) in loop (parse_aexp cs) end (* exp ::= term | exp '+' term | exp '-' term *) and parse_exp (cs:char list) : exp * (char list) = let fun loop(e,cs) = case lex cs of (PLUS,cs) => let val (e2,cs) = parse_term cs in loop(Binop(e,Plus,e2),cs) end | (MINUS,cs) => let val (e2,cs) = parse_term cs in loop(Binop(e,Minus,e2),cs) end | _ => (e,cs) in loop (parse_term cs) end (* prog ::= exp EOF *) fun parse (s:string) : exp = let val (e,cs) = parse_exp (explode s) in case lex cs of (EOF,_) => e | (t,_) => token_error "" t end end (* parse *) (************************************************************************) (* Put it all together *) (************************************************************************) structure MyLanguage : sig val calc : string -> unit end = struct fun calc(x:string) = let val e = Parse.parse x val t = TypeCheck.typecheck e val i = Eval.evaluate e in print ("The result is "^(Int.toString i)^"\n") end handle Error.Error s => (print ("error: "^s^"\n")) end