(* Play with this file! Define your own recognizers and check * them against input strings using the matches function. For * example, write a recognizer S which takes in a string s, and * which matches only when the input matches the list of characters * in s. Try to define S in terms of other existing combinators * [hint: it's a one-liner]. Define a recognizer for ML-style * comments, which nest. Define a recognizer for IEEE floating- * point numbers as in Java. Define a recognizer for a subset of * HTML. Go nuts! *) (*********************************************************************) (* This structure defines a set of generic combinators for building *) (* recognizers -- routines that pattern match against lists of *) (* characters. *) (*********************************************************************) structure Recognizer = struct (* a recognizer is a function that takes a list of characters * [c1,c2,c3,...,cn], and if the beginning of the list matches, * characters [c1,c2,...,ck] returns SOME [ck+1,...,cn]. If the * beginning of the list does not match, we return NONE. *) type recognizer = char list -> (char list) option (* to match a recognizer against a string, we explode the string * into a list of characters, run the recognizer on the list, and * check that we got back SOME []. If we get back SOME [c1,...,cn] * (for n>0), then, the recognize succeeded, but didn't consume all * of the input. If we get NONE, then the recognizer failed. *) fun matches(r:recognizer)(s:string) : bool = case r (explode s) of SOME [] => true | SOME _ => false | NONE => false (* always matches, consuming no input *) val always : recognizer = fn cs => SOME cs (* never matches *) val never : recognizer = fn cs => NONE (* matches when the first character satisfies predicate p *) fun satisfy (p:char->bool) : recognizer = fn cs => (case cs of c::rest => if p c then SOME rest else NONE | [] => NONE) (* matches any alphabetic character *) val alpha : recognizer = satisfy Char.isAlpha (* matches any digit *) val digit : recognizer = satisfy Char.isDigit (* matches only the character c *) fun C (c:char) : recognizer = satisfy (fn c' => c' = c) val a : recognizer = C #"a" val b : recognizer = C #"b" (* matches any character other than c *) fun notC (c:char) : recognizer = satisfy (fn c' => c' <> c) val not_a : recognizer = notC #"a" (* matches when we are at the end of the list of characters *) val eof : recognizer = fn cs => (case cs of [] => SOME [] | _::_ => NONE) (* matches if either r1 matches or r2 matches *) fun alt (r1:recognizer) (r2:recognizer) : recognizer = fn cs => (case r1 cs of NONE => r2 cs | SOME cs => SOME cs) val a_or_b = alt a b val alpha_or_digit = alt alpha digit (* matches if r1 matches, and r2 matches the remainder *) fun seq (r1:recognizer) (r2:recognizer) : recognizer = fn cs => (case r1 cs of NONE => NONE | SOME cs' => r2 cs') val a_then_b = seq a b val alpha_then_digit_then_b = seq alpha (seq digit b) fun uncurry (f : 'a -> 'b -> 'c) : ('a * 'b) -> 'c = fn (x,y) => f x y fun alts (rs : recognizer list) : recognizer = foldr (uncurry alt) never rs val a_or_b_or_c = alts [a,b,C #"c"] val hex_digit = alts (map C (explode "0123456789abcdef")) fun seqs (rs : recognizer list) : recognizer = foldr (uncurry seq) always rs val a_then_b_then_c = seqs [a,b,C #"c"] (* Kleene's star: matches zero or more occurrences of r. * Because this is recursive, we must be careful... *) fun star (r:recognizer) : recognizer = fn cs => (case r cs of NONE => always cs | SOME cs => star r cs) (* matches one or more occurrences of r *) fun plus (r:recognizer) : recognizer = seq r (star r) (* matches any string built from one or more digits *) val number = plus digit (* matches any string built from one or more alphabetic characters *) val simple_var = plus alpha (* a more realistic recognizer for ML identifiers -- they must * start with an alphabetic character, underscore, or single-quote, * and can then have zero or more alphabetic characters, underscores, * single-quotes, or digits. *) val var = let val non_dig = alts [alpha,C #"_", C #"'"] in seq non_dig (star (alt non_dig digit)) end (* C style comments -- they start with "/*", followed by any character, * terminated by "*/". *) val c_comment = seqs [C #"/", C #"*", star (alt (notC #"*") (seq (C #"*") (notC #"/"))), C #"*", C #"/"] (* white space -- e.g., zero or more spaces, tabs, newlines, linefeeds, * or comments. *) val white = star (alts [C #" ", C #"\n", C #"\t", C #"\r", c_comment]) val w = white (* Scheme-style s-expressions -- numbers, variables, or parentheses * wrapped around zero or more s-expressions. Note that we also must * account for white space. *) val rec sexp : recognizer = fn cs => seqs [w, alts [ number, simple_var, seqs [ C #"(", w, star sexp, w, C #")"] ], w] cs val hmm = seq (alt a (seq a a)) (alt b (seq b b)) val _ = matches hmm "abb" end