From 6a4c1744c4ec5009e6c8eaf7f0e3ebabd3bae931 Mon Sep 17 00:00:00 2001 From: Mylloon Date: Sun, 11 Dec 2022 03:35:52 +0100 Subject: [PATCH] add string support --- ast.ml | 4 +++ compiler.ml | 1 + errors.ml | 2 ++ lexer.mll | 62 +++++++++++++++++++++++++------------------ main.ml | 1 + parser.mly | 8 +++++- semantics.ml | 1 + simplifier.ml | 5 ++++ tests/20_str.test | 3 +++ tests/21_bad-str.test | 4 +++ 10 files changed, 64 insertions(+), 27 deletions(-) create mode 100644 tests/20_str.test create mode 100644 tests/21_bad-str.test diff --git a/ast.ml b/ast.ml index 36bc23b..39e56df 100644 --- a/ast.ml +++ b/ast.ml @@ -3,6 +3,7 @@ type type_t = | Void_t | Int_t | Bool_t + | Str_t | Func_t of type_t * type_t list module Syntax = struct @@ -12,6 +13,7 @@ module Syntax = struct | Void | Int of int | Bool of bool + | Str of string type expr = | Val of @@ -79,6 +81,7 @@ module V1 = struct | Void | Int of int | Bool of bool + | Str of string end module V2 = struct @@ -86,6 +89,7 @@ module V2 = struct | Void | Int of int | Bool of bool + | Data of string end module IR (P : Parameters) = struct diff --git a/compiler.ml b/compiler.ml index 078396a..08f5e1c 100644 --- a/compiler.ml +++ b/compiler.ml @@ -15,6 +15,7 @@ let compile_value = function | Void -> [ Li (V0, 0) ] | Int n -> [ Li (V0, n) ] | Bool b -> [ Li (V0, if b then 1 else 0) ] + | Data l -> [ La (V0, Lbl l) ] ;; let rec compile_expr env = function diff --git a/errors.ml b/errors.ml index ecfd9f1..3d0d63a 100644 --- a/errors.ml +++ b/errors.ml @@ -3,6 +3,7 @@ open Lexing exception LexerError of char exception SemanticsError of string * Lexing.position +exception SyntaxError of string let err msg pos = Printf.eprintf @@ -18,6 +19,7 @@ let rec string_of_type_t = function | Void_t -> "void" | Int_t -> "int" | Bool_t -> "bool" + | Str_t -> "str" | Func_t (r, a) -> (if List.length a > 1 then "(" else "") ^ String.concat ", " (List.map string_of_type_t a) diff --git a/lexer.mll b/lexer.mll index 49e8f78..bcf7eb4 100644 --- a/lexer.mll +++ b/lexer.mll @@ -10,31 +10,41 @@ let bool = "true" | "false" let ident = alpha (alpha | num | '-' | '_')* rule token = parse -| eof { Lend } -| [ ' ' '\t' ] { token lexbuf } -| '\n' { Lexing.new_line lexbuf; token lexbuf } -| num+ as n { Lint (int_of_string n) } -| bool as b { Lbool (bool_of_string b) } -| "return" { Lreturn } -| "int" { Ltype (Int_t) } -| "bool" { Ltype (Bool_t) } -| "void" { Ltype (Void_t) } -| '{' { Lbracedeb } -| '}' { Lbracefin } -| '(' { Lpardeb } -| ')' { Lparfin } -| ',' { Lcomma } -| '=' { Lassign } -| ';' { Lsc } -| '+' { Ladd } -| '-' { Lsub } -| '*' { Lmul } -| '/' { Ldiv } -| ident as i { Lvar i } -| '#' { comment lexbuf } -| _ as c { raise (LexerError c) } + | eof { Lend } + | [ ' ' '\t' ] { token lexbuf } + | '\n' { Lexing.new_line lexbuf; token lexbuf } + | num+ as n { Lint (int_of_string n) } + | bool as b { Lbool (bool_of_string b) } + | "return" { Lreturn } + | "int" { Ltype (Int_t) } + | "bool" { Ltype (Bool_t) } + | "void" { Ltype (Void_t) } + | "str" { Ltype (Str_t) } + | '{' { Lbracedeb } + | '}' { Lbracefin } + | '(' { Lpardeb } + | ')' { Lparfin } + | ',' { Lcomma } + | '=' { Lassign } + | ';' { Lsc } + | '+' { Ladd } + | '-' { Lsub } + | '*' { Lmul } + | '/' { Ldiv } + | '"' { read_string (Buffer.create 16) lexbuf } + | ident as i { Lvar i } + | '#' { comment lexbuf } + | _ as c { raise (LexerError c) } and comment = parse -| eof { Lend } -| '\n' { Lexing.new_line lexbuf; token lexbuf } -| _ { comment lexbuf } + | eof { Lend } + | '\n' { Lexing.new_line lexbuf; token lexbuf } + | _ { comment lexbuf } + +and read_string buffer = parse + | '"' { Lstr (Buffer.contents buffer) } + | [^ '"' '\\']+ { Buffer.add_string buffer (Lexing.lexeme lexbuf) + ; read_string buffer lexbuf + } + | _ as c { raise (LexerError c) } + | eof { raise (SyntaxError "String is not terminated") } diff --git a/main.ml b/main.ml index 40ff967..a6ade27 100644 --- a/main.ml +++ b/main.ml @@ -18,6 +18,7 @@ let () = with | LexerError c -> err (Printf.sprintf "Unrecognized char \"%c\"" c) (Lexing.lexeme_start_p buf) + | SyntaxError s -> err (Printf.sprintf "%s" s) (Lexing.lexeme_start_p buf) | Parser.Error -> err "Syntax error" (Lexing.lexeme_start_p buf) | SemanticsError (msg, pos) -> err msg pos ;; diff --git a/parser.mly b/parser.mly index af83bcc..8e09441 100644 --- a/parser.mly +++ b/parser.mly @@ -5,6 +5,7 @@ %token Lint %token Lbool +%token Lstr %token Ltype %token Lvar %token Lend Lassign Lsc Lreturn @@ -15,7 +16,7 @@ %left Ladd Lsub Lmul Ldiv %left Lbracedeb Lparfin Lbracefin Lreturn -%left Ltype Lbool Lint Lvar +%left Ltype Lbool Lint Lvar Lstr %start prog @@ -117,6 +118,11 @@ expr: Val { value = Bool (b) ; pos = $startpos(b) } } + /* string */ + | s = Lstr { + Val { value = Str (s) ; pos = $startpos(s) } + } + /* Variable */ | v = Lvar { Var { name = v ; pos = $startpos(v) } diff --git a/semantics.ml b/semantics.ml index 49555e1..cef7b7b 100644 --- a/semantics.ml +++ b/semantics.ml @@ -8,6 +8,7 @@ let analyze_value = function | Syntax.Void -> Void, Void_t | Syntax.Int n -> Int n, Int_t | Syntax.Bool b -> Bool b, Bool_t + | Syntax.Str s -> Str s, Str_t ;; let rec analyze_expr env ua t = function diff --git a/simplifier.ml b/simplifier.ml index 94de7f5..2ca9282 100644 --- a/simplifier.ml +++ b/simplifier.ml @@ -1,10 +1,15 @@ open Ast let collect_constant_strings code = + let counter = ref 0 in let ccs_value = function | V1.Void -> V2.Void, [] | V1.Bool b -> V2.Bool b, [] | V1.Int n -> V2.Int n, [] + | V1.Str s -> + incr counter; + let lbl = "str" ^ string_of_int !counter in + V2.Data lbl, [ lbl, Mips.Asciiz s ] in let rec ccs_expr = function | IR1.Val v -> diff --git a/tests/20_str.test b/tests/20_str.test new file mode 100644 index 0000000..902516d --- /dev/null +++ b/tests/20_str.test @@ -0,0 +1,3 @@ +void main () { + str a = "bonjour!"; +} diff --git a/tests/21_bad-str.test b/tests/21_bad-str.test new file mode 100644 index 0000000..d16785f --- /dev/null +++ b/tests/21_bad-str.test @@ -0,0 +1,4 @@ +void main () { + # Error on line 3 col 28: String is not terminated. + str a = "au revoir !; +}