diff --git a/flap/src/hopix/hopixLexer.mll b/flap/src/hopix/hopixLexer.mll index fd72715..2311a96 100644 --- a/flap/src/hopix/hopixLexer.mll +++ b/flap/src/hopix/hopixLexer.mll @@ -8,8 +8,15 @@ Lexing.new_line lexbuf; f lexbuf - let error lexbuf = - error "during lexing" (lex_join lexbuf.lex_start_p lexbuf.lex_curr_p) + let error lexbuf c = + let msg = + "during lexing" + ^ + match c with + | Some c -> Printf.sprintf " at '%c'" c + | None -> "" + in + error msg (lex_join lexbuf.lex_start_p lexbuf.lex_curr_p) } let newline = ('\010' | '\013' | "\013\010") @@ -33,80 +40,105 @@ let constr_id = ['A'-'Z']['A'-'Z' 'a'-'z' '0'-'9' '_']* let type_variable = '`' ['a'-'z']['A'-'Z' 'a'-'z' '0'-'9' '_']* let int = '-'? (digit+ | hexa+ | bina+ | octa+) let char = '\'' atom '\'' -let string = '\"' ((atom | '\'' | "\\\"")) '\"' + +let letter = (digit | ['A'-'Z'] | ['a'-'z']) (* let binop = '+' | '-' | '*' | '/' | "&&" | "||"| "=?"| "<=?" |">=?" |"?" *) rule token = parse (** Layout *) - | newline { next_line_and token lexbuf } - | blank+ { token lexbuf } - | eof { EOF } - | "{*" { commentary lexbuf } - | "##" { commentary_line lexbuf } + | newline { next_line_and token lexbuf } + | blank+ { token lexbuf } + | eof { EOF } + | "{*" { commentary lexbuf } + | "##" { commentary_line lexbuf } (** Keywords *) - | "let" { LET } - | "type" { TYPE } - | "extern" { EXTERN } - | "fun" { FUN } - | "match" { MATCH } - | "if" { IF } - | "then" { THEN } - | "else" { ELSE } - | "ref" { REF } - | "while" { WHILE } - | "do" { DO } - | "until" { UNTIL } - | "from" { FROM } - | "to" { TO } - | "and" { AND } - | "for" { FOR } - (* Fini ? *) + | "let" { LET } + | "type" { TYPE } + | "extern" { EXTERN } + | "fun" { FUN } + | "match" { MATCH } + | "if" { IF } + | "then" { THEN } + | "else" { ELSE } + | "ref" { REF } + | "while" { WHILE } + | "do" { DO } + | "until" { UNTIL } + | "from" { FROM } + | "to" { TO } + | "and" { AND_KW } + | "for" { FOR } (** Binar operation : pas sûr pour celui là *) - (* | binop as b { BINOP b } *) - - (** Operators *) - (* | '=' { EQUAL } *) + (* | binop as b { BINOP b } *) (** Ponctuation *) - | '(' { LPAREN } - | ')' { RPAREN } - | '[' { LBRACK } - | ']' { RBRACK } - | '{' { LBRACE } - | '}' { RBRACE } - | '_' { WILDCARD } - | ':' { COLON } - | "->" { ARROW } - | '<' { INFERIOR } - | '>' { SUPERIOR } - | '|' { PIPE } - | '&' { AND } + | '=' { EQUAL } + | '(' { LPAREN } + | ')' { RPAREN } + | '[' { LBRACK } + | ']' { RBRACK } + | '{' { LBRACE } + | '}' { RBRACE } + | '_' { WILDCARD } + | ':' { COLON } + | "->" { ARROW } + | '<' { INFERIOR } + | '>' { SUPERIOR } + | '|' { PIPE } + | '&' { AND } + | '*' { STAR } + | ',' { COMMA } + + (** Strings *) + | '"' { read_string (Buffer.create 16) lexbuf } (** Values *) - | int as i { INT (Mint.of_string i) } - | ident as s { ID s } - | type_variable as s { TID s } - | constr_id as s { CID s } + | int as i { INT (Mint.of_string i) } + | ident as s { ID s } + | type_variable as s { TID s } + | constr_id as s { CID s } + + (** Characters *) + (* On en manque surement plein ici *) + | "'" (letter as c) "'" { CHAR c } (** Lexing error *) - | _ { error lexbuf "unexpected character." } + | _ as _c { error lexbuf None (* (Some _c) *) "unexpected character." } (* TODO: Gérer les imbrications de commentaires *) and commentary = parse - | "*}" { token lexbuf } - | newline { next_line_and commentary lexbuf } + | "*}" { token lexbuf } + | newline { next_line_and commentary lexbuf } - (** Error *) - | eof { error lexbuf "unclosed commentary." } + (** Error *) + | eof { error lexbuf None "unclosed commentary." } (** Commentary content *) - | _ { commentary lexbuf } + | _ { commentary lexbuf } and commentary_line = parse - | newline { next_line_and token lexbuf } - | eof { EOF } - | _ { commentary_line lexbuf } + | newline { next_line_and token lexbuf } + | eof { EOF } + | _ { commentary_line lexbuf } + +and read_string buffer = parse + (** End of string *) + | '"' { STRING (Buffer.contents buffer) } + + (** Escape *) + | "\\n" { Buffer.add_char buffer '\n'; read_string buffer lexbuf } + | "\\b" { Buffer.add_char buffer '\b'; read_string buffer lexbuf } + | "\\r" { Buffer.add_char buffer '\r'; read_string buffer lexbuf } + | "\\'" { Buffer.add_char buffer '\''; read_string buffer lexbuf } + | "\\\"" { Buffer.add_char buffer '"'; read_string buffer lexbuf } + | "\\\\" { Buffer.add_char buffer '\\'; read_string buffer lexbuf } + + (** Error *) + | eof { error lexbuf None "Unterminated string." } + + (** String content *) + | _ as c { Buffer.add_char buffer c; read_string buffer lexbuf } diff --git a/flap/src/hopix/hopixParser.mly b/flap/src/hopix/hopixParser.mly index 989f54a..d2051e7 100644 --- a/flap/src/hopix/hopixParser.mly +++ b/flap/src/hopix/hopixParser.mly @@ -7,7 +7,7 @@ %token EOF LET TYPE WILDCARD STAR ARROW COLON EXTERN FUN COMMA AND EQUAL LPAREN %token RPAREN LBRACK RBRACK LBRACE RBRACE INFERIOR SUPERIOR BINOP DO ELSE FOR -%token FROM IF MATCH PIPE REF THEN TO UNTIL WHILE +%token FROM IF MATCH PIPE REF THEN TO UNTIL WHILE AND_KW %token INT %token ID TID CID STRING @@ -26,6 +26,10 @@ program: | definition=located(definition)* EOF { definition } +/* Attrapes les erreurs de syntaxe */ +| e=located(error) { + Error.error "parsing" (Position.position e) "Syntax error." +} definition: @@ -60,7 +64,7 @@ label_with_type: vdefinition: /* Valeur simple */ -| LET i=located(identifier) COLON ts=option(located(type_scheme)) EQUAL e=located(expression) { +| LET i=located(identifier) ts=option(vdef_type_scheme) EQUAL e=located(expression) { SimpleValue(i, ts, e) } /* Fonction(s) @@ -71,6 +75,11 @@ vdefinition: RecFunctions(fl) } +vdef_type_scheme: +| COLON ts=located(type_scheme) { + ts + } + fundef: | COLON t=option(located(type_scheme)) i=located(identifier) p=located(pattern) EQUAL e=located(expression) { @@ -86,18 +95,27 @@ fundef: * peut être qu'en utilisant des option, on pourrait diminuer le nombre de répétition. * TODO : y'a environ 50 warnings ici, surtout au niveau du POr et PAnd */ pattern: +/* Parenthésage */ +| LPAREN p=pattern RPAREN { + p + } +/* Motif universel liant */ | i=located(identifier) { PVariable i } +/* Motif universel non liant */ | WILDCARD { PWildcard } +/* Annotation de type */ | p=located(pattern) COLON ty=located(ty) { PTypeAnnotation(p,ty) } +/* Entier / Caractère / String */ | l=located(literal) { PLiteral l } +/* Valeurs étiquetées */ | const=located(constructor) { PTaggedValue(const, None, []) } @@ -110,6 +128,7 @@ pattern: | const=located(constructor) INFERIOR liste_ty=option(separated_nonempty_list(COMMA, located(ty))) SUPERIOR LPAREN liste_pattern=separated_nonempty_list(COMMA, located(pattern)) RPAREN { PTaggedValue(const, liste_ty, liste_pattern) } +/* Enregistrement */ /* à refaire */ | LBRACE l=separated_nonempty_list(COMMA, separated_pair(located(label), EQUAL, located(pattern))) RBRACE { PRecord(l, None) @@ -120,14 +139,21 @@ pattern: | LBRACE l=separated_nonempty_list(COMMA, separated_pair(located(label), EQUAL, located(pattern))) RBRACE INFERIOR liste_ty=option(separated_nonempty_list(COMMA, located(ty))) SUPERIOR { PRecord(l, liste_ty) } - +/* Disjonction */ | p1=located(pattern) PIPE p_list=separated_nonempty_list(PIPE, located(pattern)) { POr(p1 :: p_list) } +/* Conjonction */ | p1=located(pattern) AND p_list=separated_nonempty_list(AND, located(pattern)) { PAnd(p1 :: p_list) } +pattern_list: +/* N-uplets */ +| LPAREN p=separated_nonempty_list(COMMA, pattern) RPAREN { + p + } + /********************************* DATA TYPE **********************************/ /* Pour résoudre un conflit, on a du split ty en 2 règles