Add error message for parsing, basic string and char support, also fix broken tokens
This commit is contained in:
parent
4ffd33ccc8
commit
094f5f784a
2 changed files with 116 additions and 58 deletions
|
@ -8,8 +8,15 @@
|
||||||
Lexing.new_line lexbuf;
|
Lexing.new_line lexbuf;
|
||||||
f lexbuf
|
f lexbuf
|
||||||
|
|
||||||
let error lexbuf =
|
let error lexbuf c =
|
||||||
error "during lexing" (lex_join lexbuf.lex_start_p lexbuf.lex_curr_p)
|
let msg =
|
||||||
|
"during lexing"
|
||||||
|
^
|
||||||
|
match c with
|
||||||
|
| Some c -> Printf.sprintf " at '%c'" c
|
||||||
|
| None -> ""
|
||||||
|
in
|
||||||
|
error msg (lex_join lexbuf.lex_start_p lexbuf.lex_curr_p)
|
||||||
}
|
}
|
||||||
|
|
||||||
let newline = ('\010' | '\013' | "\013\010")
|
let newline = ('\010' | '\013' | "\013\010")
|
||||||
|
@ -33,7 +40,8 @@ let constr_id = ['A'-'Z']['A'-'Z' 'a'-'z' '0'-'9' '_']*
|
||||||
let type_variable = '`' ['a'-'z']['A'-'Z' 'a'-'z' '0'-'9' '_']*
|
let type_variable = '`' ['a'-'z']['A'-'Z' 'a'-'z' '0'-'9' '_']*
|
||||||
let int = '-'? (digit+ | hexa+ | bina+ | octa+)
|
let int = '-'? (digit+ | hexa+ | bina+ | octa+)
|
||||||
let char = '\'' atom '\''
|
let char = '\'' atom '\''
|
||||||
let string = '\"' ((atom | '\'' | "\\\"")) '\"'
|
|
||||||
|
let letter = (digit | ['A'-'Z'] | ['a'-'z'])
|
||||||
|
|
||||||
(* let binop = '+' | '-' | '*' | '/' | "&&" | "||"| "=?"| "<=?" |">=?" |"<?" |">?" *)
|
(* let binop = '+' | '-' | '*' | '/' | "&&" | "||"| "=?"| "<=?" |">=?" |"<?" |">?" *)
|
||||||
|
|
||||||
|
@ -61,17 +69,14 @@ rule token = parse
|
||||||
| "until" { UNTIL }
|
| "until" { UNTIL }
|
||||||
| "from" { FROM }
|
| "from" { FROM }
|
||||||
| "to" { TO }
|
| "to" { TO }
|
||||||
| "and" { AND }
|
| "and" { AND_KW }
|
||||||
| "for" { FOR }
|
| "for" { FOR }
|
||||||
(* Fini ? *)
|
|
||||||
|
|
||||||
(** Binar operation : pas sûr pour celui là *)
|
(** Binar operation : pas sûr pour celui là *)
|
||||||
(* | binop as b { BINOP b } *)
|
(* | binop as b { BINOP b } *)
|
||||||
|
|
||||||
(** Operators *)
|
|
||||||
(* | '=' { EQUAL } *)
|
|
||||||
|
|
||||||
(** Ponctuation *)
|
(** Ponctuation *)
|
||||||
|
| '=' { EQUAL }
|
||||||
| '(' { LPAREN }
|
| '(' { LPAREN }
|
||||||
| ')' { RPAREN }
|
| ')' { RPAREN }
|
||||||
| '[' { LBRACK }
|
| '[' { LBRACK }
|
||||||
|
@ -85,6 +90,11 @@ rule token = parse
|
||||||
| '>' { SUPERIOR }
|
| '>' { SUPERIOR }
|
||||||
| '|' { PIPE }
|
| '|' { PIPE }
|
||||||
| '&' { AND }
|
| '&' { AND }
|
||||||
|
| '*' { STAR }
|
||||||
|
| ',' { COMMA }
|
||||||
|
|
||||||
|
(** Strings *)
|
||||||
|
| '"' { read_string (Buffer.create 16) lexbuf }
|
||||||
|
|
||||||
(** Values *)
|
(** Values *)
|
||||||
| int as i { INT (Mint.of_string i) }
|
| int as i { INT (Mint.of_string i) }
|
||||||
|
@ -92,8 +102,12 @@ rule token = parse
|
||||||
| type_variable as s { TID s }
|
| type_variable as s { TID s }
|
||||||
| constr_id as s { CID s }
|
| constr_id as s { CID s }
|
||||||
|
|
||||||
|
(** Characters *)
|
||||||
|
(* On en manque surement plein ici *)
|
||||||
|
| "'" (letter as c) "'" { CHAR c }
|
||||||
|
|
||||||
(** Lexing error *)
|
(** Lexing error *)
|
||||||
| _ { error lexbuf "unexpected character." }
|
| _ as _c { error lexbuf None (* (Some _c) *) "unexpected character." }
|
||||||
|
|
||||||
(* TODO: Gérer les imbrications de commentaires *)
|
(* TODO: Gérer les imbrications de commentaires *)
|
||||||
and commentary = parse
|
and commentary = parse
|
||||||
|
@ -101,7 +115,7 @@ and commentary = parse
|
||||||
| newline { next_line_and commentary lexbuf }
|
| newline { next_line_and commentary lexbuf }
|
||||||
|
|
||||||
(** Error *)
|
(** Error *)
|
||||||
| eof { error lexbuf "unclosed commentary." }
|
| eof { error lexbuf None "unclosed commentary." }
|
||||||
|
|
||||||
(** Commentary content *)
|
(** Commentary content *)
|
||||||
| _ { commentary lexbuf }
|
| _ { commentary lexbuf }
|
||||||
|
@ -110,3 +124,21 @@ and commentary_line = parse
|
||||||
| newline { next_line_and token lexbuf }
|
| newline { next_line_and token lexbuf }
|
||||||
| eof { EOF }
|
| eof { EOF }
|
||||||
| _ { commentary_line lexbuf }
|
| _ { commentary_line lexbuf }
|
||||||
|
|
||||||
|
and read_string buffer = parse
|
||||||
|
(** End of string *)
|
||||||
|
| '"' { STRING (Buffer.contents buffer) }
|
||||||
|
|
||||||
|
(** Escape *)
|
||||||
|
| "\\n" { Buffer.add_char buffer '\n'; read_string buffer lexbuf }
|
||||||
|
| "\\b" { Buffer.add_char buffer '\b'; read_string buffer lexbuf }
|
||||||
|
| "\\r" { Buffer.add_char buffer '\r'; read_string buffer lexbuf }
|
||||||
|
| "\\'" { Buffer.add_char buffer '\''; read_string buffer lexbuf }
|
||||||
|
| "\\\"" { Buffer.add_char buffer '"'; read_string buffer lexbuf }
|
||||||
|
| "\\\\" { Buffer.add_char buffer '\\'; read_string buffer lexbuf }
|
||||||
|
|
||||||
|
(** Error *)
|
||||||
|
| eof { error lexbuf None "Unterminated string." }
|
||||||
|
|
||||||
|
(** String content *)
|
||||||
|
| _ as c { Buffer.add_char buffer c; read_string buffer lexbuf }
|
||||||
|
|
|
@ -7,7 +7,7 @@
|
||||||
|
|
||||||
%token EOF LET TYPE WILDCARD STAR ARROW COLON EXTERN FUN COMMA AND EQUAL LPAREN
|
%token EOF LET TYPE WILDCARD STAR ARROW COLON EXTERN FUN COMMA AND EQUAL LPAREN
|
||||||
%token RPAREN LBRACK RBRACK LBRACE RBRACE INFERIOR SUPERIOR BINOP DO ELSE FOR
|
%token RPAREN LBRACK RBRACK LBRACE RBRACE INFERIOR SUPERIOR BINOP DO ELSE FOR
|
||||||
%token FROM IF MATCH PIPE REF THEN TO UNTIL WHILE
|
%token FROM IF MATCH PIPE REF THEN TO UNTIL WHILE AND_KW
|
||||||
|
|
||||||
%token<Mint.t> INT
|
%token<Mint.t> INT
|
||||||
%token<string> ID TID CID STRING
|
%token<string> ID TID CID STRING
|
||||||
|
@ -26,6 +26,10 @@ program:
|
||||||
| definition=located(definition)* EOF {
|
| definition=located(definition)* EOF {
|
||||||
definition
|
definition
|
||||||
}
|
}
|
||||||
|
/* Attrapes les erreurs de syntaxe */
|
||||||
|
| e=located(error) {
|
||||||
|
Error.error "parsing" (Position.position e) "Syntax error."
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
definition:
|
definition:
|
||||||
|
@ -60,7 +64,7 @@ label_with_type:
|
||||||
|
|
||||||
vdefinition:
|
vdefinition:
|
||||||
/* Valeur simple */
|
/* Valeur simple */
|
||||||
| LET i=located(identifier) COLON ts=option(located(type_scheme)) EQUAL e=located(expression) {
|
| LET i=located(identifier) ts=option(vdef_type_scheme) EQUAL e=located(expression) {
|
||||||
SimpleValue(i, ts, e)
|
SimpleValue(i, ts, e)
|
||||||
}
|
}
|
||||||
/* Fonction(s)
|
/* Fonction(s)
|
||||||
|
@ -71,6 +75,11 @@ vdefinition:
|
||||||
RecFunctions(fl)
|
RecFunctions(fl)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
vdef_type_scheme:
|
||||||
|
| COLON ts=located(type_scheme) {
|
||||||
|
ts
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
fundef:
|
fundef:
|
||||||
| COLON t=option(located(type_scheme)) i=located(identifier) p=located(pattern) EQUAL e=located(expression) {
|
| COLON t=option(located(type_scheme)) i=located(identifier) p=located(pattern) EQUAL e=located(expression) {
|
||||||
|
@ -86,18 +95,27 @@ fundef:
|
||||||
* peut être qu'en utilisant des option, on pourrait diminuer le nombre de répétition.
|
* peut être qu'en utilisant des option, on pourrait diminuer le nombre de répétition.
|
||||||
* TODO : y'a environ 50 warnings ici, surtout au niveau du POr et PAnd */
|
* TODO : y'a environ 50 warnings ici, surtout au niveau du POr et PAnd */
|
||||||
pattern:
|
pattern:
|
||||||
|
/* Parenthésage */
|
||||||
|
| LPAREN p=pattern RPAREN {
|
||||||
|
p
|
||||||
|
}
|
||||||
|
/* Motif universel liant */
|
||||||
| i=located(identifier) {
|
| i=located(identifier) {
|
||||||
PVariable i
|
PVariable i
|
||||||
}
|
}
|
||||||
|
/* Motif universel non liant */
|
||||||
| WILDCARD {
|
| WILDCARD {
|
||||||
PWildcard
|
PWildcard
|
||||||
}
|
}
|
||||||
|
/* Annotation de type */
|
||||||
| p=located(pattern) COLON ty=located(ty) {
|
| p=located(pattern) COLON ty=located(ty) {
|
||||||
PTypeAnnotation(p,ty)
|
PTypeAnnotation(p,ty)
|
||||||
}
|
}
|
||||||
|
/* Entier / Caractère / String */
|
||||||
| l=located(literal) {
|
| l=located(literal) {
|
||||||
PLiteral l
|
PLiteral l
|
||||||
}
|
}
|
||||||
|
/* Valeurs étiquetées */
|
||||||
| const=located(constructor) {
|
| const=located(constructor) {
|
||||||
PTaggedValue(const, None, [])
|
PTaggedValue(const, None, [])
|
||||||
}
|
}
|
||||||
|
@ -110,6 +128,7 @@ pattern:
|
||||||
| const=located(constructor) INFERIOR liste_ty=option(separated_nonempty_list(COMMA, located(ty))) SUPERIOR LPAREN liste_pattern=separated_nonempty_list(COMMA, located(pattern)) RPAREN {
|
| const=located(constructor) INFERIOR liste_ty=option(separated_nonempty_list(COMMA, located(ty))) SUPERIOR LPAREN liste_pattern=separated_nonempty_list(COMMA, located(pattern)) RPAREN {
|
||||||
PTaggedValue(const, liste_ty, liste_pattern)
|
PTaggedValue(const, liste_ty, liste_pattern)
|
||||||
}
|
}
|
||||||
|
/* Enregistrement */
|
||||||
/* à refaire */
|
/* à refaire */
|
||||||
| LBRACE l=separated_nonempty_list(COMMA, separated_pair(located(label), EQUAL, located(pattern))) RBRACE {
|
| LBRACE l=separated_nonempty_list(COMMA, separated_pair(located(label), EQUAL, located(pattern))) RBRACE {
|
||||||
PRecord(l, None)
|
PRecord(l, None)
|
||||||
|
@ -120,14 +139,21 @@ pattern:
|
||||||
| LBRACE l=separated_nonempty_list(COMMA, separated_pair(located(label), EQUAL, located(pattern))) RBRACE INFERIOR liste_ty=option(separated_nonempty_list(COMMA, located(ty))) SUPERIOR {
|
| LBRACE l=separated_nonempty_list(COMMA, separated_pair(located(label), EQUAL, located(pattern))) RBRACE INFERIOR liste_ty=option(separated_nonempty_list(COMMA, located(ty))) SUPERIOR {
|
||||||
PRecord(l, liste_ty)
|
PRecord(l, liste_ty)
|
||||||
}
|
}
|
||||||
|
/* Disjonction */
|
||||||
| p1=located(pattern) PIPE p_list=separated_nonempty_list(PIPE, located(pattern)) {
|
| p1=located(pattern) PIPE p_list=separated_nonempty_list(PIPE, located(pattern)) {
|
||||||
POr(p1 :: p_list)
|
POr(p1 :: p_list)
|
||||||
}
|
}
|
||||||
|
/* Conjonction */
|
||||||
| p1=located(pattern) AND p_list=separated_nonempty_list(AND, located(pattern)) {
|
| p1=located(pattern) AND p_list=separated_nonempty_list(AND, located(pattern)) {
|
||||||
PAnd(p1 :: p_list)
|
PAnd(p1 :: p_list)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pattern_list:
|
||||||
|
/* N-uplets */
|
||||||
|
| LPAREN p=separated_nonempty_list(COMMA, pattern) RPAREN {
|
||||||
|
p
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/********************************* DATA TYPE **********************************/
|
/********************************* DATA TYPE **********************************/
|
||||||
/* Pour résoudre un conflit, on a du split ty en 2 règles
|
/* Pour résoudre un conflit, on a du split ty en 2 règles
|
||||||
|
|
Reference in a new issue