Add error message for parsing, basic string and char support, also fix broken tokens

This commit is contained in:
Mylloon 2023-10-18 00:02:59 +02:00
parent 4ffd33ccc8
commit 094f5f784a
Signed by: Anri
GPG key ID: A82D63DFF8D1317F
2 changed files with 116 additions and 58 deletions

View file

@ -8,8 +8,15 @@
Lexing.new_line lexbuf; Lexing.new_line lexbuf;
f lexbuf f lexbuf
let error lexbuf = let error lexbuf c =
error "during lexing" (lex_join lexbuf.lex_start_p lexbuf.lex_curr_p) let msg =
"during lexing"
^
match c with
| Some c -> Printf.sprintf " at '%c'" c
| None -> ""
in
error msg (lex_join lexbuf.lex_start_p lexbuf.lex_curr_p)
} }
let newline = ('\010' | '\013' | "\013\010") let newline = ('\010' | '\013' | "\013\010")
@ -33,80 +40,105 @@ let constr_id = ['A'-'Z']['A'-'Z' 'a'-'z' '0'-'9' '_']*
let type_variable = '`' ['a'-'z']['A'-'Z' 'a'-'z' '0'-'9' '_']* let type_variable = '`' ['a'-'z']['A'-'Z' 'a'-'z' '0'-'9' '_']*
let int = '-'? (digit+ | hexa+ | bina+ | octa+) let int = '-'? (digit+ | hexa+ | bina+ | octa+)
let char = '\'' atom '\'' let char = '\'' atom '\''
let string = '\"' ((atom | '\'' | "\\\"")) '\"'
let letter = (digit | ['A'-'Z'] | ['a'-'z'])
(* let binop = '+' | '-' | '*' | '/' | "&&" | "||"| "=?"| "<=?" |">=?" |"<?" |">?" *) (* let binop = '+' | '-' | '*' | '/' | "&&" | "||"| "=?"| "<=?" |">=?" |"<?" |">?" *)
rule token = parse rule token = parse
(** Layout *) (** Layout *)
| newline { next_line_and token lexbuf } | newline { next_line_and token lexbuf }
| blank+ { token lexbuf } | blank+ { token lexbuf }
| eof { EOF } | eof { EOF }
| "{*" { commentary lexbuf } | "{*" { commentary lexbuf }
| "##" { commentary_line lexbuf } | "##" { commentary_line lexbuf }
(** Keywords *) (** Keywords *)
| "let" { LET } | "let" { LET }
| "type" { TYPE } | "type" { TYPE }
| "extern" { EXTERN } | "extern" { EXTERN }
| "fun" { FUN } | "fun" { FUN }
| "match" { MATCH } | "match" { MATCH }
| "if" { IF } | "if" { IF }
| "then" { THEN } | "then" { THEN }
| "else" { ELSE } | "else" { ELSE }
| "ref" { REF } | "ref" { REF }
| "while" { WHILE } | "while" { WHILE }
| "do" { DO } | "do" { DO }
| "until" { UNTIL } | "until" { UNTIL }
| "from" { FROM } | "from" { FROM }
| "to" { TO } | "to" { TO }
| "and" { AND } | "and" { AND_KW }
| "for" { FOR } | "for" { FOR }
(* Fini ? *)
(** Binar operation : pas sûr pour celui là *) (** Binar operation : pas sûr pour celui là *)
(* | binop as b { BINOP b } *) (* | binop as b { BINOP b } *)
(** Operators *)
(* | '=' { EQUAL } *)
(** Ponctuation *) (** Ponctuation *)
| '(' { LPAREN } | '=' { EQUAL }
| ')' { RPAREN } | '(' { LPAREN }
| '[' { LBRACK } | ')' { RPAREN }
| ']' { RBRACK } | '[' { LBRACK }
| '{' { LBRACE } | ']' { RBRACK }
| '}' { RBRACE } | '{' { LBRACE }
| '_' { WILDCARD } | '}' { RBRACE }
| ':' { COLON } | '_' { WILDCARD }
| "->" { ARROW } | ':' { COLON }
| '<' { INFERIOR } | "->" { ARROW }
| '>' { SUPERIOR } | '<' { INFERIOR }
| '|' { PIPE } | '>' { SUPERIOR }
| '&' { AND } | '|' { PIPE }
| '&' { AND }
| '*' { STAR }
| ',' { COMMA }
(** Strings *)
| '"' { read_string (Buffer.create 16) lexbuf }
(** Values *) (** Values *)
| int as i { INT (Mint.of_string i) } | int as i { INT (Mint.of_string i) }
| ident as s { ID s } | ident as s { ID s }
| type_variable as s { TID s } | type_variable as s { TID s }
| constr_id as s { CID s } | constr_id as s { CID s }
(** Characters *)
(* On en manque surement plein ici *)
| "'" (letter as c) "'" { CHAR c }
(** Lexing error *) (** Lexing error *)
| _ { error lexbuf "unexpected character." } | _ as _c { error lexbuf None (* (Some _c) *) "unexpected character." }
(* TODO: Gérer les imbrications de commentaires *) (* TODO: Gérer les imbrications de commentaires *)
and commentary = parse and commentary = parse
| "*}" { token lexbuf } | "*}" { token lexbuf }
| newline { next_line_and commentary lexbuf } | newline { next_line_and commentary lexbuf }
(** Error *) (** Error *)
| eof { error lexbuf "unclosed commentary." } | eof { error lexbuf None "unclosed commentary." }
(** Commentary content *) (** Commentary content *)
| _ { commentary lexbuf } | _ { commentary lexbuf }
and commentary_line = parse and commentary_line = parse
| newline { next_line_and token lexbuf } | newline { next_line_and token lexbuf }
| eof { EOF } | eof { EOF }
| _ { commentary_line lexbuf } | _ { commentary_line lexbuf }
and read_string buffer = parse
(** End of string *)
| '"' { STRING (Buffer.contents buffer) }
(** Escape *)
| "\\n" { Buffer.add_char buffer '\n'; read_string buffer lexbuf }
| "\\b" { Buffer.add_char buffer '\b'; read_string buffer lexbuf }
| "\\r" { Buffer.add_char buffer '\r'; read_string buffer lexbuf }
| "\\'" { Buffer.add_char buffer '\''; read_string buffer lexbuf }
| "\\\"" { Buffer.add_char buffer '"'; read_string buffer lexbuf }
| "\\\\" { Buffer.add_char buffer '\\'; read_string buffer lexbuf }
(** Error *)
| eof { error lexbuf None "Unterminated string." }
(** String content *)
| _ as c { Buffer.add_char buffer c; read_string buffer lexbuf }

View file

@ -7,7 +7,7 @@
%token EOF LET TYPE WILDCARD STAR ARROW COLON EXTERN FUN COMMA AND EQUAL LPAREN %token EOF LET TYPE WILDCARD STAR ARROW COLON EXTERN FUN COMMA AND EQUAL LPAREN
%token RPAREN LBRACK RBRACK LBRACE RBRACE INFERIOR SUPERIOR BINOP DO ELSE FOR %token RPAREN LBRACK RBRACK LBRACE RBRACE INFERIOR SUPERIOR BINOP DO ELSE FOR
%token FROM IF MATCH PIPE REF THEN TO UNTIL WHILE %token FROM IF MATCH PIPE REF THEN TO UNTIL WHILE AND_KW
%token<Mint.t> INT %token<Mint.t> INT
%token<string> ID TID CID STRING %token<string> ID TID CID STRING
@ -26,6 +26,10 @@ program:
| definition=located(definition)* EOF { | definition=located(definition)* EOF {
definition definition
} }
/* Attrapes les erreurs de syntaxe */
| e=located(error) {
Error.error "parsing" (Position.position e) "Syntax error."
}
definition: definition:
@ -60,7 +64,7 @@ label_with_type:
vdefinition: vdefinition:
/* Valeur simple */ /* Valeur simple */
| LET i=located(identifier) COLON ts=option(located(type_scheme)) EQUAL e=located(expression) { | LET i=located(identifier) ts=option(vdef_type_scheme) EQUAL e=located(expression) {
SimpleValue(i, ts, e) SimpleValue(i, ts, e)
} }
/* Fonction(s) /* Fonction(s)
@ -71,6 +75,11 @@ vdefinition:
RecFunctions(fl) RecFunctions(fl)
} }
vdef_type_scheme:
| COLON ts=located(type_scheme) {
ts
}
fundef: fundef:
| COLON t=option(located(type_scheme)) i=located(identifier) p=located(pattern) EQUAL e=located(expression) { | COLON t=option(located(type_scheme)) i=located(identifier) p=located(pattern) EQUAL e=located(expression) {
@ -86,18 +95,27 @@ fundef:
* peut être qu'en utilisant des option, on pourrait diminuer le nombre de répétition. * peut être qu'en utilisant des option, on pourrait diminuer le nombre de répétition.
* TODO : y'a environ 50 warnings ici, surtout au niveau du POr et PAnd */ * TODO : y'a environ 50 warnings ici, surtout au niveau du POr et PAnd */
pattern: pattern:
/* Parenthésage */
| LPAREN p=pattern RPAREN {
p
}
/* Motif universel liant */
| i=located(identifier) { | i=located(identifier) {
PVariable i PVariable i
} }
/* Motif universel non liant */
| WILDCARD { | WILDCARD {
PWildcard PWildcard
} }
/* Annotation de type */
| p=located(pattern) COLON ty=located(ty) { | p=located(pattern) COLON ty=located(ty) {
PTypeAnnotation(p,ty) PTypeAnnotation(p,ty)
} }
/* Entier / Caractère / String */
| l=located(literal) { | l=located(literal) {
PLiteral l PLiteral l
} }
/* Valeurs étiquetées */
| const=located(constructor) { | const=located(constructor) {
PTaggedValue(const, None, []) PTaggedValue(const, None, [])
} }
@ -110,6 +128,7 @@ pattern:
| const=located(constructor) INFERIOR liste_ty=option(separated_nonempty_list(COMMA, located(ty))) SUPERIOR LPAREN liste_pattern=separated_nonempty_list(COMMA, located(pattern)) RPAREN { | const=located(constructor) INFERIOR liste_ty=option(separated_nonempty_list(COMMA, located(ty))) SUPERIOR LPAREN liste_pattern=separated_nonempty_list(COMMA, located(pattern)) RPAREN {
PTaggedValue(const, liste_ty, liste_pattern) PTaggedValue(const, liste_ty, liste_pattern)
} }
/* Enregistrement */
/* à refaire */ /* à refaire */
| LBRACE l=separated_nonempty_list(COMMA, separated_pair(located(label), EQUAL, located(pattern))) RBRACE { | LBRACE l=separated_nonempty_list(COMMA, separated_pair(located(label), EQUAL, located(pattern))) RBRACE {
PRecord(l, None) PRecord(l, None)
@ -120,14 +139,21 @@ pattern:
| LBRACE l=separated_nonempty_list(COMMA, separated_pair(located(label), EQUAL, located(pattern))) RBRACE INFERIOR liste_ty=option(separated_nonempty_list(COMMA, located(ty))) SUPERIOR { | LBRACE l=separated_nonempty_list(COMMA, separated_pair(located(label), EQUAL, located(pattern))) RBRACE INFERIOR liste_ty=option(separated_nonempty_list(COMMA, located(ty))) SUPERIOR {
PRecord(l, liste_ty) PRecord(l, liste_ty)
} }
/* Disjonction */
| p1=located(pattern) PIPE p_list=separated_nonempty_list(PIPE, located(pattern)) { | p1=located(pattern) PIPE p_list=separated_nonempty_list(PIPE, located(pattern)) {
POr(p1 :: p_list) POr(p1 :: p_list)
} }
/* Conjonction */
| p1=located(pattern) AND p_list=separated_nonempty_list(AND, located(pattern)) { | p1=located(pattern) AND p_list=separated_nonempty_list(AND, located(pattern)) {
PAnd(p1 :: p_list) PAnd(p1 :: p_list)
} }
pattern_list:
/* N-uplets */
| LPAREN p=separated_nonempty_list(COMMA, pattern) RPAREN {
p
}
/********************************* DATA TYPE **********************************/ /********************************* DATA TYPE **********************************/
/* Pour résoudre un conflit, on a du split ty en 2 règles /* Pour résoudre un conflit, on a du split ty en 2 règles