début expression parser et string lexer

This commit is contained in:
Nicolas PENELOUX 2023-10-18 17:28:31 +02:00
parent 2e44ba6dcd
commit d1cdc273bd
2 changed files with 90 additions and 7 deletions

View file

@ -27,21 +27,30 @@ let hexa = "0x" ['0'-'9' 'a'-'f' 'A'-'F']
let bina = "0b" ['0'-'1'] let bina = "0b" ['0'-'1']
let octa = "0o" ['0'-'7'] let octa = "0o" ['0'-'7']
let printable = [' ' '\t' '\n' '\r' (* 33-126 *)] (* pas sûr *) let int = '-'? (digit+ | hexa+ | bina+ | octa+)
(* On ne peut pas différencier au niveau du lexer var_id label_id et type_con, (* On ne peut pas différencier au niveau du lexer var_id label_id et type_con,
* il faudra le faire à l'analyseur syntaxique. On fait donc un "ident" pour * il faudra le faire à l'analyseur syntaxique. On fait donc un "ident" pour
* identificateur *) * identificateur *)
(* identificateur *)
let ident = ['a'-'z']['A'-'Z' 'a'-'z' '0'-'9' '_']* let ident = ['a'-'z']['A'-'Z' 'a'-'z' '0'-'9' '_']*
let atom = '"'
let constr_id = ['A'-'Z']['A'-'Z' 'a'-'z' '0'-'9' '_']* let constr_id = ['A'-'Z']['A'-'Z' 'a'-'z' '0'-'9' '_']*
let type_variable = '`' ['a'-'z']['A'-'Z' 'a'-'z' '0'-'9' '_']* let type_variable = '`' ['a'-'z']['A'-'Z' 'a'-'z' '0'-'9' '_']*
let int = '-'? (digit+ | hexa+ | bina+ | octa+)
(* littéraux caractères et chaînes de caractères *)
let ascii_c = ['\000'-'\255']
let hex_dig = ['0'-'9' 'a'-'f' 'A'-'F']
let ascii_hex = "\\0x" hex_dig hex_dig
let printable = ['\032'-'\038' '\040'-'\127']
let atom = ascii_c | ascii_hex | printable | "\\\\" | "\\'" | "\\n" | "\\t" | "\\b" | "\\r"
let char = '\'' atom '\'' let char = '\'' atom '\''
let letter = (digit | ['A'-'Z'] | ['a'-'z']) let letter = (digit | ['A'-'Z'] | ['a'-'z'])
(* pas sûr pour str *)
(* let str = '\"' (([atom] | ['\''] | ["\\\""]) # '"')* '\"' *)
(* let binop = '+' | '-' | '*' | '/' | "&&" | "||"| "=?"| "<=?" |">=?" |"<?" |">?" *) (* let binop = '+' | '-' | '*' | '/' | "&&" | "||"| "=?"| "<=?" |">=?" |"<?" |">?" *)
@ -92,6 +101,7 @@ rule token = parse
| '&' { AND } | '&' { AND }
| '*' { STAR } | '*' { STAR }
| ',' { COMMA } | ',' { COMMA }
| '.' { DOT }
(** Strings *) (** Strings *)
| '"' { read_string (Buffer.create 16) lexbuf } | '"' { read_string (Buffer.create 16) lexbuf }

View file

@ -7,7 +7,7 @@
%token EOF LET TYPE WILDCARD STAR ARROW COLON EXTERN FUN COMMA AND EQUAL LPAREN %token EOF LET TYPE WILDCARD STAR ARROW COLON EXTERN FUN COMMA AND EQUAL LPAREN
%token RPAREN LBRACK RBRACK LBRACE RBRACE INFERIOR SUPERIOR BINOP DO ELSE FOR %token RPAREN LBRACK RBRACK LBRACE RBRACE INFERIOR SUPERIOR BINOP DO ELSE FOR
%token FROM IF MATCH PIPE REF THEN TO UNTIL WHILE AND_KW %token FROM IF MATCH PIPE REF THEN TO UNTIL WHILE AND_KW DOT
%token<Mint.t> INT %token<Mint.t> INT
%token<string> ID TID CID STRING %token<string> ID TID CID STRING
@ -140,14 +140,16 @@ pattern:
PRecord(l, liste_ty) PRecord(l, liste_ty)
} }
/* Disjonction */ /* Disjonction */
/*
| p1=located(pattern) PIPE p_list=separated_nonempty_list(PIPE, located(pattern)) { | p1=located(pattern) PIPE p_list=separated_nonempty_list(PIPE, located(pattern)) {
POr(p1 :: p_list) POr(p1 :: p_list)
} }
/* Conjonction */ /* Conjonction */
/*
| p1=located(pattern) AND p_list=separated_nonempty_list(AND, located(pattern)) { | p1=located(pattern) AND p_list=separated_nonempty_list(AND, located(pattern)) {
PAnd(p1 :: p_list) PAnd(p1 :: p_list)
} }
*/
pattern_list: pattern_list:
/* N-uplets */ /* N-uplets */
| LPAREN p=separated_nonempty_list(COMMA, pattern) RPAREN { | LPAREN p=separated_nonempty_list(COMMA, pattern) RPAREN {
@ -208,10 +210,81 @@ type_scheme:
/********************************* EXPRESSION *********************************/ /********************************* EXPRESSION *********************************/
/* De manière générale, il faudrait au mieux revoir le code, pour le factoriser et le rendre plus propre */
/* (il y a même moyen que ça le soit obligatoire pour pas avoir des conflits éventuel) */
/* Exemple : TAgged et Record, trop de cas différent alors qu'on pourrait en faire en 2 fois au moins voir 1 */
expression: expression:
/* Simple litteral */
| l=located(literal) { | l=located(literal) {
Literal l Literal l
} }
/* Variable */
| i = located(identifier){
Variable(i,None)
}
| i = located(identifier) INFERIOR SUPERIOR {
Variable(i,None)
}
| i = located(identifier) INFERIOR t_list = option(separated_nonempty_list(COMMA,located(ty))) SUPERIOR{
Variable(i,t_list)
}
/* Tagged Value*/
/* K */
| const = located(constructor){
Tagged(const,None,[])
}
/* K < > */
| const = located(constructor) INFERIOR SUPERIOR{
Tagged(const,None,[])
}
/* K < > (e1, ..., en) */
| const = located(constructor) INFERIOR SUPERIOR LPAREN e_list=separated_nonempty_list(COMMA,located(expression)) RPAREN {
Tagged(const,None,e_list)
}
/* K <ty_1, ... ty_m> */
| const = located(constructor) INFERIOR t_list = option(separated_nonempty_list(COMMA,located(ty))) SUPERIOR {
Tagged(const,t_list,[])
}
/* K <ty_1, ..., ty_m> (e1,...,en) */
| const = located(constructor) INFERIOR t_list = option(separated_nonempty_list(COMMA,located(ty))) SUPERIOR LPAREN e_list=separated_nonempty_list(COMMA,located(expression)) RPAREN{
Tagged(const,t_list,e_list)
}
/* Tuple n = 0 and n > 1 */
| LPAREN RPAREN {
Tuple([])
}
| LPAREN e=located(expression) COMMA e_list = separated_nonempty_list(COMMA,located(expression)) RPAREN {
Tuple(e::e_list)
}
/* Record */
| LBRACE l=separated_nonempty_list(COMMA, separated_pair(located(label), EQUAL, located(expression))) RBRACE {
Record(l, None)
}
| LBRACE l=separated_nonempty_list(COMMA, separated_pair(located(label), EQUAL, located(expression))) RBRACE INFERIOR SUPERIOR {
Record(l, None)
}
| LBRACE l=separated_nonempty_list(COMMA, separated_pair(located(label), EQUAL, located(expression))) RBRACE INFERIOR t_list = option(separated_nonempty_list(COMMA,located(ty))) SUPERIOR{
Record(l, t_list)
}
/* Field */
/* e.l */
| e=located(expression) DOT l = located(label){
Field(e,l,None)
}
/* e.l < > */
| e=located(expression) DOT l = located(label) INFERIOR SUPERIOR {
Field(e,l,None)
}
/* e.l <ty_1...ty_n>*/
| e = located(expression) DOT l=located(label) INFERIOR t_list = option(separated_nonempty_list(COMMA,located(ty))) SUPERIOR {
Field(e,l,t_list)
}
/******************************** BASIC TYPES *********************************/ /******************************** BASIC TYPES *********************************/