From d1cdc273bd55730d85f95fceb35437fe39a65bab Mon Sep 17 00:00:00 2001 From: Nicolas PENELOUX Date: Wed, 18 Oct 2023 17:28:31 +0200 Subject: [PATCH] =?UTF-8?q?d=C3=A9but=20expression=20parser=20et=20string?= =?UTF-8?q?=20lexer?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- flap/src/hopix/hopixLexer.mll | 20 ++++++--- flap/src/hopix/hopixParser.mly | 77 +++++++++++++++++++++++++++++++++- 2 files changed, 90 insertions(+), 7 deletions(-) diff --git a/flap/src/hopix/hopixLexer.mll b/flap/src/hopix/hopixLexer.mll index 2311a96..8a044c8 100644 --- a/flap/src/hopix/hopixLexer.mll +++ b/flap/src/hopix/hopixLexer.mll @@ -27,21 +27,30 @@ let hexa = "0x" ['0'-'9' 'a'-'f' 'A'-'F'] let bina = "0b" ['0'-'1'] let octa = "0o" ['0'-'7'] -let printable = [' ' '\t' '\n' '\r' (* 33-126 *)] (* pas sûr *) +let int = '-'? (digit+ | hexa+ | bina+ | octa+) + (* On ne peut pas différencier au niveau du lexer var_id label_id et type_con, * il faudra le faire à l'analyseur syntaxique. On fait donc un "ident" pour * identificateur *) + +(* identificateur *) let ident = ['a'-'z']['A'-'Z' 'a'-'z' '0'-'9' '_']* - -let atom = '"' - let constr_id = ['A'-'Z']['A'-'Z' 'a'-'z' '0'-'9' '_']* let type_variable = '`' ['a'-'z']['A'-'Z' 'a'-'z' '0'-'9' '_']* -let int = '-'? (digit+ | hexa+ | bina+ | octa+) + +(* littéraux caractères et chaînes de caractères *) +let ascii_c = ['\000'-'\255'] +let hex_dig = ['0'-'9' 'a'-'f' 'A'-'F'] +let ascii_hex = "\\0x" hex_dig hex_dig +let printable = ['\032'-'\038' '\040'-'\127'] + +let atom = ascii_c | ascii_hex | printable | "\\\\" | "\\'" | "\\n" | "\\t" | "\\b" | "\\r" let char = '\'' atom '\'' let letter = (digit | ['A'-'Z'] | ['a'-'z']) +(* pas sûr pour str *) +(* let str = '\"' (([atom] | ['\''] | ["\\\""]) # '"')* '\"' *) (* let binop = '+' | '-' | '*' | '/' | "&&" | "||"| "=?"| "<=?" |">=?" |"?" *) @@ -92,6 +101,7 @@ rule token = parse | '&' { AND } | '*' { STAR } | ',' { COMMA } + | '.' { DOT } (** Strings *) | '"' { read_string (Buffer.create 16) lexbuf } diff --git a/flap/src/hopix/hopixParser.mly b/flap/src/hopix/hopixParser.mly index 1dc3bb4..b0e70e4 100644 --- a/flap/src/hopix/hopixParser.mly +++ b/flap/src/hopix/hopixParser.mly @@ -7,7 +7,7 @@ %token EOF LET TYPE WILDCARD STAR ARROW COLON EXTERN FUN COMMA AND EQUAL LPAREN %token RPAREN LBRACK RBRACK LBRACE RBRACE INFERIOR SUPERIOR BINOP DO ELSE FOR -%token FROM IF MATCH PIPE REF THEN TO UNTIL WHILE AND_KW +%token FROM IF MATCH PIPE REF THEN TO UNTIL WHILE AND_KW DOT %token INT %token ID TID CID STRING @@ -140,14 +140,16 @@ pattern: PRecord(l, liste_ty) } /* Disjonction */ +/* | p1=located(pattern) PIPE p_list=separated_nonempty_list(PIPE, located(pattern)) { POr(p1 :: p_list) } /* Conjonction */ +/* | p1=located(pattern) AND p_list=separated_nonempty_list(AND, located(pattern)) { PAnd(p1 :: p_list) } - +*/ pattern_list: /* N-uplets */ | LPAREN p=separated_nonempty_list(COMMA, pattern) RPAREN { @@ -208,11 +210,82 @@ type_scheme: /********************************* EXPRESSION *********************************/ + +/* De manière générale, il faudrait au mieux revoir le code, pour le factoriser et le rendre plus propre */ +/* (il y a même moyen que ça le soit obligatoire pour pas avoir des conflits éventuel) */ +/* Exemple : TAgged et Record, trop de cas différent alors qu'on pourrait en faire en 2 fois au moins voir 1 */ expression: +/* Simple litteral */ | l=located(literal) { Literal l } + /* Variable */ + | i = located(identifier){ + Variable(i,None) + } + | i = located(identifier) INFERIOR SUPERIOR { + Variable(i,None) + } + | i = located(identifier) INFERIOR t_list = option(separated_nonempty_list(COMMA,located(ty))) SUPERIOR{ + Variable(i,t_list) + } + /* Tagged Value*/ + /* K */ + | const = located(constructor){ + Tagged(const,None,[]) + } + /* K < > */ + | const = located(constructor) INFERIOR SUPERIOR{ + Tagged(const,None,[]) + } + /* K < > (e1, ..., en) */ + | const = located(constructor) INFERIOR SUPERIOR LPAREN e_list=separated_nonempty_list(COMMA,located(expression)) RPAREN { + Tagged(const,None,e_list) + } + /* K */ + | const = located(constructor) INFERIOR t_list = option(separated_nonempty_list(COMMA,located(ty))) SUPERIOR { + Tagged(const,t_list,[]) + } + /* K (e1,...,en) */ + | const = located(constructor) INFERIOR t_list = option(separated_nonempty_list(COMMA,located(ty))) SUPERIOR LPAREN e_list=separated_nonempty_list(COMMA,located(expression)) RPAREN{ + Tagged(const,t_list,e_list) + } + + /* Tuple n = 0 and n > 1 */ + | LPAREN RPAREN { + Tuple([]) + } + | LPAREN e=located(expression) COMMA e_list = separated_nonempty_list(COMMA,located(expression)) RPAREN { + Tuple(e::e_list) + } + + /* Record */ + | LBRACE l=separated_nonempty_list(COMMA, separated_pair(located(label), EQUAL, located(expression))) RBRACE { + Record(l, None) + } + | LBRACE l=separated_nonempty_list(COMMA, separated_pair(located(label), EQUAL, located(expression))) RBRACE INFERIOR SUPERIOR { + Record(l, None) + } + | LBRACE l=separated_nonempty_list(COMMA, separated_pair(located(label), EQUAL, located(expression))) RBRACE INFERIOR t_list = option(separated_nonempty_list(COMMA,located(ty))) SUPERIOR{ + Record(l, t_list) + } + + /* Field */ + + /* e.l */ + | e=located(expression) DOT l = located(label){ + Field(e,l,None) + } + /* e.l < > */ + | e=located(expression) DOT l = located(label) INFERIOR SUPERIOR { + Field(e,l,None) + } + /* e.l */ + | e = located(expression) DOT l=located(label) INFERIOR t_list = option(separated_nonempty_list(COMMA,located(ty))) SUPERIOR { + Field(e,l,t_list) + } + /******************************** BASIC TYPES *********************************/ type_variable: