début expression parser et string lexer

2023-10-18 17:28:31 +02:00 · 2023-10-18 17:28:31 +02:00 · d1cdc273bd
commit d1cdc273bd
parent 2e44ba6dcd
2 changed files with 90 additions and 7 deletions
--- a/flap/src/hopix/hopixLexer.mll
+++ b/flap/src/hopix/hopixLexer.mll
@ -27,21 +27,30 @@ let hexa = "0x" ['0'-'9' 'a'-'f' 'A'-'F']
 let bina = "0b" ['0'-'1']
 let octa = "0o" ['0'-'7']

-let printable = [' ' '\t' '\n' '\r' (* 33-126 *)] (* pas sûr *)
+let int = '-'? (digit+ | hexa+ | bina+ | octa+)
+

 (* On ne peut pas différencier au niveau du lexer var_id label_id et type_con,
 * il faudra le faire à l'analyseur syntaxique. On fait donc un "ident" pour
 * identificateur *)
+
+(* identificateur *)
 let ident = ['a'-'z']['A'-'Z' 'a'-'z' '0'-'9' '_']*
-
-let atom = '"'
-
 let constr_id = ['A'-'Z']['A'-'Z' 'a'-'z' '0'-'9' '_']*
 let type_variable = '`' ['a'-'z']['A'-'Z' 'a'-'z' '0'-'9' '_']*
-let int = '-'? (digit+ | hexa+ | bina+ | octa+)
+
+(* littéraux caractères et chaînes de caractères *)
+let ascii_c =  ['\000'-'\255']
+let hex_dig = ['0'-'9' 'a'-'f' 'A'-'F']
+let ascii_hex = "\\0x" hex_dig hex_dig
+let printable = ['\032'-'\038' '\040'-'\127']
+
+let atom = ascii_c | ascii_hex | printable | "\\\\" | "\\'" | "\\n" | "\\t" | "\\b" | "\\r"
 let char = '\'' atom '\''

 let letter = (digit | ['A'-'Z'] | ['a'-'z'])
+(* pas sûr pour str *)
+(* let str = '\"' (([atom] | ['\''] | ["\\\""]) # '"')* '\"' *)

 (* let binop = '+' | '-' | '*' | '/' | "&&" | "||"| "=?"| "<=?" |">=?" |"<?" |">?" *)

@ -92,6 +101,7 @@ rule token = parse
  | '&'                   { AND      }
  | '*'                   { STAR     }
  | ','                   { COMMA    }
+  | '.'                   { DOT      }

  (** Strings *)
  | '"'                   { read_string (Buffer.create 16) lexbuf }
--- a/flap/src/hopix/hopixParser.mly
+++ b/flap/src/hopix/hopixParser.mly
@ -7,7 +7,7 @@

 %token EOF LET TYPE WILDCARD STAR ARROW COLON EXTERN FUN COMMA AND EQUAL LPAREN
 %token RPAREN LBRACK RBRACK LBRACE RBRACE INFERIOR SUPERIOR BINOP DO ELSE FOR
-%token FROM IF MATCH PIPE REF THEN TO UNTIL WHILE AND_KW
+%token FROM IF MATCH PIPE REF THEN TO UNTIL WHILE AND_KW DOT 

 %token<Mint.t> INT
 %token<string> ID TID CID STRING
@ -140,14 +140,16 @@ pattern:
    PRecord(l, liste_ty)
  }
 /* Disjonction */
+/*
 | p1=located(pattern) PIPE p_list=separated_nonempty_list(PIPE, located(pattern)) {
    POr(p1 :: p_list)
  }
 /* Conjonction */
+/*
 | p1=located(pattern) AND p_list=separated_nonempty_list(AND, located(pattern)) {
    PAnd(p1 :: p_list)
  }
-
+*/
 pattern_list:
 /* N-uplets */
 | LPAREN p=separated_nonempty_list(COMMA, pattern) RPAREN {
@ -208,11 +210,82 @@ type_scheme:


 /********************************* EXPRESSION *********************************/
+
+/* De manière générale, il faudrait au mieux revoir le code, pour le factoriser et le rendre plus propre */
+/* (il y a même moyen que ça le soit obligatoire pour pas avoir des conflits éventuel) */
+/* Exemple : TAgged et Record, trop de cas différent alors qu'on pourrait en faire en 2 fois au moins voir 1 */
 expression:
+/* Simple litteral */
 | l=located(literal) {
    Literal l
  }
+  /* Variable */
+  | i = located(identifier){
+    Variable(i,None)
+  }
+  | i = located(identifier) INFERIOR SUPERIOR {
+    Variable(i,None)
+  }
+  | i = located(identifier) INFERIOR t_list = option(separated_nonempty_list(COMMA,located(ty))) SUPERIOR{
+    Variable(i,t_list)
+  }

+  /* Tagged Value*/
+  /* K */
+  | const = located(constructor){
+    Tagged(const,None,[])
+  }
+  /* K < > */
+  | const = located(constructor) INFERIOR SUPERIOR{
+    Tagged(const,None,[])
+  }
+  /* K < > (e1, ..., en) */
+  | const = located(constructor) INFERIOR SUPERIOR LPAREN e_list=separated_nonempty_list(COMMA,located(expression)) RPAREN {
+    Tagged(const,None,e_list)
+  }
+  /* K <ty_1, ... ty_m> */
+  | const = located(constructor) INFERIOR t_list = option(separated_nonempty_list(COMMA,located(ty))) SUPERIOR {
+    Tagged(const,t_list,[])
+  }
+  /* K <ty_1, ..., ty_m> (e1,...,en) */
+  | const = located(constructor) INFERIOR t_list = option(separated_nonempty_list(COMMA,located(ty))) SUPERIOR LPAREN e_list=separated_nonempty_list(COMMA,located(expression)) RPAREN{
+    Tagged(const,t_list,e_list)
+  }
+
+  /* Tuple n = 0 and n > 1 */
+  | LPAREN RPAREN {
+    Tuple([])
+  }
+  | LPAREN e=located(expression) COMMA e_list = separated_nonempty_list(COMMA,located(expression)) RPAREN {
+    Tuple(e::e_list)
+  }
+
+  /* Record */
+  | LBRACE l=separated_nonempty_list(COMMA, separated_pair(located(label), EQUAL, located(expression))) RBRACE {
+    Record(l, None)
+  }
+  | LBRACE l=separated_nonempty_list(COMMA, separated_pair(located(label), EQUAL, located(expression))) RBRACE INFERIOR SUPERIOR {
+    Record(l, None)
+  }
+  | LBRACE l=separated_nonempty_list(COMMA, separated_pair(located(label), EQUAL, located(expression))) RBRACE INFERIOR t_list = option(separated_nonempty_list(COMMA,located(ty))) SUPERIOR{
+    Record(l, t_list)
+  }
+
+  /* Field */
+
+  /* e.l */
+  | e=located(expression) DOT l = located(label){
+    Field(e,l,None)
+  }
+  /* e.l < > */
+  | e=located(expression) DOT l = located(label) INFERIOR SUPERIOR {
+    Field(e,l,None)
+  }
+  /* e.l <ty_1...ty_n>*/
+  | e = located(expression) DOT l=located(label) INFERIOR t_list = option(separated_nonempty_list(COMMA,located(ty))) SUPERIOR {
+    Field(e,l,t_list)
+  }
+ 

 /******************************** BASIC TYPES *********************************/
 type_variable: