From d1cdc273bd55730d85f95fceb35437fe39a65bab Mon Sep 17 00:00:00 2001
From: Nicolas PENELOUX <nicolas.peneloux@free.fr>
Date: Wed, 18 Oct 2023 17:28:31 +0200
Subject: [PATCH] =?UTF-8?q?d=C3=A9but=20expression=20parser=20et=20string?=
 =?UTF-8?q?=20lexer?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 flap/src/hopix/hopixLexer.mll  | 20 ++++++---
 flap/src/hopix/hopixParser.mly | 77 +++++++++++++++++++++++++++++++++-
 2 files changed, 90 insertions(+), 7 deletions(-)
diff --git a/flap/src/hopix/hopixLexer.mll b/flap/src/hopix/hopixLexer.mll
index 2311a96..8a044c8 100644
--- a/flap/src/hopix/hopixLexer.mll
+++ b/flap/src/hopix/hopixLexer.mll
@@ -27,21 +27,30 @@ let hexa = "0x" ['0'-'9' 'a'-'f' 'A'-'F']
 let bina = "0b" ['0'-'1']
 let octa = "0o" ['0'-'7']
 
-let printable = [' ' '\t' '\n' '\r' (* 33-126 *)] (* pas sûr *)
+let int = '-'? (digit+ | hexa+ | bina+ | octa+)
+
 
 (* On ne peut pas différencier au niveau du lexer var_id label_id et type_con,
  * il faudra le faire à l'analyseur syntaxique. On fait donc un "ident" pour
  * identificateur *)
+
+(* identificateur *)
 let ident = ['a'-'z']['A'-'Z' 'a'-'z' '0'-'9' '_']*
-
-let atom = '"'
-
 let constr_id = ['A'-'Z']['A'-'Z' 'a'-'z' '0'-'9' '_']*
 let type_variable = '`' ['a'-'z']['A'-'Z' 'a'-'z' '0'-'9' '_']*
-let int = '-'? (digit+ | hexa+ | bina+ | octa+)
+
+(* littéraux caractères et chaînes de caractères *)
+let ascii_c =  ['\000'-'\255']
+let hex_dig = ['0'-'9' 'a'-'f' 'A'-'F']
+let ascii_hex = "\\0x" hex_dig hex_dig
+let printable = ['\032'-'\038' '\040'-'\127']
+
+let atom = ascii_c | ascii_hex | printable | "\\\\" | "\\'" | "\\n" | "\\t" | "\\b" | "\\r"
 let char = '\'' atom '\''
 
 let letter = (digit | ['A'-'Z'] | ['a'-'z'])
+(* pas sûr pour str *)
+(* let str = '\"' (([atom] | ['\''] | ["\\\""]) # '"')* '\"' *)
 
 (* let binop = '+' | '-' | '*' | '/' | "&&" | "||"| "=?"| "<=?" |">=?" |"<?" |">?" *)
 
@@ -92,6 +101,7 @@ rule token = parse
   | '&'                   { AND      }
   | '*'                   { STAR     }
   | ','                   { COMMA    }
+  | '.'                   { DOT      }
 
   (** Strings *)
   | '"'                   { read_string (Buffer.create 16) lexbuf }
diff --git a/flap/src/hopix/hopixParser.mly b/flap/src/hopix/hopixParser.mly
index 1dc3bb4..b0e70e4 100644
--- a/flap/src/hopix/hopixParser.mly
+++ b/flap/src/hopix/hopixParser.mly
@@ -7,7 +7,7 @@
 
 %token EOF LET TYPE WILDCARD STAR ARROW COLON EXTERN FUN COMMA AND EQUAL LPAREN
 %token RPAREN LBRACK RBRACK LBRACE RBRACE INFERIOR SUPERIOR BINOP DO ELSE FOR
-%token FROM IF MATCH PIPE REF THEN TO UNTIL WHILE AND_KW
+%token FROM IF MATCH PIPE REF THEN TO UNTIL WHILE AND_KW DOT 
 
 %token<Mint.t> INT
 %token<string> ID TID CID STRING
@@ -140,14 +140,16 @@ pattern:
     PRecord(l, liste_ty)
   }
 /* Disjonction */
+/*
 | p1=located(pattern) PIPE p_list=separated_nonempty_list(PIPE, located(pattern)) {
     POr(p1 :: p_list)
   }
 /* Conjonction */
+/*
 | p1=located(pattern) AND p_list=separated_nonempty_list(AND, located(pattern)) {
     PAnd(p1 :: p_list)
   }
-
+*/
 pattern_list:
 /* N-uplets */
 | LPAREN p=separated_nonempty_list(COMMA, pattern) RPAREN {
@@ -208,11 +210,82 @@ type_scheme:
 
 
 /********************************* EXPRESSION *********************************/
+
+/* De manière générale, il faudrait au mieux revoir le code, pour le factoriser et le rendre plus propre */
+/* (il y a même moyen que ça le soit obligatoire pour pas avoir des conflits éventuel) */
+/* Exemple : TAgged et Record, trop de cas différent alors qu'on pourrait en faire en 2 fois au moins voir 1 */
 expression:
+/* Simple litteral */
 | l=located(literal) {
     Literal l
   }
+  /* Variable */
+  | i = located(identifier){
+    Variable(i,None)
+  }
+  | i = located(identifier) INFERIOR SUPERIOR {
+    Variable(i,None)
+  }
+  | i = located(identifier) INFERIOR t_list = option(separated_nonempty_list(COMMA,located(ty))) SUPERIOR{
+    Variable(i,t_list)
+  }
 
+  /* Tagged Value*/
+  /* K */
+  | const = located(constructor){
+    Tagged(const,None,[])
+  }
+  /* K < > */
+  | const = located(constructor) INFERIOR SUPERIOR{
+    Tagged(const,None,[])
+  }
+  /* K < > (e1, ..., en) */
+  | const = located(constructor) INFERIOR SUPERIOR LPAREN e_list=separated_nonempty_list(COMMA,located(expression)) RPAREN {
+    Tagged(const,None,e_list)
+  }
+  /* K <ty_1, ... ty_m> */
+  | const = located(constructor) INFERIOR t_list = option(separated_nonempty_list(COMMA,located(ty))) SUPERIOR {
+    Tagged(const,t_list,[])
+  }
+  /* K <ty_1, ..., ty_m> (e1,...,en) */
+  | const = located(constructor) INFERIOR t_list = option(separated_nonempty_list(COMMA,located(ty))) SUPERIOR LPAREN e_list=separated_nonempty_list(COMMA,located(expression)) RPAREN{
+    Tagged(const,t_list,e_list)
+  }
+
+  /* Tuple n = 0 and n > 1 */
+  | LPAREN RPAREN {
+    Tuple([])
+  }
+  | LPAREN e=located(expression) COMMA e_list = separated_nonempty_list(COMMA,located(expression)) RPAREN {
+    Tuple(e::e_list)
+  }
+
+  /* Record */
+  | LBRACE l=separated_nonempty_list(COMMA, separated_pair(located(label), EQUAL, located(expression))) RBRACE {
+    Record(l, None)
+  }
+  | LBRACE l=separated_nonempty_list(COMMA, separated_pair(located(label), EQUAL, located(expression))) RBRACE INFERIOR SUPERIOR {
+    Record(l, None)
+  }
+  | LBRACE l=separated_nonempty_list(COMMA, separated_pair(located(label), EQUAL, located(expression))) RBRACE INFERIOR t_list = option(separated_nonempty_list(COMMA,located(ty))) SUPERIOR{
+    Record(l, t_list)
+  }
+
+  /* Field */
+
+  /* e.l */
+  | e=located(expression) DOT l = located(label){
+    Field(e,l,None)
+  }
+  /* e.l < > */
+  | e=located(expression) DOT l = located(label) INFERIOR SUPERIOR {
+    Field(e,l,None)
+  }
+  /* e.l <ty_1...ty_n>*/
+  | e = located(expression) DOT l=located(label) INFERIOR t_list = option(separated_nonempty_list(COMMA,located(ty))) SUPERIOR {
+    Field(e,l,t_list)
+  }
+ 
 
 /******************************** BASIC TYPES *********************************/
 type_variable: