Add error message for parsing, basic string and char support, also fix broken tokens

2023-10-18 00:02:59 +02:00 · 2023-10-18 00:02:59 +02:00 · 094f5f784a
commit 094f5f784a
parent 4ffd33ccc8
2 changed files with 116 additions and 58 deletions
--- a/flap/src/hopix/hopixLexer.mll
+++ b/flap/src/hopix/hopixLexer.mll
@ -8,8 +8,15 @@
    Lexing.new_line lexbuf;
    f lexbuf

-  let error lexbuf =
-    error "during lexing" (lex_join lexbuf.lex_start_p lexbuf.lex_curr_p)
+  let error lexbuf c =
+  let msg =
+    "during lexing"
+    ^
+    match c with
+    | Some c -> Printf.sprintf " at '%c'" c
+    | None -> ""
+  in
+    error msg (lex_join lexbuf.lex_start_p lexbuf.lex_curr_p)
 }

 let newline = ('\010' | '\013' | "\013\010")
@ -33,80 +40,105 @@ let constr_id = ['A'-'Z']['A'-'Z' 'a'-'z' '0'-'9' '_']*
 let type_variable = '`' ['a'-'z']['A'-'Z' 'a'-'z' '0'-'9' '_']*
 let int = '-'? (digit+ | hexa+ | bina+ | octa+)
 let char = '\'' atom '\''
-let string = '\"' ((atom | '\'' | "\\\"")) '\"'
+
+let letter = (digit | ['A'-'Z'] | ['a'-'z'])

 (* let binop = '+' | '-' | '*' | '/' | "&&" | "||"| "=?"| "<=?" |">=?" |"<?" |">?" *)


 rule token = parse
  (** Layout *)
-  | newline            { next_line_and token lexbuf      }
-  | blank+             { token lexbuf                    }
-  | eof                { EOF                             }
-  | "{*"               { commentary lexbuf               }
-  | "##"               { commentary_line lexbuf          }
+  | newline               { next_line_and token lexbuf }
+  | blank+                { token lexbuf               }
+  | eof                   { EOF                        }
+  | "{*"                  { commentary lexbuf          }
+  | "##"                  { commentary_line lexbuf     }

  (** Keywords *)
-  | "let"              { LET    }
-  | "type"             { TYPE   }
-  | "extern"           { EXTERN }
-  | "fun"              { FUN    }
-  | "match"            { MATCH  }
-  | "if"               { IF     }
-  | "then"             { THEN   }
-  | "else"             { ELSE   }
-  | "ref"              { REF    }
-  | "while"            { WHILE  }
-  | "do"               { DO     }
-  | "until"            { UNTIL  }
-  | "from"             { FROM   }
-  | "to"               { TO     }
-  | "and"              { AND    }
-  | "for"              { FOR    }
-  (* Fini ? *)
+  | "let"                 { LET    }
+  | "type"                { TYPE   }
+  | "extern"              { EXTERN }
+  | "fun"                 { FUN    }
+  | "match"               { MATCH  }
+  | "if"                  { IF     }
+  | "then"                { THEN   }
+  | "else"                { ELSE   }
+  | "ref"                 { REF    }
+  | "while"               { WHILE  }
+  | "do"                  { DO     }
+  | "until"               { UNTIL  }
+  | "from"                { FROM   }
+  | "to"                  { TO     }
+  | "and"                 { AND_KW }
+  | "for"                 { FOR    }

  (** Binar operation : pas sûr pour celui là *)
-  (* | binop as b         { BINOP b } *)
-
-  (** Operators *)
-  (* | '='                { EQUAL } *)
+  (* | binop as b            { BINOP b } *)

  (** Ponctuation *)
-  | '('                { LPAREN   }
-  | ')'                { RPAREN   }
-  | '['                { LBRACK   }
-  | ']'                { RBRACK   }
-  | '{'                { LBRACE   }
-  | '}'                { RBRACE   }
-  | '_'                { WILDCARD }
-  | ':'                { COLON    }
-  | "->"               { ARROW    }
-  | '<'                { INFERIOR }
-  | '>'                { SUPERIOR }
-  | '|'                { PIPE     }
-  | '&'                { AND      }
+  | '='                   { EQUAL    }
+  | '('                   { LPAREN   }
+  | ')'                   { RPAREN   }
+  | '['                   { LBRACK   }
+  | ']'                   { RBRACK   }
+  | '{'                   { LBRACE   }
+  | '}'                   { RBRACE   }
+  | '_'                   { WILDCARD }
+  | ':'                   { COLON    }
+  | "->"                  { ARROW    }
+  | '<'                   { INFERIOR }
+  | '>'                   { SUPERIOR }
+  | '|'                   { PIPE     }
+  | '&'                   { AND      }
+  | '*'                   { STAR     }
+  | ','                   { COMMA    }
+
+  (** Strings *)
+  | '"'                   { read_string (Buffer.create 16) lexbuf }

  (** Values *)
-  | int as i           { INT (Mint.of_string i) }
-  | ident as s         { ID s                   }
-  | type_variable as s { TID s                  }
-  | constr_id as s     { CID s                  }
+  | int as i              { INT (Mint.of_string i) }
+  | ident as s            { ID s                   }
+  | type_variable as s    { TID s                  }
+  | constr_id as s        { CID s                  }
+
+  (** Characters *)
+  (* On en manque surement plein ici *)
+  | "'" (letter as c) "'" { CHAR c }

  (** Lexing error *)
-  | _                  { error lexbuf "unexpected character." }
+  | _  as _c              { error lexbuf None (* (Some _c) *) "unexpected character." }

 (* TODO: Gérer les imbrications de commentaires *)
 and commentary = parse
-  | "*}"               { token lexbuf                         }
-  | newline            { next_line_and commentary lexbuf      }
+  | "*}"    { token lexbuf                             }
+  | newline { next_line_and commentary lexbuf          }

-  (** Error  *)
-  | eof                { error lexbuf "unclosed commentary."  }
+  (** Error *)
+  | eof     { error lexbuf None "unclosed commentary." }

  (** Commentary content *)
-  | _                  { commentary lexbuf                    }
+  | _       { commentary lexbuf                        }

 and commentary_line = parse
-  | newline            { next_line_and token lexbuf }
-  | eof                { EOF                        }
-  | _                  { commentary_line lexbuf     }
+  | newline { next_line_and token lexbuf }
+  | eof     { EOF                        }
+  | _       { commentary_line lexbuf     }
+
+and read_string buffer = parse
+  (** End of string *)
+  | '"'    { STRING (Buffer.contents buffer)                        }
+
+  (** Escape *)
+  | "\\n"  { Buffer.add_char buffer '\n'; read_string buffer lexbuf }
+  | "\\b"  { Buffer.add_char buffer '\b'; read_string buffer lexbuf }
+  | "\\r"  { Buffer.add_char buffer '\r'; read_string buffer lexbuf }
+  | "\\'"  { Buffer.add_char buffer '\''; read_string buffer lexbuf }
+  | "\\\"" { Buffer.add_char buffer '"'; read_string buffer lexbuf  }
+  | "\\\\" { Buffer.add_char buffer '\\'; read_string buffer lexbuf }
+
+  (** Error *)
+  | eof    { error lexbuf None "Unterminated string."               }
+
+  (** String content *)
+  | _ as c { Buffer.add_char buffer c; read_string buffer lexbuf    }
--- a/flap/src/hopix/hopixParser.mly
+++ b/flap/src/hopix/hopixParser.mly
@ -7,7 +7,7 @@

 %token EOF LET TYPE WILDCARD STAR ARROW COLON EXTERN FUN COMMA AND EQUAL LPAREN
 %token RPAREN LBRACK RBRACK LBRACE RBRACE INFERIOR SUPERIOR BINOP DO ELSE FOR
-%token FROM IF MATCH PIPE REF THEN TO UNTIL WHILE
+%token FROM IF MATCH PIPE REF THEN TO UNTIL WHILE AND_KW

 %token<Mint.t> INT
 %token<string> ID TID CID STRING
@ -26,6 +26,10 @@ program:
 | definition=located(definition)* EOF {
    definition
  }
+/* Attrapes les erreurs de syntaxe */
+| e=located(error) {
+  Error.error "parsing" (Position.position e) "Syntax error."
+}


 definition:
@ -60,7 +64,7 @@ label_with_type:

 vdefinition:
 /* Valeur simple */
-| LET i=located(identifier) COLON ts=option(located(type_scheme)) EQUAL e=located(expression) {
+| LET i=located(identifier) ts=option(vdef_type_scheme) EQUAL e=located(expression) {
    SimpleValue(i, ts, e)
  }
 /* Fonction(s)
@ -71,6 +75,11 @@ vdefinition:
      RecFunctions(fl)
  }

+vdef_type_scheme:
+| COLON ts=located(type_scheme) {
+    ts
+  }
+

 fundef:
 | COLON t=option(located(type_scheme)) i=located(identifier) p=located(pattern) EQUAL e=located(expression) {
@ -86,18 +95,27 @@ fundef:
 * peut être qu'en utilisant des option, on pourrait diminuer le nombre de répétition.
 * TODO : y'a environ 50 warnings ici, surtout au niveau du POr et PAnd */
 pattern:
+/* Parenthésage */
+| LPAREN p=pattern RPAREN {
+    p
+  }
+/* Motif universel liant */
 | i=located(identifier) {
    PVariable i
  }
+/* Motif universel non liant */
 | WILDCARD {
    PWildcard
  }
+/* Annotation de type */
 | p=located(pattern) COLON ty=located(ty) {
    PTypeAnnotation(p,ty)
  }
+/* Entier / Caractère / String */
 | l=located(literal) {
    PLiteral l
  }
+/* Valeurs étiquetées */
 | const=located(constructor) {
    PTaggedValue(const, None, [])
  }
@ -110,6 +128,7 @@ pattern:
 | const=located(constructor) INFERIOR liste_ty=option(separated_nonempty_list(COMMA, located(ty))) SUPERIOR LPAREN liste_pattern=separated_nonempty_list(COMMA, located(pattern)) RPAREN {
    PTaggedValue(const, liste_ty, liste_pattern)
  }
+/* Enregistrement */
 /* à refaire */
 | LBRACE l=separated_nonempty_list(COMMA, separated_pair(located(label), EQUAL, located(pattern))) RBRACE {
    PRecord(l, None)
@ -120,14 +139,21 @@ pattern:
 | LBRACE l=separated_nonempty_list(COMMA, separated_pair(located(label), EQUAL, located(pattern))) RBRACE INFERIOR liste_ty=option(separated_nonempty_list(COMMA, located(ty))) SUPERIOR {
    PRecord(l, liste_ty)
  }
-
+/* Disjonction */
 | p1=located(pattern) PIPE p_list=separated_nonempty_list(PIPE, located(pattern)) {
    POr(p1 :: p_list)
  }
+/* Conjonction */
 | p1=located(pattern) AND p_list=separated_nonempty_list(AND, located(pattern)) {
    PAnd(p1 :: p_list)
  }

+pattern_list:
+/* N-uplets */
+| LPAREN p=separated_nonempty_list(COMMA, pattern) RPAREN {
+    p
+  }
+

 /********************************* DATA TYPE **********************************/
 /* Pour résoudre un conflit, on a du split ty en 2 règles