From 5a1dbcf6de34cf24e919c41bc8ce3a535f772f39 Mon Sep 17 00:00:00 2001 From: Mylloon Date: Sat, 21 Oct 2023 15:33:19 +0200 Subject: [PATCH] refactor --- flap/src/hopix/hopixLexer.mll | 84 ++++++++++++++++++----------------- 1 file changed, 44 insertions(+), 40 deletions(-) diff --git a/flap/src/hopix/hopixLexer.mll b/flap/src/hopix/hopixLexer.mll index 67c82d3..b9fe033 100644 --- a/flap/src/hopix/hopixLexer.mll +++ b/flap/src/hopix/hopixLexer.mll @@ -13,7 +13,7 @@ "during lexing" ^ match c with - | Some c -> Printf.sprintf " at '%c'" c + | Some c -> Printf.sprintf " at `%c`" c | None -> "" in error msg (lex_join lexbuf.lex_start_p lexbuf.lex_curr_p) @@ -22,37 +22,36 @@ let newline = ('\010' | '\013' | "\013\010") let blank = [' ' '\009' '\012'] +let hex_dig = ['0'-'9' 'a'-'f' 'A'-'F'] + let digit = ['0'-'9'] -let hexa = "0x" ['0'-'9' 'a'-'f' 'A'-'F'] +let hexa = "0x" hex_dig let bina = "0b" ['0'-'1'] let octa = "0o" ['0'-'7'] -let int = '-'? (digit+ | hexa+ | bina+ | octa+) - -(* On ne peut pas différencier au niveau du lexer var_id label_id et type_con, - * il faudra le faire à l'analyseur syntaxique. On fait donc un "ident" pour - * identificateur *) - -(* identificateur *) -let ident = ['a'-'z']['A'-'Z' 'a'-'z' '0'-'9' '_']* -let constr_id = ['A'-'Z']['A'-'Z' 'a'-'z' '0'-'9' '_']* -let type_variable = '`' ['a'-'z']['A'-'Z' 'a'-'z' '0'-'9' '_']* - -(* littéraux caractères et chaînes de caractères *) -let ascii_c = ['\000'-'\255'] -let hex_dig = ['0'-'9' 'a'-'f' 'A'-'F'] +(* Définition d'un atom + * aka un string qui représente un char, par exemple "\065" = 'A' *) +let ascii_table = ['\000'-'\255'] let ascii_hex = "\\0x" hex_dig hex_dig let printable = ['\032'-'\038' '\040'-'\127'] +let escapes = "\\\\" | "\\'" | "\\n" | "\\t" | "\\b" | "\\r" +let atom = ascii_table | ascii_hex | printable | escapes -let atom = ascii_c | ascii_hex | printable | "\\\\" | "\\'" | "\\n" | "\\t" | "\\b" | "\\r" -let char = '\'' atom '\'' +(* On ne peut pas différencier au niveau du lexer var_id label_id et type_con, + * il faudra le faire à l'analyseur syntaxique. + * On va donc faire un 'ident' pour "identificateur" *) +(* Identificateurs var_id label_id type_con *) +let ident = ['a'-'z']['A'-'Z' 'a'-'z' '0'-'9' '_']* +(* Identificateur de constructeurs de données *) +let constr_id = ['A'-'Z']['A'-'Z' 'a'-'z' '0'-'9' '_']* +(* Identificateur de variables de type *) +let type_variable = '`' ident +(* Littéraux entiers *) +let int = '-'? (digit+ | hexa+ | bina+ | octa+) +(* Littéraux caractères *) let letter = (digit | ['A'-'Z'] | ['a'-'z']) -(* pas sûr pour str *) -let str = '\"' (atom | '\'' | "\\\"")* '\"' - - rule token = parse (** Layout *) @@ -80,18 +79,6 @@ rule token = parse | "and" { AND_KW } | "for" { FOR } - (** Opérateurs binaires *) - | "+" { PLUS } - | "-" { MINUS } - | "/" { SLASH } - | "&&" { D_AND } - | "||" { D_OR } - | "=?" { EQUAL_OP } - | "<=?" { INF_EQUAL_OP } - | ">=?" { SUP_EQUAL_OP } - | "?" { SUP_OP } - (** Ponctuation *) | '=' { EQUAL } @@ -116,18 +103,35 @@ rule token = parse | ":=" { ASSIGN } | '!' { EXCLA } - (** Strings *) - | '"' { read_string (Buffer.create 16) lexbuf } + (* Opérateurs binaires *) + | "+" { PLUS } + | "-" { MINUS } + | "/" { SLASH } + | "&&" { D_AND } + | "||" { D_OR } + | "=?" { EQUAL_OP } + | "<=?" { INF_EQUAL_OP } + | ">=?" { SUP_EQUAL_OP } + | "?" { SUP_OP } - (** Values *) - | int as i { INT (Mint.of_string i) } + (** Identificateurs *) | ident as s { ID s } | type_variable as s { TID s } | constr_id as s { CID s } - (** Characters *) - (* On en manque surement plein ici *) + (* Integers *) + | int as i { INT (Mint.of_string i) } + + (* Strings *) + | '"' { read_string (Buffer.create 16) lexbuf } + + (* Characters *) | "'" (letter as c) "'" { CHAR c } + (* | "'" (atom as c) "'" { (* On retire le \ du début + * TODO: fix *) + let code = int_of_string (String.sub c 1 ((String.length c) - 2)) + in CHAR (Char.chr (code)) } *) (** Lexing error *) | _ as _c { error lexbuf None (* (Some _c) *) "unexpected character." }