This commit is contained in:
Mylloon 2023-10-21 15:33:19 +02:00
parent 9f6ec012a5
commit 5a1dbcf6de
Signed by: Anri
GPG key ID: A82D63DFF8D1317F

View file

@ -13,7 +13,7 @@
"during lexing"
^
match c with
| Some c -> Printf.sprintf " at '%c'" c
| Some c -> Printf.sprintf " at `%c`" c
| None -> ""
in
error msg (lex_join lexbuf.lex_start_p lexbuf.lex_curr_p)
@ -22,37 +22,36 @@
let newline = ('\010' | '\013' | "\013\010")
let blank = [' ' '\009' '\012']
let hex_dig = ['0'-'9' 'a'-'f' 'A'-'F']
let digit = ['0'-'9']
let hexa = "0x" ['0'-'9' 'a'-'f' 'A'-'F']
let hexa = "0x" hex_dig
let bina = "0b" ['0'-'1']
let octa = "0o" ['0'-'7']
let int = '-'? (digit+ | hexa+ | bina+ | octa+)
(* On ne peut pas différencier au niveau du lexer var_id label_id et type_con,
* il faudra le faire à l'analyseur syntaxique. On fait donc un "ident" pour
* identificateur *)
(* identificateur *)
let ident = ['a'-'z']['A'-'Z' 'a'-'z' '0'-'9' '_']*
let constr_id = ['A'-'Z']['A'-'Z' 'a'-'z' '0'-'9' '_']*
let type_variable = '`' ['a'-'z']['A'-'Z' 'a'-'z' '0'-'9' '_']*
(* littéraux caractères et chaînes de caractères *)
let ascii_c = ['\000'-'\255']
let hex_dig = ['0'-'9' 'a'-'f' 'A'-'F']
(* Définition d'un atom
* aka un string qui représente un char, par exemple "\065" = 'A' *)
let ascii_table = ['\000'-'\255']
let ascii_hex = "\\0x" hex_dig hex_dig
let printable = ['\032'-'\038' '\040'-'\127']
let escapes = "\\\\" | "\\'" | "\\n" | "\\t" | "\\b" | "\\r"
let atom = ascii_table | ascii_hex | printable | escapes
let atom = ascii_c | ascii_hex | printable | "\\\\" | "\\'" | "\\n" | "\\t" | "\\b" | "\\r"
let char = '\'' atom '\''
(* On ne peut pas différencier au niveau du lexer var_id label_id et type_con,
* il faudra le faire à l'analyseur syntaxique.
* On va donc faire un 'ident' pour "identificateur" *)
(* Identificateurs var_id label_id type_con *)
let ident = ['a'-'z']['A'-'Z' 'a'-'z' '0'-'9' '_']*
(* Identificateur de constructeurs de données *)
let constr_id = ['A'-'Z']['A'-'Z' 'a'-'z' '0'-'9' '_']*
(* Identificateur de variables de type *)
let type_variable = '`' ident
(* Littéraux entiers *)
let int = '-'? (digit+ | hexa+ | bina+ | octa+)
(* Littéraux caractères *)
let letter = (digit | ['A'-'Z'] | ['a'-'z'])
(* pas sûr pour str *)
let str = '\"' (atom | '\'' | "\\\"")* '\"'
rule token = parse
(** Layout *)
@ -80,18 +79,6 @@ rule token = parse
| "and" { AND_KW }
| "for" { FOR }
(** Opérateurs binaires *)
| "+" { PLUS }
| "-" { MINUS }
| "/" { SLASH }
| "&&" { D_AND }
| "||" { D_OR }
| "=?" { EQUAL_OP }
| "<=?" { INF_EQUAL_OP }
| ">=?" { SUP_EQUAL_OP }
| "<?" { INF_OP }
| ">?" { SUP_OP }
(** Ponctuation *)
| '=' { EQUAL }
@ -116,18 +103,35 @@ rule token = parse
| ":=" { ASSIGN }
| '!' { EXCLA }
(** Strings *)
| '"' { read_string (Buffer.create 16) lexbuf }
(* Opérateurs binaires *)
| "+" { PLUS }
| "-" { MINUS }
| "/" { SLASH }
| "&&" { D_AND }
| "||" { D_OR }
| "=?" { EQUAL_OP }
| "<=?" { INF_EQUAL_OP }
| ">=?" { SUP_EQUAL_OP }
| "<?" { INF_OP }
| ">?" { SUP_OP }
(** Values *)
| int as i { INT (Mint.of_string i) }
(** Identificateurs *)
| ident as s { ID s }
| type_variable as s { TID s }
| constr_id as s { CID s }
(** Characters *)
(* On en manque surement plein ici *)
(* Integers *)
| int as i { INT (Mint.of_string i) }
(* Strings *)
| '"' { read_string (Buffer.create 16) lexbuf }
(* Characters *)
| "'" (letter as c) "'" { CHAR c }
(* | "'" (atom as c) "'" { (* On retire le \ du début
* TODO: fix *)
let code = int_of_string (String.sub c 1 ((String.length c) - 2))
in CHAR (Char.chr (code)) } *)
(** Lexing error *)
| _ as _c { error lexbuf None (* (Some _c) *) "unexpected character." }