refactor
This commit is contained in:
parent
9f6ec012a5
commit
5a1dbcf6de
1 changed files with 44 additions and 40 deletions
|
@ -13,7 +13,7 @@
|
||||||
"during lexing"
|
"during lexing"
|
||||||
^
|
^
|
||||||
match c with
|
match c with
|
||||||
| Some c -> Printf.sprintf " at '%c'" c
|
| Some c -> Printf.sprintf " at `%c`" c
|
||||||
| None -> ""
|
| None -> ""
|
||||||
in
|
in
|
||||||
error msg (lex_join lexbuf.lex_start_p lexbuf.lex_curr_p)
|
error msg (lex_join lexbuf.lex_start_p lexbuf.lex_curr_p)
|
||||||
|
@ -22,37 +22,36 @@
|
||||||
let newline = ('\010' | '\013' | "\013\010")
|
let newline = ('\010' | '\013' | "\013\010")
|
||||||
let blank = [' ' '\009' '\012']
|
let blank = [' ' '\009' '\012']
|
||||||
|
|
||||||
|
let hex_dig = ['0'-'9' 'a'-'f' 'A'-'F']
|
||||||
|
|
||||||
let digit = ['0'-'9']
|
let digit = ['0'-'9']
|
||||||
let hexa = "0x" ['0'-'9' 'a'-'f' 'A'-'F']
|
let hexa = "0x" hex_dig
|
||||||
let bina = "0b" ['0'-'1']
|
let bina = "0b" ['0'-'1']
|
||||||
let octa = "0o" ['0'-'7']
|
let octa = "0o" ['0'-'7']
|
||||||
|
|
||||||
let int = '-'? (digit+ | hexa+ | bina+ | octa+)
|
|
||||||
|
|
||||||
|
(* Définition d'un atom
|
||||||
(* On ne peut pas différencier au niveau du lexer var_id label_id et type_con,
|
* aka un string qui représente un char, par exemple "\065" = 'A' *)
|
||||||
* il faudra le faire à l'analyseur syntaxique. On fait donc un "ident" pour
|
let ascii_table = ['\000'-'\255']
|
||||||
* identificateur *)
|
|
||||||
|
|
||||||
(* identificateur *)
|
|
||||||
let ident = ['a'-'z']['A'-'Z' 'a'-'z' '0'-'9' '_']*
|
|
||||||
let constr_id = ['A'-'Z']['A'-'Z' 'a'-'z' '0'-'9' '_']*
|
|
||||||
let type_variable = '`' ['a'-'z']['A'-'Z' 'a'-'z' '0'-'9' '_']*
|
|
||||||
|
|
||||||
(* littéraux caractères et chaînes de caractères *)
|
|
||||||
let ascii_c = ['\000'-'\255']
|
|
||||||
let hex_dig = ['0'-'9' 'a'-'f' 'A'-'F']
|
|
||||||
let ascii_hex = "\\0x" hex_dig hex_dig
|
let ascii_hex = "\\0x" hex_dig hex_dig
|
||||||
let printable = ['\032'-'\038' '\040'-'\127']
|
let printable = ['\032'-'\038' '\040'-'\127']
|
||||||
|
let escapes = "\\\\" | "\\'" | "\\n" | "\\t" | "\\b" | "\\r"
|
||||||
|
let atom = ascii_table | ascii_hex | printable | escapes
|
||||||
|
|
||||||
let atom = ascii_c | ascii_hex | printable | "\\\\" | "\\'" | "\\n" | "\\t" | "\\b" | "\\r"
|
(* On ne peut pas différencier au niveau du lexer var_id label_id et type_con,
|
||||||
let char = '\'' atom '\''
|
* il faudra le faire à l'analyseur syntaxique.
|
||||||
|
* On va donc faire un 'ident' pour "identificateur" *)
|
||||||
|
|
||||||
|
(* Identificateurs var_id label_id type_con *)
|
||||||
|
let ident = ['a'-'z']['A'-'Z' 'a'-'z' '0'-'9' '_']*
|
||||||
|
(* Identificateur de constructeurs de données *)
|
||||||
|
let constr_id = ['A'-'Z']['A'-'Z' 'a'-'z' '0'-'9' '_']*
|
||||||
|
(* Identificateur de variables de type *)
|
||||||
|
let type_variable = '`' ident
|
||||||
|
(* Littéraux entiers *)
|
||||||
|
let int = '-'? (digit+ | hexa+ | bina+ | octa+)
|
||||||
|
(* Littéraux caractères *)
|
||||||
let letter = (digit | ['A'-'Z'] | ['a'-'z'])
|
let letter = (digit | ['A'-'Z'] | ['a'-'z'])
|
||||||
(* pas sûr pour str *)
|
|
||||||
let str = '\"' (atom | '\'' | "\\\"")* '\"'
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
rule token = parse
|
rule token = parse
|
||||||
(** Layout *)
|
(** Layout *)
|
||||||
|
@ -80,18 +79,6 @@ rule token = parse
|
||||||
| "and" { AND_KW }
|
| "and" { AND_KW }
|
||||||
| "for" { FOR }
|
| "for" { FOR }
|
||||||
|
|
||||||
(** Opérateurs binaires *)
|
|
||||||
| "+" { PLUS }
|
|
||||||
| "-" { MINUS }
|
|
||||||
| "/" { SLASH }
|
|
||||||
| "&&" { D_AND }
|
|
||||||
| "||" { D_OR }
|
|
||||||
| "=?" { EQUAL_OP }
|
|
||||||
| "<=?" { INF_EQUAL_OP }
|
|
||||||
| ">=?" { SUP_EQUAL_OP }
|
|
||||||
| "<?" { INF_OP }
|
|
||||||
| ">?" { SUP_OP }
|
|
||||||
|
|
||||||
|
|
||||||
(** Ponctuation *)
|
(** Ponctuation *)
|
||||||
| '=' { EQUAL }
|
| '=' { EQUAL }
|
||||||
|
@ -116,18 +103,35 @@ rule token = parse
|
||||||
| ":=" { ASSIGN }
|
| ":=" { ASSIGN }
|
||||||
| '!' { EXCLA }
|
| '!' { EXCLA }
|
||||||
|
|
||||||
(** Strings *)
|
(* Opérateurs binaires *)
|
||||||
| '"' { read_string (Buffer.create 16) lexbuf }
|
| "+" { PLUS }
|
||||||
|
| "-" { MINUS }
|
||||||
|
| "/" { SLASH }
|
||||||
|
| "&&" { D_AND }
|
||||||
|
| "||" { D_OR }
|
||||||
|
| "=?" { EQUAL_OP }
|
||||||
|
| "<=?" { INF_EQUAL_OP }
|
||||||
|
| ">=?" { SUP_EQUAL_OP }
|
||||||
|
| "<?" { INF_OP }
|
||||||
|
| ">?" { SUP_OP }
|
||||||
|
|
||||||
(** Values *)
|
(** Identificateurs *)
|
||||||
| int as i { INT (Mint.of_string i) }
|
|
||||||
| ident as s { ID s }
|
| ident as s { ID s }
|
||||||
| type_variable as s { TID s }
|
| type_variable as s { TID s }
|
||||||
| constr_id as s { CID s }
|
| constr_id as s { CID s }
|
||||||
|
|
||||||
(** Characters *)
|
(* Integers *)
|
||||||
(* On en manque surement plein ici *)
|
| int as i { INT (Mint.of_string i) }
|
||||||
|
|
||||||
|
(* Strings *)
|
||||||
|
| '"' { read_string (Buffer.create 16) lexbuf }
|
||||||
|
|
||||||
|
(* Characters *)
|
||||||
| "'" (letter as c) "'" { CHAR c }
|
| "'" (letter as c) "'" { CHAR c }
|
||||||
|
(* | "'" (atom as c) "'" { (* On retire le \ du début
|
||||||
|
* TODO: fix *)
|
||||||
|
let code = int_of_string (String.sub c 1 ((String.length c) - 2))
|
||||||
|
in CHAR (Char.chr (code)) } *)
|
||||||
|
|
||||||
(** Lexing error *)
|
(** Lexing error *)
|
||||||
| _ as _c { error lexbuf None (* (Some _c) *) "unexpected character." }
|
| _ as _c { error lexbuf None (* (Some _c) *) "unexpected character." }
|
||||||
|
|
Reference in a new issue