This commit is contained in:
Mylloon 2023-10-24 21:44:29 +02:00
parent d9fd965b3b
commit 05c2d703bf
Signed by: Anri
GPG key ID: A82D63DFF8D1317F

View file

@ -7,36 +7,43 @@
let next_line_and f lexbuf =
Lexing.new_line lexbuf;
f lexbuf
;;
let error lexbuf c =
let msg =
"during lexing"
^
match c with
| Some c -> Printf.sprintf " at `%c`" c
| None -> ""
in
let msg =
"during lexing"
^
match c with
| Some c -> Printf.sprintf " at `%c`" c
| None -> ""
in
error msg (lex_join lexbuf.lex_start_p lexbuf.lex_curr_p)
;;
let err_msg = "unexpected character."
(* Fonction qui convertie une chaîne de caractère ascii en vrai caractère.
* Notamment les escapes : "\n" ou "\000" *)
let recup_char data =
match data with
| "\\n" -> Some '\n'
| "\\b" -> Some '\b'
| "\\r" -> Some '\r'
| "\\t" -> Some '\t'
| "\\'" -> Some '\''
| "\\\"" -> Some '"'
| "\\\\" -> Some '\\'
| _ ->
(try
let caractere = String.sub data 1 (String.length data - 1) in
let ascii_code = int_of_string caractere in
Some (Char.chr ascii_code)
with
| _ -> None)
* Notamment les escapes : "\n" ou "\000"
* En plus de préserver les caractères "normaux" *)
let recup_char data lexbuf =
let length = String.length data in
if length == 1
then String.get data 0
else (
match data with
| "\\n" -> '\n'
| "\\b" -> '\b'
| "\\r" -> '\r'
| "\\t" -> '\t'
| "\\\'" -> '\''
| "\\\\" -> '\\'
| _ ->
(try
let caractere = String.sub data 1 (length - 1) in
let ascii_code = int_of_string caractere in
Char.chr ascii_code
with
| _ -> error lexbuf None err_msg))
;;
}
@ -54,11 +61,20 @@ let octa = "0o" ['0'-'7']+
(* Définition d'un atom
* aka un string qui représente un char, par exemple "\065" = 'A' *)
let ascii_table = "\\" ['0'-'2'] ['0'-'9'] ['0'-'9'] (* TODO: on déborde de 255 à 299 :( *)
let ascii_table = '\\' ['0'-'2'] ['0'-'9'] ['0'-'9'] (* TODO: on déborde de 255 à 299 :( *)
let ascii_hex = "\\0x" hex_dig hex_dig
let printable = ['\032'-'\038' '\040'-'\127']
let escapes = "\\n" | "\\b" | "\\r" | "\\t" | "\\'" | "\\\"" | "\\\\"
let atom = ascii_table | ascii_hex | printable | escapes
let escapes = "\\n"
| "\\b"
| "\\r"
| "\\t"
| "\\'"
| "\\\\"
let atom = ascii_table
| ascii_hex
| printable
| escapes
| '"'
(* On ne peut pas différencier au niveau du lexer var_id label_id et type_con,
* il faudra le faire à l'analyseur syntaxique.
@ -71,18 +87,26 @@ let constr_id = ['A'-'Z']['A'-'Z' 'a'-'z' '0'-'9' '_']*
(* Identificateur de variables de type *)
let type_variable = '`' ident
(* Littéraux entiers *)
let int ='-'? digit+
| hexa
| bina
| octa
(* Littéraux caractères *)
let letter = (digit | ['A'-'Z'] | ['a'-'z'])
let char = (digit | ['A'-'Z'] | ['a'-'z'])
(* tmp *)
(* Quand le code ascii est trop grand
* TODO: Ne se déclenche pas pour, par exemple, 270 :( *)
let ascii_trop_grand = '\\' ['3'-'9']['0'-'9'](['0'-'9'])+
(* Caractères d'un string *)
let str_char = ascii_table
| ascii_hex
| printable
| escapes
| '\''
| "\\\""
rule token = parse
(** Layout *)
| newline { next_line_and token lexbuf }
@ -157,16 +181,13 @@ rule token = parse
| '"' { read_string (Buffer.create 16) lexbuf }
(* Characters *)
| "'" (letter as c) "'" { CHAR c }
| "'" (atom as a) "'" { match recup_char a with
| Some c -> CHAR c
| None -> error lexbuf None "" }
| "'" (char as c) "'" { CHAR c }
| "'" (atom as a) "'" { CHAR (recup_char a lexbuf) }
(** Lexing error *)
(* erreur qui advient pour le test 22-char-literal,
* le code renvoie bizarrement que "Error (during lexing)" *)
| "'" ascii_trop_grand "'" { error lexbuf None "" }
| _ as _c { error lexbuf None (* (Some _c) *) "unexpected character." }
(** Lexing errors *)
(* Erreur qui advient quand un code ASCII est trop grand *)
| "'" ascii_trop_grand "'" { error lexbuf None "" }
| _ as _c { error lexbuf None (* (Some _c) *) err_msg }
(* TODO: Gérer les imbrications de commentaires *)
and commentary = parse
@ -186,19 +207,16 @@ and commentary_line = parse
and read_string buffer = parse
(** End of string *)
| '"' { STRING (Buffer.contents buffer) }
| '"' { STRING (Buffer.contents buffer) }
(** Escape *)
| "\\n" { Buffer.add_char buffer '\n'; read_string buffer lexbuf }
| "\\b" { Buffer.add_char buffer '\b'; read_string buffer lexbuf }
| "\\r" { Buffer.add_char buffer '\r'; read_string buffer lexbuf }
| "\\t" { Buffer.add_char buffer '\r'; read_string buffer lexbuf }
| "\\'" { Buffer.add_char buffer '\''; read_string buffer lexbuf }
| "\\\"" { Buffer.add_char buffer '"'; read_string buffer lexbuf }
| "\\\\" { Buffer.add_char buffer '\\'; read_string buffer lexbuf }
(** Escape *)
| "\\\"" { Buffer.add_char buffer '\"'
; read_string buffer lexbuf }
(** String characters *)
| str_char as s { let c = recup_char s lexbuf
in Buffer.add_char buffer c
; read_string buffer lexbuf }
(** Error *)
| eof { error lexbuf None "Unterminated string." }
(** String content *)
| _ as c { Buffer.add_char buffer c; read_string buffer lexbuf }
| eof { error lexbuf None "Unterminated string." }