This commit is contained in:
Mylloon 2023-10-24 21:44:29 +02:00
parent d9fd965b3b
commit 05c2d703bf
Signed by: Anri
GPG key ID: A82D63DFF8D1317F

View file

@ -7,36 +7,43 @@
let next_line_and f lexbuf = let next_line_and f lexbuf =
Lexing.new_line lexbuf; Lexing.new_line lexbuf;
f lexbuf f lexbuf
;;
let error lexbuf c = let error lexbuf c =
let msg = let msg =
"during lexing" "during lexing"
^ ^
match c with match c with
| Some c -> Printf.sprintf " at `%c`" c | Some c -> Printf.sprintf " at `%c`" c
| None -> "" | None -> ""
in in
error msg (lex_join lexbuf.lex_start_p lexbuf.lex_curr_p) error msg (lex_join lexbuf.lex_start_p lexbuf.lex_curr_p)
;;
let err_msg = "unexpected character."
(* Fonction qui convertie une chaîne de caractère ascii en vrai caractère. (* Fonction qui convertie une chaîne de caractère ascii en vrai caractère.
* Notamment les escapes : "\n" ou "\000" *) * Notamment les escapes : "\n" ou "\000"
let recup_char data = * En plus de préserver les caractères "normaux" *)
match data with let recup_char data lexbuf =
| "\\n" -> Some '\n' let length = String.length data in
| "\\b" -> Some '\b' if length == 1
| "\\r" -> Some '\r' then String.get data 0
| "\\t" -> Some '\t' else (
| "\\'" -> Some '\'' match data with
| "\\\"" -> Some '"' | "\\n" -> '\n'
| "\\\\" -> Some '\\' | "\\b" -> '\b'
| _ -> | "\\r" -> '\r'
(try | "\\t" -> '\t'
let caractere = String.sub data 1 (String.length data - 1) in | "\\\'" -> '\''
let ascii_code = int_of_string caractere in | "\\\\" -> '\\'
Some (Char.chr ascii_code) | _ ->
with (try
| _ -> None) let caractere = String.sub data 1 (length - 1) in
let ascii_code = int_of_string caractere in
Char.chr ascii_code
with
| _ -> error lexbuf None err_msg))
;; ;;
} }
@ -54,11 +61,20 @@ let octa = "0o" ['0'-'7']+
(* Définition d'un atom (* Définition d'un atom
* aka un string qui représente un char, par exemple "\065" = 'A' *) * aka un string qui représente un char, par exemple "\065" = 'A' *)
let ascii_table = "\\" ['0'-'2'] ['0'-'9'] ['0'-'9'] (* TODO: on déborde de 255 à 299 :( *) let ascii_table = '\\' ['0'-'2'] ['0'-'9'] ['0'-'9'] (* TODO: on déborde de 255 à 299 :( *)
let ascii_hex = "\\0x" hex_dig hex_dig let ascii_hex = "\\0x" hex_dig hex_dig
let printable = ['\032'-'\038' '\040'-'\127'] let printable = ['\032'-'\038' '\040'-'\127']
let escapes = "\\n" | "\\b" | "\\r" | "\\t" | "\\'" | "\\\"" | "\\\\" let escapes = "\\n"
let atom = ascii_table | ascii_hex | printable | escapes | "\\b"
| "\\r"
| "\\t"
| "\\'"
| "\\\\"
let atom = ascii_table
| ascii_hex
| printable
| escapes
| '"'
(* On ne peut pas différencier au niveau du lexer var_id label_id et type_con, (* On ne peut pas différencier au niveau du lexer var_id label_id et type_con,
* il faudra le faire à l'analyseur syntaxique. * il faudra le faire à l'analyseur syntaxique.
@ -71,18 +87,26 @@ let constr_id = ['A'-'Z']['A'-'Z' 'a'-'z' '0'-'9' '_']*
(* Identificateur de variables de type *) (* Identificateur de variables de type *)
let type_variable = '`' ident let type_variable = '`' ident
(* Littéraux entiers *) (* Littéraux entiers *)
let int ='-'? digit+ let int ='-'? digit+
| hexa | hexa
| bina | bina
| octa | octa
(* Littéraux caractères *) (* Littéraux caractères *)
let letter = (digit | ['A'-'Z'] | ['a'-'z']) let char = (digit | ['A'-'Z'] | ['a'-'z'])
(* tmp *) (* Quand le code ascii est trop grand
* TODO: Ne se déclenche pas pour, par exemple, 270 :( *)
let ascii_trop_grand = '\\' ['3'-'9']['0'-'9'](['0'-'9'])+ let ascii_trop_grand = '\\' ['3'-'9']['0'-'9'](['0'-'9'])+
(* Caractères d'un string *)
let str_char = ascii_table
| ascii_hex
| printable
| escapes
| '\''
| "\\\""
rule token = parse rule token = parse
(** Layout *) (** Layout *)
| newline { next_line_and token lexbuf } | newline { next_line_and token lexbuf }
@ -157,16 +181,13 @@ rule token = parse
| '"' { read_string (Buffer.create 16) lexbuf } | '"' { read_string (Buffer.create 16) lexbuf }
(* Characters *) (* Characters *)
| "'" (letter as c) "'" { CHAR c } | "'" (char as c) "'" { CHAR c }
| "'" (atom as a) "'" { match recup_char a with | "'" (atom as a) "'" { CHAR (recup_char a lexbuf) }
| Some c -> CHAR c
| None -> error lexbuf None "" }
(** Lexing error *) (** Lexing errors *)
(* erreur qui advient pour le test 22-char-literal, (* Erreur qui advient quand un code ASCII est trop grand *)
* le code renvoie bizarrement que "Error (during lexing)" *) | "'" ascii_trop_grand "'" { error lexbuf None "" }
| "'" ascii_trop_grand "'" { error lexbuf None "" } | _ as _c { error lexbuf None (* (Some _c) *) err_msg }
| _ as _c { error lexbuf None (* (Some _c) *) "unexpected character." }
(* TODO: Gérer les imbrications de commentaires *) (* TODO: Gérer les imbrications de commentaires *)
and commentary = parse and commentary = parse
@ -186,19 +207,16 @@ and commentary_line = parse
and read_string buffer = parse and read_string buffer = parse
(** End of string *) (** End of string *)
| '"' { STRING (Buffer.contents buffer) } | '"' { STRING (Buffer.contents buffer) }
(** Escape *) (** Escape *)
| "\\n" { Buffer.add_char buffer '\n'; read_string buffer lexbuf } | "\\\"" { Buffer.add_char buffer '\"'
| "\\b" { Buffer.add_char buffer '\b'; read_string buffer lexbuf } ; read_string buffer lexbuf }
| "\\r" { Buffer.add_char buffer '\r'; read_string buffer lexbuf }
| "\\t" { Buffer.add_char buffer '\r'; read_string buffer lexbuf } (** String characters *)
| "\\'" { Buffer.add_char buffer '\''; read_string buffer lexbuf } | str_char as s { let c = recup_char s lexbuf
| "\\\"" { Buffer.add_char buffer '"'; read_string buffer lexbuf } in Buffer.add_char buffer c
| "\\\\" { Buffer.add_char buffer '\\'; read_string buffer lexbuf } ; read_string buffer lexbuf }
(** Error *) (** Error *)
| eof { error lexbuf None "Unterminated string." } | eof { error lexbuf None "Unterminated string." }
(** String content *)
| _ as c { Buffer.add_char buffer c; read_string buffer lexbuf }