strings!
This commit is contained in:
parent
d9fd965b3b
commit
05c2d703bf
1 changed files with 70 additions and 52 deletions
|
@ -7,36 +7,43 @@
|
|||
let next_line_and f lexbuf =
|
||||
Lexing.new_line lexbuf;
|
||||
f lexbuf
|
||||
;;
|
||||
|
||||
let error lexbuf c =
|
||||
let msg =
|
||||
"during lexing"
|
||||
^
|
||||
match c with
|
||||
| Some c -> Printf.sprintf " at `%c`" c
|
||||
| None -> ""
|
||||
in
|
||||
let msg =
|
||||
"during lexing"
|
||||
^
|
||||
match c with
|
||||
| Some c -> Printf.sprintf " at `%c`" c
|
||||
| None -> ""
|
||||
in
|
||||
error msg (lex_join lexbuf.lex_start_p lexbuf.lex_curr_p)
|
||||
;;
|
||||
|
||||
let err_msg = "unexpected character."
|
||||
|
||||
(* Fonction qui convertie une chaîne de caractère ascii en vrai caractère.
|
||||
* Notamment les escapes : "\n" ou "\000" *)
|
||||
let recup_char data =
|
||||
match data with
|
||||
| "\\n" -> Some '\n'
|
||||
| "\\b" -> Some '\b'
|
||||
| "\\r" -> Some '\r'
|
||||
| "\\t" -> Some '\t'
|
||||
| "\\'" -> Some '\''
|
||||
| "\\\"" -> Some '"'
|
||||
| "\\\\" -> Some '\\'
|
||||
| _ ->
|
||||
(try
|
||||
let caractere = String.sub data 1 (String.length data - 1) in
|
||||
let ascii_code = int_of_string caractere in
|
||||
Some (Char.chr ascii_code)
|
||||
with
|
||||
| _ -> None)
|
||||
* Notamment les escapes : "\n" ou "\000"
|
||||
* En plus de préserver les caractères "normaux" *)
|
||||
let recup_char data lexbuf =
|
||||
let length = String.length data in
|
||||
if length == 1
|
||||
then String.get data 0
|
||||
else (
|
||||
match data with
|
||||
| "\\n" -> '\n'
|
||||
| "\\b" -> '\b'
|
||||
| "\\r" -> '\r'
|
||||
| "\\t" -> '\t'
|
||||
| "\\\'" -> '\''
|
||||
| "\\\\" -> '\\'
|
||||
| _ ->
|
||||
(try
|
||||
let caractere = String.sub data 1 (length - 1) in
|
||||
let ascii_code = int_of_string caractere in
|
||||
Char.chr ascii_code
|
||||
with
|
||||
| _ -> error lexbuf None err_msg))
|
||||
;;
|
||||
}
|
||||
|
||||
|
@ -54,11 +61,20 @@ let octa = "0o" ['0'-'7']+
|
|||
|
||||
(* Définition d'un atom
|
||||
* aka un string qui représente un char, par exemple "\065" = 'A' *)
|
||||
let ascii_table = "\\" ['0'-'2'] ['0'-'9'] ['0'-'9'] (* TODO: on déborde de 255 à 299 :( *)
|
||||
let ascii_table = '\\' ['0'-'2'] ['0'-'9'] ['0'-'9'] (* TODO: on déborde de 255 à 299 :( *)
|
||||
let ascii_hex = "\\0x" hex_dig hex_dig
|
||||
let printable = ['\032'-'\038' '\040'-'\127']
|
||||
let escapes = "\\n" | "\\b" | "\\r" | "\\t" | "\\'" | "\\\"" | "\\\\"
|
||||
let atom = ascii_table | ascii_hex | printable | escapes
|
||||
let escapes = "\\n"
|
||||
| "\\b"
|
||||
| "\\r"
|
||||
| "\\t"
|
||||
| "\\'"
|
||||
| "\\\\"
|
||||
let atom = ascii_table
|
||||
| ascii_hex
|
||||
| printable
|
||||
| escapes
|
||||
| '"'
|
||||
|
||||
(* On ne peut pas différencier au niveau du lexer var_id label_id et type_con,
|
||||
* il faudra le faire à l'analyseur syntaxique.
|
||||
|
@ -71,18 +87,26 @@ let constr_id = ['A'-'Z']['A'-'Z' 'a'-'z' '0'-'9' '_']*
|
|||
(* Identificateur de variables de type *)
|
||||
let type_variable = '`' ident
|
||||
(* Littéraux entiers *)
|
||||
|
||||
let int ='-'? digit+
|
||||
| hexa
|
||||
| bina
|
||||
| octa
|
||||
|
||||
(* Littéraux caractères *)
|
||||
let letter = (digit | ['A'-'Z'] | ['a'-'z'])
|
||||
let char = (digit | ['A'-'Z'] | ['a'-'z'])
|
||||
|
||||
(* tmp *)
|
||||
(* Quand le code ascii est trop grand
|
||||
* TODO: Ne se déclenche pas pour, par exemple, 270 :( *)
|
||||
let ascii_trop_grand = '\\' ['3'-'9']['0'-'9'](['0'-'9'])+
|
||||
|
||||
(* Caractères d'un string *)
|
||||
let str_char = ascii_table
|
||||
| ascii_hex
|
||||
| printable
|
||||
| escapes
|
||||
| '\''
|
||||
| "\\\""
|
||||
|
||||
rule token = parse
|
||||
(** Layout *)
|
||||
| newline { next_line_and token lexbuf }
|
||||
|
@ -157,16 +181,13 @@ rule token = parse
|
|||
| '"' { read_string (Buffer.create 16) lexbuf }
|
||||
|
||||
(* Characters *)
|
||||
| "'" (letter as c) "'" { CHAR c }
|
||||
| "'" (atom as a) "'" { match recup_char a with
|
||||
| Some c -> CHAR c
|
||||
| None -> error lexbuf None "" }
|
||||
| "'" (char as c) "'" { CHAR c }
|
||||
| "'" (atom as a) "'" { CHAR (recup_char a lexbuf) }
|
||||
|
||||
(** Lexing error *)
|
||||
(* erreur qui advient pour le test 22-char-literal,
|
||||
* le code renvoie bizarrement que "Error (during lexing)" *)
|
||||
| "'" ascii_trop_grand "'" { error lexbuf None "" }
|
||||
| _ as _c { error lexbuf None (* (Some _c) *) "unexpected character." }
|
||||
(** Lexing errors *)
|
||||
(* Erreur qui advient quand un code ASCII est trop grand *)
|
||||
| "'" ascii_trop_grand "'" { error lexbuf None "" }
|
||||
| _ as _c { error lexbuf None (* (Some _c) *) err_msg }
|
||||
|
||||
(* TODO: Gérer les imbrications de commentaires *)
|
||||
and commentary = parse
|
||||
|
@ -186,19 +207,16 @@ and commentary_line = parse
|
|||
|
||||
and read_string buffer = parse
|
||||
(** End of string *)
|
||||
| '"' { STRING (Buffer.contents buffer) }
|
||||
| '"' { STRING (Buffer.contents buffer) }
|
||||
|
||||
(** Escape *)
|
||||
| "\\n" { Buffer.add_char buffer '\n'; read_string buffer lexbuf }
|
||||
| "\\b" { Buffer.add_char buffer '\b'; read_string buffer lexbuf }
|
||||
| "\\r" { Buffer.add_char buffer '\r'; read_string buffer lexbuf }
|
||||
| "\\t" { Buffer.add_char buffer '\r'; read_string buffer lexbuf }
|
||||
| "\\'" { Buffer.add_char buffer '\''; read_string buffer lexbuf }
|
||||
| "\\\"" { Buffer.add_char buffer '"'; read_string buffer lexbuf }
|
||||
| "\\\\" { Buffer.add_char buffer '\\'; read_string buffer lexbuf }
|
||||
(** Escape *)
|
||||
| "\\\"" { Buffer.add_char buffer '\"'
|
||||
; read_string buffer lexbuf }
|
||||
|
||||
(** String characters *)
|
||||
| str_char as s { let c = recup_char s lexbuf
|
||||
in Buffer.add_char buffer c
|
||||
; read_string buffer lexbuf }
|
||||
|
||||
(** Error *)
|
||||
| eof { error lexbuf None "Unterminated string." }
|
||||
|
||||
(** String content *)
|
||||
| _ as c { Buffer.add_char buffer c; read_string buffer lexbuf }
|
||||
| eof { error lexbuf None "Unterminated string." }
|
||||
|
|
Reference in a new issue