strings!
This commit is contained in:
parent
d9fd965b3b
commit
05c2d703bf
1 changed files with 70 additions and 52 deletions
|
@ -7,36 +7,43 @@
|
||||||
let next_line_and f lexbuf =
|
let next_line_and f lexbuf =
|
||||||
Lexing.new_line lexbuf;
|
Lexing.new_line lexbuf;
|
||||||
f lexbuf
|
f lexbuf
|
||||||
|
;;
|
||||||
|
|
||||||
let error lexbuf c =
|
let error lexbuf c =
|
||||||
let msg =
|
let msg =
|
||||||
"during lexing"
|
"during lexing"
|
||||||
^
|
^
|
||||||
match c with
|
match c with
|
||||||
| Some c -> Printf.sprintf " at `%c`" c
|
| Some c -> Printf.sprintf " at `%c`" c
|
||||||
| None -> ""
|
| None -> ""
|
||||||
in
|
in
|
||||||
error msg (lex_join lexbuf.lex_start_p lexbuf.lex_curr_p)
|
error msg (lex_join lexbuf.lex_start_p lexbuf.lex_curr_p)
|
||||||
|
;;
|
||||||
|
|
||||||
|
let err_msg = "unexpected character."
|
||||||
|
|
||||||
(* Fonction qui convertie une chaîne de caractère ascii en vrai caractère.
|
(* Fonction qui convertie une chaîne de caractère ascii en vrai caractère.
|
||||||
* Notamment les escapes : "\n" ou "\000" *)
|
* Notamment les escapes : "\n" ou "\000"
|
||||||
let recup_char data =
|
* En plus de préserver les caractères "normaux" *)
|
||||||
match data with
|
let recup_char data lexbuf =
|
||||||
| "\\n" -> Some '\n'
|
let length = String.length data in
|
||||||
| "\\b" -> Some '\b'
|
if length == 1
|
||||||
| "\\r" -> Some '\r'
|
then String.get data 0
|
||||||
| "\\t" -> Some '\t'
|
else (
|
||||||
| "\\'" -> Some '\''
|
match data with
|
||||||
| "\\\"" -> Some '"'
|
| "\\n" -> '\n'
|
||||||
| "\\\\" -> Some '\\'
|
| "\\b" -> '\b'
|
||||||
| _ ->
|
| "\\r" -> '\r'
|
||||||
(try
|
| "\\t" -> '\t'
|
||||||
let caractere = String.sub data 1 (String.length data - 1) in
|
| "\\\'" -> '\''
|
||||||
let ascii_code = int_of_string caractere in
|
| "\\\\" -> '\\'
|
||||||
Some (Char.chr ascii_code)
|
| _ ->
|
||||||
with
|
(try
|
||||||
| _ -> None)
|
let caractere = String.sub data 1 (length - 1) in
|
||||||
|
let ascii_code = int_of_string caractere in
|
||||||
|
Char.chr ascii_code
|
||||||
|
with
|
||||||
|
| _ -> error lexbuf None err_msg))
|
||||||
;;
|
;;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -54,11 +61,20 @@ let octa = "0o" ['0'-'7']+
|
||||||
|
|
||||||
(* Définition d'un atom
|
(* Définition d'un atom
|
||||||
* aka un string qui représente un char, par exemple "\065" = 'A' *)
|
* aka un string qui représente un char, par exemple "\065" = 'A' *)
|
||||||
let ascii_table = "\\" ['0'-'2'] ['0'-'9'] ['0'-'9'] (* TODO: on déborde de 255 à 299 :( *)
|
let ascii_table = '\\' ['0'-'2'] ['0'-'9'] ['0'-'9'] (* TODO: on déborde de 255 à 299 :( *)
|
||||||
let ascii_hex = "\\0x" hex_dig hex_dig
|
let ascii_hex = "\\0x" hex_dig hex_dig
|
||||||
let printable = ['\032'-'\038' '\040'-'\127']
|
let printable = ['\032'-'\038' '\040'-'\127']
|
||||||
let escapes = "\\n" | "\\b" | "\\r" | "\\t" | "\\'" | "\\\"" | "\\\\"
|
let escapes = "\\n"
|
||||||
let atom = ascii_table | ascii_hex | printable | escapes
|
| "\\b"
|
||||||
|
| "\\r"
|
||||||
|
| "\\t"
|
||||||
|
| "\\'"
|
||||||
|
| "\\\\"
|
||||||
|
let atom = ascii_table
|
||||||
|
| ascii_hex
|
||||||
|
| printable
|
||||||
|
| escapes
|
||||||
|
| '"'
|
||||||
|
|
||||||
(* On ne peut pas différencier au niveau du lexer var_id label_id et type_con,
|
(* On ne peut pas différencier au niveau du lexer var_id label_id et type_con,
|
||||||
* il faudra le faire à l'analyseur syntaxique.
|
* il faudra le faire à l'analyseur syntaxique.
|
||||||
|
@ -71,18 +87,26 @@ let constr_id = ['A'-'Z']['A'-'Z' 'a'-'z' '0'-'9' '_']*
|
||||||
(* Identificateur de variables de type *)
|
(* Identificateur de variables de type *)
|
||||||
let type_variable = '`' ident
|
let type_variable = '`' ident
|
||||||
(* Littéraux entiers *)
|
(* Littéraux entiers *)
|
||||||
|
|
||||||
let int ='-'? digit+
|
let int ='-'? digit+
|
||||||
| hexa
|
| hexa
|
||||||
| bina
|
| bina
|
||||||
| octa
|
| octa
|
||||||
|
|
||||||
(* Littéraux caractères *)
|
(* Littéraux caractères *)
|
||||||
let letter = (digit | ['A'-'Z'] | ['a'-'z'])
|
let char = (digit | ['A'-'Z'] | ['a'-'z'])
|
||||||
|
|
||||||
(* tmp *)
|
(* Quand le code ascii est trop grand
|
||||||
|
* TODO: Ne se déclenche pas pour, par exemple, 270 :( *)
|
||||||
let ascii_trop_grand = '\\' ['3'-'9']['0'-'9'](['0'-'9'])+
|
let ascii_trop_grand = '\\' ['3'-'9']['0'-'9'](['0'-'9'])+
|
||||||
|
|
||||||
|
(* Caractères d'un string *)
|
||||||
|
let str_char = ascii_table
|
||||||
|
| ascii_hex
|
||||||
|
| printable
|
||||||
|
| escapes
|
||||||
|
| '\''
|
||||||
|
| "\\\""
|
||||||
|
|
||||||
rule token = parse
|
rule token = parse
|
||||||
(** Layout *)
|
(** Layout *)
|
||||||
| newline { next_line_and token lexbuf }
|
| newline { next_line_and token lexbuf }
|
||||||
|
@ -157,16 +181,13 @@ rule token = parse
|
||||||
| '"' { read_string (Buffer.create 16) lexbuf }
|
| '"' { read_string (Buffer.create 16) lexbuf }
|
||||||
|
|
||||||
(* Characters *)
|
(* Characters *)
|
||||||
| "'" (letter as c) "'" { CHAR c }
|
| "'" (char as c) "'" { CHAR c }
|
||||||
| "'" (atom as a) "'" { match recup_char a with
|
| "'" (atom as a) "'" { CHAR (recup_char a lexbuf) }
|
||||||
| Some c -> CHAR c
|
|
||||||
| None -> error lexbuf None "" }
|
|
||||||
|
|
||||||
(** Lexing error *)
|
(** Lexing errors *)
|
||||||
(* erreur qui advient pour le test 22-char-literal,
|
(* Erreur qui advient quand un code ASCII est trop grand *)
|
||||||
* le code renvoie bizarrement que "Error (during lexing)" *)
|
| "'" ascii_trop_grand "'" { error lexbuf None "" }
|
||||||
| "'" ascii_trop_grand "'" { error lexbuf None "" }
|
| _ as _c { error lexbuf None (* (Some _c) *) err_msg }
|
||||||
| _ as _c { error lexbuf None (* (Some _c) *) "unexpected character." }
|
|
||||||
|
|
||||||
(* TODO: Gérer les imbrications de commentaires *)
|
(* TODO: Gérer les imbrications de commentaires *)
|
||||||
and commentary = parse
|
and commentary = parse
|
||||||
|
@ -186,19 +207,16 @@ and commentary_line = parse
|
||||||
|
|
||||||
and read_string buffer = parse
|
and read_string buffer = parse
|
||||||
(** End of string *)
|
(** End of string *)
|
||||||
| '"' { STRING (Buffer.contents buffer) }
|
| '"' { STRING (Buffer.contents buffer) }
|
||||||
|
|
||||||
(** Escape *)
|
(** Escape *)
|
||||||
| "\\n" { Buffer.add_char buffer '\n'; read_string buffer lexbuf }
|
| "\\\"" { Buffer.add_char buffer '\"'
|
||||||
| "\\b" { Buffer.add_char buffer '\b'; read_string buffer lexbuf }
|
; read_string buffer lexbuf }
|
||||||
| "\\r" { Buffer.add_char buffer '\r'; read_string buffer lexbuf }
|
|
||||||
| "\\t" { Buffer.add_char buffer '\r'; read_string buffer lexbuf }
|
(** String characters *)
|
||||||
| "\\'" { Buffer.add_char buffer '\''; read_string buffer lexbuf }
|
| str_char as s { let c = recup_char s lexbuf
|
||||||
| "\\\"" { Buffer.add_char buffer '"'; read_string buffer lexbuf }
|
in Buffer.add_char buffer c
|
||||||
| "\\\\" { Buffer.add_char buffer '\\'; read_string buffer lexbuf }
|
; read_string buffer lexbuf }
|
||||||
|
|
||||||
(** Error *)
|
(** Error *)
|
||||||
| eof { error lexbuf None "Unterminated string." }
|
| eof { error lexbuf None "Unterminated string." }
|
||||||
|
|
||||||
(** String content *)
|
|
||||||
| _ as c { Buffer.add_char buffer c; read_string buffer lexbuf }
|
|
||||||
|
|
Reference in a new issue