From 05c2d703bfcca81299cdb4f9906c6c66fd1b7b34 Mon Sep 17 00:00:00 2001 From: Mylloon Date: Tue, 24 Oct 2023 21:44:29 +0200 Subject: [PATCH] strings! --- flap/src/hopix/hopixLexer.mll | 122 +++++++++++++++++++--------------- 1 file changed, 70 insertions(+), 52 deletions(-) diff --git a/flap/src/hopix/hopixLexer.mll b/flap/src/hopix/hopixLexer.mll index 1b86638..0b9c1d1 100644 --- a/flap/src/hopix/hopixLexer.mll +++ b/flap/src/hopix/hopixLexer.mll @@ -7,36 +7,43 @@ let next_line_and f lexbuf = Lexing.new_line lexbuf; f lexbuf + ;; let error lexbuf c = - let msg = - "during lexing" - ^ - match c with - | Some c -> Printf.sprintf " at `%c`" c - | None -> "" - in + let msg = + "during lexing" + ^ + match c with + | Some c -> Printf.sprintf " at `%c`" c + | None -> "" + in error msg (lex_join lexbuf.lex_start_p lexbuf.lex_curr_p) + ;; + let err_msg = "unexpected character." (* Fonction qui convertie une chaîne de caractère ascii en vrai caractère. - * Notamment les escapes : "\n" ou "\000" *) - let recup_char data = - match data with - | "\\n" -> Some '\n' - | "\\b" -> Some '\b' - | "\\r" -> Some '\r' - | "\\t" -> Some '\t' - | "\\'" -> Some '\'' - | "\\\"" -> Some '"' - | "\\\\" -> Some '\\' - | _ -> - (try - let caractere = String.sub data 1 (String.length data - 1) in - let ascii_code = int_of_string caractere in - Some (Char.chr ascii_code) - with - | _ -> None) + * Notamment les escapes : "\n" ou "\000" + * En plus de préserver les caractères "normaux" *) + let recup_char data lexbuf = + let length = String.length data in + if length == 1 + then String.get data 0 + else ( + match data with + | "\\n" -> '\n' + | "\\b" -> '\b' + | "\\r" -> '\r' + | "\\t" -> '\t' + | "\\\'" -> '\'' + | "\\\\" -> '\\' + | _ -> + (try + let caractere = String.sub data 1 (length - 1) in + let ascii_code = int_of_string caractere in + Char.chr ascii_code + with + | _ -> error lexbuf None err_msg)) ;; } @@ -54,11 +61,20 @@ let octa = "0o" ['0'-'7']+ (* Définition d'un atom * aka un string qui représente un char, par exemple "\065" = 'A' *) -let ascii_table = "\\" ['0'-'2'] ['0'-'9'] ['0'-'9'] (* TODO: on déborde de 255 à 299 :( *) +let ascii_table = '\\' ['0'-'2'] ['0'-'9'] ['0'-'9'] (* TODO: on déborde de 255 à 299 :( *) let ascii_hex = "\\0x" hex_dig hex_dig let printable = ['\032'-'\038' '\040'-'\127'] -let escapes = "\\n" | "\\b" | "\\r" | "\\t" | "\\'" | "\\\"" | "\\\\" -let atom = ascii_table | ascii_hex | printable | escapes +let escapes = "\\n" + | "\\b" + | "\\r" + | "\\t" + | "\\'" + | "\\\\" +let atom = ascii_table + | ascii_hex + | printable + | escapes + | '"' (* On ne peut pas différencier au niveau du lexer var_id label_id et type_con, * il faudra le faire à l'analyseur syntaxique. @@ -71,18 +87,26 @@ let constr_id = ['A'-'Z']['A'-'Z' 'a'-'z' '0'-'9' '_']* (* Identificateur de variables de type *) let type_variable = '`' ident (* Littéraux entiers *) - let int ='-'? digit+ | hexa | bina | octa (* Littéraux caractères *) -let letter = (digit | ['A'-'Z'] | ['a'-'z']) +let char = (digit | ['A'-'Z'] | ['a'-'z']) -(* tmp *) +(* Quand le code ascii est trop grand + * TODO: Ne se déclenche pas pour, par exemple, 270 :( *) let ascii_trop_grand = '\\' ['3'-'9']['0'-'9'](['0'-'9'])+ +(* Caractères d'un string *) +let str_char = ascii_table + | ascii_hex + | printable + | escapes + | '\'' + | "\\\"" + rule token = parse (** Layout *) | newline { next_line_and token lexbuf } @@ -157,16 +181,13 @@ rule token = parse | '"' { read_string (Buffer.create 16) lexbuf } (* Characters *) - | "'" (letter as c) "'" { CHAR c } - | "'" (atom as a) "'" { match recup_char a with - | Some c -> CHAR c - | None -> error lexbuf None "" } + | "'" (char as c) "'" { CHAR c } + | "'" (atom as a) "'" { CHAR (recup_char a lexbuf) } - (** Lexing error *) - (* erreur qui advient pour le test 22-char-literal, - * le code renvoie bizarrement que "Error (during lexing)" *) - | "'" ascii_trop_grand "'" { error lexbuf None "" } - | _ as _c { error lexbuf None (* (Some _c) *) "unexpected character." } + (** Lexing errors *) + (* Erreur qui advient quand un code ASCII est trop grand *) + | "'" ascii_trop_grand "'" { error lexbuf None "" } + | _ as _c { error lexbuf None (* (Some _c) *) err_msg } (* TODO: Gérer les imbrications de commentaires *) and commentary = parse @@ -186,19 +207,16 @@ and commentary_line = parse and read_string buffer = parse (** End of string *) - | '"' { STRING (Buffer.contents buffer) } + | '"' { STRING (Buffer.contents buffer) } - (** Escape *) - | "\\n" { Buffer.add_char buffer '\n'; read_string buffer lexbuf } - | "\\b" { Buffer.add_char buffer '\b'; read_string buffer lexbuf } - | "\\r" { Buffer.add_char buffer '\r'; read_string buffer lexbuf } - | "\\t" { Buffer.add_char buffer '\r'; read_string buffer lexbuf } - | "\\'" { Buffer.add_char buffer '\''; read_string buffer lexbuf } - | "\\\"" { Buffer.add_char buffer '"'; read_string buffer lexbuf } - | "\\\\" { Buffer.add_char buffer '\\'; read_string buffer lexbuf } + (** Escape *) + | "\\\"" { Buffer.add_char buffer '\"' + ; read_string buffer lexbuf } + + (** String characters *) + | str_char as s { let c = recup_char s lexbuf + in Buffer.add_char buffer c + ; read_string buffer lexbuf } (** Error *) - | eof { error lexbuf None "Unterminated string." } - - (** String content *) - | _ as c { Buffer.add_char buffer c; read_string buffer lexbuf } + | eof { error lexbuf None "Unterminated string." }