strings!

2023-10-24 21:44:29 +02:00 · 2023-10-24 21:44:29 +02:00 · 05c2d703bf
commit 05c2d703bf
parent d9fd965b3b
1 changed files with 70 additions and 52 deletions
--- a/flap/src/hopix/hopixLexer.mll
+++ b/flap/src/hopix/hopixLexer.mll
@ -7,36 +7,43 @@
  let next_line_and f lexbuf  =
    Lexing.new_line lexbuf;
    f lexbuf
  ;;
  let error lexbuf c =
-  let msg =
+    let msg =
-    "during lexing"
+      "during lexing"
-    ^
+      ^
-    match c with
+      match c with
-    | Some c -> Printf.sprintf " at `%c`" c
+      | Some c -> Printf.sprintf " at `%c`" c
-    | None -> ""
+      | None -> ""
-  in
+    in
    error msg (lex_join lexbuf.lex_start_p lexbuf.lex_curr_p)
  ;;
  let err_msg = "unexpected character."
  (* Fonction qui convertie une chaîne de caractère ascii en vrai caractère.
-   * Notamment les escapes : "\n" ou "\000" *)
+   * Notamment les escapes : "\n" ou "\000"
-  let recup_char data =
+   * En plus de préserver les caractères "normaux" *)
-    match data with
+  let recup_char data lexbuf =
-    | "\\n" -> Some '\n'
+    let length = String.length data in
-    | "\\b" -> Some '\b'
+    if length == 1
-    | "\\r" -> Some '\r'
+    then String.get data 0
-    | "\\t" -> Some '\t'
+    else (
-    | "\\'" -> Some '\''
+      match data with
-    | "\\\"" -> Some '"'
+      | "\\n" -> '\n'
-    | "\\\\" -> Some '\\'
+      | "\\b" -> '\b'
-    | _ ->
+      | "\\r" -> '\r'
-      (try
+      | "\\t" -> '\t'
-         let caractere = String.sub data 1 (String.length data - 1) in
+      | "\\\'" -> '\''
-         let ascii_code = int_of_string caractere in
+      | "\\\\" -> '\\'
-         Some (Char.chr ascii_code)
+      | _ ->
-       with
+        (try
-       | _ -> None)
+           let caractere = String.sub data 1 (length - 1) in
           let ascii_code = int_of_string caractere in
           Char.chr ascii_code
         with
         | _ -> error lexbuf None err_msg))
  ;;
 }
@ -54,11 +61,20 @@ let octa = "0o" ['0'-'7']+
 (* Définition d'un atom
 * aka un string qui représente un char, par exemple "\065" = 'A' *)
-let ascii_table = "\\" ['0'-'2'] ['0'-'9'] ['0'-'9'] (* TODO: on déborde de 255 à 299 :( *)
+let ascii_table = '\\' ['0'-'2'] ['0'-'9'] ['0'-'9'] (* TODO: on déborde de 255 à 299 :( *)
 let ascii_hex = "\\0x" hex_dig hex_dig
 let printable = ['\032'-'\038' '\040'-'\127']
-let escapes = "\\n" | "\\b" | "\\r" | "\\t" | "\\'" | "\\\"" | "\\\\"
+let escapes = "\\n"
-let atom = ascii_table | ascii_hex | printable | escapes
+            | "\\b"
            | "\\r"
            | "\\t"
            | "\\'"
            | "\\\\"
 let atom = ascii_table
         | ascii_hex
         | printable
         | escapes
         | '"'
 (* On ne peut pas différencier au niveau du lexer var_id label_id et type_con,
 * il faudra le faire à l'analyseur syntaxique.
@ -71,18 +87,26 @@ let constr_id = ['A'-'Z']['A'-'Z' 'a'-'z' '0'-'9' '_']*
 (* Identificateur de variables de type *)
 let type_variable = '`' ident
 (* Littéraux entiers *)
 let int ='-'? digit+
            | hexa
            | bina
            | octa
 (* Littéraux caractères *)
-let letter = (digit | ['A'-'Z'] | ['a'-'z'])
+let char = (digit | ['A'-'Z'] | ['a'-'z'])
-(* tmp *)
+(* Quand le code ascii est trop grand
 * TODO: Ne se déclenche pas pour, par exemple, 270 :( *)
 let ascii_trop_grand = '\\' ['3'-'9']['0'-'9'](['0'-'9'])+
 (* Caractères d'un string *)
 let str_char = ascii_table
             | ascii_hex
             | printable
             | escapes
             | '\''
             | "\\\""
 rule token = parse
  (** Layout *)
  | newline               { next_line_and token lexbuf }
@ -157,16 +181,13 @@ rule token = parse
  | '"'                   { read_string (Buffer.create 16) lexbuf }
  (* Characters *)
-  | "'" (letter as c) "'" { CHAR c                          }
+  | "'" (char as c) "'" { CHAR c                     }
-  | "'" (atom as a) "'"   { match recup_char a with
+  | "'" (atom as a) "'"   { CHAR (recup_char a lexbuf) }
                            | Some c -> CHAR c
                            | None -> error lexbuf None ""  }
-  (** Lexing error *)
+  (** Lexing errors *)
-  (* erreur qui advient pour le test 22-char-literal,
+  (* Erreur qui advient quand un code ASCII est trop grand *)
-   * le code renvoie bizarrement que "Error (during lexing)" *)
+  | "'" ascii_trop_grand "'" { error lexbuf None ""                      }
-  | "'" ascii_trop_grand "'" { error lexbuf None "" }
+  | _  as _c                 { error lexbuf None (* (Some _c) *) err_msg }
  | _  as _c                 { error lexbuf None (* (Some _c) *) "unexpected character." }
 (* TODO: Gérer les imbrications de commentaires *)
 and commentary = parse
@ -186,19 +207,16 @@ and commentary_line = parse
 and read_string buffer = parse
  (** End of string *)
-  | '"'    { STRING (Buffer.contents buffer)                        }
+  | '"'           { STRING (Buffer.contents buffer)          }
-  (** Escape *)
+  (** Escape  *)
-  | "\\n"  { Buffer.add_char buffer '\n'; read_string buffer lexbuf }
+  | "\\\""        { Buffer.add_char buffer '\"'
-  | "\\b"  { Buffer.add_char buffer '\b'; read_string buffer lexbuf }
+                  ; read_string buffer lexbuf                }
-  | "\\r"  { Buffer.add_char buffer '\r'; read_string buffer lexbuf }
+
-  | "\\t"  { Buffer.add_char buffer '\r'; read_string buffer lexbuf }
+  (** String characters *)
-  | "\\'"  { Buffer.add_char buffer '\''; read_string buffer lexbuf }
+  | str_char as s { let c = recup_char s lexbuf
-  | "\\\"" { Buffer.add_char buffer '"'; read_string buffer lexbuf  }
+                    in Buffer.add_char buffer c
-  | "\\\\" { Buffer.add_char buffer '\\'; read_string buffer lexbuf }
+                    ; read_string buffer lexbuf              }
  (** Error *)
-  | eof    { error lexbuf None "Unterminated string."               }
+  | eof           { error lexbuf None "Unterminated string." }
  (** String content *)
  | _ as c { Buffer.add_char buffer c; read_string buffer lexbuf    }