strings!

2023-10-24 21:44:29 +02:00 · 2023-10-24 21:44:29 +02:00 · 05c2d703bf
commit 05c2d703bf
parent d9fd965b3b
1 changed files with 70 additions and 52 deletions
--- a/flap/src/hopix/hopixLexer.mll
+++ b/flap/src/hopix/hopixLexer.mll
@ -7,36 +7,43 @@
  let next_line_and f lexbuf  =
    Lexing.new_line lexbuf;
    f lexbuf
+  ;;

  let error lexbuf c =
-  let msg =
-    "during lexing"
-    ^
-    match c with
-    | Some c -> Printf.sprintf " at `%c`" c
-    | None -> ""
-  in
+    let msg =
+      "during lexing"
+      ^
+      match c with
+      | Some c -> Printf.sprintf " at `%c`" c
+      | None -> ""
+    in
    error msg (lex_join lexbuf.lex_start_p lexbuf.lex_curr_p)
+  ;;

+  let err_msg = "unexpected character."

  (* Fonction qui convertie une chaîne de caractère ascii en vrai caractère.
-   * Notamment les escapes : "\n" ou "\000" *)
-  let recup_char data =
-    match data with
-    | "\\n" -> Some '\n'
-    | "\\b" -> Some '\b'
-    | "\\r" -> Some '\r'
-    | "\\t" -> Some '\t'
-    | "\\'" -> Some '\''
-    | "\\\"" -> Some '"'
-    | "\\\\" -> Some '\\'
-    | _ ->
-      (try
-         let caractere = String.sub data 1 (String.length data - 1) in
-         let ascii_code = int_of_string caractere in
-         Some (Char.chr ascii_code)
-       with
-       | _ -> None)
+   * Notamment les escapes : "\n" ou "\000"
+   * En plus de préserver les caractères "normaux" *)
+  let recup_char data lexbuf =
+    let length = String.length data in
+    if length == 1
+    then String.get data 0
+    else (
+      match data with
+      | "\\n" -> '\n'
+      | "\\b" -> '\b'
+      | "\\r" -> '\r'
+      | "\\t" -> '\t'
+      | "\\\'" -> '\''
+      | "\\\\" -> '\\'
+      | _ ->
+        (try
+           let caractere = String.sub data 1 (length - 1) in
+           let ascii_code = int_of_string caractere in
+           Char.chr ascii_code
+         with
+         | _ -> error lexbuf None err_msg))
  ;;
 }

@ -54,11 +61,20 @@ let octa = "0o" ['0'-'7']+

 (* Définition d'un atom
 * aka un string qui représente un char, par exemple "\065" = 'A' *)
-let ascii_table = "\\" ['0'-'2'] ['0'-'9'] ['0'-'9'] (* TODO: on déborde de 255 à 299 :( *)
+let ascii_table = '\\' ['0'-'2'] ['0'-'9'] ['0'-'9'] (* TODO: on déborde de 255 à 299 :( *)
 let ascii_hex = "\\0x" hex_dig hex_dig
 let printable = ['\032'-'\038' '\040'-'\127']
-let escapes = "\\n" | "\\b" | "\\r" | "\\t" | "\\'" | "\\\"" | "\\\\"
-let atom = ascii_table | ascii_hex | printable | escapes
+let escapes = "\\n"
+            | "\\b"
+            | "\\r"
+            | "\\t"
+            | "\\'"
+            | "\\\\"
+let atom = ascii_table
+         | ascii_hex
+         | printable
+         | escapes
+         | '"'

 (* On ne peut pas différencier au niveau du lexer var_id label_id et type_con,
 * il faudra le faire à l'analyseur syntaxique.
@ -71,18 +87,26 @@ let constr_id = ['A'-'Z']['A'-'Z' 'a'-'z' '0'-'9' '_']*
 (* Identificateur de variables de type *)
 let type_variable = '`' ident
 (* Littéraux entiers *)
-
 let int ='-'? digit+
            | hexa
            | bina
            | octa

 (* Littéraux caractères *)
-let letter = (digit | ['A'-'Z'] | ['a'-'z'])
+let char = (digit | ['A'-'Z'] | ['a'-'z'])

-(* tmp *)
+(* Quand le code ascii est trop grand
+ * TODO: Ne se déclenche pas pour, par exemple, 270 :( *)
 let ascii_trop_grand = '\\' ['3'-'9']['0'-'9'](['0'-'9'])+

+(* Caractères d'un string *)
+let str_char = ascii_table
+             | ascii_hex
+             | printable
+             | escapes
+             | '\''
+             | "\\\""
+
 rule token = parse
  (** Layout *)
  | newline               { next_line_and token lexbuf }
@ -157,16 +181,13 @@ rule token = parse
  | '"'                   { read_string (Buffer.create 16) lexbuf }

  (* Characters *)
-  | "'" (letter as c) "'" { CHAR c                          }
-  | "'" (atom as a) "'"   { match recup_char a with
-                            | Some c -> CHAR c
-                            | None -> error lexbuf None ""  }
+  | "'" (char as c) "'" { CHAR c                     }
+  | "'" (atom as a) "'"   { CHAR (recup_char a lexbuf) }

-  (** Lexing error *)
-  (* erreur qui advient pour le test 22-char-literal,
-   * le code renvoie bizarrement que "Error (during lexing)" *)
-  | "'" ascii_trop_grand "'" { error lexbuf None "" }
-  | _  as _c                 { error lexbuf None (* (Some _c) *) "unexpected character." }
+  (** Lexing errors *)
+  (* Erreur qui advient quand un code ASCII est trop grand *)
+  | "'" ascii_trop_grand "'" { error lexbuf None ""                      }
+  | _  as _c                 { error lexbuf None (* (Some _c) *) err_msg }

 (* TODO: Gérer les imbrications de commentaires *)
 and commentary = parse
@ -186,19 +207,16 @@ and commentary_line = parse

 and read_string buffer = parse
  (** End of string *)
-  | '"'    { STRING (Buffer.contents buffer)                        }
+  | '"'           { STRING (Buffer.contents buffer)          }

-  (** Escape *)
-  | "\\n"  { Buffer.add_char buffer '\n'; read_string buffer lexbuf }
-  | "\\b"  { Buffer.add_char buffer '\b'; read_string buffer lexbuf }
-  | "\\r"  { Buffer.add_char buffer '\r'; read_string buffer lexbuf }
-  | "\\t"  { Buffer.add_char buffer '\r'; read_string buffer lexbuf }
-  | "\\'"  { Buffer.add_char buffer '\''; read_string buffer lexbuf }
-  | "\\\"" { Buffer.add_char buffer '"'; read_string buffer lexbuf  }
-  | "\\\\" { Buffer.add_char buffer '\\'; read_string buffer lexbuf }
+  (** Escape  *)
+  | "\\\""        { Buffer.add_char buffer '\"'
+                  ; read_string buffer lexbuf                }
+
+  (** String characters *)
+  | str_char as s { let c = recup_char s lexbuf
+                    in Buffer.add_char buffer c
+                    ; read_string buffer lexbuf              }

  (** Error *)
-  | eof    { error lexbuf None "Unterminated string."               }
-
-  (** String content *)
-  | _ as c { Buffer.add_char buffer c; read_string buffer lexbuf    }
+  | eof           { error lexbuf None "Unterminated string." }