From 05c2d703bfcca81299cdb4f9906c6c66fd1b7b34 Mon Sep 17 00:00:00 2001
From: Mylloon <kennel.anri@tutanota.com>
Date: Tue, 24 Oct 2023 21:44:29 +0200
Subject: [PATCH] strings!

---
 flap/src/hopix/hopixLexer.mll | 122 +++++++++++++++++++---------------
 1 file changed, 70 insertions(+), 52 deletions(-)

diff --git a/flap/src/hopix/hopixLexer.mll b/flap/src/hopix/hopixLexer.mll
index 1b86638..0b9c1d1 100644
--- a/flap/src/hopix/hopixLexer.mll
+++ b/flap/src/hopix/hopixLexer.mll
@@ -7,36 +7,43 @@
   let next_line_and f lexbuf  =
     Lexing.new_line lexbuf;
     f lexbuf
+  ;;
 
   let error lexbuf c =
-  let msg =
-    "during lexing"
-    ^
-    match c with
-    | Some c -> Printf.sprintf " at `%c`" c
-    | None -> ""
-  in
+    let msg =
+      "during lexing"
+      ^
+      match c with
+      | Some c -> Printf.sprintf " at `%c`" c
+      | None -> ""
+    in
     error msg (lex_join lexbuf.lex_start_p lexbuf.lex_curr_p)
+  ;;
 
+  let err_msg = "unexpected character."
 
   (* Fonction qui convertie une chaîne de caractère ascii en vrai caractère.
-   * Notamment les escapes : "\n" ou "\000" *)
-  let recup_char data =
-    match data with
-    | "\\n" -> Some '\n'
-    | "\\b" -> Some '\b'
-    | "\\r" -> Some '\r'
-    | "\\t" -> Some '\t'
-    | "\\'" -> Some '\''
-    | "\\\"" -> Some '"'
-    | "\\\\" -> Some '\\'
-    | _ ->
-      (try
-         let caractere = String.sub data 1 (String.length data - 1) in
-         let ascii_code = int_of_string caractere in
-         Some (Char.chr ascii_code)
-       with
-       | _ -> None)
+   * Notamment les escapes : "\n" ou "\000"
+   * En plus de préserver les caractères "normaux" *)
+  let recup_char data lexbuf =
+    let length = String.length data in
+    if length == 1
+    then String.get data 0
+    else (
+      match data with
+      | "\\n" -> '\n'
+      | "\\b" -> '\b'
+      | "\\r" -> '\r'
+      | "\\t" -> '\t'
+      | "\\\'" -> '\''
+      | "\\\\" -> '\\'
+      | _ ->
+        (try
+           let caractere = String.sub data 1 (length - 1) in
+           let ascii_code = int_of_string caractere in
+           Char.chr ascii_code
+         with
+         | _ -> error lexbuf None err_msg))
   ;;
 }
 
@@ -54,11 +61,20 @@ let octa = "0o" ['0'-'7']+
 
 (* Définition d'un atom
  * aka un string qui représente un char, par exemple "\065" = 'A' *)
-let ascii_table = "\\" ['0'-'2'] ['0'-'9'] ['0'-'9'] (* TODO: on déborde de 255 à 299 :( *)
+let ascii_table = '\\' ['0'-'2'] ['0'-'9'] ['0'-'9'] (* TODO: on déborde de 255 à 299 :( *)
 let ascii_hex = "\\0x" hex_dig hex_dig
 let printable = ['\032'-'\038' '\040'-'\127']
-let escapes = "\\n" | "\\b" | "\\r" | "\\t" | "\\'" | "\\\"" | "\\\\"
-let atom = ascii_table | ascii_hex | printable | escapes
+let escapes = "\\n"
+            | "\\b"
+            | "\\r"
+            | "\\t"
+            | "\\'"
+            | "\\\\"
+let atom = ascii_table
+         | ascii_hex
+         | printable
+         | escapes
+         | '"'
 
 (* On ne peut pas différencier au niveau du lexer var_id label_id et type_con,
  * il faudra le faire à l'analyseur syntaxique.
@@ -71,18 +87,26 @@ let constr_id = ['A'-'Z']['A'-'Z' 'a'-'z' '0'-'9' '_']*
 (* Identificateur de variables de type *)
 let type_variable = '`' ident
 (* Littéraux entiers *)
-
 let int ='-'? digit+
             | hexa
             | bina
             | octa
 
 (* Littéraux caractères *)
-let letter = (digit | ['A'-'Z'] | ['a'-'z'])
+let char = (digit | ['A'-'Z'] | ['a'-'z'])
 
-(* tmp *)
+(* Quand le code ascii est trop grand
+ * TODO: Ne se déclenche pas pour, par exemple, 270 :( *)
 let ascii_trop_grand = '\\' ['3'-'9']['0'-'9'](['0'-'9'])+
 
+(* Caractères d'un string *)
+let str_char = ascii_table
+             | ascii_hex
+             | printable
+             | escapes
+             | '\''
+             | "\\\""
+
 rule token = parse
   (** Layout *)
   | newline               { next_line_and token lexbuf }
@@ -157,16 +181,13 @@ rule token = parse
   | '"'                   { read_string (Buffer.create 16) lexbuf }
 
   (* Characters *)
-  | "'" (letter as c) "'" { CHAR c                          }
-  | "'" (atom as a) "'"   { match recup_char a with
-                            | Some c -> CHAR c
-                            | None -> error lexbuf None ""  }
+  | "'" (char as c) "'" { CHAR c                     }
+  | "'" (atom as a) "'"   { CHAR (recup_char a lexbuf) }
 
-  (** Lexing error *)
-  (* erreur qui advient pour le test 22-char-literal,
-   * le code renvoie bizarrement que "Error (during lexing)" *)
-  | "'" ascii_trop_grand "'" { error lexbuf None "" }
-  | _  as _c                 { error lexbuf None (* (Some _c) *) "unexpected character." }
+  (** Lexing errors *)
+  (* Erreur qui advient quand un code ASCII est trop grand *)
+  | "'" ascii_trop_grand "'" { error lexbuf None ""                      }
+  | _  as _c                 { error lexbuf None (* (Some _c) *) err_msg }
 
 (* TODO: Gérer les imbrications de commentaires *)
 and commentary = parse
@@ -186,19 +207,16 @@ and commentary_line = parse
 
 and read_string buffer = parse
   (** End of string *)
-  | '"'    { STRING (Buffer.contents buffer)                        }
+  | '"'           { STRING (Buffer.contents buffer)          }
 
-  (** Escape *)
-  | "\\n"  { Buffer.add_char buffer '\n'; read_string buffer lexbuf }
-  | "\\b"  { Buffer.add_char buffer '\b'; read_string buffer lexbuf }
-  | "\\r"  { Buffer.add_char buffer '\r'; read_string buffer lexbuf }
-  | "\\t"  { Buffer.add_char buffer '\r'; read_string buffer lexbuf }
-  | "\\'"  { Buffer.add_char buffer '\''; read_string buffer lexbuf }
-  | "\\\"" { Buffer.add_char buffer '"'; read_string buffer lexbuf  }
-  | "\\\\" { Buffer.add_char buffer '\\'; read_string buffer lexbuf }
+  (** Escape  *)
+  | "\\\""        { Buffer.add_char buffer '\"'
+                  ; read_string buffer lexbuf                }
+
+  (** String characters *)
+  | str_char as s { let c = recup_char s lexbuf
+                    in Buffer.add_char buffer c
+                    ; read_string buffer lexbuf              }
 
   (** Error *)
-  | eof    { error lexbuf None "Unterminated string."               }
-
-  (** String content *)
-  | _ as c { Buffer.add_char buffer c; read_string buffer lexbuf    }
+  | eof           { error lexbuf None "Unterminated string." }