From 5a1dbcf6de34cf24e919c41bc8ce3a535f772f39 Mon Sep 17 00:00:00 2001
From: Mylloon <kennel.anri@tutanota.com>
Date: Sat, 21 Oct 2023 15:33:19 +0200
Subject: [PATCH] refactor

---
 flap/src/hopix/hopixLexer.mll | 84 ++++++++++++++++++-----------------
 1 file changed, 44 insertions(+), 40 deletions(-)

diff --git a/flap/src/hopix/hopixLexer.mll b/flap/src/hopix/hopixLexer.mll
index 67c82d3..b9fe033 100644
--- a/flap/src/hopix/hopixLexer.mll
+++ b/flap/src/hopix/hopixLexer.mll
@@ -13,7 +13,7 @@
     "during lexing"
     ^
     match c with
-    | Some c -> Printf.sprintf " at '%c'" c
+    | Some c -> Printf.sprintf " at `%c`" c
     | None -> ""
   in
     error msg (lex_join lexbuf.lex_start_p lexbuf.lex_curr_p)
@@ -22,37 +22,36 @@
 let newline = ('\010' | '\013' | "\013\010")
 let blank   = [' ' '\009' '\012']
 
+let hex_dig = ['0'-'9' 'a'-'f' 'A'-'F']
+
 let digit = ['0'-'9']
-let hexa = "0x" ['0'-'9' 'a'-'f' 'A'-'F']
+let hexa = "0x" hex_dig
 let bina = "0b" ['0'-'1']
 let octa = "0o" ['0'-'7']
 
-let int = '-'? (digit+ | hexa+ | bina+ | octa+)
 
-
-(* On ne peut pas différencier au niveau du lexer var_id label_id et type_con,
- * il faudra le faire à l'analyseur syntaxique. On fait donc un "ident" pour
- * identificateur *)
-
-(* identificateur *)
-let ident = ['a'-'z']['A'-'Z' 'a'-'z' '0'-'9' '_']*
-let constr_id = ['A'-'Z']['A'-'Z' 'a'-'z' '0'-'9' '_']*
-let type_variable = '`' ['a'-'z']['A'-'Z' 'a'-'z' '0'-'9' '_']*
-
-(* littéraux caractères et chaînes de caractères *)
-let ascii_c =  ['\000'-'\255']
-let hex_dig = ['0'-'9' 'a'-'f' 'A'-'F']
+(* Définition d'un atom
+ * aka un string qui représente un char, par exemple "\065" = 'A' *)
+let ascii_table = ['\000'-'\255']
 let ascii_hex = "\\0x" hex_dig hex_dig
 let printable = ['\032'-'\038' '\040'-'\127']
+let escapes = "\\\\" | "\\'" | "\\n" | "\\t" | "\\b" | "\\r"
+let atom = ascii_table | ascii_hex | printable | escapes
 
-let atom = ascii_c | ascii_hex | printable | "\\\\" | "\\'" | "\\n" | "\\t" | "\\b" | "\\r"
-let char = '\'' atom '\''
+(* On ne peut pas différencier au niveau du lexer var_id label_id et type_con,
+ * il faudra le faire à l'analyseur syntaxique.
+ * On va donc faire un 'ident' pour "identificateur" *)
 
+(* Identificateurs var_id label_id type_con *)
+let ident = ['a'-'z']['A'-'Z' 'a'-'z' '0'-'9' '_']*
+(* Identificateur de constructeurs de données *)
+let constr_id = ['A'-'Z']['A'-'Z' 'a'-'z' '0'-'9' '_']*
+(* Identificateur de variables de type *)
+let type_variable = '`' ident
+(* Littéraux entiers *)
+let int = '-'? (digit+ | hexa+ | bina+ | octa+)
+(* Littéraux caractères *)
 let letter = (digit | ['A'-'Z'] | ['a'-'z'])
-(* pas sûr pour str *)
-let str = '\"' (atom | '\'' | "\\\"")* '\"'
-
-
 
 rule token = parse
   (** Layout *)
@@ -80,18 +79,6 @@ rule token = parse
   | "and"                 { AND_KW }
   | "for"                 { FOR    }
 
-  (** Opérateurs binaires  *)
-  | "+"                   { PLUS         }
-  | "-"                   { MINUS        }
-  | "/"                   { SLASH        }
-  | "&&"                  { D_AND        }
-  | "||"                  { D_OR         }
-  | "=?"                  { EQUAL_OP     }
-  | "<=?"                 { INF_EQUAL_OP }
-  | ">=?"                 { SUP_EQUAL_OP }
-  | "<?"                  { INF_OP       }
-  | ">?"                  { SUP_OP       }
-
 
   (** Ponctuation *)
   | '='                   { EQUAL     }
@@ -116,18 +103,35 @@ rule token = parse
   | ":="                  { ASSIGN    }
   | '!'                   { EXCLA     }
 
-  (** Strings *)
-  | '"'                   { read_string (Buffer.create 16) lexbuf }
+  (* Opérateurs binaires *)
+  | "+"                   { PLUS         }
+  | "-"                   { MINUS        }
+  | "/"                   { SLASH        }
+  | "&&"                  { D_AND        }
+  | "||"                  { D_OR         }
+  | "=?"                  { EQUAL_OP     }
+  | "<=?"                 { INF_EQUAL_OP }
+  | ">=?"                 { SUP_EQUAL_OP }
+  | "<?"                  { INF_OP       }
+  | ">?"                  { SUP_OP       }
 
-  (** Values *)
-  | int as i              { INT (Mint.of_string i) }
+  (** Identificateurs *)
   | ident as s            { ID s                   }
   | type_variable as s    { TID s                  }
   | constr_id as s        { CID s                  }
 
-  (** Characters *)
-  (* On en manque surement plein ici *)
+  (* Integers *)
+  | int as i              { INT (Mint.of_string i) }
+
+  (* Strings *)
+  | '"'                   { read_string (Buffer.create 16) lexbuf }
+
+  (* Characters *)
   | "'" (letter as c) "'" { CHAR c }
+  (* | "'" (atom as c) "'"   { (* On retire le \ du début
+                             * TODO: fix *)
+                            let code = int_of_string (String.sub c 1 ((String.length c) - 2))
+                            in CHAR (Char.chr (code)) } *)
 
   (** Lexing error *)
   | _  as _c              { error lexbuf None (* (Some _c) *) "unexpected character." }