From cc33d514d179e4dbb8ed4a5470b8ae43cde2c8a6 Mon Sep 17 00:00:00 2001 From: Rudy Ges Date: Mon, 18 Nov 2024 14:40:57 +0100 Subject: [PATCH] String interpolation vs utf8 --- src/context/formatString.ml | 199 +++++++++++++----- tests/display/src/cases/VsHaxeIssue648.hx | 14 ++ tests/misc/projects/VshaxeIssue648/Main.hx | 13 ++ .../projects/VshaxeIssue648/compile-fail.hxml | 4 + .../VshaxeIssue648/compile-fail.hxml.stderr | 64 ++++++ .../misc/projects/VshaxeIssue648/compile.hxml | 5 + .../VshaxeIssue648/compile.hxml.stdout | 8 + 7 files changed, 250 insertions(+), 57 deletions(-) create mode 100644 tests/display/src/cases/VsHaxeIssue648.hx create mode 100644 tests/misc/projects/VshaxeIssue648/Main.hx create mode 100644 tests/misc/projects/VshaxeIssue648/compile-fail.hxml create mode 100644 tests/misc/projects/VshaxeIssue648/compile-fail.hxml.stderr create mode 100644 tests/misc/projects/VshaxeIssue648/compile.hxml create mode 100644 tests/misc/projects/VshaxeIssue648/compile.hxml.stdout diff --git a/src/context/formatString.ml b/src/context/formatString.ml index 69d255f58fd..bcbfe6e2341 100644 --- a/src/context/formatString.ml +++ b/src/context/formatString.ml @@ -1,84 +1,168 @@ +open Extlib_leftovers open Globals open Ast let format_string defines s p process_expr = + let len = String.length s in + let get_next i = + if i >= len then raise End_of_file else + (UTF8.look s i, UTF8.next s i) + in + + let read_char = ref 0 in + let char_len = ref 0 in + + let get_next_char i = + let (chr, next) = try get_next i + with Invalid_argument _ -> + raise End_of_file + in + + try + let c = UCharExt.char_of chr in + incr read_char; + c, (fun buf -> + incr char_len; + UTF8.Buf.add_char buf chr + ), next + with UCharExt.Out_of_range -> + let get i = + let ch = String.unsafe_get s i in + (ch, int_of_char ch) + in + let (ch, c) = get !read_char in + + let buf = Buffer.create 0 in + Common.utf16_add buf c; + let len = Buffer.length buf in + + read_char := !read_char + len; + + ch, (fun buf -> + (* UTF16 handling *) + if c >= 0x80 && c < 0x800 then begin + let b = Buffer.create 0 in + let add c = Buffer.add_char b (char_of_int (c land 0xFF)) in + let c' = c lor (snd (get (i + 1)) lsl 8) in + add c'; + add (c' lsr 8); + + let s' = Buffer.contents b in + + (* ok but why? *) + if c' lsr 8 < 0x80 then char_len := !char_len + 2 + else if c' < 0xDFFF then incr char_len; + + UTF8.Buf.add_string buf s' + end else + die "" __LOC__; + ), i+len + in + + let buf = UTF8.Buf.create len in let e = ref None in let pmin = ref p.pmin in let min = ref (p.pmin + 1) in - let add_expr (enext,p) len = - min := !min + len; + + let add_expr (enext,p) = + min := !min + !char_len; + char_len := 0; let enext = process_expr enext p in match !e with | None -> e := Some enext | Some prev -> e := Some (EBinop (OpAdd,prev,enext),punion (pos prev) p) in - let add enext len = - let p = { p with pmin = !min; pmax = !min + len } in - add_expr (enext,p) len + + let add enext = + let p = { p with pmin = !min; pmax = !min + !char_len } in + add_expr (enext,p) in - let add_sub start pos = - let len = pos - start in - if len > 0 || !e = None then add (EConst (String (String.sub s start len,SDoubleQuotes))) len + + let add_sub () = + let s = UTF8.Buf.contents buf in + UTF8.Buf.clear buf; + if !char_len > 0 || !e = None then add (EConst (String (s,SDoubleQuotes))) in - let len = String.length s in - let rec parse start pos = - if pos = len then add_sub start pos else - let c = String.unsafe_get s pos in - let pos = pos + 1 in - if c = '\'' then begin - incr pmin; - incr min; - end; - if c <> '$' || pos = len then parse start pos else - match String.unsafe_get s pos with - | '$' -> - (* double $ *) - add_sub start pos; - parse (pos + 1) (pos + 1) - | '{' -> - parse_group start pos '{' '}' "brace" - | 'a'..'z' | 'A'..'Z' | '_' -> - add_sub start (pos - 1); - incr min; - let rec loop i = - if i = len then i else - let c = String.unsafe_get s i in + + let rec parse pos' = + try begin + let (c, store', pos) = get_next_char pos' in + + if c = '\'' then begin + incr pmin; + incr min; + end; + + if c <> '$' || pos >= len then begin + store' buf; + parse pos + end else + let (c, store, pos) = get_next_char pos in match c with - | 'a'..'z' | 'A'..'Z' | '0'..'9' | '_' -> loop (i+1) - | _ -> i - in - let iend = loop (pos + 1) in - let len = iend - pos in - add (EConst (Ident (String.sub s pos len))) len; - parse (pos + len) (pos + len) - | _ -> - (* keep as-it *) - parse start pos - and parse_group start pos gopen gclose gname = - add_sub start (pos - 1); + | '$' -> + (* double $ *) + store buf; + add_sub (); + parse pos + | '{' -> + add_sub (); + parse_group pos' pos '{' '}' "brace" + | 'a'..'z' | 'A'..'Z' | '_' -> + add_sub (); + incr min; + let buf = UTF8.Buf.create len in + store buf; + let rec loop i = + if i = len then i else + let (c,store,next) = get_next_char i in + + match c with + | 'a'..'z' | 'A'..'Z' | '0'..'9' | '_' -> + store buf; + loop next + | _ -> i + in + let iend = loop pos in + let id = UTF8.Buf.contents buf in + add (EConst (Ident id)); + parse iend + | _ -> + (* keep as-is *) + store' buf; + store buf; + parse pos + end with End_of_file -> add_sub () + + and parse_group prev pos gopen gclose gname = + let buf = UTF8.Buf.create len in let rec loop groups i = if i = len then match groups with | [] -> die "" __LOC__ | g :: _ -> Error.raise_typing_error ("Unclosed " ^ gname) { p with pmin = !pmin + g + 1; pmax = !pmin + g + 2 } else - let c = String.unsafe_get s i in - if c = gopen then - loop (i :: groups) (i + 1) - else if c = gclose then begin + let (c, store, pos) = get_next_char i in + if c = gopen then begin + store buf; + loop (i :: groups) pos + end else if c = gclose then begin let groups = List.tl groups in - if groups = [] then i else loop groups (i + 1) - end else - loop groups (i + 1) + if groups = [] then pos else begin + store buf; + loop groups pos + end + end else begin + store buf; + loop groups pos + end in - let send = loop [pos] (pos + 1) in - let slen = send - pos - 1 in - let scode = String.sub s (pos + 1) slen in + let send = loop [prev] pos in + let scode = UTF8.Buf.contents buf in min := !min + 2; begin let e = - let ep = { p with pmin = !pmin + pos + 2; pmax = !pmin + send + 1 } in + let ep = { p with pmin = !pmin + pos + 2; pmax = !pmin + send } in let error msg pos = if Lexer.string_is_whitespace scode then Error.raise_typing_error "Expression cannot be empty" ep else Error.raise_typing_error msg pos @@ -87,12 +171,13 @@ let format_string defines s p process_expr = | ParseSuccess(data,_,_) -> data | ParseError(_,(msg,p),_) -> error (Parser.error_msg msg) p in - add_expr e slen + add_expr e end; min := !min + 1; - parse (send + 1) (send + 1) + parse send in - parse 0 0; + + parse 0; match !e with | None -> die "" __LOC__ | Some e -> e diff --git a/tests/display/src/cases/VsHaxeIssue648.hx b/tests/display/src/cases/VsHaxeIssue648.hx new file mode 100644 index 00000000000..4d220ee7ae1 --- /dev/null +++ b/tests/display/src/cases/VsHaxeIssue648.hx @@ -0,0 +1,14 @@ +package cases; + +class VsHaxeIssue648 extends DisplayTestCase { + /** + trace('Jeremy in $ci{-1-}ty'); + trace('Jérémy in $ci{-2-}ty'); + **/ + @:funcCode function test() { + var diag = diagnostics().filter(d -> d.kind == DiagnosticKind.DKUnresolvedIdentifier); + eq(2, diag.length); + eq(diag[0].range.start.character, diag[1].range.start.character); + eq(diag[0].range.end.character, diag[1].range.end.character); + } +} diff --git a/tests/misc/projects/VshaxeIssue648/Main.hx b/tests/misc/projects/VshaxeIssue648/Main.hx new file mode 100644 index 00000000000..f80a110a80f --- /dev/null +++ b/tests/misc/projects/VshaxeIssue648/Main.hx @@ -0,0 +1,13 @@ +function main() { + #if nofail + var test = "test"; + #end + trace('Jeremy $test'); + trace('Jérémy $test'); + trace('名 字 $test'); + trace('zя���� $test abcdefghijk'); + trace('���� $test abcdefghijk'); + trace('zя $test abcdefghijk'); + trace('😀 😀 $test abcdefghijk'); + trace('😀 😀 zя���� $test abcdefghijk'); +} diff --git a/tests/misc/projects/VshaxeIssue648/compile-fail.hxml b/tests/misc/projects/VshaxeIssue648/compile-fail.hxml new file mode 100644 index 00000000000..5fdfe6ffcd5 --- /dev/null +++ b/tests/misc/projects/VshaxeIssue648/compile-fail.hxml @@ -0,0 +1,4 @@ +--main Main +--interp +-D message.reporting=pretty +-D message.no-color diff --git a/tests/misc/projects/VshaxeIssue648/compile-fail.hxml.stderr b/tests/misc/projects/VshaxeIssue648/compile-fail.hxml.stderr new file mode 100644 index 00000000000..01ef17e35b6 --- /dev/null +++ b/tests/misc/projects/VshaxeIssue648/compile-fail.hxml.stderr @@ -0,0 +1,64 @@ +[ERROR] Main.hx:5: characters 17-21 + + 5 | trace('Jeremy $test'); + | ^^^^ + | Unknown identifier : test + + | For function argument 'v' + +[ERROR] Main.hx:6: characters 17-21 + + 6 | trace('Jérémy $test'); + | ^^^^ + | Unknown identifier : test + + | For function argument 'v' + +[ERROR] Main.hx:7: characters 16-20 + + 7 | trace('名 字 $test'); + | ^^^^ + | Unknown identifier : test + + | For function argument 'v' + +[ERROR] Main.hx:8: characters 17-21 + + 8 | trace('zя���� $test abcdefghijk'); + | ^^^^ + | Unknown identifier : test + + | For function argument 'v' + +[ERROR] Main.hx:9: characters 15-19 + + 9 | trace('���� $test abcdefghijk'); + | ^^^^ + | Unknown identifier : test + + | For function argument 'v' + +[ERROR] Main.hx:10: characters 13-17 + + 10 | trace('zя $test abcdefghijk'); + | ^^^^ + | Unknown identifier : test + + | For function argument 'v' + +[ERROR] Main.hx:11: characters 16-20 + + 11 | trace('😀 😀 $test abcdefghijk'); + | ^^^^ + | Unknown identifier : test + + | For function argument 'v' + +[ERROR] Main.hx:12: characters 23-27 + + 12 | trace('😀 😀 zя���� $test abcdefghijk'); + | ^^^^ + | Unknown identifier : test + + | For function argument 'v' + diff --git a/tests/misc/projects/VshaxeIssue648/compile.hxml b/tests/misc/projects/VshaxeIssue648/compile.hxml new file mode 100644 index 00000000000..d52d69de768 --- /dev/null +++ b/tests/misc/projects/VshaxeIssue648/compile.hxml @@ -0,0 +1,5 @@ +--main Main +--interp +-D message.reporting=pretty +-D message.no-color +-D nofail diff --git a/tests/misc/projects/VshaxeIssue648/compile.hxml.stdout b/tests/misc/projects/VshaxeIssue648/compile.hxml.stdout new file mode 100644 index 00000000000..0ce0c73ce1d --- /dev/null +++ b/tests/misc/projects/VshaxeIssue648/compile.hxml.stdout @@ -0,0 +1,8 @@ +Main.hx:5: Jeremy test +Main.hx:6: Jérémy test +Main.hx:7: 名 字 test +Main.hx:8: zя���� test abcdefghijk +Main.hx:9: ���� test abcdefghijk +Main.hx:10: zя test abcdefghijk +Main.hx:11: 😀 😀 test abcdefghijk +Main.hx:12: 😀 😀 zя���� test abcdefghijk