Skip to content

Commit

Permalink
String interpolation vs utf8
Browse files Browse the repository at this point in the history
  • Loading branch information
kLabz committed Nov 19, 2024
1 parent 4360c60 commit cc33d51
Show file tree
Hide file tree
Showing 7 changed files with 250 additions and 57 deletions.
199 changes: 142 additions & 57 deletions src/context/formatString.ml
Original file line number Diff line number Diff line change
@@ -1,84 +1,168 @@
open Extlib_leftovers
open Globals
open Ast

let format_string defines s p process_expr =
let len = String.length s in
let get_next i =
if i >= len then raise End_of_file else
(UTF8.look s i, UTF8.next s i)
in

let read_char = ref 0 in
let char_len = ref 0 in

let get_next_char i =
let (chr, next) = try get_next i
with Invalid_argument _ ->
raise End_of_file
in

try
let c = UCharExt.char_of chr in
incr read_char;
c, (fun buf ->
incr char_len;
UTF8.Buf.add_char buf chr
), next
with UCharExt.Out_of_range ->
let get i =
let ch = String.unsafe_get s i in
(ch, int_of_char ch)
in
let (ch, c) = get !read_char in

let buf = Buffer.create 0 in
Common.utf16_add buf c;
let len = Buffer.length buf in

read_char := !read_char + len;

ch, (fun buf ->
(* UTF16 handling *)
if c >= 0x80 && c < 0x800 then begin
let b = Buffer.create 0 in
let add c = Buffer.add_char b (char_of_int (c land 0xFF)) in
let c' = c lor (snd (get (i + 1)) lsl 8) in
add c';
add (c' lsr 8);

let s' = Buffer.contents b in

(* ok but why? *)
if c' lsr 8 < 0x80 then char_len := !char_len + 2
else if c' < 0xDFFF then incr char_len;

UTF8.Buf.add_string buf s'
end else
die "" __LOC__;
), i+len
in

let buf = UTF8.Buf.create len in
let e = ref None in
let pmin = ref p.pmin in
let min = ref (p.pmin + 1) in
let add_expr (enext,p) len =
min := !min + len;

let add_expr (enext,p) =
min := !min + !char_len;
char_len := 0;
let enext = process_expr enext p in
match !e with
| None -> e := Some enext
| Some prev ->
e := Some (EBinop (OpAdd,prev,enext),punion (pos prev) p)
in
let add enext len =
let p = { p with pmin = !min; pmax = !min + len } in
add_expr (enext,p) len

let add enext =
let p = { p with pmin = !min; pmax = !min + !char_len } in
add_expr (enext,p)
in
let add_sub start pos =
let len = pos - start in
if len > 0 || !e = None then add (EConst (String (String.sub s start len,SDoubleQuotes))) len

let add_sub () =
let s = UTF8.Buf.contents buf in
UTF8.Buf.clear buf;
if !char_len > 0 || !e = None then add (EConst (String (s,SDoubleQuotes)))
in
let len = String.length s in
let rec parse start pos =
if pos = len then add_sub start pos else
let c = String.unsafe_get s pos in
let pos = pos + 1 in
if c = '\'' then begin
incr pmin;
incr min;
end;
if c <> '$' || pos = len then parse start pos else
match String.unsafe_get s pos with
| '$' ->
(* double $ *)
add_sub start pos;
parse (pos + 1) (pos + 1)
| '{' ->
parse_group start pos '{' '}' "brace"
| 'a'..'z' | 'A'..'Z' | '_' ->
add_sub start (pos - 1);
incr min;
let rec loop i =
if i = len then i else
let c = String.unsafe_get s i in

let rec parse pos' =
try begin
let (c, store', pos) = get_next_char pos' in

if c = '\'' then begin
incr pmin;
incr min;
end;

if c <> '$' || pos >= len then begin
store' buf;
parse pos
end else
let (c, store, pos) = get_next_char pos in
match c with
| 'a'..'z' | 'A'..'Z' | '0'..'9' | '_' -> loop (i+1)
| _ -> i
in
let iend = loop (pos + 1) in
let len = iend - pos in
add (EConst (Ident (String.sub s pos len))) len;
parse (pos + len) (pos + len)
| _ ->
(* keep as-it *)
parse start pos
and parse_group start pos gopen gclose gname =
add_sub start (pos - 1);
| '$' ->
(* double $ *)
store buf;
add_sub ();
parse pos
| '{' ->
add_sub ();
parse_group pos' pos '{' '}' "brace"
| 'a'..'z' | 'A'..'Z' | '_' ->
add_sub ();
incr min;
let buf = UTF8.Buf.create len in
store buf;
let rec loop i =
if i = len then i else
let (c,store,next) = get_next_char i in

match c with
| 'a'..'z' | 'A'..'Z' | '0'..'9' | '_' ->
store buf;
loop next
| _ -> i
in
let iend = loop pos in
let id = UTF8.Buf.contents buf in
add (EConst (Ident id));
parse iend
| _ ->
(* keep as-is *)
store' buf;
store buf;
parse pos
end with End_of_file -> add_sub ()

and parse_group prev pos gopen gclose gname =
let buf = UTF8.Buf.create len in
let rec loop groups i =
if i = len then
match groups with
| [] -> die "" __LOC__
| g :: _ -> Error.raise_typing_error ("Unclosed " ^ gname) { p with pmin = !pmin + g + 1; pmax = !pmin + g + 2 }
else
let c = String.unsafe_get s i in
if c = gopen then
loop (i :: groups) (i + 1)
else if c = gclose then begin
let (c, store, pos) = get_next_char i in
if c = gopen then begin
store buf;
loop (i :: groups) pos
end else if c = gclose then begin
let groups = List.tl groups in
if groups = [] then i else loop groups (i + 1)
end else
loop groups (i + 1)
if groups = [] then pos else begin
store buf;
loop groups pos
end
end else begin
store buf;
loop groups pos
end
in
let send = loop [pos] (pos + 1) in
let slen = send - pos - 1 in
let scode = String.sub s (pos + 1) slen in
let send = loop [prev] pos in
let scode = UTF8.Buf.contents buf in
min := !min + 2;
begin
let e =
let ep = { p with pmin = !pmin + pos + 2; pmax = !pmin + send + 1 } in
let ep = { p with pmin = !pmin + pos + 2; pmax = !pmin + send } in
let error msg pos =
if Lexer.string_is_whitespace scode then Error.raise_typing_error "Expression cannot be empty" ep
else Error.raise_typing_error msg pos
Expand All @@ -87,12 +171,13 @@ let format_string defines s p process_expr =
| ParseSuccess(data,_,_) -> data
| ParseError(_,(msg,p),_) -> error (Parser.error_msg msg) p
in
add_expr e slen
add_expr e
end;
min := !min + 1;
parse (send + 1) (send + 1)
parse send
in
parse 0 0;

parse 0;
match !e with
| None -> die "" __LOC__
| Some e -> e
14 changes: 14 additions & 0 deletions tests/display/src/cases/VsHaxeIssue648.hx
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package cases;

class VsHaxeIssue648 extends DisplayTestCase {
/**
trace('Jeremy in $ci{-1-}ty');
trace('Jérémy in $ci{-2-}ty');
**/
@:funcCode function test() {
var diag = diagnostics().filter(d -> d.kind == DiagnosticKind.DKUnresolvedIdentifier);
eq(2, diag.length);
eq(diag[0].range.start.character, diag[1].range.start.character);
eq(diag[0].range.end.character, diag[1].range.end.character);
}
}
13 changes: 13 additions & 0 deletions tests/misc/projects/VshaxeIssue648/Main.hx
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
function main() {
#if nofail
var test = "test";
#end
trace('Jeremy $test');
trace('Jérémy $test');
trace('名 字 $test');
trace('zя���� $test abcdefghijk');
trace('���� $test abcdefghijk');
trace('$test abcdefghijk');
trace('😀 😀 $test abcdefghijk');
trace('😀 😀 zя���� $test abcdefghijk');
}
4 changes: 4 additions & 0 deletions tests/misc/projects/VshaxeIssue648/compile-fail.hxml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
--main Main
--interp
-D message.reporting=pretty
-D message.no-color
64 changes: 64 additions & 0 deletions tests/misc/projects/VshaxeIssue648/compile-fail.hxml.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
[ERROR] Main.hx:5: characters 17-21

5 | trace('Jeremy $test');
| ^^^^
| Unknown identifier : test

| For function argument 'v'

[ERROR] Main.hx:6: characters 17-21

6 | trace('Jérémy $test');
| ^^^^
| Unknown identifier : test

| For function argument 'v'

[ERROR] Main.hx:7: characters 16-20

7 | trace('名 字 $test');
| ^^^^
| Unknown identifier : test

| For function argument 'v'

[ERROR] Main.hx:8: characters 17-21

8 | trace('zя���� $test abcdefghijk');
| ^^^^
| Unknown identifier : test

| For function argument 'v'

[ERROR] Main.hx:9: characters 15-19

9 | trace('���� $test abcdefghijk');
| ^^^^
| Unknown identifier : test

| For function argument 'v'

[ERROR] Main.hx:10: characters 13-17

10 | trace('zя $test abcdefghijk');
| ^^^^
| Unknown identifier : test

| For function argument 'v'

[ERROR] Main.hx:11: characters 16-20

11 | trace('😀 😀 $test abcdefghijk');
| ^^^^
| Unknown identifier : test

| For function argument 'v'

[ERROR] Main.hx:12: characters 23-27

12 | trace('😀 😀 zя���� $test abcdefghijk');
| ^^^^
| Unknown identifier : test

| For function argument 'v'

5 changes: 5 additions & 0 deletions tests/misc/projects/VshaxeIssue648/compile.hxml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
--main Main
--interp
-D message.reporting=pretty
-D message.no-color
-D nofail
8 changes: 8 additions & 0 deletions tests/misc/projects/VshaxeIssue648/compile.hxml.stdout
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
Main.hx:5: Jeremy test
Main.hx:6: Jérémy test
Main.hx:7: 名 字 test
Main.hx:8: zя���� test abcdefghijk
Main.hx:9: ���� test abcdefghijk
Main.hx:10: zя test abcdefghijk
Main.hx:11: 😀 😀 test abcdefghijk
Main.hx:12: 😀 😀 zя���� test abcdefghijk

0 comments on commit cc33d51

Please sign in to comment.