From 8d8bda8fa9a0ff5ac590dd47daa85e8a2ccb88a5 Mon Sep 17 00:00:00 2001
From: Eduardo Silva <eduardo@calyptia.com>
Date: Thu, 5 Dec 2024 17:39:33 -0600
Subject: [PATCH] tests: internal: data: pack: generators: fix json encoding
 ascii utf8

Signed-off-by: Eduardo Silva <eduardo@calyptia.com>
---
 tests/internal/data/pack/mixed.py            |  26 +++++++------
 tests/internal/data/pack/mixed_002.json      |   2 +-
 tests/internal/data/pack/mixed_002.mp        | Bin 37 -> 36 bytes
 tests/internal/data/pack/mixed_003.json      |   2 +-
 tests/internal/data/pack/utf8_bell.json      |   2 +-
 tests/internal/data/pack/utf8_copyright.json |   2 +-
 tests/internal/data/pack/utf8_gen.py         |  37 +++++++++++--------
 tests/internal/data/pack/utf8_hokke.json     |   2 +-
 tests/internal/data/pack/utf8_relaxed.json   |   2 +-
 9 files changed, 42 insertions(+), 33 deletions(-)

diff --git a/tests/internal/data/pack/mixed.py b/tests/internal/data/pack/mixed.py
index 7ad0a6149b6..c8859231d98 100644
--- a/tests/internal/data/pack/mixed.py
+++ b/tests/internal/data/pack/mixed.py
@@ -6,22 +6,24 @@
 import json
 import msgpack
 
+
 def gen_json(f):
-    raw = open(f, 'r')
-    data = raw.read()
-    raw.close()
+    # Open the input file in text mode with UTF-8 encoding
+    with open(f, 'r', encoding='utf-8') as raw:
+        data = raw.read()
 
-    out_mp = f[:-4] + ".mp"
-    out_json = f[:-4] + ".json"
+    # Define output filenames
+    base_name = os.path.splitext(f)[0]
+    out_mp = base_name + ".mp"
+    out_json = base_name + ".json"
 
-    # Write messagepack
-    fmp = open(out_mp, 'w')
-    fmp.write(msgpack.packb(data))
-    fmp.close()
+    # Write MessagePack-encoded data in binary mode
+    with open(out_mp, 'wb') as fmp:
+        fmp.write(msgpack.packb(data))
 
-    fjson = open(out_json, 'w')
-    fjson.write(json.dumps(data))
-    fjson.close()
+    # Write JSON-encoded data in text mode
+    with open(out_json, 'w', encoding='utf-8') as fjson:
+        fjson.write(json.dumps(data))
 
 for fn in os.listdir('.'):
      if not os.path.isfile(fn):
diff --git a/tests/internal/data/pack/mixed_002.json b/tests/internal/data/pack/mixed_002.json
index e32314b0aa9..d1afe50ace3 100644
--- a/tests/internal/data/pack/mixed_002.json
+++ b/tests/internal/data/pack/mixed_002.json
@@ -1 +1 @@
-"mixed_002 =>\n\n  áéíóú\n\n\n'\n\\t\n"
\ No newline at end of file
+"mixed_002 =>\n\n  \u00e1\u00e9\u00ed\u00f3\u00fa\n\n\n'\n\\t\n"
\ No newline at end of file
diff --git a/tests/internal/data/pack/mixed_002.mp b/tests/internal/data/pack/mixed_002.mp
index 1bf975535a1c3e87759e44027bf688f83f835be2..4938203d59a1b150b84400a5fa60eb0dcbff5798 100644
GIT binary patch
delta 7
OcmY#YVZ1q!Nf`hKB>~z1

delta 8
PcmY#UWxB;Mkx>}{2vGss

diff --git a/tests/internal/data/pack/mixed_003.json b/tests/internal/data/pack/mixed_003.json
index 167c89b8a06..126945cb3c7 100644
--- a/tests/internal/data/pack/mixed_003.json
+++ b/tests/internal/data/pack/mixed_003.json
@@ -1 +1 @@
-"á\n"
\ No newline at end of file
+"\u00e1\n"
\ No newline at end of file
diff --git a/tests/internal/data/pack/utf8_bell.json b/tests/internal/data/pack/utf8_bell.json
index ced4da0cfe5..d0730c4a7ce 100644
--- a/tests/internal/data/pack/utf8_bell.json
+++ b/tests/internal/data/pack/utf8_bell.json
@@ -1 +1 @@
-"🔔"
\ No newline at end of file
+"\ud83d\udd14"
\ No newline at end of file
diff --git a/tests/internal/data/pack/utf8_copyright.json b/tests/internal/data/pack/utf8_copyright.json
index 4d52a66f3d0..92d937cf7bd 100644
--- a/tests/internal/data/pack/utf8_copyright.json
+++ b/tests/internal/data/pack/utf8_copyright.json
@@ -1 +1 @@
-"©"
\ No newline at end of file
+"\u00a9"
\ No newline at end of file
diff --git a/tests/internal/data/pack/utf8_gen.py b/tests/internal/data/pack/utf8_gen.py
index 606e8cc2d31..9f1eef2b9c2 100644
--- a/tests/internal/data/pack/utf8_gen.py
+++ b/tests/internal/data/pack/utf8_gen.py
@@ -6,27 +6,34 @@
 import msgpack
 
 def gen_json(f):
+    print(f)
 
-    print f
-
-    with io.open(f, 'rb') as raw:
+    with open(f, 'rb') as raw:
         data = raw.read()
 
-    out_mp = f[:-4] + ".mp"
-    out_json = f[:-4] + ".json"
+    out_mp = f"{os.path.splitext(f)[0]}.mp"
+    out_json = f"{os.path.splitext(f)[0]}.json"
+
+    # Decode input bytes to a string
+    try:
+        decoded_data = data.decode('utf-8')
+    except UnicodeDecodeError as e:
+        print(f"Error: Unable to decode file {f} as UTF-8: {e}")
+        return
 
     # Write messagepack
-    fmp = open(out_mp, 'w')
-    fmp.write(msgpack.packb(data))
-    fmp.close()
+    with open(out_mp, 'wb') as fmp:
+        fmp.write(msgpack.packb(decoded_data))
 
-    fjson = open(out_json, 'w')
-    fjson.write(json.dumps(data).encode('utf8'))
-    fjson.close()
+    # Write JSON with properly encoded Unicode escape sequences
+    with open(out_json, 'w', encoding='utf-8') as fjson:
+        # Use json.dumps with ensure_ascii=True for \uXXXX escape sequences
+        escaped_data = json.dumps(decoded_data, ensure_ascii=True)
+        fjson.write(escaped_data)
 
 for fn in os.listdir('.'):
-     if not os.path.isfile(fn):
-         continue
+    if not os.path.isfile(fn):
+        continue
 
-     if fn.startswith('utf8_') and fn.endswith('.txt'):
-         gen_json(fn)
+    if fn.startswith('utf8_') and fn.endswith('.txt'):
+        gen_json(fn)
diff --git a/tests/internal/data/pack/utf8_hokke.json b/tests/internal/data/pack/utf8_hokke.json
index d93624bf21f..37f460c4b35 100644
--- a/tests/internal/data/pack/utf8_hokke.json
+++ b/tests/internal/data/pack/utf8_hokke.json
@@ -1 +1 @@
-"𩸽"
\ No newline at end of file
+"\ud867\ude3d"
\ No newline at end of file
diff --git a/tests/internal/data/pack/utf8_relaxed.json b/tests/internal/data/pack/utf8_relaxed.json
index 4526bf40faf..2402faf9df4 100644
--- a/tests/internal/data/pack/utf8_relaxed.json
+++ b/tests/internal/data/pack/utf8_relaxed.json
@@ -1 +1 @@
-"☺"
\ No newline at end of file
+"\u263a"
\ No newline at end of file