Skip to content

Commit

Permalink
tests: internal: data: pack: generators: fix json encoding ascii utf8
Browse files Browse the repository at this point in the history
Signed-off-by: Eduardo Silva <[email protected]>
  • Loading branch information
edsiper committed Dec 5, 2024
1 parent d06b3fd commit 8d8bda8
Show file tree
Hide file tree
Showing 9 changed files with 42 additions and 33 deletions.
26 changes: 14 additions & 12 deletions tests/internal/data/pack/mixed.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,24 @@
import json
import msgpack


def gen_json(f):
raw = open(f, 'r')
data = raw.read()
raw.close()
# Open the input file in text mode with UTF-8 encoding
with open(f, 'r', encoding='utf-8') as raw:
data = raw.read()

out_mp = f[:-4] + ".mp"
out_json = f[:-4] + ".json"
# Define output filenames
base_name = os.path.splitext(f)[0]
out_mp = base_name + ".mp"
out_json = base_name + ".json"

# Write messagepack
fmp = open(out_mp, 'w')
fmp.write(msgpack.packb(data))
fmp.close()
# Write MessagePack-encoded data in binary mode
with open(out_mp, 'wb') as fmp:
fmp.write(msgpack.packb(data))

fjson = open(out_json, 'w')
fjson.write(json.dumps(data))
fjson.close()
# Write JSON-encoded data in text mode
with open(out_json, 'w', encoding='utf-8') as fjson:
fjson.write(json.dumps(data))

for fn in os.listdir('.'):
if not os.path.isfile(fn):
Expand Down
2 changes: 1 addition & 1 deletion tests/internal/data/pack/mixed_002.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
"mixed_002 =>\n\n áéíóú\n\n\n'\n\\t\n"
"mixed_002 =>\n\n \u00e1\u00e9\u00ed\u00f3\u00fa\n\n\n'\n\\t\n"
Binary file modified tests/internal/data/pack/mixed_002.mp
Binary file not shown.
2 changes: 1 addition & 1 deletion tests/internal/data/pack/mixed_003.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
"á\n"
"\u00e1\n"
2 changes: 1 addition & 1 deletion tests/internal/data/pack/utf8_bell.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
"🔔"
"\ud83d\udd14"
2 changes: 1 addition & 1 deletion tests/internal/data/pack/utf8_copyright.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
"©"
"\u00a9"
37 changes: 22 additions & 15 deletions tests/internal/data/pack/utf8_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,27 +6,34 @@
import msgpack

def gen_json(f):
print(f)

print f

with io.open(f, 'rb') as raw:
with open(f, 'rb') as raw:
data = raw.read()

out_mp = f[:-4] + ".mp"
out_json = f[:-4] + ".json"
out_mp = f"{os.path.splitext(f)[0]}.mp"
out_json = f"{os.path.splitext(f)[0]}.json"

# Decode input bytes to a string
try:
decoded_data = data.decode('utf-8')
except UnicodeDecodeError as e:
print(f"Error: Unable to decode file {f} as UTF-8: {e}")
return

# Write messagepack
fmp = open(out_mp, 'w')
fmp.write(msgpack.packb(data))
fmp.close()
with open(out_mp, 'wb') as fmp:
fmp.write(msgpack.packb(decoded_data))

fjson = open(out_json, 'w')
fjson.write(json.dumps(data).encode('utf8'))
fjson.close()
# Write JSON with properly encoded Unicode escape sequences
with open(out_json, 'w', encoding='utf-8') as fjson:
# Use json.dumps with ensure_ascii=True for \uXXXX escape sequences
escaped_data = json.dumps(decoded_data, ensure_ascii=True)
fjson.write(escaped_data)

for fn in os.listdir('.'):
if not os.path.isfile(fn):
continue
if not os.path.isfile(fn):
continue

if fn.startswith('utf8_') and fn.endswith('.txt'):
gen_json(fn)
if fn.startswith('utf8_') and fn.endswith('.txt'):
gen_json(fn)
2 changes: 1 addition & 1 deletion tests/internal/data/pack/utf8_hokke.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
"𩸽"
"\ud867\ude3d"
2 changes: 1 addition & 1 deletion tests/internal/data/pack/utf8_relaxed.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
""
"\u263a"

0 comments on commit 8d8bda8

Please sign in to comment.