Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

address telemetry message truncation bug - bad unicode error #228

Merged
merged 3 commits into from
Jan 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/core/src/service_interfaces/TelemetryWriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ def __ensure_message_restriction_compliance(self, full_message):
self.composite_logger.log_telemetry_module("Data sent to telemetry will be truncated as it exceeds size limit. [Message={0}]".format(str(formatted_message)))
formatted_message = formatted_message.encode('utf-8')
chars_dropped = len(formatted_message) - message_size_limit_in_chars + Constants.TELEMETRY_BUFFER_FOR_DROPPED_COUNT_MSG_IN_CHARS + Constants.TELEMETRY_EVENT_COUNTER_MSG_SIZE_LIMIT_IN_CHARS
formatted_message = formatted_message[:message_size_limit_in_chars - Constants.TELEMETRY_BUFFER_FOR_DROPPED_COUNT_MSG_IN_CHARS - Constants.TELEMETRY_EVENT_COUNTER_MSG_SIZE_LIMIT_IN_CHARS].decode('utf-8') + '. [{0} chars dropped]'.format(chars_dropped)
formatted_message = formatted_message[:message_size_limit_in_chars - Constants.TELEMETRY_BUFFER_FOR_DROPPED_COUNT_MSG_IN_CHARS - Constants.TELEMETRY_EVENT_COUNTER_MSG_SIZE_LIMIT_IN_CHARS].decode('utf-8', errors='replace') + '. [{0} chars dropped]'.format(chars_dropped)

formatted_message += " [TC={0}]".format(self.__telemetry_event_counter)
return formatted_message
Expand Down
16 changes: 16 additions & 0 deletions src/core/tests/Test_TelemetryWriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,22 @@ def test_write_event_msg_size_limit(self):
self.assertTrue("a"*(len(message.encode('utf-8')) - chars_dropped) + ". [{0} chars dropped]".format(chars_dropped) in events[-1]["Message"])
f.close()

def test_write_event_msg_size_limit_char_more_than_1_bytes(self):
""" Perform 1 byte truncation on char that is more than 1 byte, use decode('utf-8', errors='replace') to replace bad unicode with a good 1 byte char (�) """

message = "a€bc"*3074 # €(\xe2\x82\xac) is 3 bytes char can be written in windows console w/o encoding
self.runtime.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task")
latest_event_file = [pos_json for pos_json in os.listdir(self.runtime.telemetry_writer.events_folder_path) if re.search('^[0-9]+.json$', pos_json)][-1]
with open(os.path.join(self.runtime.telemetry_writer.events_folder_path, latest_event_file), 'r+') as f:
events = json.load(f)
self.assertTrue(events is not None)
self.assertEqual(events[-1]["TaskName"], "Test Task")
self.assertTrue(len(events[-1]["Message"]) < len(message.encode('utf-8')))
chars_dropped = len(message.encode('utf-8')) - Constants.TELEMETRY_MSG_SIZE_LIMIT_IN_CHARS + Constants.TELEMETRY_BUFFER_FOR_DROPPED_COUNT_MSG_IN_CHARS + Constants.TELEMETRY_EVENT_COUNTER_MSG_SIZE_LIMIT_IN_CHARS
self.assertTrue("a€bc" in events[-1]["Message"])
self.assertTrue("a€bc" * (len(message) + 1 - chars_dropped) + ". [{0} chars dropped]".format(chars_dropped) in events[-1]["Message"]) # len(message) + 1 due to bad unicode will be replaced by �
f.close()

# TODO: The following 3 tests cause widespread test suite failures (on master), so leaving it out. And tracking in: Task 10912099: [Bug] Bug in telemetry writer - overwriting prior events in fast execution
# def test_write_event_size_limit(self):
# # will not write to telemetry if event size exceeds limit
Expand Down
Loading