Skip to content

Commit

Permalink
address telemetry message truncation bug - bad unicode error (#228)
Browse files Browse the repository at this point in the history
* add bad char for unit test

* address bad unicode truncation and add unit test

* revert space in __ensure_message_restriction_compliance
  • Loading branch information
feng-j678 authored Jan 4, 2024
1 parent 83961b0 commit dfd4909
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 1 deletion.
2 changes: 1 addition & 1 deletion src/core/src/service_interfaces/TelemetryWriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ def __ensure_message_restriction_compliance(self, full_message):
self.composite_logger.log_telemetry_module("Data sent to telemetry will be truncated as it exceeds size limit. [Message={0}]".format(str(formatted_message)))
formatted_message = formatted_message.encode('utf-8')
chars_dropped = len(formatted_message) - message_size_limit_in_chars + Constants.TELEMETRY_BUFFER_FOR_DROPPED_COUNT_MSG_IN_CHARS + Constants.TELEMETRY_EVENT_COUNTER_MSG_SIZE_LIMIT_IN_CHARS
formatted_message = formatted_message[:message_size_limit_in_chars - Constants.TELEMETRY_BUFFER_FOR_DROPPED_COUNT_MSG_IN_CHARS - Constants.TELEMETRY_EVENT_COUNTER_MSG_SIZE_LIMIT_IN_CHARS].decode('utf-8') + '. [{0} chars dropped]'.format(chars_dropped)
formatted_message = formatted_message[:message_size_limit_in_chars - Constants.TELEMETRY_BUFFER_FOR_DROPPED_COUNT_MSG_IN_CHARS - Constants.TELEMETRY_EVENT_COUNTER_MSG_SIZE_LIMIT_IN_CHARS].decode('utf-8', errors='replace') + '. [{0} chars dropped]'.format(chars_dropped)

formatted_message += " [TC={0}]".format(self.__telemetry_event_counter)
return formatted_message
Expand Down
16 changes: 16 additions & 0 deletions src/core/tests/Test_TelemetryWriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,22 @@ def test_write_event_msg_size_limit(self):
self.assertTrue("a"*(len(message.encode('utf-8')) - chars_dropped) + ". [{0} chars dropped]".format(chars_dropped) in events[-1]["Message"])
f.close()

def test_write_event_msg_size_limit_char_more_than_1_bytes(self):
""" Perform 1 byte truncation on char that is more than 1 byte, use decode('utf-8', errors='replace') to replace bad unicode with a good 1 byte char (�) """

message = "a€bc"*3074 # €(\xe2\x82\xac) is 3 bytes char can be written in windows console w/o encoding
self.runtime.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task")
latest_event_file = [pos_json for pos_json in os.listdir(self.runtime.telemetry_writer.events_folder_path) if re.search('^[0-9]+.json$', pos_json)][-1]
with open(os.path.join(self.runtime.telemetry_writer.events_folder_path, latest_event_file), 'r+') as f:
events = json.load(f)
self.assertTrue(events is not None)
self.assertEqual(events[-1]["TaskName"], "Test Task")
self.assertTrue(len(events[-1]["Message"]) < len(message.encode('utf-8')))
chars_dropped = len(message.encode('utf-8')) - Constants.TELEMETRY_MSG_SIZE_LIMIT_IN_CHARS + Constants.TELEMETRY_BUFFER_FOR_DROPPED_COUNT_MSG_IN_CHARS + Constants.TELEMETRY_EVENT_COUNTER_MSG_SIZE_LIMIT_IN_CHARS
self.assertTrue("a€bc" in events[-1]["Message"])
self.assertTrue("a€bc" * (len(message) + 1 - chars_dropped) + ". [{0} chars dropped]".format(chars_dropped) in events[-1]["Message"]) # len(message) + 1 due to bad unicode will be replaced by �
f.close()

# TODO: The following 3 tests cause widespread test suite failures (on master), so leaving it out. And tracking in: Task 10912099: [Bug] Bug in telemetry writer - overwriting prior events in fast execution
# def test_write_event_size_limit(self):
# # will not write to telemetry if event size exceeds limit
Expand Down

0 comments on commit dfd4909

Please sign in to comment.