diff --git a/Jenkinsfile b/Jenkinsfile
index 534a196f0..3d0d0bbf0 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -65,6 +65,7 @@ def dailyDeviceTest = {
   runPytestDevice("8x8/test_concatenate", "-n 1 --tc 1", "concat_1")
   runPytestDevice("8x8/test_concatenate", "-n 1 --tc 5", "concat_5")
   runPytestDevice("8x8/test_mean", "-n 1 --tc 1", "mean_1")
+  runPytestDevice("16x8/test_mean", "-n 1 --tc 1", "16x8_mean_1")
   runPytestDevice("8x8/test_lstm", "-n 1 --tc 1", "lstm_1")
   runPytestDevice("8x8/test_lstm", "-n 1", "lstm_5")
   runPytestDevice("complex_models/8x8/test_cnn_classifier", "-n 1 --tc 1", "cnn_classifier_1")
diff --git a/integration_tests/models/16x8/test_mean/params.yaml b/integration_tests/models/16x8/test_mean/params.yaml
new file mode 100644
index 000000000..aecf2f6bd
--- /dev/null
+++ b/integration_tests/models/16x8/test_mean/params.yaml
@@ -0,0 +1 @@
+MAX_ABS_ERROR: 1.0
diff --git a/integration_tests/models/16x8/test_mean/test_mean_0.tflite b/integration_tests/models/16x8/test_mean/test_mean_0.tflite
new file mode 100644
index 000000000..d6ba76ad0
Binary files /dev/null and b/integration_tests/models/16x8/test_mean/test_mean_0.tflite differ
diff --git a/integration_tests/models/16x8/test_mean/test_mean_1.mlir b/integration_tests/models/16x8/test_mean/test_mean_1.mlir
new file mode 100644
index 000000000..86e91f1ab
--- /dev/null
+++ b/integration_tests/models/16x8/test_mean/test_mean_1.mlir
@@ -0,0 +1,5 @@
+func.func @main(%arg0: tensor<1x5x8x16x!quant.uniform<i8:f32, 0.0078426999971270561:-1>> {tf_saved_model.index_path = ["input_2"]}) -> (tensor<1x5x1x16x!quant.uniform<i8:f32, 0.0078426999971270561:-1>> {tf_saved_model.index_path = ["tf.mean_1"]}) attributes {tf.entry_function = {inputs = "serving_default_input_2:0", outputs = "PartitionedCall:0"}, tf_saved_model.exported_names = ["serving_default"]} {
+  %0 = "tfl.pseudo_qconst"() {qtype = tensor<1xi32>, value = dense<2> : tensor<1xi32>} : () -> tensor<1xi32>
+  %1 = "tfl.mean"(%arg0, %0) {keep_dims = true} : (tensor<1x5x8x16x!quant.uniform<i8:f32, 0.0078426999971270561:-1>>, tensor<1xi32>) -> tensor<1x5x1x16x!quant.uniform<i8:f32, 0.0078426999971270561:-1>>
+  return %1 : tensor<1x5x1x16x!quant.uniform<i8:f32, 0.0078426999971270561:-1>>
+}
diff --git a/integration_tests/models/16x8/test_mean/test_mean_1.tflite b/integration_tests/models/16x8/test_mean/test_mean_1.tflite
new file mode 100644
index 000000000..063533218
Binary files /dev/null and b/integration_tests/models/16x8/test_mean/test_mean_1.tflite differ
diff --git a/integration_tests/models/16x8/test_mean/test_mean_10.mlir b/integration_tests/models/16x8/test_mean/test_mean_10.mlir
new file mode 100644
index 000000000..f49a62393
--- /dev/null
+++ b/integration_tests/models/16x8/test_mean/test_mean_10.mlir
@@ -0,0 +1,6 @@
+// This test reduces the 2nd and 3rd axes of a 4D tensor with consecutive axes and keep_dims = true.
+func.func @main(%arg0: tensor<8x5x10x12x!quant.uniform<i8:f32, 0.008:2>>) -> (tensor<8x1x1x12x!quant.uniform<i8:f32, 0.008:2>>) {
+  %0 = "tfl.pseudo_qconst"() {qtype = tensor<2xi32>, value = dense<[1, 2]> : tensor<2xi32>} : () -> tensor<2xi32>
+  %1 = "tfl.mean"(%arg0, %0) {keep_dims = true} : (tensor<8x5x10x12x!quant.uniform<i8:f32, 0.008:2>>, tensor<2xi32>) -> tensor<8x1x1x12x!quant.uniform<i8:f32, 0.008:2>>
+  return %1 : tensor<8x1x1x12x!quant.uniform<i8:f32, 0.008:2>>
+}
diff --git a/integration_tests/models/16x8/test_mean/test_mean_10.tflite b/integration_tests/models/16x8/test_mean/test_mean_10.tflite
new file mode 100644
index 000000000..e84cfe0cb
Binary files /dev/null and b/integration_tests/models/16x8/test_mean/test_mean_10.tflite differ
diff --git a/integration_tests/models/16x8/test_mean/test_mean_10_int16.mlir b/integration_tests/models/16x8/test_mean/test_mean_10_int16.mlir
new file mode 100644
index 000000000..043682ea9
--- /dev/null
+++ b/integration_tests/models/16x8/test_mean/test_mean_10_int16.mlir
@@ -0,0 +1,6 @@
+// This test reduces the 2nd and 3rd axes of a 4D tensor with consecutive axes and keep_dims = true.
+func.func @main(%arg0: tensor<8x5x10x12x!quant.uniform<i16:f32, 0.008>>) -> (tensor<8x1x1x12x!quant.uniform<i16:f32, 0.008>>) {
+  %0 = "tfl.pseudo_qconst"() {qtype = tensor<2xi32>, value = dense<[1, 2]> : tensor<2xi32>} : () -> tensor<2xi32>
+  %1 = "tfl.mean"(%arg0, %0) {keep_dims = true} : (tensor<8x5x10x12x!quant.uniform<i16:f32, 0.008>>, tensor<2xi32>) -> tensor<8x1x1x12x!quant.uniform<i16:f32, 0.008>>
+  return %1 : tensor<8x1x1x12x!quant.uniform<i16:f32, 0.008>>
+}
diff --git a/integration_tests/models/16x8/test_mean/test_mean_11.mlir b/integration_tests/models/16x8/test_mean/test_mean_11.mlir
new file mode 100644
index 000000000..f49a62393
--- /dev/null
+++ b/integration_tests/models/16x8/test_mean/test_mean_11.mlir
@@ -0,0 +1,6 @@
+// This test reduces the 2nd and 3rd axes of a 4D tensor with consecutive axes and keep_dims = true.
+func.func @main(%arg0: tensor<8x5x10x12x!quant.uniform<i8:f32, 0.008:2>>) -> (tensor<8x1x1x12x!quant.uniform<i8:f32, 0.008:2>>) {
+  %0 = "tfl.pseudo_qconst"() {qtype = tensor<2xi32>, value = dense<[1, 2]> : tensor<2xi32>} : () -> tensor<2xi32>
+  %1 = "tfl.mean"(%arg0, %0) {keep_dims = true} : (tensor<8x5x10x12x!quant.uniform<i8:f32, 0.008:2>>, tensor<2xi32>) -> tensor<8x1x1x12x!quant.uniform<i8:f32, 0.008:2>>
+  return %1 : tensor<8x1x1x12x!quant.uniform<i8:f32, 0.008:2>>
+}
diff --git a/integration_tests/models/16x8/test_mean/test_mean_11.tflite b/integration_tests/models/16x8/test_mean/test_mean_11.tflite
new file mode 100644
index 000000000..e84cfe0cb
Binary files /dev/null and b/integration_tests/models/16x8/test_mean/test_mean_11.tflite differ
diff --git a/integration_tests/models/16x8/test_mean/test_mean_11_int16.mlir b/integration_tests/models/16x8/test_mean/test_mean_11_int16.mlir
new file mode 100644
index 000000000..043682ea9
--- /dev/null
+++ b/integration_tests/models/16x8/test_mean/test_mean_11_int16.mlir
@@ -0,0 +1,6 @@
+// This test reduces the 2nd and 3rd axes of a 4D tensor with consecutive axes and keep_dims = true.
+func.func @main(%arg0: tensor<8x5x10x12x!quant.uniform<i16:f32, 0.008>>) -> (tensor<8x1x1x12x!quant.uniform<i16:f32, 0.008>>) {
+  %0 = "tfl.pseudo_qconst"() {qtype = tensor<2xi32>, value = dense<[1, 2]> : tensor<2xi32>} : () -> tensor<2xi32>
+  %1 = "tfl.mean"(%arg0, %0) {keep_dims = true} : (tensor<8x5x10x12x!quant.uniform<i16:f32, 0.008>>, tensor<2xi32>) -> tensor<8x1x1x12x!quant.uniform<i16:f32, 0.008>>
+  return %1 : tensor<8x1x1x12x!quant.uniform<i16:f32, 0.008>>
+}
diff --git a/integration_tests/models/16x8/test_mean/test_mean_1_int16.mlir b/integration_tests/models/16x8/test_mean/test_mean_1_int16.mlir
new file mode 100644
index 000000000..d02569533
--- /dev/null
+++ b/integration_tests/models/16x8/test_mean/test_mean_1_int16.mlir
@@ -0,0 +1,5 @@
+func.func @main(%arg0: tensor<1x5x8x16x!quant.uniform<i16:f32, 0.0078426999971270561>> {tf_saved_model.index_path = ["input_2"]}) -> (tensor<1x5x1x16x!quant.uniform<i16:f32, 0.0078426999971270561>> {tf_saved_model.index_path = ["tf.mean_1"]}) attributes {tf.entry_function = {inputs = "serving_default_input_2:0", outputs = "PartitionedCall:0"}, tf_saved_model.exported_names = ["serving_default"]} {
+  %0 = "tfl.pseudo_qconst"() {qtype = tensor<1xi32>, value = dense<2> : tensor<1xi32>} : () -> tensor<1xi32>
+  %1 = "tfl.mean"(%arg0, %0) {keep_dims = true} : (tensor<1x5x8x16x!quant.uniform<i16:f32, 0.0078426999971270561>>, tensor<1xi32>) -> tensor<1x5x1x16x!quant.uniform<i16:f32, 0.0078426999971270561>>
+  return %1 : tensor<1x5x1x16x!quant.uniform<i16:f32, 0.0078426999971270561>>
+}
diff --git a/integration_tests/models/16x8/test_mean/test_mean_2.mlir b/integration_tests/models/16x8/test_mean/test_mean_2.mlir
new file mode 100644
index 000000000..7b006436c
--- /dev/null
+++ b/integration_tests/models/16x8/test_mean/test_mean_2.mlir
@@ -0,0 +1,6 @@
+// This test reduces the 3rd axis of a 4D tensor without keeping dimensions.
+func.func @main(%arg0: tensor<2x3x4x5x!quant.uniform<i8:f32, 0.005:-128>>) -> (tensor<2x3x5x!quant.uniform<i8:f32, 0.006:-127>>) {
+  %0 = "tfl.pseudo_qconst"() {qtype = tensor<1xi32>, value = dense<2> : tensor<1xi32>} : () -> tensor<1xi32>
+  %1 = "tfl.mean"(%arg0, %0) {keep_dims = false} : (tensor<2x3x4x5x!quant.uniform<i8:f32, 0.005:-128>>, tensor<1xi32>) -> tensor<2x3x5x!quant.uniform<i8:f32, 0.006:-127>>
+  return %1 : tensor<2x3x5x!quant.uniform<i8:f32, 0.006:-127>>
+}
diff --git a/integration_tests/models/16x8/test_mean/test_mean_2.tflite b/integration_tests/models/16x8/test_mean/test_mean_2.tflite
new file mode 100644
index 000000000..bff813a03
Binary files /dev/null and b/integration_tests/models/16x8/test_mean/test_mean_2.tflite differ
diff --git a/integration_tests/models/16x8/test_mean/test_mean_2_int16.mlir b/integration_tests/models/16x8/test_mean/test_mean_2_int16.mlir
new file mode 100644
index 000000000..7cebc482c
--- /dev/null
+++ b/integration_tests/models/16x8/test_mean/test_mean_2_int16.mlir
@@ -0,0 +1,6 @@
+// This test reduces the 3rd axis of a 4D tensor without keeping dimensions.
+func.func @main(%arg0: tensor<2x3x4x5x!quant.uniform<i16:f32, 0.005>>) -> (tensor<2x3x5x!quant.uniform<i16:f32, 0.006>>) {
+  %0 = "tfl.pseudo_qconst"() {qtype = tensor<1xi32>, value = dense<2> : tensor<1xi32>} : () -> tensor<1xi32>
+  %1 = "tfl.mean"(%arg0, %0) {keep_dims = false} : (tensor<2x3x4x5x!quant.uniform<i16:f32, 0.005>>, tensor<1xi32>) -> tensor<2x3x5x!quant.uniform<i16:f32, 0.006>>
+  return %1 : tensor<2x3x5x!quant.uniform<i16:f32, 0.006>>
+}
diff --git a/integration_tests/models/16x8/test_mean/test_mean_3.mlir b/integration_tests/models/16x8/test_mean/test_mean_3.mlir
new file mode 100644
index 000000000..1c964c9f2
--- /dev/null
+++ b/integration_tests/models/16x8/test_mean/test_mean_3.mlir
@@ -0,0 +1,6 @@
+// This test reduces the 2nd and 4th axes of a 5D tensor while keeping dimensions.
+func.func @main(%arg0: tensor<4x3x5x7x6x!quant.uniform<i8:f32, 0.0045:0>>) -> (tensor<4x1x5x1x6x!quant.uniform<i8:f32, 0.0045:0>>) {
+  %0 = "tfl.pseudo_qconst"() {qtype = tensor<2xi32>, value = dense<[1, 3]> : tensor<2xi32>} : () -> tensor<2xi32>
+  %1 = "tfl.mean"(%arg0, %0) {keep_dims = true} : (tensor<4x3x5x7x6x!quant.uniform<i8:f32, 0.0045:0>>, tensor<2xi32>) -> tensor<4x1x5x1x6x!quant.uniform<i8:f32, 0.0045:0>>
+  return %1 : tensor<4x1x5x1x6x!quant.uniform<i8:f32, 0.0045:0>>
+}
diff --git a/integration_tests/models/16x8/test_mean/test_mean_3.tflite b/integration_tests/models/16x8/test_mean/test_mean_3.tflite
new file mode 100644
index 000000000..101a53ba3
Binary files /dev/null and b/integration_tests/models/16x8/test_mean/test_mean_3.tflite differ
diff --git a/integration_tests/models/16x8/test_mean/test_mean_3_int16.mlir b/integration_tests/models/16x8/test_mean/test_mean_3_int16.mlir
new file mode 100644
index 000000000..1172d1eed
--- /dev/null
+++ b/integration_tests/models/16x8/test_mean/test_mean_3_int16.mlir
@@ -0,0 +1,6 @@
+// This test reduces the 2nd and 4th axes of a 5D tensor while keeping dimensions.
+func.func @main(%arg0: tensor<4x3x5x7x6x!quant.uniform<i16:f32, 0.0045>>) -> (tensor<4x1x5x1x6x!quant.uniform<i16:f32, 0.0045>>) {
+  %0 = "tfl.pseudo_qconst"() {qtype = tensor<2xi32>, value = dense<[1, 3]> : tensor<2xi32>} : () -> tensor<2xi32>
+  %1 = "tfl.mean"(%arg0, %0) {keep_dims = true} : (tensor<4x3x5x7x6x!quant.uniform<i16:f32, 0.0045>>, tensor<2xi32>) -> tensor<4x1x5x1x6x!quant.uniform<i16:f32, 0.0045>>
+  return %1 : tensor<4x1x5x1x6x!quant.uniform<i16:f32, 0.0045>>
+}
diff --git a/integration_tests/models/16x8/test_mean/test_mean_4.mlir b/integration_tests/models/16x8/test_mean/test_mean_4.mlir
new file mode 100644
index 000000000..595ef06c6
--- /dev/null
+++ b/integration_tests/models/16x8/test_mean/test_mean_4.mlir
@@ -0,0 +1,6 @@
+// This test reduces the 1st axis of a 3D tensor without keeping dimensions.
+func.func @main(%arg0: tensor<10x20x30x!quant.uniform<i8:f32, 0.003:-5>>) -> (tensor<20x30x!quant.uniform<i8:f32, 0.003:-5>>) {
+  %0 = "tfl.pseudo_qconst"() {qtype = tensor<1xi32>, value = dense<0> : tensor<1xi32>} : () -> tensor<1xi32>
+  %1 = "tfl.mean"(%arg0, %0) {keep_dims = false} : (tensor<10x20x30x!quant.uniform<i8:f32, 0.003:-5>>, tensor<1xi32>) -> tensor<20x30x!quant.uniform<i8:f32, 0.003:-5>>
+  return %1 : tensor<20x30x!quant.uniform<i8:f32, 0.003:-5>>
+}
diff --git a/integration_tests/models/16x8/test_mean/test_mean_4.tflite b/integration_tests/models/16x8/test_mean/test_mean_4.tflite
new file mode 100644
index 000000000..e023e126f
Binary files /dev/null and b/integration_tests/models/16x8/test_mean/test_mean_4.tflite differ
diff --git a/integration_tests/models/16x8/test_mean/test_mean_4_int16.mlir b/integration_tests/models/16x8/test_mean/test_mean_4_int16.mlir
new file mode 100644
index 000000000..7cd5d0508
--- /dev/null
+++ b/integration_tests/models/16x8/test_mean/test_mean_4_int16.mlir
@@ -0,0 +1,6 @@
+// This test reduces the 1st axis of a 3D tensor without keeping dimensions.
+func.func @main(%arg0: tensor<10x20x30x!quant.uniform<i16:f32, 0.003>>) -> (tensor<20x30x!quant.uniform<i16:f32, 0.003>>) {
+  %0 = "tfl.pseudo_qconst"() {qtype = tensor<1xi32>, value = dense<0> : tensor<1xi32>} : () -> tensor<1xi32>
+  %1 = "tfl.mean"(%arg0, %0) {keep_dims = false} : (tensor<10x20x30x!quant.uniform<i16:f32, 0.003>>, tensor<1xi32>) -> tensor<20x30x!quant.uniform<i16:f32, 0.003>>
+  return %1 : tensor<20x30x!quant.uniform<i16:f32, 0.003>>
+}
diff --git a/integration_tests/models/16x8/test_mean/test_mean_5.mlir b/integration_tests/models/16x8/test_mean/test_mean_5.mlir
new file mode 100644
index 000000000..bfbc54527
--- /dev/null
+++ b/integration_tests/models/16x8/test_mean/test_mean_5.mlir
@@ -0,0 +1,6 @@
+// This test reduces all axes of a 2D tensor while keeping dimensions.
+func.func @main(%arg0: tensor<5x7x!quant.uniform<i8:f32, 0.002:-3>>) -> (tensor<1x1x!quant.uniform<i8:f32, 0.002:-3>>) {
+  %0 = "tfl.pseudo_qconst"() {qtype = tensor<2xi32>, value = dense<[0, 1]> : tensor<2xi32>} : () -> tensor<2xi32>
+  %1 = "tfl.mean"(%arg0, %0) {keep_dims = true} : (tensor<5x7x!quant.uniform<i8:f32, 0.002:-3>>, tensor<2xi32>) -> tensor<1x1x!quant.uniform<i8:f32, 0.002:-3>>
+  return %1 : tensor<1x1x!quant.uniform<i8:f32, 0.002:-3>>
+}
diff --git a/integration_tests/models/16x8/test_mean/test_mean_5.tflite b/integration_tests/models/16x8/test_mean/test_mean_5.tflite
new file mode 100644
index 000000000..be35d7504
Binary files /dev/null and b/integration_tests/models/16x8/test_mean/test_mean_5.tflite differ
diff --git a/integration_tests/models/16x8/test_mean/test_mean_5_int16.mlir b/integration_tests/models/16x8/test_mean/test_mean_5_int16.mlir
new file mode 100644
index 000000000..913a2c3c9
--- /dev/null
+++ b/integration_tests/models/16x8/test_mean/test_mean_5_int16.mlir
@@ -0,0 +1,6 @@
+// This test reduces all axes of a 2D tensor while keeping dimensions.
+func.func @main(%arg0: tensor<5x7x!quant.uniform<i16:f32, 0.002>>) -> (tensor<1x1x!quant.uniform<i16:f32, 0.002>>) {
+  %0 = "tfl.pseudo_qconst"() {qtype = tensor<2xi32>, value = dense<[0, 1]> : tensor<2xi32>} : () -> tensor<2xi32>
+  %1 = "tfl.mean"(%arg0, %0) {keep_dims = true} : (tensor<5x7x!quant.uniform<i16:f32, 0.002>>, tensor<2xi32>) -> tensor<1x1x!quant.uniform<i16:f32, 0.002>>
+  return %1 : tensor<1x1x!quant.uniform<i16:f32, 0.002>>
+}
diff --git a/integration_tests/models/16x8/test_mean/test_mean_6.mlir b/integration_tests/models/16x8/test_mean/test_mean_6.mlir
new file mode 100644
index 000000000..9d8f2be6d
--- /dev/null
+++ b/integration_tests/models/16x8/test_mean/test_mean_6.mlir
@@ -0,0 +1,6 @@
+// This test reduces a 1D tensor to a scalar.
+func.func @main(%arg0: tensor<15x!quant.uniform<i8:f32, 0.009:0>>) -> (tensor<!quant.uniform<i8:f32, 0.009:0>>) {
+  %0 = "tfl.pseudo_qconst"() {qtype = tensor<1xi32>, value = dense<0> : tensor<1xi32>} : () -> tensor<1xi32>
+  %1 = "tfl.mean"(%arg0, %0) {keep_dims = false} : (tensor<15x!quant.uniform<i8:f32, 0.009:0>>, tensor<1xi32>) -> tensor<!quant.uniform<i8:f32, 0.009:0>>
+  return %1 : tensor<!quant.uniform<i8:f32, 0.009:0>>
+}
diff --git a/integration_tests/models/16x8/test_mean/test_mean_6.tflite b/integration_tests/models/16x8/test_mean/test_mean_6.tflite
new file mode 100644
index 000000000..200c2d60a
Binary files /dev/null and b/integration_tests/models/16x8/test_mean/test_mean_6.tflite differ
diff --git a/integration_tests/models/16x8/test_mean/test_mean_6_int16.mlir b/integration_tests/models/16x8/test_mean/test_mean_6_int16.mlir
new file mode 100644
index 000000000..57a4a498c
--- /dev/null
+++ b/integration_tests/models/16x8/test_mean/test_mean_6_int16.mlir
@@ -0,0 +1,6 @@
+// This test reduces a 1D tensor to a scalar.
+func.func @main(%arg0: tensor<15x!quant.uniform<i16:f32, 0.009>>) -> (tensor<!quant.uniform<i16:f32, 0.009>>) {
+  %0 = "tfl.pseudo_qconst"() {qtype = tensor<1xi32>, value = dense<0> : tensor<1xi32>} : () -> tensor<1xi32>
+  %1 = "tfl.mean"(%arg0, %0) {keep_dims = false} : (tensor<15x!quant.uniform<i16:f32, 0.009>>, tensor<1xi32>) -> tensor<!quant.uniform<i16:f32, 0.009>>
+  return %1 : tensor<!quant.uniform<i16:f32, 0.009>>
+}
diff --git a/integration_tests/models/16x8/test_mean/test_mean_7.mlir b/integration_tests/models/16x8/test_mean/test_mean_7.mlir
new file mode 100644
index 000000000..36c6a0739
--- /dev/null
+++ b/integration_tests/models/16x8/test_mean/test_mean_7.mlir
@@ -0,0 +1,6 @@
+// This test reduces the 2nd and 3rd axes of a 3D tensor with different input/output quantization parameters.
+func.func @main(%arg0: tensor<5x6x7x!quant.uniform<i8:f32, 0.004:-2>>) -> (tensor<5x!quant.uniform<i8:f32, 0.0035:-1>>) {
+  %0 = "tfl.pseudo_qconst"() {qtype = tensor<2xi32>, value = dense<[1, 2]> : tensor<2xi32>} : () -> tensor<2xi32>
+  %1 = "tfl.mean"(%arg0, %0) {keep_dims = false} : (tensor<5x6x7x!quant.uniform<i8:f32, 0.004:-2>>, tensor<2xi32>) -> tensor<5x!quant.uniform<i8:f32, 0.0035:-1>>
+  return %1 : tensor<5x!quant.uniform<i8:f32, 0.0035:-1>>
+}
diff --git a/integration_tests/models/16x8/test_mean/test_mean_7.tflite b/integration_tests/models/16x8/test_mean/test_mean_7.tflite
new file mode 100644
index 000000000..da9ae10b1
Binary files /dev/null and b/integration_tests/models/16x8/test_mean/test_mean_7.tflite differ
diff --git a/integration_tests/models/16x8/test_mean/test_mean_7_int16.mlir b/integration_tests/models/16x8/test_mean/test_mean_7_int16.mlir
new file mode 100644
index 000000000..078b65ca7
--- /dev/null
+++ b/integration_tests/models/16x8/test_mean/test_mean_7_int16.mlir
@@ -0,0 +1,6 @@
+// This test reduces the 2nd and 3rd axes of a 3D tensor with different input/output quantization parameters.
+func.func @main(%arg0: tensor<5x6x7x!quant.uniform<i16:f32, 0.004>>) -> (tensor<5x!quant.uniform<i16:f32, 0.0035>>) {
+  %0 = "tfl.pseudo_qconst"() {qtype = tensor<2xi32>, value = dense<[1, 2]> : tensor<2xi32>} : () -> tensor<2xi32>
+  %1 = "tfl.mean"(%arg0, %0) {keep_dims = false} : (tensor<5x6x7x!quant.uniform<i16:f32, 0.004>>, tensor<2xi32>) -> tensor<5x!quant.uniform<i16:f32, 0.0035>>
+  return %1 : tensor<5x!quant.uniform<i16:f32, 0.0035>>
+}
diff --git a/integration_tests/models/16x8/test_mean/translate_mlir.py b/integration_tests/models/16x8/test_mean/translate_mlir.py
new file mode 100644
index 000000000..847e5f432
--- /dev/null
+++ b/integration_tests/models/16x8/test_mean/translate_mlir.py
@@ -0,0 +1,53 @@
+import os
+import re
+
+
+def translate_int8_to_int16_mlir(mlir_code):
+    """
+    Translates MLIR code from using int8 quantization to int16 quantization, removing any zero-point specification.
+
+    Args:
+    mlir_code (str): The original MLIR code string with int8 quantization.
+
+    Returns:
+    str: Translated MLIR code with int16 quantization.
+    """
+    # Step 1: Replace int8 quantization with int16 quantization in tensor types
+    mlir_code = re.sub(
+        r"!quant\.uniform<i8:f32, ([^>]+)>", r"!quant.uniform<i16:f32, \1>", mlir_code
+    )
+
+    # Step 2: Remove any zero-point by eliminating it from the parameter list
+    mlir_code = re.sub(
+        r"!quant\.uniform<i16:f32, ([^,:]+):[^>]+>",
+        r"!quant.uniform<i16:f32, \1>",
+        mlir_code,
+    )
+
+    return mlir_code
+
+
+def process_mlir_files_in_directory():
+    """
+    Processes all .mlir files in the current directory by translating int8 quantization to int16,
+    and saving the output to a new file with the _int16.mlir suffix.
+    """
+    for filename in os.listdir("."):
+        if not filename.endswith(".mlir") or filename.endswith("_int16.mlir"):
+            continue
+        with open(filename, "r") as file:
+            mlir_code = file.read()
+
+        # Translate the MLIR code
+        translated_mlir_code = translate_int8_to_int16_mlir(mlir_code)
+
+        # Save the translated code to a new file
+        new_filename = f"{os.path.splitext(filename)[0]}_int16.mlir"
+        with open(new_filename, "w") as new_file:
+            new_file.write(translated_mlir_code)
+        print(f"Processed {filename} -> {new_filename}")
+
+
+# Execute the script
+if __name__ == "__main__":
+    process_mlir_files_in_directory()
diff --git a/third_party/lib_nn b/third_party/lib_nn
index 52a618797..48ac92d54 160000
--- a/third_party/lib_nn
+++ b/third_party/lib_nn
@@ -1 +1 @@
-Subproject commit 52a618797e3ece7a070e5cc45bac3f3766318c96
+Subproject commit 48ac92d54c6f6d7abc9e3f419b518a7ccfd7d81a
diff --git a/third_party/lib_tflite_micro b/third_party/lib_tflite_micro
index 20a0abe2a..be7deb10f 160000
--- a/third_party/lib_tflite_micro
+++ b/third_party/lib_tflite_micro
@@ -1 +1 @@
-Subproject commit 20a0abe2aa73c614cb184aa026cd4325bd8a45bc
+Subproject commit be7deb10f8f547f0fe5eeca37eb9fdeffd1c0cf9
diff --git a/xformer/IR/XCoreOps.td b/xformer/IR/XCoreOps.td
index f471990d6..bd473b1ef 100644
--- a/xformer/IR/XCoreOps.td
+++ b/xformer/IR/XCoreOps.td
@@ -200,6 +200,23 @@ def XC_MeanOp : XC_Op<"mean", [Pure]> {
   let results = (outs TensorOf<[QI8]> : $output);
 }
 
+def XC_MeanI16Op : XC_Op<"meani16", [Pure]> {
+  let summary = "Mean int16 op";
+
+  let description = [{Mean int16 op.}];
+
+  let arguments = (ins
+    TensorOf<[QI16]>:$input,
+
+    I32Attr:$start,
+    I32Attr:$mean,
+    I32Attr:$end,
+    F32Attr:$scale_mul
+  );
+
+  let results = (outs TensorOf<[QI16]> : $output);
+}
+
 def XC_MulOp : XC_Op<"mul", [Pure, XC_MemoryOverlappable]> {
   let summary = "Mul op";
 
diff --git a/xformer/Transforms/ReplaceMean.cpp b/xformer/Transforms/ReplaceMean.cpp
index 59c20b387..a5dc86b28 100644
--- a/xformer/Transforms/ReplaceMean.cpp
+++ b/xformer/Transforms/ReplaceMean.cpp
@@ -1,6 +1,3 @@
-// Copyright 2021 XMOS LIMITED. This Software is subject to the terms of the
-// XMOS Public License: Version 1
-
 #include "IR/XCoreOps.h"
 #include "Utils/Util.h"
 
@@ -16,7 +13,7 @@ extern "C" {
 namespace mlir::xcore {
 
 namespace {
-// Replace TFL Mean with Mean for XCore.
+// Replace TFL Mean with Mean or Mean16 for XCore.
 struct ReplaceMean
     : public PassWrapper<ReplaceMean, OperationPass<func::FuncOp>> {
   MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(ReplaceMean)
@@ -26,7 +23,7 @@ struct ReplaceMean
   }
   StringRef getArgument() const final { return "xcore-replace-mean"; }
   StringRef getDescription() const final {
-    return "Replace TFL Mean with Mean for XCore.";
+    return "Replace TFL Mean with Mean or Mean16 for XCore.";
   }
   void runOnOperation() override;
 };
@@ -41,7 +38,9 @@ struct ReplaceMeanPattern : public OpRewritePattern<TFL::MeanOp> {
     auto output = meanOp.getOutput();
 
     DenseElementsAttr axisAttr;
-    matchPattern(meanOp.getAxis(), m_Constant(&axisAttr));
+    if (!matchPattern(meanOp.getAxis(), m_Constant(&axisAttr))) {
+      return failure();
+    }
     auto axisValues = axisAttr.getValues<int32_t>();
     std::vector<int32_t> axis(axisValues.begin(), axisValues.end());
     int32_t minAxis = *std::min_element(axis.begin(), axis.end());
@@ -52,14 +51,19 @@ struct ReplaceMeanPattern : public OpRewritePattern<TFL::MeanOp> {
 
     auto inputType = input.getType().cast<ShapedType>();
     auto outputType = output.getType().cast<ShapedType>();
-    if (!utils::isNBitSignedQType<8>(inputType.getElementType()) ||
-        !utils::isNBitSignedQType<8>(outputType.getElementType())) {
+
+    // Check if input and output are either int8 or int16.
+    bool isInt8 = utils::isNBitSignedQType<8>(inputType.getElementType()) &&
+                  utils::isNBitSignedQType<8>(outputType.getElementType());
+
+    bool isInt16 = utils::isNBitSignedQType<16>(inputType.getElementType()) &&
+                   utils::isNBitSignedQType<16>(outputType.getElementType());
+
+    if (!(isInt8 || isInt16)) {
       return failure();
     }
 
     auto inputShape = inputType.getShape();
-    auto outputShape = outputType.getShape();
-
     int rank = inputShape.size();
 
     int beginDims = 1;
@@ -80,23 +84,32 @@ struct ReplaceMeanPattern : public OpRewritePattern<TFL::MeanOp> {
     auto inputQType = utils::getQType(input);
     auto outputQType = utils::getQType(output);
 
-    float inZeroPoint = static_cast<float>(inputQType.getZeroPoint());
-    float outZeroPoint = static_cast<float>(outputQType.getZeroPoint());
     float scaleMul = inputQType.getScale() / outputQType.getScale() /
                      static_cast<float>(meanDims);
+    auto scaleMulAttr = rewriter.getF32FloatAttr(scaleMul);
 
     auto beginDimsAttr = rewriter.getI32IntegerAttr(beginDims);
     auto endDimsAttr = rewriter.getI32IntegerAttr(endDims);
     auto meanDimsAttr = rewriter.getI32IntegerAttr(meanDims);
-    auto inZeroPointAttr = rewriter.getF32FloatAttr(inZeroPoint);
-    auto outZeroPointAttr = rewriter.getF32FloatAttr(outZeroPoint);
-    auto scaleMulAttr = rewriter.getF32FloatAttr(scaleMul);
 
-    auto xcMeanOp = rewriter.create<MeanOp>(
-        meanOp.getLoc(), meanOp.getType(), meanOp.getInput(), beginDimsAttr,
-        meanDimsAttr, endDimsAttr, inZeroPointAttr, outZeroPointAttr,
-        scaleMulAttr);
-    rewriter.replaceOp(meanOp, xcMeanOp.getOutput());
+    if (isInt8) {
+      float inZeroPoint = static_cast<float>(inputQType.getZeroPoint());
+      float outZeroPoint = static_cast<float>(outputQType.getZeroPoint());
+      auto inZeroPointAttr = rewriter.getF32FloatAttr(inZeroPoint);
+      auto outZeroPointAttr = rewriter.getF32FloatAttr(outZeroPoint);
+
+      auto xcMeanOp = rewriter.create<MeanOp>(
+          meanOp.getLoc(), meanOp.getType(), meanOp.getInput(), beginDimsAttr,
+          meanDimsAttr, endDimsAttr, inZeroPointAttr, outZeroPointAttr,
+          scaleMulAttr);
+      rewriter.replaceOp(meanOp, xcMeanOp.getOutput());
+    } else { // isInt16
+      // Zero points are always zero for int16 and are not passed to Mean16Op.
+      auto xcMeanOp = rewriter.create<MeanI16Op>(
+          meanOp.getLoc(), meanOp.getType(), meanOp.getInput(), beginDimsAttr,
+          meanDimsAttr, endDimsAttr, scaleMulAttr);
+      rewriter.replaceOp(meanOp, xcMeanOp.getOutput());
+    }
 
     return success();
   }
diff --git a/xformer/Transforms/ReplaceSum.cpp b/xformer/Transforms/ReplaceSum.cpp
index 6415b6a62..b5a788401 100644
--- a/xformer/Transforms/ReplaceSum.cpp
+++ b/xformer/Transforms/ReplaceSum.cpp
@@ -1,6 +1,3 @@
-// Copyright 2021 XMOS LIMITED. This Software is subject to the terms of the
-// XMOS Public License: Version 1
-
 #include "IR/XCoreOps.h"
 #include "Utils/Util.h"
 
@@ -16,7 +13,7 @@ extern "C" {
 namespace mlir::xcore {
 
 namespace {
-// Replace TFL Sum with Mean for XCore.
+// Replace TFL Sum with Mean or Mean16 for XCore.
 struct ReplaceSum
     : public PassWrapper<ReplaceSum, OperationPass<func::FuncOp>> {
   MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(ReplaceSum)
@@ -26,7 +23,7 @@ struct ReplaceSum
   }
   StringRef getArgument() const final { return "xcore-replace-sum"; }
   StringRef getDescription() const final {
-    return "Replace TFL Sum with mean for XCore.";
+    return "Replace TFL Sum with Mean or Mean16 for XCore.";
   }
   void runOnOperation() override;
 };
@@ -52,8 +49,15 @@ struct ReplaceSumPattern : public OpRewritePattern<TFL::SumOp> {
 
     auto inputType = input.getType().cast<ShapedType>();
     auto outputType = output.getType().cast<ShapedType>();
-    if (!utils::isNBitSignedQType<8>(inputType.getElementType()) ||
-        !utils::isNBitSignedQType<8>(outputType.getElementType())) {
+
+    // Check if input and output are either int8 or int16.
+    bool isInt8 = utils::isNBitSignedQType<8>(inputType.getElementType()) &&
+                  utils::isNBitSignedQType<8>(outputType.getElementType());
+
+    bool isInt16 = utils::isNBitSignedQType<16>(inputType.getElementType()) &&
+                   utils::isNBitSignedQType<16>(outputType.getElementType());
+
+    if (!(isInt8 || isInt16)) {
       return failure();
     }
 
@@ -80,22 +84,31 @@ struct ReplaceSumPattern : public OpRewritePattern<TFL::SumOp> {
     auto inputQType = utils::getQType(input);
     auto outputQType = utils::getQType(output);
 
-    float inZeroPoint = static_cast<float>(inputQType.getZeroPoint());
-    float outZeroPoint = static_cast<float>(outputQType.getZeroPoint());
     float scaleMul = inputQType.getScale() / outputQType.getScale();
+    auto scaleMulAttr = rewriter.getF32FloatAttr(scaleMul);
 
     auto beginDimsAttr = rewriter.getI32IntegerAttr(beginDims);
     auto endDimsAttr = rewriter.getI32IntegerAttr(endDims);
     auto meanDimsAttr = rewriter.getI32IntegerAttr(sumDims);
-    auto inZeroPointAttr = rewriter.getF32FloatAttr(inZeroPoint);
-    auto outZeroPointAttr = rewriter.getF32FloatAttr(outZeroPoint);
-    auto scaleMulAttr = rewriter.getF32FloatAttr(scaleMul);
 
-    auto xcSumOp = rewriter.create<MeanOp>(
-        sumOp.getLoc(), sumOp.getType(), sumOp.getInput(), beginDimsAttr,
-        meanDimsAttr, endDimsAttr, inZeroPointAttr, outZeroPointAttr,
-        scaleMulAttr);
-    rewriter.replaceOp(sumOp, xcSumOp.getOutput());
+    if (isInt8) {
+      float inZeroPoint = static_cast<float>(inputQType.getZeroPoint());
+      float outZeroPoint = static_cast<float>(outputQType.getZeroPoint());
+      auto inZeroPointAttr = rewriter.getF32FloatAttr(inZeroPoint);
+      auto outZeroPointAttr = rewriter.getF32FloatAttr(outZeroPoint);
+
+      auto xcSumOp = rewriter.create<MeanOp>(
+          sumOp.getLoc(), sumOp.getType(), sumOp.getInput(), beginDimsAttr,
+          meanDimsAttr, endDimsAttr, inZeroPointAttr, outZeroPointAttr,
+          scaleMulAttr);
+      rewriter.replaceOp(sumOp, xcSumOp.getOutput());
+    } else { // isInt16
+      // Zero points are always zero for int16 and are not passed to Mean16Op.
+      auto xcSumOp = rewriter.create<MeanI16Op>(
+          sumOp.getLoc(), sumOp.getType(), sumOp.getInput(), beginDimsAttr,
+          meanDimsAttr, endDimsAttr, scaleMulAttr);
+      rewriter.replaceOp(sumOp, xcSumOp.getOutput());
+    }
 
     return success();
   }
diff --git a/xformer/Transforms/TranslateToCustomOp.cpp b/xformer/Transforms/TranslateToCustomOp.cpp
index 3b46b91f6..df30083d1 100644
--- a/xformer/Transforms/TranslateToCustomOp.cpp
+++ b/xformer/Transforms/TranslateToCustomOp.cpp
@@ -81,6 +81,18 @@ std::vector<uint8_t> MeanOp::buildCustomOptions() {
   return fbb.GetBuffer();
 }
 
+std::vector<uint8_t> MeanI16Op::buildCustomOptions() {
+  flexbuffers::Builder fbb;
+  auto rootMap = fbb.StartMap();
+  fbb.Int("s", (int32_t)getStart());
+  fbb.Int("m", (int32_t)getMean());
+  fbb.Int("e", (int32_t)getEnd());
+  fbb.IndirectFloat("sm", getScaleMul().convertToFloat());
+  fbb.EndMap(rootMap);
+  fbb.Finish();
+  return fbb.GetBuffer();
+}
+
 std::vector<uint8_t> SliceOp::buildCustomOptions() {
   flexbuffers::Builder fbb;
   auto rootMap = fbb.StartMap();
@@ -268,6 +280,7 @@ void TranslateToCustomOp::runOnOperation() {
   patterns.insert<RewriteToCustomOp<BatchedSoftmaxOp>>(ctx);
   patterns.insert<RewriteToCustomOp<MulOp>>(ctx);
   patterns.insert<RewriteToCustomOp<MeanOp>>(ctx);
+  patterns.insert<RewriteToCustomOp<MeanI16Op>>(ctx);
   patterns.insert<RewriteToCustomOp<Pad3To4Op>>(ctx);
   patterns.insert<RewriteToCustomOp<SliceOp>>(ctx);
   patterns.insert<RewriteToCustomOp<BroadcastOp>>(ctx);
diff --git a/xformer/lib_tflite_micro.BUILD b/xformer/lib_tflite_micro.BUILD
index 28bbd26ed..5b736d498 100644
--- a/xformer/lib_tflite_micro.BUILD
+++ b/xformer/lib_tflite_micro.BUILD
@@ -42,6 +42,7 @@ filegroup(
         "lib_tflite_micro/src/tflite-xcore-kernels/xcore_broadcast.cc",
         "lib_tflite_micro/src/tflite-xcore-kernels/xcore_mul.cc",
         "lib_tflite_micro/src/tflite-xcore-kernels/xcore_mean.cc",
+        "lib_tflite_micro/src/tflite-xcore-kernels/xcore_meani16.cc",
         "lib_tflite_micro/src/tflite-xcore-kernels/xcore_expand_8_to_16.cc",
         "lib_tflite_micro/src/tflite-xcore-kernels/xcore_binaryi16.cc",
         "lib_tflite_micro/src/tflite-xcore-kernels/xcore_unaryi16.cc",