Fix security vulnerabilities (#11260)

Summary: IBM code scan found a few issues. - Use of uninitialized variables: It is always safe to initialize variables. Most instances fixed in the PR get initialized in a function but depending on a function is unsafe. - Detect and handle memory allocation errors - Bitwise operations (~, <<, >>, &, ^ and |) and their combinations with assign operator (<<=, >>=, &=, ^= and |=) are not normally meaningful on signed integers. Bitwise operators should be used only with unsigned integer operands, as the result of bitwise operations on signed integers are implementation-defined. Pull Request resolved: #11260 Reviewed By: pedroerp Differential Revision: D64630405 Pulled By: kgpai fbshipit-source-id: da1cc18bf9934f7bfa426204f4ca93a5a4a7189b
facebookincubator · Oct 28, 2024 · 878388f · 878388f
1 parent c8ac4e3
commit 878388f
Show file tree

Hide file tree

Showing 13 changed files with 72 additions and 50 deletions.
diff --git a/velox/common/base/VeloxException.cpp b/velox/common/base/VeloxException.cpp
@@ -34,7 +34,7 @@ std::exception_ptr toVeloxException(const std::exception_ptr& exceptionPtr) {
 }
 
 int64_t& threadNumVeloxThrow() {
-  thread_local int64_t numThrow;
+  thread_local int64_t numThrow = 0;
   return numThrow;
 }
 

diff --git a/velox/common/process/ProcessBase.cpp b/velox/common/process/ProcessBase.cpp
@@ -63,7 +63,7 @@ std::string getAppName() {
  * This machine's name.
  */
 std::string getHostName() {
-  char hostbuf[_POSIX_HOST_NAME_MAX + 1];
+  char hostbuf[_POSIX_HOST_NAME_MAX + 1] = {0};
   if (gethostname(hostbuf, _POSIX_HOST_NAME_MAX + 1) < 0) {
     return "";
   } else {

diff --git a/velox/dwio/dwrf/writer/Writer.cpp b/velox/dwio/dwrf/writer/Writer.cpp
@@ -449,7 +449,7 @@ void Writer::flushStripe(bool close) {
   uint64_t offset = 0;
   const auto addStream = [&](const DwrfStreamIdentifier& stream,
                              const auto& out) {
-    uint32_t currentIndex;
+    uint32_t currentIndex = 0;
     const auto nodeId = stream.encodingKey().node();
     proto::Stream* s = encodingManager.addStreamToFooter(nodeId, currentIndex);
 

diff --git a/velox/dwio/parquet/writer/arrow/EncryptionInternal.cpp b/velox/dwio/parquet/writer/arrow/EncryptionInternal.cpp
@@ -37,15 +37,19 @@ constexpr int kCtrMode = 1;
 constexpr int kCtrIvLength = 16;
 constexpr int kBufferSizeLength = 4;
 
-#define ENCRYPT_INIT(CTX, ALG)                                        \
-  if (1 != EVP_EncryptInit_ex(CTX, ALG, nullptr, nullptr, nullptr)) { \
-    throw ParquetException("Couldn't init ALG encryption");           \
-  }
-
-#define DECRYPT_INIT(CTX, ALG)                                        \
-  if (1 != EVP_DecryptInit_ex(CTX, ALG, nullptr, nullptr, nullptr)) { \
-    throw ParquetException("Couldn't init ALG decryption");           \
-  }
+#define ENCRYPT_INIT(CTX, ALG)                                          \
+  do {                                                                  \
+    if (1 != EVP_EncryptInit_ex(CTX, ALG, nullptr, nullptr, nullptr)) { \
+      throw ParquetException("Couldn't init ALG encryption");           \
+    }                                                                   \
+  } while (0)
+
+#define DECRYPT_INIT(CTX, ALG)                                          \
+  do {                                                                  \
+    if (1 != EVP_DecryptInit_ex(CTX, ALG, nullptr, nullptr, nullptr)) { \
+      throw ParquetException("Couldn't init ALG decryption");           \
+    }                                                                   \
+  } while (0)
 
 class AesEncryptor::AesEncryptorImpl {
  public:
@@ -233,7 +237,7 @@ int AesEncryptor::AesEncryptorImpl::GcmEncrypt(
     const uint8_t* aad,
     int aad_len,
     uint8_t* ciphertext) {
-  int len;
+  int len = 0;
   int ciphertext_len;
 
   uint8_t tag[kGcmTagLength];
@@ -281,7 +285,7 @@ int AesEncryptor::AesEncryptorImpl::GcmEncrypt(
   }
 
   // Copying the buffer size, nonce and tag to ciphertext
-  int buffer_size = kNonceLength + ciphertext_len + kGcmTagLength;
+  uint32_t buffer_size = kNonceLength + ciphertext_len + kGcmTagLength;
   if (length_buffer_length_ > 0) {
     ciphertext[3] = static_cast<uint8_t>(0xff & (buffer_size >> 24));
     ciphertext[2] = static_cast<uint8_t>(0xff & (buffer_size >> 16));
@@ -304,7 +308,7 @@ int AesEncryptor::AesEncryptorImpl::CtrEncrypt(
     int key_len,
     const uint8_t* nonce,
     uint8_t* ciphertext) {
-  int len;
+  int len = 0;
   int ciphertext_len;
 
   // Parquet CTR IVs are comprised of a 12-byte nonce and a 4-byte initial
@@ -346,7 +350,7 @@ int AesEncryptor::AesEncryptorImpl::CtrEncrypt(
   ciphertext_len += len;
 
   // Copying the buffer size and nonce to ciphertext
-  int buffer_size = kNonceLength + ciphertext_len;
+  uint32_t buffer_size = kNonceLength + ciphertext_len;
   if (length_buffer_length_ > 0) {
     ciphertext[3] = static_cast<uint8_t>(0xff & (buffer_size >> 24));
     ciphertext[2] = static_cast<uint8_t>(0xff & (buffer_size >> 16));
@@ -593,7 +597,7 @@ int AesDecryptor::AesDecryptorImpl::GcmDecrypt(
     const uint8_t* aad,
     int aad_len,
     uint8_t* plaintext) {
-  int len;
+  int len = 0;
   int plaintext_len;
 
   uint8_t tag[kGcmTagLength];
@@ -603,7 +607,7 @@ int AesDecryptor::AesDecryptorImpl::GcmDecrypt(
 
   if (length_buffer_length_ > 0) {
     // Extract ciphertext length
-    int written_ciphertext_len = ((ciphertext[3] & 0xff) << 24) |
+    uint32_t written_ciphertext_len = ((ciphertext[3] & 0xff) << 24) |
         ((ciphertext[2] & 0xff) << 16) | ((ciphertext[1] & 0xff) << 8) |
         ((ciphertext[0] & 0xff));
 
@@ -672,15 +676,15 @@ int AesDecryptor::AesDecryptorImpl::CtrDecrypt(
     const uint8_t* key,
     int key_len,
     uint8_t* plaintext) {
-  int len;
+  int len = 0;
   int plaintext_len;
 
   uint8_t iv[kCtrIvLength];
   memset(iv, 0, kCtrIvLength);
 
   if (length_buffer_length_ > 0) {
     // Extract ciphertext length
-    int written_ciphertext_len = ((ciphertext[3] & 0xff) << 24) |
+    uint32_t written_ciphertext_len = ((ciphertext[3] & 0xff) << 24) |
         ((ciphertext[2] & 0xff) << 16) | ((ciphertext[1] & 0xff) << 8) |
         ((ciphertext[0] & 0xff));
 
@@ -757,8 +761,9 @@ int AesDecryptor::AesDecryptorImpl::Decrypt(
 static std::string ShortToBytesLe(int16_t input) {
   int8_t output[2];
   memset(output, 0, 2);
-  output[1] = static_cast<int8_t>(0xff & (input >> 8));
-  output[0] = static_cast<int8_t>(0xff & (input));
+  uint16_t in = static_cast<uint16_t>(input);
+  output[1] = static_cast<int8_t>(0xff & (in >> 8));
+  output[0] = static_cast<int8_t>(0xff & (in));
 
   return std::string(reinterpret_cast<char const*>(output), 2);
 }

diff --git a/velox/exec/AggregateInfo.h b/velox/exec/AggregateInfo.h
@@ -49,7 +49,7 @@ struct AggregateInfo {
   bool distinct{false};
 
   /// Index of the result column in the output RowVector.
-  column_index_t output;
+  column_index_t output{0};
 
   /// Type of intermediate results. Used for spilling.
   TypePtr intermediateType;

diff --git a/velox/exec/ContainerRowSerde.cpp b/velox/exec/ContainerRowSerde.cpp
@@ -315,7 +315,7 @@ void deserializeOne<TypeKind::ARRAY>(
   if (array->size() <= index) {
     array->resize(index + 1);
   }
-  vector_size_t offset;
+  vector_size_t offset = 0;
   auto size = deserializeArray(in, *array->elements(), offset);
   array->setOffsetAndSize(index, offset, size);
   result.setNull(index, false);
@@ -331,9 +331,9 @@ void deserializeOne<TypeKind::MAP>(
   if (map->size() <= index) {
     map->resize(index + 1);
   }
-  vector_size_t keyOffset;
+  vector_size_t keyOffset = 0;
   auto keySize = deserializeArray(in, *map->mapKeys(), keyOffset);
-  vector_size_t valueOffset;
+  vector_size_t valueOffset = 0;
   auto valueSize = deserializeArray(in, *map->mapValues(), valueOffset);
   VELOX_CHECK_EQ(keySize, valueSize);
   VELOX_CHECK_EQ(keyOffset, valueOffset);

diff --git a/velox/exec/HashAggregation.cpp b/velox/exec/HashAggregation.cpp
@@ -162,8 +162,8 @@ void HashAggregation::addInput(RowVectorPtr input) {
 void HashAggregation::updateRuntimeStats() {
   // Report range sizes and number of distinct values for the group-by keys.
   const auto& hashers = groupingSet_->hashLookup().hashers;
-  uint64_t asRange;
-  uint64_t asDistinct;
+  uint64_t asRange{0};
+  uint64_t asDistinct{0};
   const auto hashTableStats = groupingSet_->hashTableStats();
 
   auto lockedStats = stats_.wlock();

diff --git a/velox/exec/HashBuild.cpp b/velox/exec/HashBuild.cpp
@@ -893,8 +893,8 @@ void HashBuild::addRuntimeStats() {
   // Report range sizes and number of distinct values for the join keys.
   const auto& hashers = table_->hashers();
   const auto hashTableStats = table_->stats();
-  uint64_t asRange;
-  uint64_t asDistinct;
+  uint64_t asRange{0};
+  uint64_t asDistinct{0};
   auto lockedStats = stats_.wlock();
 
   lockedStats->addInputTiming.add(table_->offThreadBuildTiming());

diff --git a/velox/exec/OutputBuffer.h b/velox/exec/OutputBuffer.h
@@ -40,8 +40,8 @@ using DataAvailableCallback = std::function<void(
 using DataConsumerActiveCheckCallback = std::function<bool()>;
 
 struct DataAvailable {
-  DataAvailableCallback callback;
-  int64_t sequence;
+  DataAvailableCallback callback{nullptr};
+  int64_t sequence{0};
   std::vector<std::unique_ptr<folly::IOBuf>> data;
   std::vector<int64_t> remainingBytes;
 
@@ -140,7 +140,7 @@ class DestinationBuffer {
 
     /// Whether the result is returned immediately without invoking the `notify'
     /// callback.
-    bool immediate;
+    bool immediate{false};
   };
 
   /// Returns a shallow copy (folly::IOBuf::clone) of the data starting at

diff --git a/velox/tpch/gen/dbgen/bm_utils.cpp b/velox/tpch/gen/dbgen/bm_utils.cpp
@@ -91,7 +91,7 @@
 /* End of lines added by Chuck McDevitt for WIN32 support */
 #include "dbgen/dsstypes.h" // @manual
 
-static char alpha_num[65] =
+static const char* alpha_num =
     "0123456789abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ,";
 
 #if defined(__STDC__) || defined(__cplusplus)

diff --git a/velox/tpch/gen/dbgen/text.cpp b/velox/tpch/gen/dbgen/text.cpp
@@ -1,3 +1,18 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 /*
  * Copyright owned by the Transaction Processing Performance Council.
  *
@@ -245,6 +260,8 @@ void init_text_pool(long bSize, DBGenContext* ctx) {
 
   txtBufferSize = bSize;
   szTextPool = (char*)malloc(bSize + 1 + 100);
+  MALLOC_CHECK(szTextPool);
+  memset(szTextPool, 0, bSize + 1 + 100);
 
   char* ptr = szTextPool;
   char* endptr = szTextPool + bSize + 1;

diff --git a/velox/vector/DecodedVector.cpp b/velox/vector/DecodedVector.cpp
@@ -21,7 +21,7 @@
 
 namespace facebook::velox {
 
-uint64_t DecodedVector::constantNullMask_;
+uint64_t DecodedVector::constantNullMask_{0};
 
 namespace {
 std::vector<vector_size_t> makeConsecutiveIndices(size_t size) {

diff --git a/velox/vector/arrow/Abi.h b/velox/vector/arrow/Abi.h
@@ -36,13 +36,13 @@ extern "C" {
 
 struct ArrowSchema {
   // Array type description
-  const char* format;
-  const char* name;
-  const char* metadata;
-  int64_t flags;
-  int64_t n_children;
-  struct ArrowSchema** children;
-  struct ArrowSchema* dictionary;
+  const char* format{nullptr};
+  const char* name{nullptr};
+  const char* metadata{nullptr};
+  int64_t flags{0};
+  int64_t n_children{0};
+  struct ArrowSchema** children{nullptr};
+  struct ArrowSchema* dictionary{nullptr};
 
   // Release callback
   void (*release)(struct ArrowSchema*);
@@ -52,14 +52,14 @@ struct ArrowSchema {
 
 struct ArrowArray {
   // Array data description
-  int64_t length;
-  int64_t null_count;
-  int64_t offset;
-  int64_t n_buffers;
-  int64_t n_children;
-  const void** buffers;
-  struct ArrowArray** children;
-  struct ArrowArray* dictionary;
+  int64_t length{0};
+  int64_t null_count{0};
+  int64_t offset{0};
+  int64_t n_buffers{0};
+  int64_t n_children{0};
+  const void** buffers{nullptr};
+  struct ArrowArray** children{nullptr};
+  struct ArrowArray* dictionary{nullptr};
 
   // Release callback
   void (*release)(struct ArrowArray*);