第四节

zjhellofss · Jul 1, 2024 · 06ef5d5 · 06ef5d5
1 parent a2df727
commit 06ef5d5
Show file tree

Hide file tree

Showing 4 changed files with 203 additions and 47 deletions.
diff --git a/kuiper/include/tensor/tensor.h b/kuiper/include/tensor/tensor.h
@@ -13,22 +13,30 @@ class Tensor {
   explicit Tensor() = default;
 
   explicit Tensor(base::DataType data_type, int32_t dim0, bool need_alloc = false,
-                  std::shared_ptr<base::DeviceAllocator> alloc = nullptr);
+                  std::shared_ptr<base::DeviceAllocator> alloc = nullptr,
+                  void* ptr = nullptr);
 
   explicit Tensor(base::DataType data_type, int32_t dim0, int32_t dim1,
                   bool need_alloc = false,
-                  std::shared_ptr<base::DeviceAllocator> alloc = nullptr);
+                  std::shared_ptr<base::DeviceAllocator> alloc = nullptr,
+                  void* ptr = nullptr);
 
   explicit Tensor(base::DataType data_type, int32_t dim0, int32_t dim1, int32_t dim2,
                   bool need_alloc = false,
-                  std::shared_ptr<base::DeviceAllocator> alloc = nullptr);
+                  std::shared_ptr<base::DeviceAllocator> alloc = nullptr,
+                  void* ptr = nullptr);
 
   explicit Tensor(base::DataType data_type, int32_t dim0, int32_t dim1, int32_t dim2,
                   int32_t dim3, bool need_alloc = false,
-                  std::shared_ptr<base::DeviceAllocator> alloc = nullptr);
+                  std::shared_ptr<base::DeviceAllocator> alloc = nullptr,
+                  void* ptr = nullptr);
 
   explicit Tensor(base::DataType data_type, std::vector<int32_t> dims);
 
+  // void to_cpu();
+
+  // void to_cuda();
+
   bool is_empty() const;
 
   template <typename T>
@@ -39,6 +47,8 @@ class Tensor {
 
   void reshape(const std::vector<int32_t>& dims);
 
+  std::shared_ptr<base::Buffer> get_buffer() const;
+
   size_t size() const;
 
   size_t byte_size() const;
@@ -76,8 +86,7 @@ class Tensor {
   template <typename T>
   const T& index(int64_t offset) const;
 
-  template <typename T>
-  void transpose_dim12(Tensor dst);
+  tensor::Tensor clone() const;
 
  private:
   size_t size_ = 0;
@@ -88,12 +97,16 @@ class Tensor {
 
 template <typename T>
 T& Tensor::index(int64_t offset) {
+  CHECK_GE(offset, 0);
+  CHECK_LT(offset, this->size());
   T& val = *(reinterpret_cast<T*>(buffer_->ptr()) + offset);
   return val;
 }
 
 template <typename T>
 const T& Tensor::index(int64_t offset) const {
+  CHECK_GE(offset, 0);
+  CHECK_LT(offset, this->size());
   const T& val = *(reinterpret_cast<T*>(buffer_->ptr()) + offset);
   return val;
 }
@@ -127,35 +140,5 @@ const T* Tensor::ptr(int64_t index) const {
       << "The data area buffer of this tensor is empty or it points to a null pointer.";
   return reinterpret_cast<const T*>(buffer_->ptr()) + index;
 }
-
-template <typename T>
-void Tensor::transpose_dim12(Tensor dst) {
-  CHECK_EQ(dims_size(), 3);
-  CHECK_EQ(is_empty(), false);
-  CHECK_EQ(dst.dims_size(), 3);
-  CHECK_EQ(dst.is_empty(), false);
-  CHECK_EQ(get_dim(0), dst.get_dim(0));
-  CHECK_EQ(get_dim(1), dst.get_dim(2));
-  CHECK_EQ(get_dim(2), dst.get_dim(1));
-  CHECK(device_type() == dst.device_type());
-  CHECK(device_type() == base::DeviceType::kDeviceCPU);
-
-  int32_t src_ch = this->get_dim(0);
-  int32_t src_row = this->get_dim(1);
-  int32_t src_col = this->get_dim(2);
-  int32_t dst_row = dst.get_dim(1);
-  int32_t dst_col = dst.get_dim(2);
-  int32_t plane_size = src_col * src_row;
-
-  T* src_ptr = this->ptr<T>();
-  T* dst_ptr = dst.ptr<T>();
-  for (int32_t ch = 0; ch < src_ch; ++ch) {
-    T* src_ch_ptr = src_ptr + ch * plane_size;
-    T* dst_ch_ptr = dst_ptr + ch * plane_size;
-    arma::Mat<T> src_mat = arma::Mat<T>(src_ch_ptr, src_col, src_row, false, true);
-    arma::Mat<T> dst_mat = arma::Mat<T>(dst_ch_ptr, dst_col, dst_row, false, true);
-    dst_mat = src_mat.t();
-  }
-}
 }  // namespace tensor
-#endif  // KUIPER_INCLUDE_TENSOR_TENSOR_H_
+#endif  // KUIPER_INCLUDE_TENSOR_TENSOR_H_
diff --git a/kuiper/source/tensor/tensor.cpp b/kuiper/source/tensor/tensor.cpp
@@ -4,50 +4,110 @@
 
 namespace tensor {
 template <typename T, typename Tp>
-static inline size_t ReduceDimension(T begin, T end, Tp init) {
+static size_t reduce_dimension(T begin, T end, Tp init) {
   if (begin >= end) {
     return 0;
   }
   size_t size = std::accumulate(begin, end, init, std::multiplies<>());
   return size;
 }
 
+static size_t data_type_size(base::DataType data_type) {
+  switch (data_type) {
+    case base::DataType::kDataTypeFp32: {
+      return 4;
+    }
+    case base::DataType::kDataTypeInt8: {
+      return 1;
+    }
+    case base::DataType::kDataTypeInt32: {
+      return 4;
+    }
+    default: {
+      LOG(FATAL) << "Unknown data type size for " << int(data_type);
+      return 0;
+    }
+  }
+}
+
 Tensor::Tensor(base::DataType data_type, int32_t dim0, bool need_alloc,
-               std::shared_ptr<base::DeviceAllocator> alloc)
+               std::shared_ptr<base::DeviceAllocator> alloc, void* ptr)
     : data_type_(data_type) {
   dims_.push_back(dim0);
   size_ = dim0;
   if (need_alloc && alloc) {
     allocate(alloc);
+  } else {
+    if (ptr != nullptr) {
+      CHECK(need_alloc == false)
+          << "The need_alloc is is true when ptr parameter is not a null pointer.";
+      if (!alloc) {
+        std::shared_ptr<base::Buffer> buffer = std::make_shared<base::Buffer>(
+            data_type_size(data_type) * size_, nullptr, ptr, true);
+        this->buffer_ = buffer;
+      } else {
+        std::shared_ptr<base::Buffer> buffer = std::make_shared<base::Buffer>(
+            data_type_size(data_type) * size_, alloc, ptr, false);
+        this->buffer_ = buffer;
+      }
+    }
   }
 }
 
 Tensor::Tensor(base::DataType data_type, int32_t dim0, int32_t dim1, bool need_alloc,
-               std::shared_ptr<base::DeviceAllocator> alloc)
+               std::shared_ptr<base::DeviceAllocator> alloc, void* ptr)
     : data_type_(data_type) {
   dims_.push_back(dim0);
   dims_.push_back(dim1);
   size_ = dim0 * dim1;
   if (need_alloc && alloc) {
     allocate(alloc);
+  } else {
+    if (ptr != nullptr) {
+      CHECK(need_alloc == false)
+          << "The need_alloc is is true when ptr parameter is not a null pointer.";
+      if (!alloc) {
+        std::shared_ptr<base::Buffer> buffer = std::make_shared<base::Buffer>(
+            data_type_size(data_type) * size_, nullptr, ptr, true);
+        this->buffer_ = buffer;
+      } else {
+        std::shared_ptr<base::Buffer> buffer = std::make_shared<base::Buffer>(
+            data_type_size(data_type) * size_, alloc, ptr, false);
+        this->buffer_ = buffer;
+      }
+    }
   }
 }
 
 Tensor::Tensor(base::DataType data_type, int32_t dim0, int32_t dim1, int32_t dim2,
-               bool need_alloc, std::shared_ptr<base::DeviceAllocator> alloc)
+               bool need_alloc, std::shared_ptr<base::DeviceAllocator> alloc, void* ptr)
     : data_type_(data_type) {
   dims_.push_back(dim0);
   dims_.push_back(dim1);
   dims_.push_back(dim2);
   size_ = dim0 * dim1 * dim2;
   if (need_alloc && alloc) {
     allocate(alloc);
+  } else {
+    if (ptr != nullptr) {
+      CHECK(need_alloc == false)
+          << "The need_alloc is is true when ptr parameter is not a null pointer.";
+      if (!alloc) {
+        std::shared_ptr<base::Buffer> buffer = std::make_shared<base::Buffer>(
+            data_type_size(data_type) * size_, nullptr, ptr, true);
+        this->buffer_ = buffer;
+      } else {
+        std::shared_ptr<base::Buffer> buffer = std::make_shared<base::Buffer>(
+            data_type_size(data_type) * size_, alloc, ptr, false);
+        this->buffer_ = buffer;
+      }
+    }
   }
 }
 
 Tensor::Tensor(base::DataType data_type, int32_t dim0, int32_t dim1, int32_t dim2,
                int32_t dim3, bool need_alloc,
-               std::shared_ptr<base::DeviceAllocator> alloc)
+               std::shared_ptr<base::DeviceAllocator> alloc, void* ptr)
     : data_type_(data_type) {
   dims_.push_back(dim0);
   dims_.push_back(dim1);
@@ -56,14 +116,62 @@ Tensor::Tensor(base::DataType data_type, int32_t dim0, int32_t dim1, int32_t dim
   size_ = dim0 * dim1 * dim2 * dim3;
   if (need_alloc && alloc) {
     allocate(alloc);
+  } else {
+    if (ptr != nullptr) {
+      CHECK(need_alloc == false)
+          << "The need_alloc is is true when ptr parameter is not a null pointer.";
+      if (!alloc) {
+        std::shared_ptr<base::Buffer> buffer = std::make_shared<base::Buffer>(
+            data_type_size(data_type) * size_, nullptr, ptr, true);
+        this->buffer_ = buffer;
+      } else {
+        std::shared_ptr<base::Buffer> buffer = std::make_shared<base::Buffer>(
+            data_type_size(data_type) * size_, alloc, ptr, false);
+      }
+    }
   }
 }
 
 Tensor::Tensor(base::DataType data_type, std::vector<int32_t> dims)
     : dims_(std::move(dims)), data_type_(data_type) {
-  size_ = ReduceDimension(dims_.begin(), dims_.end(), 1);
+  size_ = reduce_dimension(dims_.begin(), dims_.end(), 1);
 }
 
+// void Tensor::to_cuda() {
+//   CHECK_NE(buffer_, nullptr);
+//   const base::DeviceType device_type = this->device_type();
+//   if (device_type == base::DeviceType::kDeviceUnknown) {
+//     LOG(ERROR) << "The device type of the tensor is unknown.";
+//   } else if (device_type == base::DeviceType::kDeviceCPU) {
+//     size_t byte_size = this->byte_size();
+//     auto cu_alloc = base::CUDADeviceAllocatorFactory::get_instance();
+//     auto cu_buffer = std::make_shared<base::Buffer>(byte_size, cu_alloc);
+//     cu_alloc->memcpy(buffer_->ptr(), cu_buffer->ptr(), byte_size,
+//                      base::MemcpyKind::kMemcpyCPU2CUDA);
+//     this->buffer_ = cu_buffer;
+//   } else {
+//     LOG(INFO) << "The device type of the tensor is already cpu.";
+//   }
+// }
+
+// void Tensor::to_cpu() {
+//   CHECK_NE(buffer_, nullptr);
+//   const base::DeviceType device_type = this->device_type();
+//
+//   if (device_type == base::DeviceType::kDeviceUnknown) {
+//     LOG(ERROR) << "The device type of the tensor is unknown.";
+//   } else if (device_type == base::DeviceType::kDeviceCUDA) {
+//     size_t byte_size = this->byte_size();
+//     auto cpu_alloc = base::CPUDeviceAllocatorFactory::get_instance();
+//     auto cpu_buffer = std::make_shared<base::Buffer>(byte_size, cpu_alloc);
+//     cpu_alloc->memcpy(buffer_->ptr(), cpu_buffer->ptr(), byte_size,
+//                       base::MemcpyKind::kMemcpyCUDA2CPU);
+//     this->buffer_ = cpu_buffer;
+//   } else {
+//     LOG(INFO) << "The device type of the tensor is already cuda.";
+//   }
+// }
+
 size_t Tensor::size() const {
   return this->size_;
 }
@@ -86,6 +194,12 @@ bool Tensor::assign(std::shared_ptr<base::Buffer> buffer) {
     LOG(ERROR) << "The buffer parameter in the assign function is null pointer!";
     return false;
   }
+  if (buffer_) {
+    if (buffer_->device_type() != buffer->device_type()) {
+      LOG(ERROR)
+          << "The device type of the new buffer is different from the original one.";
+    }
+  }
 
   size_t byte_size = this->byte_size();
   if (byte_size > buffer->byte_size()) {
@@ -137,7 +251,7 @@ void Tensor::set_device_type(base::DeviceType device_type) {
 void Tensor::reset(base::DataType data_type, const std::vector<int32_t>& dims) {
   this->data_type_ = data_type;
   this->dims_ = dims;
-  this->size_ = ReduceDimension(dims.begin(), dims.end(), 1);
+  this->size_ = reduce_dimension(dims.begin(), dims.end(), 1);
   this->buffer_ = nullptr;
 }
 
@@ -150,7 +264,7 @@ base::DataType Tensor::data_type() const {
 }
 
 void Tensor::reshape(const std::vector<int32_t>& dims) {
-  size_t size = ReduceDimension(dims.begin(), dims.end(), 1);
+  size_t size = reduce_dimension(dims.begin(), dims.end(), 1);
   if (!buffer_) {
     this->dims_ = dims;
     this->size_ = size;
@@ -168,6 +282,20 @@ void Tensor::reshape(const std::vector<int32_t>& dims) {
   this->size_ = size;
 }
 
+std::shared_ptr<base::Buffer> Tensor::get_buffer() const {
+  return buffer_;
+}
+
+Tensor Tensor::clone() const {
+  Tensor new_tensor = *this;
+  size_t byte_size = this->byte_size();
+
+  auto allocator = buffer_->allocator();
+  new_tensor.buffer_ = std::make_shared<base::Buffer>(byte_size, allocator);
+  new_tensor.buffer_->copy_from(buffer_.get());
+  return new_tensor;
+}
+
 size_t Tensor::byte_size() const {
   return this->size() * DataTypeSize(data_type_);
 }
@@ -176,7 +304,7 @@ std::vector<size_t> Tensor::strides() const {
   std::vector<size_t> strides;
   if (!dims_.empty()) {
     for (int32_t i = 0; i < dims_.size() - 1; ++i) {
-      size_t stride = ReduceDimension(dims_.begin() + i + 1, dims_.end(), 1);
+      size_t stride = reduce_dimension(dims_.begin() + i + 1, dims_.end(), 1);
       strides.push_back(stride);
     }
     strides.push_back(1);

diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
@@ -2,7 +2,7 @@ find_package(GTest REQUIRED)
 find_package(glog REQUIRED)
 
 set(link_ext_lib glog::glog GTest::gtest)
-add_executable(test_llm test_main.cpp test_buffer.cpp test_add.cpp test_math.cpp)
+add_executable(test_llm test_main.cpp test_tensor.cpp test_buffer.cpp test_add.cpp test_math.cpp)
 
 target_link_libraries(test_llm ${link_ext_lib})
 target_include_directories(test_llm PUBLIC ${glog_INCLUDE_DIR})