Skip to content

Commit

Permalink
第四节
Browse files Browse the repository at this point in the history
  • Loading branch information
zjhellofss committed Jul 1, 2024
1 parent a2df727 commit 06ef5d5
Show file tree
Hide file tree
Showing 4 changed files with 203 additions and 47 deletions.
57 changes: 20 additions & 37 deletions kuiper/include/tensor/tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,22 +13,30 @@ class Tensor {
explicit Tensor() = default;

explicit Tensor(base::DataType data_type, int32_t dim0, bool need_alloc = false,
std::shared_ptr<base::DeviceAllocator> alloc = nullptr);
std::shared_ptr<base::DeviceAllocator> alloc = nullptr,
void* ptr = nullptr);

explicit Tensor(base::DataType data_type, int32_t dim0, int32_t dim1,
bool need_alloc = false,
std::shared_ptr<base::DeviceAllocator> alloc = nullptr);
std::shared_ptr<base::DeviceAllocator> alloc = nullptr,
void* ptr = nullptr);

explicit Tensor(base::DataType data_type, int32_t dim0, int32_t dim1, int32_t dim2,
bool need_alloc = false,
std::shared_ptr<base::DeviceAllocator> alloc = nullptr);
std::shared_ptr<base::DeviceAllocator> alloc = nullptr,
void* ptr = nullptr);

explicit Tensor(base::DataType data_type, int32_t dim0, int32_t dim1, int32_t dim2,
int32_t dim3, bool need_alloc = false,
std::shared_ptr<base::DeviceAllocator> alloc = nullptr);
std::shared_ptr<base::DeviceAllocator> alloc = nullptr,
void* ptr = nullptr);

explicit Tensor(base::DataType data_type, std::vector<int32_t> dims);

// void to_cpu();

// void to_cuda();

bool is_empty() const;

template <typename T>
Expand All @@ -39,6 +47,8 @@ class Tensor {

void reshape(const std::vector<int32_t>& dims);

std::shared_ptr<base::Buffer> get_buffer() const;

size_t size() const;

size_t byte_size() const;
Expand Down Expand Up @@ -76,8 +86,7 @@ class Tensor {
template <typename T>
const T& index(int64_t offset) const;

template <typename T>
void transpose_dim12(Tensor dst);
tensor::Tensor clone() const;

private:
size_t size_ = 0;
Expand All @@ -88,12 +97,16 @@ class Tensor {

template <typename T>
T& Tensor::index(int64_t offset) {
CHECK_GE(offset, 0);
CHECK_LT(offset, this->size());
T& val = *(reinterpret_cast<T*>(buffer_->ptr()) + offset);
return val;
}

template <typename T>
const T& Tensor::index(int64_t offset) const {
CHECK_GE(offset, 0);
CHECK_LT(offset, this->size());
const T& val = *(reinterpret_cast<T*>(buffer_->ptr()) + offset);
return val;
}
Expand Down Expand Up @@ -127,35 +140,5 @@ const T* Tensor::ptr(int64_t index) const {
<< "The data area buffer of this tensor is empty or it points to a null pointer.";
return reinterpret_cast<const T*>(buffer_->ptr()) + index;
}

template <typename T>
void Tensor::transpose_dim12(Tensor dst) {
CHECK_EQ(dims_size(), 3);
CHECK_EQ(is_empty(), false);
CHECK_EQ(dst.dims_size(), 3);
CHECK_EQ(dst.is_empty(), false);
CHECK_EQ(get_dim(0), dst.get_dim(0));
CHECK_EQ(get_dim(1), dst.get_dim(2));
CHECK_EQ(get_dim(2), dst.get_dim(1));
CHECK(device_type() == dst.device_type());
CHECK(device_type() == base::DeviceType::kDeviceCPU);

int32_t src_ch = this->get_dim(0);
int32_t src_row = this->get_dim(1);
int32_t src_col = this->get_dim(2);
int32_t dst_row = dst.get_dim(1);
int32_t dst_col = dst.get_dim(2);
int32_t plane_size = src_col * src_row;

T* src_ptr = this->ptr<T>();
T* dst_ptr = dst.ptr<T>();
for (int32_t ch = 0; ch < src_ch; ++ch) {
T* src_ch_ptr = src_ptr + ch * plane_size;
T* dst_ch_ptr = dst_ptr + ch * plane_size;
arma::Mat<T> src_mat = arma::Mat<T>(src_ch_ptr, src_col, src_row, false, true);
arma::Mat<T> dst_mat = arma::Mat<T>(dst_ch_ptr, dst_col, dst_row, false, true);
dst_mat = src_mat.t();
}
}
} // namespace tensor
#endif // KUIPER_INCLUDE_TENSOR_TENSOR_H_
#endif // KUIPER_INCLUDE_TENSOR_TENSOR_H_
146 changes: 137 additions & 9 deletions kuiper/source/tensor/tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,50 +4,110 @@

namespace tensor {
template <typename T, typename Tp>
static inline size_t ReduceDimension(T begin, T end, Tp init) {
static size_t reduce_dimension(T begin, T end, Tp init) {
if (begin >= end) {
return 0;
}
size_t size = std::accumulate(begin, end, init, std::multiplies<>());
return size;
}

static size_t data_type_size(base::DataType data_type) {
switch (data_type) {
case base::DataType::kDataTypeFp32: {
return 4;
}
case base::DataType::kDataTypeInt8: {
return 1;
}
case base::DataType::kDataTypeInt32: {
return 4;
}
default: {
LOG(FATAL) << "Unknown data type size for " << int(data_type);
return 0;
}
}
}

Tensor::Tensor(base::DataType data_type, int32_t dim0, bool need_alloc,
std::shared_ptr<base::DeviceAllocator> alloc)
std::shared_ptr<base::DeviceAllocator> alloc, void* ptr)
: data_type_(data_type) {
dims_.push_back(dim0);
size_ = dim0;
if (need_alloc && alloc) {
allocate(alloc);
} else {
if (ptr != nullptr) {
CHECK(need_alloc == false)
<< "The need_alloc is is true when ptr parameter is not a null pointer.";
if (!alloc) {
std::shared_ptr<base::Buffer> buffer = std::make_shared<base::Buffer>(
data_type_size(data_type) * size_, nullptr, ptr, true);
this->buffer_ = buffer;
} else {
std::shared_ptr<base::Buffer> buffer = std::make_shared<base::Buffer>(
data_type_size(data_type) * size_, alloc, ptr, false);
this->buffer_ = buffer;
}
}
}
}

Tensor::Tensor(base::DataType data_type, int32_t dim0, int32_t dim1, bool need_alloc,
std::shared_ptr<base::DeviceAllocator> alloc)
std::shared_ptr<base::DeviceAllocator> alloc, void* ptr)
: data_type_(data_type) {
dims_.push_back(dim0);
dims_.push_back(dim1);
size_ = dim0 * dim1;
if (need_alloc && alloc) {
allocate(alloc);
} else {
if (ptr != nullptr) {
CHECK(need_alloc == false)
<< "The need_alloc is is true when ptr parameter is not a null pointer.";
if (!alloc) {
std::shared_ptr<base::Buffer> buffer = std::make_shared<base::Buffer>(
data_type_size(data_type) * size_, nullptr, ptr, true);
this->buffer_ = buffer;
} else {
std::shared_ptr<base::Buffer> buffer = std::make_shared<base::Buffer>(
data_type_size(data_type) * size_, alloc, ptr, false);
this->buffer_ = buffer;
}
}
}
}

Tensor::Tensor(base::DataType data_type, int32_t dim0, int32_t dim1, int32_t dim2,
bool need_alloc, std::shared_ptr<base::DeviceAllocator> alloc)
bool need_alloc, std::shared_ptr<base::DeviceAllocator> alloc, void* ptr)
: data_type_(data_type) {
dims_.push_back(dim0);
dims_.push_back(dim1);
dims_.push_back(dim2);
size_ = dim0 * dim1 * dim2;
if (need_alloc && alloc) {
allocate(alloc);
} else {
if (ptr != nullptr) {
CHECK(need_alloc == false)
<< "The need_alloc is is true when ptr parameter is not a null pointer.";
if (!alloc) {
std::shared_ptr<base::Buffer> buffer = std::make_shared<base::Buffer>(
data_type_size(data_type) * size_, nullptr, ptr, true);
this->buffer_ = buffer;
} else {
std::shared_ptr<base::Buffer> buffer = std::make_shared<base::Buffer>(
data_type_size(data_type) * size_, alloc, ptr, false);
this->buffer_ = buffer;
}
}
}
}

Tensor::Tensor(base::DataType data_type, int32_t dim0, int32_t dim1, int32_t dim2,
int32_t dim3, bool need_alloc,
std::shared_ptr<base::DeviceAllocator> alloc)
std::shared_ptr<base::DeviceAllocator> alloc, void* ptr)
: data_type_(data_type) {
dims_.push_back(dim0);
dims_.push_back(dim1);
Expand All @@ -56,14 +116,62 @@ Tensor::Tensor(base::DataType data_type, int32_t dim0, int32_t dim1, int32_t dim
size_ = dim0 * dim1 * dim2 * dim3;
if (need_alloc && alloc) {
allocate(alloc);
} else {
if (ptr != nullptr) {
CHECK(need_alloc == false)
<< "The need_alloc is is true when ptr parameter is not a null pointer.";
if (!alloc) {
std::shared_ptr<base::Buffer> buffer = std::make_shared<base::Buffer>(
data_type_size(data_type) * size_, nullptr, ptr, true);
this->buffer_ = buffer;
} else {
std::shared_ptr<base::Buffer> buffer = std::make_shared<base::Buffer>(
data_type_size(data_type) * size_, alloc, ptr, false);
}
}
}
}

Tensor::Tensor(base::DataType data_type, std::vector<int32_t> dims)
: dims_(std::move(dims)), data_type_(data_type) {
size_ = ReduceDimension(dims_.begin(), dims_.end(), 1);
size_ = reduce_dimension(dims_.begin(), dims_.end(), 1);
}

// void Tensor::to_cuda() {
// CHECK_NE(buffer_, nullptr);
// const base::DeviceType device_type = this->device_type();
// if (device_type == base::DeviceType::kDeviceUnknown) {
// LOG(ERROR) << "The device type of the tensor is unknown.";
// } else if (device_type == base::DeviceType::kDeviceCPU) {
// size_t byte_size = this->byte_size();
// auto cu_alloc = base::CUDADeviceAllocatorFactory::get_instance();
// auto cu_buffer = std::make_shared<base::Buffer>(byte_size, cu_alloc);
// cu_alloc->memcpy(buffer_->ptr(), cu_buffer->ptr(), byte_size,
// base::MemcpyKind::kMemcpyCPU2CUDA);
// this->buffer_ = cu_buffer;
// } else {
// LOG(INFO) << "The device type of the tensor is already cpu.";
// }
// }

// void Tensor::to_cpu() {
// CHECK_NE(buffer_, nullptr);
// const base::DeviceType device_type = this->device_type();
//
// if (device_type == base::DeviceType::kDeviceUnknown) {
// LOG(ERROR) << "The device type of the tensor is unknown.";
// } else if (device_type == base::DeviceType::kDeviceCUDA) {
// size_t byte_size = this->byte_size();
// auto cpu_alloc = base::CPUDeviceAllocatorFactory::get_instance();
// auto cpu_buffer = std::make_shared<base::Buffer>(byte_size, cpu_alloc);
// cpu_alloc->memcpy(buffer_->ptr(), cpu_buffer->ptr(), byte_size,
// base::MemcpyKind::kMemcpyCUDA2CPU);
// this->buffer_ = cpu_buffer;
// } else {
// LOG(INFO) << "The device type of the tensor is already cuda.";
// }
// }

size_t Tensor::size() const {
return this->size_;
}
Expand All @@ -86,6 +194,12 @@ bool Tensor::assign(std::shared_ptr<base::Buffer> buffer) {
LOG(ERROR) << "The buffer parameter in the assign function is null pointer!";
return false;
}
if (buffer_) {
if (buffer_->device_type() != buffer->device_type()) {
LOG(ERROR)
<< "The device type of the new buffer is different from the original one.";
}
}

size_t byte_size = this->byte_size();
if (byte_size > buffer->byte_size()) {
Expand Down Expand Up @@ -137,7 +251,7 @@ void Tensor::set_device_type(base::DeviceType device_type) {
void Tensor::reset(base::DataType data_type, const std::vector<int32_t>& dims) {
this->data_type_ = data_type;
this->dims_ = dims;
this->size_ = ReduceDimension(dims.begin(), dims.end(), 1);
this->size_ = reduce_dimension(dims.begin(), dims.end(), 1);
this->buffer_ = nullptr;
}

Expand All @@ -150,7 +264,7 @@ base::DataType Tensor::data_type() const {
}

void Tensor::reshape(const std::vector<int32_t>& dims) {
size_t size = ReduceDimension(dims.begin(), dims.end(), 1);
size_t size = reduce_dimension(dims.begin(), dims.end(), 1);
if (!buffer_) {
this->dims_ = dims;
this->size_ = size;
Expand All @@ -168,6 +282,20 @@ void Tensor::reshape(const std::vector<int32_t>& dims) {
this->size_ = size;
}

std::shared_ptr<base::Buffer> Tensor::get_buffer() const {
return buffer_;
}

Tensor Tensor::clone() const {
Tensor new_tensor = *this;
size_t byte_size = this->byte_size();

auto allocator = buffer_->allocator();
new_tensor.buffer_ = std::make_shared<base::Buffer>(byte_size, allocator);
new_tensor.buffer_->copy_from(buffer_.get());
return new_tensor;
}

size_t Tensor::byte_size() const {
return this->size() * DataTypeSize(data_type_);
}
Expand All @@ -176,7 +304,7 @@ std::vector<size_t> Tensor::strides() const {
std::vector<size_t> strides;
if (!dims_.empty()) {
for (int32_t i = 0; i < dims_.size() - 1; ++i) {
size_t stride = ReduceDimension(dims_.begin() + i + 1, dims_.end(), 1);
size_t stride = reduce_dimension(dims_.begin() + i + 1, dims_.end(), 1);
strides.push_back(stride);
}
strides.push_back(1);
Expand Down
2 changes: 1 addition & 1 deletion test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ find_package(GTest REQUIRED)
find_package(glog REQUIRED)

set(link_ext_lib glog::glog GTest::gtest)
add_executable(test_llm test_main.cpp test_buffer.cpp test_add.cpp test_math.cpp)
add_executable(test_llm test_main.cpp test_tensor.cpp test_buffer.cpp test_add.cpp test_math.cpp)

target_link_libraries(test_llm ${link_ext_lib})
target_include_directories(test_llm PUBLIC ${glog_INCLUDE_DIR})
Expand Down
Loading

0 comments on commit 06ef5d5

Please sign in to comment.