Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
enable other dtype in deconvolution
Browse files Browse the repository at this point in the history
  • Loading branch information
vchuravy committed Jun 7, 2016
1 parent 55ced9e commit 2568407
Show file tree
Hide file tree
Showing 7 changed files with 133 additions and 71 deletions.
51 changes: 27 additions & 24 deletions src/operator/cudnn_deconvolution-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,16 @@
namespace mxnet {
namespace op {
#if defined(__CUDACC__) && MXNET_USE_CUDNN == 1
template<typename DType>
class CuDNNDeconvolutionOp : public Operator {
public:
explicit CuDNNDeconvolutionOp(DeconvolutionParam param) {
this->param_ = param;
// convert MB to words
param_.workspace = (param_.workspace << 20) / sizeof(real_t);
param_.workspace = (param_.workspace << 20) / sizeof(DType);
init_cudnn_ = false;
// TODO(xxx): fp16
dtype_ = CUDNN_DATA_FLOAT;
dtype_ = mshadow::DataType<DType>::kCudnnFlag;
}

~CuDNNDeconvolutionOp() {
Expand All @@ -45,20 +46,21 @@ class CuDNNDeconvolutionOp : public Operator {
CHECK_EQ(in_data.size(), expected);
CHECK_EQ(out_data.size(), 1);
Stream<gpu> *s = ctx.get_stream<gpu>();
Tensor<gpu, 4> data = in_data[deconv::kData].get<gpu, 4, real_t>(s);
Tensor<gpu, 4> wmat = in_data[deconv::kWeight].get<gpu, 4, real_t>(s);
Tensor<gpu, 4> out = out_data[deconv::kOut].get<gpu, 4, real_t>(s);
Tensor<gpu, 4, DType> data = in_data[deconv::kData].get<gpu, 4, DType>(s);
Tensor<gpu, 4, DType> wmat = in_data[deconv::kWeight].get<gpu, 4, DType>(s);
Tensor<gpu, 4, DType> out = out_data[deconv::kOut].get<gpu, 4, DType>(s);
CHECK_EQ(data.CheckContiguous(), true);
CHECK_EQ(wmat.CheckContiguous(), true);
CHECK_EQ(out.CheckContiguous(), true);
if (!init_cudnn_) {
Init(s, in_data, out_data);
}
Tensor<gpu, 1> workspace = ctx.requested[deconv::kTempSpace].get_space<gpu>(
Tensor<gpu, 1, DType> workspace =
ctx.requested[deconv::kTempSpace].get_space_typed<gpu, 1, DType>(
mshadow::Shape1(forward_workspace_), s);
for (uint32_t g = 0; g < param_.num_group; ++g) {
float alpha = 1.0f;
float beta = 0.0f;
typename DataType<DType>::ScaleType alpha = 1.0f;
typename DataType<DType>::ScaleType beta = 0.0f;
#if CUDNN_MAJOR <= 4
CHECK_EQ(cudnnConvolutionBackwardData_v3(s->dnn_handle_,
&alpha,
Expand Down Expand Up @@ -90,7 +92,7 @@ class CuDNNDeconvolutionOp : public Operator {
#endif
if (!param_.no_bias) {
beta = 1.0f;
Tensor<gpu, 1> bias = in_data[deconv::kBias].get<gpu, 1, real_t>(s);
Tensor<gpu, 1, DType> bias = in_data[deconv::kBias].get<gpu, 1, DType>(s);
#if CUDNN_MAJOR >= 4
CHECK_EQ(cudnnAddTensor(s->dnn_handle_,
&alpha,
Expand Down Expand Up @@ -129,18 +131,19 @@ class CuDNNDeconvolutionOp : public Operator {
// TODO(bing): think about how to support add to
CHECK_EQ(req[deconv::kWeight], kWriteTo);
Stream<gpu> *s = ctx.get_stream<gpu>();
Tensor<gpu, 4> grad = out_grad[deconv::kOut].get<gpu, 4, real_t>(s);
Tensor<gpu, 4> wmat = in_data[deconv::kWeight].get<gpu, 4, real_t>(s);
Tensor<gpu, 4> gwmat = in_grad[deconv::kWeight].get<gpu, 4, real_t>(s);
Tensor<gpu, 4> data = in_data[deconv::kData].get<gpu, 4, real_t>(s);
Tensor<gpu, 4> gdata = in_grad[deconv::kData].get<gpu, 4, real_t>(s);
Tensor<gpu, 1> workspace = ctx.requested[deconv::kTempSpace].get_space<gpu>(
Tensor<gpu, 4, DType> grad = out_grad[deconv::kOut].get<gpu, 4, DType>(s);
Tensor<gpu, 4, DType> wmat = in_data[deconv::kWeight].get<gpu, 4, DType>(s);
Tensor<gpu, 4, DType> gwmat = in_grad[deconv::kWeight].get<gpu, 4, DType>(s);
Tensor<gpu, 4, DType> data = in_data[deconv::kData].get<gpu, 4, DType>(s);
Tensor<gpu, 4, DType> gdata = in_grad[deconv::kData].get<gpu, 4, DType>(s);
Tensor<gpu, 1, DType> workspace =
ctx.requested[deconv::kTempSpace].get_space_typed<gpu, 1, DType>(
mshadow::Shape1(backward_workspace_), s);
for (uint32_t g = 0; g < param_.num_group; ++g) {
float alpha = 1.0f;
float beta = 0.0f;
typename DataType<DType>::ScaleType alpha = 1.0f;
typename DataType<DType>::ScaleType beta = 0.0f;
if (!param_.no_bias) {
Tensor<gpu, 1> gbias = in_grad[deconv::kBias].get<gpu, 1, real_t>(s);
Tensor<gpu, 1, DType> gbias = in_grad[deconv::kBias].get<gpu, 1, DType>(s);
CHECK_EQ(cudnnConvolutionBackwardBias(s->dnn_handle_,
&alpha,
out_desc_,
Expand Down Expand Up @@ -208,11 +211,11 @@ class CuDNNDeconvolutionOp : public Operator {
CHECK_EQ(out_data.size(), 1);
if (!init_cudnn_) {
init_cudnn_ = true;
size_t workspace_byte = static_cast<size_t>(param_.workspace * sizeof(real_t));
size_t workspace_byte = static_cast<size_t>(param_.workspace * sizeof(DType));
size_t back_size = 0;
size_t back_size_w = 0;
Tensor<gpu, 4> data = in_data[deconv::kData].get<gpu, 4, real_t>(s);
Tensor<gpu, 4> out = out_data[deconv::kOut].get<gpu, 4, real_t>(s);
Tensor<gpu, 4, DType> data = in_data[deconv::kData].get<gpu, 4, DType>(s);
Tensor<gpu, 4, DType> out = out_data[deconv::kOut].get<gpu, 4, DType>(s);
data_offset_ = data.shape_[1] / param_.num_group * data.shape_[2] * data.shape_[3];
out_offset_ = out.shape_[1] /param_.num_group * out.shape_[2] * out.shape_[3];
weight_offset_ = data.shape_[1] / param_.num_group * param_.num_filter / param_.num_group
Expand Down Expand Up @@ -267,7 +270,7 @@ class CuDNNDeconvolutionOp : public Operator {
out.shape_[3],
1), CUDNN_STATUS_SUCCESS);
if (!param_.no_bias) {
Tensor<gpu, 1> bias = in_data[deconv::kBias].get<gpu, 1, real_t>(s);
Tensor<gpu, 1, DType> bias = in_data[deconv::kBias].get<gpu, 1, DType>(s);
bias_offset_ = bias.shape_[0] / param_.num_group;
CHECK_EQ(cudnnSetTensor4dDescriptor(bias_desc_,
CUDNN_TENSOR_NCHW,
Expand Down Expand Up @@ -324,8 +327,8 @@ class CuDNNDeconvolutionOp : public Operator {
in_desc_,
algo_,
&forward_workspace_byte_), CUDNN_STATUS_SUCCESS);
forward_workspace_ = forward_workspace_byte_ / sizeof(real_t) + 1;
backward_workspace_ = backward_workspace_byte_ / sizeof(real_t) + 1;
forward_workspace_ = forward_workspace_byte_ / sizeof(DType) + 1;
backward_workspace_ = backward_workspace_byte_ / sizeof(DType) + 1;
}
}

Expand Down
112 changes: 74 additions & 38 deletions src/operator/deconvolution-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ struct DeconvolutionParam : public dmlc::Parameter<DeconvolutionParam> {
}
};

template<typename xpu>
template<typename xpu, typename DType>
class DeconvolutionOp : public Operator {
public:
explicit DeconvolutionOp(DeconvolutionParam p) {
Expand All @@ -75,29 +75,33 @@ class DeconvolutionOp : public Operator {
CHECK_EQ(in_data.size(), expected);
CHECK_EQ(out_data.size(), 1);
Stream<xpu> *s = ctx.get_stream<xpu>();
Tensor<xpu, 4> data = in_data[deconv::kData].get<xpu, 4, real_t>(s);
Tensor<xpu, 4> out = out_data[deconv::kOut].get<xpu, 4, real_t>(s);
Tensor<xpu, 4, DType> data = in_data[deconv::kData].get<xpu, 4, DType>(s);
Tensor<xpu, 4, DType> out = out_data[deconv::kOut].get<xpu, 4, DType>(s);
Shape<3> wmat_shape =
Shape3(param_.num_group,
data.shape_[1] / param_.num_group,
param_.num_filter / param_.num_group * param_.kernel[0] * param_.kernel[1]);
Tensor<xpu, 3> wmat = in_data[deconv::kWeight].get_with_shape<xpu, 3, real_t>(wmat_shape, s);
Tensor<xpu, 3, DType> wmat =
in_data[deconv::kWeight].get_with_shape<xpu, 3, DType>(wmat_shape, s);
#if defined(__CUDACC__)
CHECK_EQ(s->blas_handle_ownership_, Stream<xpu>::OwnHandle)
<< "Must init CuBLAS handle in stream";
#endif
const index_t nbatch = data.size(0);
Tensor<xpu, 1> workspace = ctx.requested[deconv::kTempSpace].get_space<xpu>(
Shape1(this->InitTemp(out.shape_, data.shape_)), s);
Tensor<xpu, 1, DType> workspace =
ctx.requested[deconv::kTempSpace].get_space_typed<xpu, 1, DType>(
Shape1(this->InitTemp(out.shape_, data.shape_)), s);
for (index_t i = 0; i < nbatch; i += nstep_) {
const index_t step = std::min(nstep_, nbatch - i);
Tensor<xpu, 2> temp_col = Tensor<xpu, 2>(workspace.dptr_,
Shape2(shape_colunit_[0],
shape_colunit_[1] * step), s);
Tensor<xpu, 3> temp_dst = Tensor<xpu, 3>(workspace.dptr_ + temp_col.shape_.Size(),
Shape3(shape_dstunit_[0],
shape_dstunit_[1],
shape_dstunit_[2] * step), s);
Tensor<xpu, 2, DType> temp_col = Tensor<xpu, 2, DType>(
workspace.dptr_,
Shape2(shape_colunit_[0],
shape_colunit_[1] * step), s);
Tensor<xpu, 3, DType> temp_dst = Tensor<xpu, 3, DType>(
workspace.dptr_ + temp_col.shape_.Size(),
Shape3(shape_dstunit_[0],
shape_dstunit_[1],
shape_dstunit_[2] * step), s);
temp_dst = reshape(swapaxis<1, 0>(data.Slice(i, i + step)), temp_dst.shape_);
if (param_.pad[0] == 0 && param_.pad[1] == 0) {
temp_col = unpack_patch2col(out.Slice(i, i + step),
Expand All @@ -117,8 +121,8 @@ class DeconvolutionOp : public Operator {
}
const index_t gstride = temp_col.size(0) / param_.num_group;
for (uint32_t gid = 0; gid < param_.num_group; ++gid) {
mshadow::Tensor<xpu, 2> tmpc = temp_col.Slice(gstride * gid,
gstride * (gid + 1));
mshadow::Tensor<xpu, 2, DType> tmpc = temp_col.Slice(gstride * gid,
gstride * (gid + 1));
tmpc = dot(wmat[gid].T(), temp_dst[gid]);
}
if (param_.pad[0] == 0 && param_.pad[1] == 0) {
Expand All @@ -143,7 +147,7 @@ class DeconvolutionOp : public Operator {
}
if (!param_.no_bias) {
// add bias, broadcast bias to dim 1: channel
Tensor<xpu, 1> bias = in_data[deconv::kBias].get<xpu, 1, real_t>(s);
Tensor<xpu, 1, DType> bias = in_data[deconv::kBias].get<xpu, 1, DType>(s);
out += broadcast<1>(bias, out.shape_);
}
}
Expand All @@ -165,31 +169,36 @@ class DeconvolutionOp : public Operator {
CHECK_EQ(in_data[deconv::kWeight].CheckContiguous(), true);
// get data
Stream<xpu> *s = ctx.get_stream<xpu>();
Tensor<xpu, 4> data = in_data[deconv::kData].get<xpu, 4, real_t>(s);
Tensor<xpu, 4> grad = out_grad[deconv::kOut].get<xpu, 4, real_t>(s);
Tensor<xpu, 4> gdata = in_grad[deconv::kData].get<xpu, 4, real_t>(s);
Tensor<xpu, 4, DType> data = in_data[deconv::kData].get<xpu, 4, DType>(s);
Tensor<xpu, 4, DType> grad = out_grad[deconv::kOut].get<xpu, 4, DType>(s);
Tensor<xpu, 4, DType> gdata = in_grad[deconv::kData].get<xpu, 4, DType>(s);
Shape<3> wmat_shape =
Shape3(param_.num_group,
data.shape_[1] / param_.num_group,
param_.num_filter / param_.num_group * param_.kernel[0] * param_.kernel[1]);
Tensor<xpu, 3> wmat = in_data[deconv::kWeight].get_with_shape<xpu, 3, real_t>(wmat_shape, s);
Tensor<xpu, 3> gwmat = in_grad[deconv::kWeight].get_with_shape<xpu, 3, real_t>(wmat_shape, s);
Tensor<xpu, 3, DType> wmat =
in_data[deconv::kWeight].get_with_shape<xpu, 3, DType>(wmat_shape, s);
Tensor<xpu, 3, DType> gwmat =
in_grad[deconv::kWeight].get_with_shape<xpu, 3, DType>(wmat_shape, s);
#if defined(__CUDACC__)
CHECK_EQ(s->blas_handle_ownership_, Stream<xpu>::OwnHandle)
<< "Must init CuBLAS handle in stream";
#endif
const index_t nbatch = data.size(0);
Tensor<xpu, 1> workspace = ctx.requested[deconv::kTempSpace].get_space<xpu>(
Shape1(this->InitTemp(grad.shape_, data.shape_)), s);
Tensor<xpu, 1, DType> workspace =
ctx.requested[deconv::kTempSpace].get_space_typed<xpu, 1, DType>(
Shape1(this->InitTemp(grad.shape_, data.shape_)), s);
for (index_t i = 0; i < nbatch; i += nstep_) {
const index_t step = std::min(nstep_, nbatch - i);
Tensor<xpu, 2> temp_col = Tensor<xpu, 2>(workspace.dptr_,
Shape2(shape_colunit_[0],
shape_colunit_[1] * step), s);
Tensor<xpu, 3> temp_dst = Tensor<xpu, 3>(workspace.dptr_ + temp_col.shape_.Size(),
Shape3(shape_dstunit_[0],
shape_dstunit_[1],
shape_dstunit_[2] * step), s);
Tensor<xpu, 2, DType> temp_col = Tensor<xpu, 2, DType>(
workspace.dptr_,
Shape2(shape_colunit_[0],
shape_colunit_[1] * step), s);
Tensor<xpu, 3, DType> temp_dst = Tensor<xpu, 3, DType>(
workspace.dptr_ + temp_col.shape_.Size(),
Shape3(shape_dstunit_[0],
shape_dstunit_[1],
shape_dstunit_[2] * step), s);
temp_dst = reshape(swapaxis<1, 0>(data.Slice(i, i + step)), temp_dst.shape_);
if (param_.pad[0] == 0 && param_.pad[1] == 0) {
temp_col = unpack_patch2col(grad.Slice(i, i + step),
Expand All @@ -208,17 +217,17 @@ class DeconvolutionOp : public Operator {
}
const index_t gstride = temp_col.size(0) / param_.num_group;
for (uint32_t gid = 0; gid < param_.num_group; ++gid) {
Tensor<xpu, 2> tmpc = temp_col.Slice(gstride * gid, gstride * (gid + 1));
Tensor<xpu, 2, DType> tmpc = temp_col.Slice(gstride * gid, gstride * (gid + 1));
if (i == 0) {
Tensor<xpu, 2> tmp_gwmat = gwmat[gid];
Tensor<xpu, 2, DType> tmp_gwmat = gwmat[gid];
Assign(tmp_gwmat, req[deconv::kWeight], dot(temp_dst[gid], tmpc.T()));
} else {
gwmat[gid] += dot(temp_dst[gid], tmpc.T());
}
}
if (req[deconv::kData] == kWriteTo || req[deconv::kData] == kWriteInplace) {
for (uint32_t gid = 0; gid < param_.num_group; ++gid) {
Tensor<xpu, 2> tmpc = temp_col.Slice(gstride * gid, gstride * (gid + 1));
Tensor<xpu, 2, DType> tmpc = temp_col.Slice(gstride * gid, gstride * (gid + 1));
temp_dst[gid] = dot(wmat[gid], tmpc);
}
gdata.Slice(i, i + step) = swapaxis<1, 0>(reshape(temp_dst,
Expand All @@ -229,7 +238,7 @@ class DeconvolutionOp : public Operator {
}
}
if (!param_.no_bias) {
Tensor<xpu, 1> gbias = in_grad[deconv::kBias].get<xpu, 1, real_t>(s);
Tensor<xpu, 1, DType> gbias = in_grad[deconv::kBias].get<xpu, 1, DType>(s);
Assign(gbias, req[deconv::kBias], sumall_except_dim<1>(grad));
}
}
Expand Down Expand Up @@ -259,8 +268,8 @@ class DeconvolutionOp : public Operator {
shape_dstunit_[2] * nstep_);
index_t required_size = scol.Size() + sdst.Size();
CHECK_GE(param_.workspace, required_size)
<< "\nMinimum workspace size: " << required_size * sizeof(real_t) << " Bytes\n"
<< "Given: " << param_.workspace * sizeof(real_t);
<< "\nMinimum workspace size: " << required_size * sizeof(DType) << " Bytes\n"
<< "Given: " << param_.workspace * sizeof(DType);
return required_size;
}

Expand All @@ -271,7 +280,7 @@ class DeconvolutionOp : public Operator {
}; // class DeconvolutionOp

template<typename xpu>
Operator* CreateOp(DeconvolutionParam param);
Operator* CreateOp(DeconvolutionParam param, int dtype);

#if DMLC_USE_CXX11
class DeconvolutionProp : public OperatorProperty {
Expand Down Expand Up @@ -332,6 +341,26 @@ class DeconvolutionProp : public OperatorProperty {
return true;
}

bool InferType(std::vector<int> *in_type,
std::vector<int> *out_type,
std::vector<int> *aux_type) const override {
CHECK_GE(in_type->size(), 1);
int dtype = (*in_type)[0];
CHECK_NE(dtype, -1) << "First input must have specified type";
for (index_t i = 0; i < in_type->size(); ++i) {
if ((*in_type)[i] == -1) {
(*in_type)[i] = dtype;
} else {
CHECK_EQ((*in_type)[i], dtype) << "This layer requires uniform type. "
<< "Expected " << dtype << " v.s. given "
<< (*in_type)[i] << " at " << ListArguments()[i];
}
}
out_type->clear();
out_type->push_back(dtype);
return true;
}

OperatorProperty* Copy() const override {
auto ptr = new DeconvolutionProp();
ptr->param_ = param_;
Expand Down Expand Up @@ -359,7 +388,14 @@ class DeconvolutionProp : public OperatorProperty {
return {ResourceRequest::kTempSpace};
}

Operator* CreateOperator(Context ctx) const override;
Operator* CreateOperator(Context ctx) const override {
LOG(FATAL) << "Not Implemented";
return NULL;
}

Operator* CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
std::vector<int> *in_type) const override;


private:
DeconvolutionParam param_;
Expand Down
17 changes: 13 additions & 4 deletions src/operator/deconvolution.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,21 @@
namespace mxnet {
namespace op {
template<>
Operator* CreateOp<cpu>(DeconvolutionParam param) {
return new DeconvolutionOp<cpu>(param);
Operator* CreateOp<cpu>(DeconvolutionParam param, int dtype) {
Operator *op = NULL;
MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
op = new DeconvolutionOp<cpu, DType>(param);
});
return op;
}

Operator* DeconvolutionProp::CreateOperator(Context ctx) const {
DO_BIND_DISPATCH(CreateOp, param_);
Operator* DeconvolutionProp::CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
std::vector<int> *in_type) const {
std::vector<TShape> out_shape, aux_shape;
std::vector<int> out_type, aux_type;
CHECK(InferType(in_type, &out_type, &aux_type));
CHECK(InferShape(in_shape, &out_shape, &aux_shape));
DO_BIND_DISPATCH(CreateOp, param_, in_type->at(0));
}

DMLC_REGISTER_PARAMETER(DeconvolutionParam);
Expand Down
10 changes: 7 additions & 3 deletions src/operator/deconvolution.cu
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,16 @@
namespace mxnet {
namespace op {
template<>
Operator* CreateOp<gpu>(DeconvolutionParam param) {
Operator* CreateOp<gpu>(DeconvolutionParam param, int dtype) {
Operator *op = NULL;
MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
#if MXNET_USE_CUDNN == 1
return new CuDNNDeconvolutionOp(param);
op = new CuDNNDeconvolutionOp<DType>(param);
#else
return new DeconvolutionOp<gpu>(param);
op = new DeconvolutionOp<gpu, DType>(param);
#endif // MXNET_USE_CUDNN
});
return op;
}

} // namespace op
Expand Down
2 changes: 1 addition & 1 deletion src/operator/upsampling.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ Operator *CreateOp<cpu>(UpSamplingParam param) {
p.stride = TShape(shape, shape + 2);
shape[0] = shape[1] = pad;
p.pad = TShape(shape, shape + 2);
return new DeconvolutionOp<cpu>(p);
return new DeconvolutionOp<cpu, real_t>(p);
} else {
LOG(FATAL) << "Unknown sample type";
return NULL;
Expand Down
Loading

0 comments on commit 2568407

Please sign in to comment.