Skip to content

Commit

Permalink
Merge branch 'develop' into cderb/fin_in_miopen
Browse files Browse the repository at this point in the history
  • Loading branch information
cderb authored Dec 4, 2023
2 parents 23e0d28 + fdaa341 commit 6198f52
Show file tree
Hide file tree
Showing 15 changed files with 1,921 additions and 5 deletions.
5 changes: 3 additions & 2 deletions docs/MIOpen_Porting_Guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@


## The key differences between MIOpen and cuDNN:
* MIOpen only supports 4-D tensors in the NCHW storage format. This means all the __\*Nd\*__ APIs in cuDNN do not have a corresponding API in MIOpen.
* MIOpen only supports 4-D tensors in the NCHW and NHWC storage format. This means all the __\*Nd\*__ APIs in cuDNN do not have a corresponding API in MIOpen.
* MIOpen only supports __`float(fp32)`__ data-type.
* MIOpen only supports __2D Convolutions__ and __2D Pooling__.
* MIOpen supports __2D Convolutions__ and __3D Convolutions__.
* MIOpen only supports __2D Pooling__.
* Calling miopenFindConvolution*Algorithm() is *mandatory* before calling any Convolution API.
* Typical calling sequence for Convolution APIs for MIOpen is:
* miopenConvolution*GetWorkSpaceSize() // returns the workspace size required by Find()
Expand Down
2 changes: 1 addition & 1 deletion docs/sphinx/requirements.in
Original file line number Diff line number Diff line change
@@ -1 +1 @@
rocm-docs-core==0.29.0
rocm-docs-core==0.30.0
2 changes: 1 addition & 1 deletion docs/sphinx/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ requests==2.31.0
# via
# pygithub
# sphinx
rocm-docs-core==0.29.0
rocm-docs-core==0.30.0
# via -r requirements.in
smmap==5.0.0
# via gitdb
Expand Down
2 changes: 1 addition & 1 deletion include/miopen/miopen.h
Original file line number Diff line number Diff line change
Expand Up @@ -634,7 +634,7 @@ MIOPEN_EXPORT miopenStatus_t miopenCreateTensorDescriptor(miopenTensorDescriptor

/*! @brief Set shape of 4D tensor
*
* Interface for setting 4-D tensor shape. MIOpen currently only implements NCHW layout.
* Interface for setting 4-D tensor shape. MIOpen currently implements NCHW and NHWC layout.
*
* @param tensorDesc Tensor descriptor (input/output)
* @param dataType MIOpen datatype (input)
Expand Down
3 changes: 3 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,9 @@ set( MIOpen_Source
solver/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp
solver/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp
solver/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp
solver/conv_hip_implicit_gemm_f16f8f16_fwd_xdlops.cpp
solver/conv_hip_implicit_gemm_f16f8f16_bwd_xdlops.cpp
solver/conv_hip_implicit_gemm_f16f8f16_wrw_xdlops.cpp
solver/conv_hip_implicit_gemm_nonxdlops_common.cpp
solver/conv_hip_implicit_gemm_wrw_v4r4.cpp
solver/conv_hip_implicit_gemm_wrw_v4r4_xdlops.cpp
Expand Down
226 changes: 226 additions & 0 deletions src/include/miopen/solver.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4771,6 +4771,232 @@ struct ConvHipImplicitGemm3DGroupBwdXdlops final
bool CheckCKApplicability(const miopen::conv::ProblemDescription&) const;
};

struct PerformanceConfigHipImplicitGemmF16F8F16FwdXdlops
: PerfConfigBaseCK<PerformanceConfigHipImplicitGemmF16F8F16FwdXdlops>
{
int index = 0;
std::string kernel_id = "";
std::vector<std::string> valid_kernels;

PerformanceConfigHipImplicitGemmF16F8F16FwdXdlops(int idx, std::string kernl_id)
: index(idx), kernel_id(kernl_id)
{
}

PerformanceConfigHipImplicitGemmF16F8F16FwdXdlops() = default;

explicit PerformanceConfigHipImplicitGemmF16F8F16FwdXdlops(bool)
: PerformanceConfigHipImplicitGemmF16F8F16FwdXdlops(0, "")
{
}
void HeuristicInit(const miopen::conv::ProblemDescription&);
bool SetNextValue(const miopen::conv::ProblemDescription&);
bool IsValidValue() const;
bool IsValid(const ExecutionContext&, const miopen::conv::ProblemDescription& problem) const
{
return IsValid(problem);
}
bool IsValid(const miopen::conv::ProblemDescription&) const;
bool operator==(const PerformanceConfigHipImplicitGemmF16F8F16FwdXdlops& other) const;

private:
template <typename DataType, typename ComputeType>
void Init(const miopen::conv::ProblemDescription&);
template <typename DataType, typename ComputeType>
bool CheckIsSupportCKArgs(const miopen::conv::ProblemDescription&) const;
};

struct ConvHipImplicitGemmF16F8F16FwdXdlops final
: ConvTunableSolver<PerformanceConfigHipImplicitGemmF16F8F16FwdXdlops>
{
const std::string& SolverDbId() const override
{
return GetSolverDbId<ConvHipImplicitGemmF16F8F16FwdXdlops>();
}

PerformanceConfigHipImplicitGemmF16F8F16FwdXdlops
GetDefaultPerformanceConfig(const ExecutionContext&,
const miopen::conv::ProblemDescription&) const override;
bool IsValidPerformanceConfig(
const ExecutionContext&,
const miopen::conv::ProblemDescription&,
const PerformanceConfigHipImplicitGemmF16F8F16FwdXdlops&) const override;
PerformanceConfigHipImplicitGemmF16F8F16FwdXdlops
Search(const ExecutionContext&,
const miopen::conv::ProblemDescription&,
const AnyInvokeParams& invoke_ctx) const override;
bool IsApplicable(const ExecutionContext&,
const miopen::conv::ProblemDescription&) const override;
bool IsDynamic() const override { return true; }
ConvSolution
GetSolution(const ExecutionContext&,
const miopen::conv::ProblemDescription&,
const PerformanceConfigHipImplicitGemmF16F8F16FwdXdlops&) const override;
/// \ref igemm_get_wti_magic_number
float GetWti(const ExecutionContext&, const miopen::conv::ProblemDescription&) const override
{
return 0.02f;
};

private:
template <typename DataType, typename ComputeType>
bool CheckCKApplicability(const miopen::conv::ProblemDescription&) const;
};

struct PerformanceConfigHipImplicitGemmF16F8F16BwdXdlops
: PerfConfigBase<PerformanceConfigHipImplicitGemmF16F8F16BwdXdlops>
{
int index;
std::string kernel_id;
std::vector<std::string> valid_kernels;
PerformanceConfigHipImplicitGemmF16F8F16BwdXdlops(int idx, std::string kernl_id)
: index(idx), kernel_id(kernl_id)
{
}
PerformanceConfigHipImplicitGemmF16F8F16BwdXdlops()
: PerformanceConfigHipImplicitGemmF16F8F16BwdXdlops(0, "")
{
}
PerformanceConfigHipImplicitGemmF16F8F16BwdXdlops(bool)
: PerformanceConfigHipImplicitGemmF16F8F16BwdXdlops(0, "")
{
}
void HeuristicInit(const miopen::conv::ProblemDescription&);
bool SetNextValue(const miopen::conv::ProblemDescription&);
bool IsValidValue() const;
bool IsValid(const ExecutionContext&, const miopen::conv::ProblemDescription& problem) const
{
return IsValid(problem);
}
bool IsValid(const miopen::conv::ProblemDescription&) const;
template <typename Self, typename F>
static void Visit(Self&& s, F f)
{
f(s.kernel_id, "kernel_id");
}
bool operator==(const PerformanceConfigHipImplicitGemmF16F8F16BwdXdlops& other) const;

private:
template <typename DataType, typename OutComputeType, typename WeiComputeType>
void Init(const miopen::conv::ProblemDescription&);
template <typename DataType, typename OutComputeType, typename WeiComputeType>
bool CheckIsSupportCKArgs(const miopen::conv::ProblemDescription&) const;
};

struct ConvHipImplicitGemmF16F8F16BwdXdlops final
: ConvTunableSolver<PerformanceConfigHipImplicitGemmF16F8F16BwdXdlops>
{
const std::string& SolverDbId() const override
{
return GetSolverDbId<ConvHipImplicitGemmF16F8F16BwdXdlops>();
}

PerformanceConfigHipImplicitGemmF16F8F16BwdXdlops
GetDefaultPerformanceConfig(const ExecutionContext&,
const miopen::conv::ProblemDescription&) const override;
bool IsValidPerformanceConfig(
const ExecutionContext&,
const miopen::conv::ProblemDescription&,
const PerformanceConfigHipImplicitGemmF16F8F16BwdXdlops&) const override;
PerformanceConfigHipImplicitGemmF16F8F16BwdXdlops
Search(const ExecutionContext&,
const miopen::conv::ProblemDescription&,
const AnyInvokeParams& invoke_ctx) const override;
bool IsApplicable(const ExecutionContext&,
const miopen::conv::ProblemDescription&) const override;
bool IsDynamic() const override { return true; }
ConvSolution
GetSolution(const ExecutionContext&,
const miopen::conv::ProblemDescription&,
const PerformanceConfigHipImplicitGemmF16F8F16BwdXdlops&) const override;
/// \ref igemm_get_wti_magic_number
float GetWti(const ExecutionContext&, const miopen::conv::ProblemDescription&) const override
{
return 0.02f;
};

private:
template <typename DataType, typename OutComputeType, typename WeiComputeType>
bool CheckCKApplicability(const miopen::conv::ProblemDescription&) const;
};

struct PerformanceConfigHipImplicitGemmF16F8F16WrwXdlops
: PerfConfigBase<PerformanceConfigHipImplicitGemmF16F8F16WrwXdlops>
{
int index;
std::string kernel_id;
std::vector<std::string> valid_kernels;
PerformanceConfigHipImplicitGemmF16F8F16WrwXdlops(int idx, std::string kernl_id)
: index(idx), kernel_id(kernl_id)
{
}
PerformanceConfigHipImplicitGemmF16F8F16WrwXdlops()
: PerformanceConfigHipImplicitGemmF16F8F16WrwXdlops(0, "")
{
}
PerformanceConfigHipImplicitGemmF16F8F16WrwXdlops(bool)
: PerformanceConfigHipImplicitGemmF16F8F16WrwXdlops(0, "")
{
}
void HeuristicInit(const miopen::conv::ProblemDescription&);
bool SetNextValue(const miopen::conv::ProblemDescription&);
bool IsValidValue() const;
bool IsValid(const ExecutionContext&, const miopen::conv::ProblemDescription& problem) const
{
return IsValid(problem);
}
bool IsValid(const miopen::conv::ProblemDescription&) const;
template <typename Self, typename F>
static void Visit(Self&& s, F f)
{
f(s.kernel_id, "kernel_id");
}
bool operator==(const PerformanceConfigHipImplicitGemmF16F8F16WrwXdlops& other) const;

private:
template <typename DataType, typename OutComputeType, typename InComputeType>
void Init(const miopen::conv::ProblemDescription&);
template <typename DataType, typename OutComputeType, typename InComputeType>
bool CheckIsSupportCKArgs(const miopen::conv::ProblemDescription&) const;
};

struct ConvHipImplicitGemmF16F8F16WrwXdlops final
: ConvTunableSolver<PerformanceConfigHipImplicitGemmF16F8F16WrwXdlops>
{
const std::string& SolverDbId() const override
{
return GetSolverDbId<ConvHipImplicitGemmF16F8F16WrwXdlops>();
}

PerformanceConfigHipImplicitGemmF16F8F16WrwXdlops
GetDefaultPerformanceConfig(const ExecutionContext&,
const miopen::conv::ProblemDescription&) const override;
bool IsValidPerformanceConfig(
const ExecutionContext&,
const miopen::conv::ProblemDescription&,
const PerformanceConfigHipImplicitGemmF16F8F16WrwXdlops&) const override;
PerformanceConfigHipImplicitGemmF16F8F16WrwXdlops
Search(const ExecutionContext&,
const miopen::conv::ProblemDescription&,
const AnyInvokeParams& invoke_ctx) const override;
bool IsApplicable(const ExecutionContext&,
const miopen::conv::ProblemDescription&) const override;
bool IsDynamic() const override { return true; }
ConvSolution
GetSolution(const ExecutionContext&,
const miopen::conv::ProblemDescription&,
const PerformanceConfigHipImplicitGemmF16F8F16WrwXdlops&) const override;
/// \ref igemm_get_wti_magic_number
float GetWti(const ExecutionContext&, const miopen::conv::ProblemDescription&) const override
{
return 0.02f;
};

private:
template <typename DataType, typename OutComputeType, typename InComputeType>
bool CheckCKApplicability(const miopen::conv::ProblemDescription&) const;
};

} // namespace conv

// Use struct as a syntactic sugar to make the intent as clear as possible.
Expand Down
3 changes: 3 additions & 0 deletions src/mlo_dir_conv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,8 @@ static auto GetImplicitGemmSolvers()
miopen::solver::conv::ConvHipImplicitGemmGroupFwdXdlops,
miopen::solver::conv::ConvHipImplicitGemm3DGroupFwdXdlops,
miopen::solver::conv::ConvHipImplicitGemm3DGroupBwdXdlops,
miopen::solver::conv::ConvHipImplicitGemmF16F8F16FwdXdlops,
miopen::solver::conv::ConvHipImplicitGemmF16F8F16BwdXdlops,
#endif // MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL
miopen::solver::conv::ConvAsmImplicitGemmGTCDynamicFwdDlopsNCHWC>{};
}
Expand Down Expand Up @@ -162,6 +164,7 @@ static auto GetImplicitGemmWrWSolvers()
miopen::solver::conv::ConvAsmImplicitGemmGTCDynamicWrwXdlops,
#if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL
miopen::solver::conv::ConvHipImplicitGemm3DGroupWrwXdlops,
miopen::solver::conv::ConvHipImplicitGemmF16F8F16WrwXdlops,
#endif // MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL
miopen::solver::conv::ConvAsmImplicitGemmGTCDynamicWrwXdlopsNHWC>{};
}
Expand Down
12 changes: 12 additions & 0 deletions src/solver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -611,6 +611,18 @@ inline SolverRegistrar::SolverRegistrar(IdRegistryData& registry)
Register(registry, ++id, Primitive::Normalization, norm::Layernorm4DCKForward{}.SolverDbId());
Register(registry, ++id, Primitive::Normalization, norm::LayernormForward{}.SolverDbId());
Register(registry, ++id, Primitive::Reduce, reduce::SumForward{}.SolverDbId());
RegisterWithSolver(registry,
++id,
conv::ConvHipImplicitGemmF16F8F16FwdXdlops{},
miopenConvolutionAlgoImplicitGEMM);
RegisterWithSolver(registry,
++id,
conv::ConvHipImplicitGemmF16F8F16BwdXdlops{},
miopenConvolutionAlgoImplicitGEMM);
RegisterWithSolver(registry,
++id,
conv::ConvHipImplicitGemmF16F8F16WrwXdlops{},
miopenConvolutionAlgoImplicitGEMM);

// IMPORTANT: New solvers should be added to the end of the function!
}
Expand Down
Loading

0 comments on commit 6198f52

Please sign in to comment.