Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MNN:Sync] Sync Internal 2.6.3 #2550

Merged
merged 2 commits into from
Aug 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ struct GemmBatchedIdentityThreadblockSwizzle {
return GemmCoord(
(problem_size.m() + tile_size.m() - 1) / tile_size.m(),
(problem_size.n() + tile_size.n() - 1) / tile_size.n(),
batch_count % (1 << 16));
batch_count >= 65536 ? 65535 : batch_count);
}

/// Computes CUDA grid dimensions given a size in units of logical tiles
Expand Down
45 changes: 24 additions & 21 deletions codegen/opencl/OpenCLTarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ std::string OpenCLTarget::type() {
}
std::string OpenCLTarget::macro() {
return
"#ifdef MNN_SUPPORT_FP16\n"
"#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
"#endif\n"
"#define OFFSET_CHECK\\\n"
"\tconst int c = get_global_id(0), w = get_global_id(1), hb = get_global_id(2);\\\n"
"\tif (c >= global_size_dim0 || w >= global_size_dim1 || hb >= global_size_dim2) { return; }\\\n"
Expand Down Expand Up @@ -113,61 +116,61 @@ std::string OpenCLTarget::codegen(std::vector<std::string>& inputs, const Comman
ss << inpName << "=" << operand << " * " << operand;
break;
case UnaryOpOperation_ERF:
ss << inpName << "=erf(convert_float4(" << operand << "))";
ss << inpName << "=CONVERT_FLOAT4(erf(convert_float4(" << operand << ")))";
break;
case UnaryOpOperation_ERFC:
ss << inpName << "=erfc(convert_float4(" << operand << "))";
ss << inpName << "=CONVERT_FLOAT4(erfc(convert_float4(" << operand << ")))";
break;
case UnaryOpOperation_SQRT:
ss << inpName << "=sqrt(convert_float4(" << operand << "))";
ss << inpName << "=CONVERT_FLOAT4(sqrt(convert_float4(" << operand << ")))";
break;
case UnaryOpOperation_RSQRT:
ss << inpName << "=rsqrt(convert_float4(" << operand << "))";
ss << inpName << "=CONVERT_FLOAT4(rsqrt(convert_float4(" << operand << ")))";
break;
case UnaryOpOperation_ABS:
ss << inpName << "=fabs(convert_float4(" << operand << "))";
ss << inpName << "=CONVERT_FLOAT4(fabs(convert_float4(" << operand << ")))";
break;
case UnaryOpOperation_SIN:
ss << inpName << "=sin(convert_float4(" << operand << "))";
ss << inpName << "=CONVERT_FLOAT4(sin(convert_float4(" << operand << ")))";
break;
case UnaryOpOperation_COS:
ss << inpName << "=cos(convert_float4(" << operand << "))";
ss << inpName << "=CONVERT_FLOAT4(cos(convert_float4(" << operand << ")))";
break;
case UnaryOpOperation_SIGN:
ss << inpName << "=sign(convert_float4(" << operand << "))";
ss << inpName << "=CONVERT_FLOAT4(sign(convert_float4(" << operand << ")))";
break;
case UnaryOpOperation_EXP:
ss << inpName << "=exp(convert_float4(" << operand << "))";
ss << inpName << "=CONVERT_FLOAT4(exp(convert_float4(" << operand << ")))";
break;
case UnaryOpOperation_NEG:
ss << inpName << "=-(" << operand << ")";
break;
case UnaryOpOperation_TAN:
ss << inpName << "=tan(convert_float4(" << operand << "))";
ss << inpName << "=CONVERT_FLOAT4(tan(convert_float4(" << operand << ")))";
break;
case UnaryOpOperation_CEIL:
ss << inpName << "=ceil(convert_float4(" << operand << "))";
ss << inpName << "=CONVERT_FLOAT4(ceil(convert_float4(" << operand << ")))";
break;
case UnaryOpOperation_LOG1P:
ss << inpName << "=log1p(convert_float4(" << operand << "))";
ss << inpName << "=CONVERT_FLOAT4(log1p(convert_float4(" << operand << ")))";
break;
case UnaryOpOperation_FLOOR:
ss << inpName << "=floor(convert_float4(" << operand << "))";
ss << inpName << "=CONVERT_FLOAT4(floor(convert_float4(" << operand << ")))";
break;
case UnaryOpOperation_ROUND:
ss << inpName << "=round(convert_float4(" << operand << "))";
ss << inpName << "=CONVERT_FLOAT4(round(convert_float4(" << operand << ")))";
break;
case UnaryOpOperation_SIGMOID:
ss << inpName << "=native_recip((float4)1+native_exp(convert_float4(-" << operand << ")))";
ss << inpName << "=CONVERT_FLOAT4(native_recip((float4)1+native_exp(convert_float4(-" << operand << "))))";
break;
case UnaryOpOperation_TANH:
ss << inpName << "=tanh(convert_float4(" << operand << "))";
ss << inpName << "=CONVERT_FLOAT4(tanh(convert_float4(" << operand << ")))";
break;
case UnaryOpOperation_RECIPROCAL:
ss << inpName << "=native_recip(convert_float4(" << operand << "))";
ss << inpName << "=CONVERT_FLOAT4(native_recip(convert_float4(" << operand << ")))";
break;
case UnaryOpOperation_LOG:
ss << inpName << "=native_log(convert_float4(" << operand << "+(float4)((float)0.0000001)))";
ss << inpName << "=CONVERT_FLOAT4(native_log(convert_float4(" << operand << ")+(float4)((float)0.0000001)))";
break;
default:
MNN_ASSERT(false);
Expand Down Expand Up @@ -198,13 +201,13 @@ std::string OpenCLTarget::codegen(std::vector<std::string>& inputs, const Comman
return ss.str();
}
std::string OpenCLTarget::load(const std::string& base, const std::string& offset, const Command* cmd, std::string& inpName) {
return "FLOAT4 " + inpName + "=read_imagef(" + base + ", SAMPLER, " + offset + ")";
return "FLOAT4 " + inpName + "=RI_F(" + base + ", SAMPLER, " + offset + ")";
}
std::string OpenCLTarget::loadscalar(const std::string& base, std::string& inpName) {
return "FLOAT4 " + inpName + "=((float4)read_imagef(" + base + ", SAMPLER, (int2)(0, 0)).x)";
return "FLOAT4 " + inpName + "=(RI_F(" + base + ", SAMPLER, (int2)(0, 0)).x)";
}
std::string OpenCLTarget::store(const std::string base, const std::string& offset, const std::string& data) {
return "write_imagef(" + base + ", " + offset + ", " + data + ");\n";
return "WI_F(" + base + ", " + offset + ", " + data + ");\n";
}

std::string OpenCLTarget::proto(const std::string& name, const std::vector<std::string>& inputs, const std::vector<std::string>& outputs, bool hasSingleConvertRaster) {
Expand Down
2 changes: 1 addition & 1 deletion codegen/opencl/OpenCLTarget.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class OpenCLTarget : public Target {
std::string store(const std::string base, const std::string& offset, const std::string& data) override;
std::string proto(const std::string& name, const std::vector<std::string>& inputs, const std::vector<std::string>& outputs, bool hasSingleConvertRaster = false) override;
template <typename T>
std::string numval(T t) { return "((float4)" + std::to_string(t) + ")"; }
std::string numval(T t) { return "((FLOAT4)" + std::to_string(t) + ")"; }
};

}
154 changes: 53 additions & 101 deletions docs/inference/python.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,59 @@ MNN在C++的基础上,增加了Python扩展。扩展单元包括两个部分
### MNNTools
MNNTools提供目前主要是2个工具,用法可以参考[mnnconvert](../tools/python.html#mnnconvert)和[mnnquant](../tools/python.html#mnnquant)

## 使用Python Session API
## 使用Python Module API
### 数据类型
Python中的`Module API`与C++中的函数名略有区别,用法相似。主要数据类型如下:
- [_Module](../pymnn/_Module.md) 模型实例
- [Var](../pymnn/Var.md) 模型的输入输出
### 推理流程
基本推理流程如下:
- [创建Module](../pymnn/nn.html#load-module-from-file-file-name-input-names-output-names-dynamic-shape-mutable-rearrange-backend-memory-mode-power-mode-precision-mode)
- 创建输入: 使用`expr`或`numpy`函数创建`Var`即可作为输入
- [执行推理](../pymnn/_Module.html#forward-input)
- 获取输出: 输出为`Var`类型,可以通过`expr`或`numpy`函数执行后处理
### 示例
```python
import MNN.nn as nn
import MNN.cv as cv
import MNN.numpy as np
import MNN.expr as expr

# 配置执行后端,线程数,精度等信息;key-vlaue请查看API介绍
config = {}
config['precision'] = 'low' # 当硬件支持(armv8.2)时使用fp16推理
config['backend'] = 0 # CPU
config['numThread'] = 4 # 线程数

rt = nn.create_runtime_manager((config,))
# 加载模型创建_Module
net = nn.load_module_from_file('mobilenet_v1.mnn', ['data'], ['prob'], runtime_manager=rt)

# 读取图片
image = cv.imread('cat.jpg')
# 转换为float32, 形状为[224,224,3]
image = cv.resize(image, (224, 224), mean=[103.94, 116.78, 123.68], norm=[0.017, 0.017, 0.017])
# 增加batch HWC to NHWC
input_var = np.expand_dims(image, 0)
# NHWC to NC4HW4
input_var = expr.convert(input_var, expr.NC4HW4)

# 执行推理
output_var = net.forward(input_var)

# NC4HW4 to NHWC
output_var = expr.convert(output_var, expr.NHWC)
# 打印出分类结果, 282为猫
print("output belong to class: {}".format(np.argmax(output_var)))
# output belong to class: 282
```
其他示例可以参考[示例](../pymnn/RuntimeManager.html#example);也可以参考[示例工程](../start/demo.html#id5)。


## 使用Python Session API *[deprecated]*

不建议使用该API执行推理,建议使用Module API

### 数据类型
Python中`Session API`的函数名与用法与C++基本一样。使用的主要数据类型如下:
- [Interpreter](../pymnn/Interpreter.md) 解释器,持有模型资源
Expand Down Expand Up @@ -118,107 +170,7 @@ print("output belong to class: {}".format(np.argmax(output_var, 1)))
# output belong to class: array([282, 385], dtype=int32)
```
其他示例可以参考[示例](../pymnn/Interpreter.html#example);也可以参考[示例工程](../start/demo.html#session)。
## 使用Python Module API
### 数据类型
Python中的`Module API`与C++中的函数名略有区别,用法相似。主要数据类型如下:
- [_Module](../pymnn/_Module.md) 模型实例
- [Var](../pymnn/Var.md) 模型的输入输出
### 推理流程
基本推理流程如下:
- [创建Module](../pymnn/nn.html#load-module-from-file-file-name-input-names-output-names-dynamic-shape-mutable-rearrange-backend-memory-mode-power-mode-precision-mode)
- 创建输入: 使用`expr`或`numpy`函数创建`Var`即可作为输入
- [执行推理](../pymnn/_Module.html#forward-input)
- 获取输出: 输出为`Var`类型,可以通过`expr`或`numpy`函数执行后处理
### 示例
```python
import MNN.nn as nn
import MNN.cv as cv
import MNN.numpy as np
import MNN.expr as expr

# 配置执行后端,线程数,精度等信息;key-vlaue请查看API介绍
config = {}
config['precision'] = 'low' # 当硬件支持(armv8.2)时使用fp16推理
config['backend'] = 0 # CPU
config['numThread'] = 4 # 线程数

rt = nn.create_runtime_manager((config,))
# 加载模型创建_Module
net = nn.load_module_from_file('mobilenet_v1.mnn', ['data'], ['prob'], runtime_manager=rt)

# 读取图片
image = cv.imread('cat.jpg')
# 转换为float32, 形状为[224,224,3]
image = cv.resize(image, (224, 224), mean=[103.94, 116.78, 123.68], norm=[0.017, 0.017, 0.017])
# 增加batch HWC to NHWC
input_var = np.expand_dims(image, 0)
# NHWC to NC4HW4
input_var = expr.convert(input_var, expr.NC4HW4)

# 执行推理
output_var = net.forward(input_var)

# NC4HW4 to NHWC
output_var = expr.convert(output_var, expr.NHWC)
# 打印出分类结果, 282为猫
print("output belong to class: {}".format(np.argmax(output_var)))
# output belong to class: 282
```
其他示例可以参考[示例](../pymnn/RuntimeManager.html#example);也可以参考[示例工程](../start/demo.html#id5)。

## 使用Python Expr API
### 数据类型
Python的`Expr API`相比C++在命名和使用方式上略有区别,但是功能一致。主要数据类型如下:
- [Var](../pymnn/Var.md) 表达式计算中的变量
### 主要用法
因为`Expr`不仅有模型推理的能力,还具备数值计算的能力。在实际使用中`Expr`被用作构图或者计算的情况更多,实际用来执行模型推理的情况并不多,当`Expr`用作模型推理时的主要流程如下:
- [加载计算图](../pymnn/expr.html#load-as-dict-filename)
- 获取输入输出:直接使用Python中的`dict`的方式获取,如:`net['input']`
- [写入输入数据](../pymnn/Var.html#write-data)
- [读取输出数据](../pymnn/Var.html#read):读取数据不限于`read`,尝试打印和使用都可能触发读取操作
### 示例
`Expr`用作模型推理:
```python
import MNN.cv as cv
import MNN.numpy as np
import MNN.expr as expr

net = expr.load_as_dict('mobilenet_v1.mnn')
input_var = net['data']
output_var = net['prob']

# 读取图片
image = cv.imread('cat.jpg')
# 转换为float32, 形状为[224,224,3]
image = cv.resize(image, (224, 224), mean=[103.94, 116.78, 123.68], norm=[0.017, 0.017, 0.017])
# 增加batch HWC to NHWC
input_data = np.expand_dims(image, 0)
# NHWC to NC4HW4
input_data = expr.convert(input_data, expr.NC4HW4)

input_var.write(input_data.read_as_tuple())

# 打印出分类结果, 282为猫
print("output belong to class: {}".format(np.argmax(output_var)))
```
`Expr`用于数值计算与数据存取:
```python
import MNN.numpy as np
import MNN.expr as expr

x = expr.range(0., 10., 1.)
y = expr.fill([10], 3.1415)
z = expr.sin(x * y + x / y)
expr.save([z], 'z.mnn')
a = expr.load_as_list('z.mnn')[0]
print(a)
'''
array([ 0. , -0.31288275, 0.59434694, -0.8161286 , 0.955958 ,
-0.9997932 , 0.943233 , -0.79195637, 0.561154 , -0.27400237],
dtype=float32)
'''
```
其他示例可以参考[示例](../pymnn/Var.html#example);也可以参考[示例工程](../start/demo.html#id5)。
## 使用cv/numpy API
### 数据类型
Python的`cv`和`numpy`接口,其中`cv`是对C++中`tools/cv`实现的封装;`numpy`则是对`expr`接口的封装;这两个接口主要为了提高MNN的易用性,与`opencv`与`numpy`做到了再接口上的部分兼容,在用法和思路上基本一致。主要数据类型如下:
Expand Down
Loading
Loading