diff --git a/.gitmodules b/.gitmodules index c5e0419..91c7e94 100644 --- a/.gitmodules +++ b/.gitmodules @@ -4,4 +4,8 @@ [submodule "lightseq"] path = lightseq - url = https://github.com/thu-coai/lightseq-nat \ No newline at end of file + url = https://github.com/thu-coai/lightseq-nat + +[submodule "cub"] + path = cub + url = https://github.com/NVIDIA/cub \ No newline at end of file diff --git a/README.md b/README.md index 5bfffda..4c12da6 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ This repo is modified from [``fairseq:5175fd``](https://github.com/pytorch/fairs ## Requirements & Installation * Python >= 3.7 -* Pytorch == 1.10.1 +* Pytorch == 1.10.1 (tested with cuda == 10.2 or 11.3) * gcc >= 7.0.0 (for compiling cuda operations. See FAQs if you want to use a lower gcc version) * ``git clone --recurse-submodules https://github.com/thu-coai/DA-Transformer.git && pip install -e .`` * (Optional) Customized LightSeq for NAT (``cd lightseq && pip install -e .``) @@ -43,6 +43,7 @@ Most codes of the framework are from Fairseq. We mainly add the following files. fs_plugins ├── criterions │ └── nat_dag_loss.py # DA-Transformer loss +├── cub # Requirements: Nvidia CUDA programming model ├── custom_ops # operations implementations and cuda kernels │ ├── dag_best_alignment.cu │ ├── logsoftmax_gather.cu diff --git a/cub b/cub new file mode 160000 index 0000000..618a46c --- /dev/null +++ b/cub @@ -0,0 +1 @@ +Subproject commit 618a46c27764f0e0b86fb3643a572ed039180ad8 diff --git a/fs_plugins/custom_ops/dag_loss.py b/fs_plugins/custom_ops/dag_loss.py index 612324e..fe98a2e 100644 --- a/fs_plugins/custom_ops/dag_loss.py +++ b/fs_plugins/custom_ops/dag_loss.py @@ -16,6 +16,7 @@ import os import math +import sys import torch from torch import nn, Tensor @@ -39,6 +40,7 @@ def get_dag_kernel(): if dag_kernel is not None: return dag_kernel else: + print("Start compiling cuda operations for DA-Transformer...", file=sys.stderr, flush=True) dag_kernel = load( "dag_loss_fn", sources=[ @@ -47,9 +49,11 @@ def get_dag_kernel(): os.path.join(module_path, "dag_best_alignment.cu"), os.path.join(module_path, "logsoftmax_gather.cu"), ], - extra_cflags=['-DOF_SOFTMAX_USE_FAST_MATH'], - extra_cuda_cflags=['-DOF_SOFTMAX_USE_FAST_MATH'], + extra_cflags=['-DOF_SOFTMAX_USE_FAST_MATH', '-O3'], + extra_cuda_cflags=['-DOF_SOFTMAX_USE_FAST_MATH', '-O3'], + extra_include_paths=[os.path.join(module_path, "../../cub")], ) + print("Cuda operations compiling finished", file=sys.stderr, flush=True) return dag_kernel class DagLossFunc(Function): diff --git a/setup.py b/setup.py index 196009c..164624d 100644 --- a/setup.py +++ b/setup.py @@ -224,6 +224,7 @@ def do_setup(package_data): "sacrebleu[ja]", "tqdm", "bitarray", + "ninja" ], dependency_links=dependency_links, packages=find_packages(