[toc]
export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/nvidia/lib64:$LD_LIBRARY_PATH
export LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/nvidia/lib64:$LD_LIBRARY_PATH
export LLVM_BUILD_DIR=/project/xingjinglu/llvm-project/build
export PATH=/usr/local/nvidia/bin:$PATH
pip install ninja cmake wheel; # build-time dependencies
git clone --recursive https://github.com/llvm/llvm-project.git
cd llvm-project
# change to the commit as given in triton/cmake/llvm-hash.txt
git checkout 4017f04e310454ccced4c404a23f7698eec735ca -b for_triton
mkdir build
cd build
cmake -G Ninja ../llvm -DLLVM_ENABLE_PROJECTS="mlir;llvm" -DLLVM_BUILD_EXAMPLES=ON \
-DLLVM_TARGETS_TO_BUILD="X86;NVPTX;RISCV;AMDGPU" -DMLIR_ENABLE_CUDA_RUNNER=ON \
-DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_ASSERTIONS=ON -DLLVM_ENABLE_RTTI=ON \
-DLLVM_INSTALL_UTILS=ON -DMLIR_INCLUDE_INTEGRATION_TESTS=ON
ninja -j128
export TRITON_PTXAS_PATH=/usr/local/cuda/bin/ptxas
export TRITON_CUOBJDUMP_PATH=/usr/local/cuda/bin/cuobjdump
export TRITON_NVDISASM_PATH=/usr/local/cuda/bin/nvdisasm
export LLVM_BUILD_DIR=/project/xingjinglu/llvm-project/build
export LLVM_INCLUDE_DIRS=$LLVM_BUILD_DIR/include
export LLVM_LIBRARY_DIR=$LLVM_BUILD_DIR/lib
export LLVM_SYSPATH=$LLVM_BUILD_DIR
export LD_LIBRARY_PATH=$LLVM_LIBRARY_DIR:$LD_LIBRARY_PATH
git clone https://github.com/openai/triton.git;
cd triton/python;
pip install -e .
python tutorials/01-vector-add.py
- the output is as below
tensor([1.3713, 1.3076, 0.4940, ..., 0.6724, 1.2141, 0.9733], device='cuda:0')
tensor([1.3713, 1.3076, 0.4940, ..., 0.6724, 1.2141, 0.9733], device='cuda:0')
The maximum difference between torch and triton is 0.0
vector-add-performance:
size Triton Torch
0 4096.0 11.377778 11.130435
1 8192.0 21.787235 23.813955
2 16384.0 44.521738 41.795915
3 32768.0 73.142858 72.710056
4 65536.0 127.336788 127.336788
5 131072.0 199.399583 200.620406
6 262144.0 283.296835 285.767442
7 524288.0 381.023277 371.659727
8 1048576.0 412.608613 416.101597
9 2097152.0 444.311871 449.646643
10 4194304.0 463.766462 468.393097
11 8388608.0 472.615390 479.385543
12 16777216.0 477.602370 484.554523
13 33554432.0 478.037844 484.414634
14 67108864.0 479.979873 488.623552
15 134217728.0 479.870017 489.126924