conda install cmake make gxx_linux-64 gcc_linux-64
intel mkl
conda install -c intel mkl-devel mkl-static onednn-devel-cpu-iomp
cuda
conda install -c nvidia cuda-cudart-dev cuda-nvcc libcurand-dev libcublas-dev cudnn=8.1
compile
mkdir build && cd build
cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS="-msse4.1" -DBUILD_CLI=OFF -DWITH_DNNL=ON -DOPENMP_RUNTIME=COMP -DWITH_CUDA=ON -DWITH_CUDNN=ON -DCUDA_DYNAMIC_LOADING=ON -DCUDA_NVCC_FLAGS="-Xfatbin=-compress-all" -DCUDA_ARCH_LIST="Common" -DCMAKE_INSTALL_PREFIX=(pwd)/release ..
make -j 10