Skip to content

Instantly share code, notes, and snippets.

@sparticlesteve
Last active January 7, 2020 23:57
Show Gist options
  • Save sparticlesteve/71c9765307189bf8dc776803438f737a to your computer and use it in GitHub Desktop.
Save sparticlesteve/71c9765307189bf8dc776803438f737a to your computer and use it in GitHub Desktop.
import torch
import torch.distributed as dist
# Configuration
ranks_per_node = 8
shape = 2**16 # fails if 2**17
dtype = torch.float32
# Initialize
dist.init_process_group(backend='mpi')
rank, n_ranks = dist.get_rank(), dist.get_world_size()
local_rank = rank % ranks_per_node
device = torch.device('cuda', local_rank)
print('MPI rank', rank, 'size', n_ranks, 'device', device)
# Allocate a tensor
x = torch.randn(shape, dtype=dtype).to(device)
print('local result:', x.sum())
# Do a broadcast from rank 0
dist.broadcast(x, 0)
print('broadcast result:', x.sum())
# Do an allreduce
dist.all_reduce(x)
print('allreduce result:', x.sum())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment