Skip to content

Instantly share code, notes, and snippets.

@botcs
Created February 23, 2022 11:45
Show Gist options
  • Save botcs/5b2196fddbc327eb2b27dccdcae794b1 to your computer and use it in GitHub Desktop.
Save botcs/5b2196fddbc327eb2b27dccdcae794b1 to your computer and use it in GitHub Desktop.
bottom_up_features, expt_flops, real_flops, gate_tensor = self.backbone(x, step_rate)
File "/data/engs-tvg-depth/csbotos/miniconda3/envs/dyndet/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/home/csbotos/github/fbscript/DynamicRouting/./dl_lib/modeling/dynamic_arch/dynamic_backbone.py", line 300, in forward
self.all_cell_list[layer_index][_cell_index](
File "/data/engs-tvg-depth/csbotos/miniconda3/envs/dyndet/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/home/csbotos/github/fbscript/DynamicRouting/./dl_lib/modeling/dynamic_arch/dynamic_cell.py", line 273, in forward
result_list[2].append(h_l_down * gate_weights_beta_down)
(function _print_stack)
[02/23 11:40:22 dl_lib.engine.hooks]: Total training time: 0:00:33 (0:00:00 on hooks)
Traceback (most recent call last):
File "/home/csbotos/github/fbscript/DynamicRouting/tools/train_net.py", line 174, in <module>
launch(
File "/home/csbotos/github/fbscript/DynamicRouting/./dl_lib/engine/launch.py", line 51, in launch
mp.spawn(
File "/data/engs-tvg-depth/csbotos/miniconda3/envs/dyndet/lib/python3.9/site-packages/torch/multiprocessing/spawn.py", line 230, in spawn
return start_processes(fn, args, nprocs, join, daemon, start_method='spawn')
File "/data/engs-tvg-depth/csbotos/miniconda3/envs/dyndet/lib/python3.9/site-packages/torch/multiprocessing/spawn.py", line 188, in start_processes
while not context.join():
File "/data/engs-tvg-depth/csbotos/miniconda3/envs/dyndet/lib/python3.9/site-packages/torch/multiprocessing/spawn.py", line 150, in join
raise ProcessRaisedException(msg, error_index, failed_process.pid)
torch.multiprocessing.spawn.ProcessRaisedException:
-- Process 2 terminated with the following error:
Traceback (most recent call last):
File "/data/engs-tvg-depth/csbotos/miniconda3/envs/dyndet/lib/python3.9/site-packages/torch/multiprocessing/spawn.py", line 59, in _wrap
fn(i, *args)
File "/home/csbotos/github/fbscript/DynamicRouting/./dl_lib/engine/launch.py", line 93, in _distributed_worker
main_func(*args)
File "/home/csbotos/github/fbscript/DynamicRouting/tools/train_net.py", line 161, in main
res = trainer.train()
File "/home/csbotos/github/fbscript/DynamicRouting/./dl_lib/engine/custom.py", line 204, in train
super().train(self.start_iter, self.max_iter)
File "/home/csbotos/github/fbscript/DynamicRouting/./dl_lib/engine/train_loop.py", line 136, in train
self.run_step()
File "/home/csbotos/github/fbscript/DynamicRouting/./dl_lib/engine/train_loop.py", line 517, in run_step
losses.backward()
File "/data/engs-tvg-depth/csbotos/miniconda3/envs/dyndet/lib/python3.9/site-packages/torch/_tensor.py", line 307, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
File "/data/engs-tvg-depth/csbotos/miniconda3/envs/dyndet/lib/python3.9/site-packages/torch/autograd/__init__.py", line 154, in backward
Variable._execution_engine.run_backward(
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor [1, 1, 1, 1]], which is output 0 of AsStridedBackward0, is at version 2; expected version 0 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!bottom_up_features, expt_flops, real_flops, gate_tensor = self.backbone(x, step_rate)
File "/data/engs-tvg-depth/csbotos/miniconda3/envs/dyndet/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/home/csbotos/github/fbscript/DynamicRouting/./dl_lib/modeling/dynamic_arch/dynamic_backbone.py", line 300, in forward
self.all_cell_list[layer_index][_cell_index](
File "/data/engs-tvg-depth/csbotos/miniconda3/envs/dyndet/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/home/csbotos/github/fbscript/DynamicRouting/./dl_lib/modeling/dynamic_arch/dynamic_cell.py", line 273, in forward
result_list[2].append(h_l_down * gate_weights_beta_down)
(function _print_stack)
[02/23 11:40:22 dl_lib.engine.hooks]: Total training time: 0:00:33 (0:00:00 on hooks)
Traceback (most recent call last):
File "/home/csbotos/github/fbscript/DynamicRouting/tools/train_net.py", line 174, in <module>
launch(
File "/home/csbotos/github/fbscript/DynamicRouting/./dl_lib/engine/launch.py", line 51, in launch
mp.spawn(
File "/data/engs-tvg-depth/csbotos/miniconda3/envs/dyndet/lib/python3.9/site-packages/torch/multiprocessing/spawn.py", line 230, in spawn
return start_processes(fn, args, nprocs, join, daemon, start_method='spawn')
File "/data/engs-tvg-depth/csbotos/miniconda3/envs/dyndet/lib/python3.9/site-packages/torch/multiprocessing/spawn.py", line 188, in start_processes
while not context.join():
File "/data/engs-tvg-depth/csbotos/miniconda3/envs/dyndet/lib/python3.9/site-packages/torch/multiprocessing/spawn.py", line 150, in join
raise ProcessRaisedException(msg, error_index, failed_process.pid)
torch.multiprocessing.spawn.ProcessRaisedException:
-- Process 2 terminated with the following error:
Traceback (most recent call last):
File "/data/engs-tvg-depth/csbotos/miniconda3/envs/dyndet/lib/python3.9/site-packages/torch/multiprocessing/spawn.py", line 59, in _wrap
fn(i, *args)
File "/home/csbotos/github/fbscript/DynamicRouting/./dl_lib/engine/launch.py", line 93, in _distributed_worker
main_func(*args)
File "/home/csbotos/github/fbscript/DynamicRouting/tools/train_net.py", line 161, in main
res = trainer.train()
File "/home/csbotos/github/fbscript/DynamicRouting/./dl_lib/engine/custom.py", line 204, in train
super().train(self.start_iter, self.max_iter)
File "/home/csbotos/github/fbscript/DynamicRouting/./dl_lib/engine/train_loop.py", line 136, in train
self.run_step()
File "/home/csbotos/github/fbscript/DynamicRouting/./dl_lib/engine/train_loop.py", line 517, in run_step
losses.backward()
File "/data/engs-tvg-depth/csbotos/miniconda3/envs/dyndet/lib/python3.9/site-packages/torch/_tensor.py", line 307, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
File "/data/engs-tvg-depth/csbotos/miniconda3/envs/dyndet/lib/python3.9/site-packages/torch/autograd/__init__.py", line 154, in backward
Variable._execution_engine.run_backward(
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor [1, 1, 1, 1]], which is output 0 of AsStridedBackward0, is at version 2; expected version 0 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment