Created
January 11, 2024 21:37
-
-
Save yifuwang/c4a9fefa3d1e1dc017744e094be57c6c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
num_params=150 world_size=8 mixed=True Param size: 0.059 GB Copy bandwidth: 560.946 GB/s (gpu ms/iter: 0.105, cpu ms/iter 1.066) | |
num_params=54 world_size=8 mixed=True Param size: 1.453 GB Copy bandwidth: 732.657 GB/s (gpu ms/iter: 1.984, cpu ms/iter 0.417) | |
num_params=54 world_size=8 mixed=True Param size: 0.512 GB Copy bandwidth: 753.514 GB/s (gpu ms/iter: 0.679, cpu ms/iter 0.419) | |
num_params=50 world_size=8 mixed=True Param size: 0.200 GB Copy bandwidth: 719.400 GB/s (gpu ms/iter: 0.279, cpu ms/iter 0.410) | |
num_params=3 world_size=8 mixed=True Param size: 0.983 GB Copy bandwidth: 782.121 GB/s (gpu ms/iter: 1.257, cpu ms/iter 0.098) | |
num_params=9 world_size=8 mixed=True Param size: 0.802 GB Copy bandwidth: 766.458 GB/s (gpu ms/iter: 1.047, cpu ms/iter 0.134) | |
num_params=3 world_size=8 mixed=True Param size: 1.573 GB Copy bandwidth: 790.611 GB/s (gpu ms/iter: 1.989, cpu ms/iter 0.099) | |
num_params=9 world_size=8 mixed=True Param size: 2.248 GB Copy bandwidth: 789.754 GB/s (gpu ms/iter: 2.847, cpu ms/iter 0.138) | |
num_params=150 world_size=128 mixed=True Param size: 0.064 GB Copy bandwidth: 565.667 GB/s (gpu ms/iter: 0.113, cpu ms/iter 0.996) | |
num_params=54 world_size=128 mixed=True Param size: 1.458 GB Copy bandwidth: 670.681 GB/s (gpu ms/iter: 2.174, cpu ms/iter 0.289) | |
num_params=54 world_size=128 mixed=True Param size: 0.515 GB Copy bandwidth: 676.135 GB/s (gpu ms/iter: 0.762, cpu ms/iter 0.264) | |
num_params=50 world_size=128 mixed=True Param size: 0.203 GB Copy bandwidth: 662.603 GB/s (gpu ms/iter: 0.306, cpu ms/iter 0.249) | |
num_params=3 world_size=128 mixed=True Param size: 0.983 GB Copy bandwidth: 769.283 GB/s (gpu ms/iter: 1.278, cpu ms/iter 0.078) | |
num_params=9 world_size=128 mixed=True Param size: 0.802 GB Copy bandwidth: 761.057 GB/s (gpu ms/iter: 1.054, cpu ms/iter 0.104) | |
num_params=3 world_size=128 mixed=True Param size: 1.573 GB Copy bandwidth: 774.325 GB/s (gpu ms/iter: 2.031, cpu ms/iter 0.075) | |
num_params=9 world_size=128 mixed=True Param size: 2.248 GB Copy bandwidth: 773.048 GB/s (gpu ms/iter: 2.908, cpu ms/iter 0.099) | |
num_params=150 world_size=1024 mixed=True Param size: 0.202 GB Copy bandwidth: 641.405 GB/s (gpu ms/iter: 0.315, cpu ms/iter 0.616) | |
num_params=54 world_size=1024 mixed=True Param size: 1.524 GB Copy bandwidth: 646.772 GB/s (gpu ms/iter: 2.356, cpu ms/iter 0.276) | |
num_params=54 world_size=1024 mixed=True Param size: 0.575 GB Copy bandwidth: 658.157 GB/s (gpu ms/iter: 0.874, cpu ms/iter 0.278) | |
num_params=50 world_size=1024 mixed=True Param size: 0.246 GB Copy bandwidth: 642.032 GB/s (gpu ms/iter: 0.383, cpu ms/iter 0.245) | |
num_params=3 world_size=1024 mixed=True Param size: 1.007 GB Copy bandwidth: 728.990 GB/s (gpu ms/iter: 1.381, cpu ms/iter 0.080) | |
num_params=9 world_size=1024 mixed=True Param size: 0.818 GB Copy bandwidth: 689.763 GB/s (gpu ms/iter: 1.186, cpu ms/iter 0.102) | |
num_params=3 world_size=1024 mixed=True Param size: 1.611 GB Copy bandwidth: 765.507 GB/s (gpu ms/iter: 2.104, cpu ms/iter 0.078) | |
num_params=9 world_size=1024 mixed=True Param size: 2.248 GB Copy bandwidth: 757.626 GB/s (gpu ms/iter: 2.967, cpu ms/iter 0.106) | |
num_params=150 world_size=8 mixed=False Param size: 0.035 GB Copy bandwidth: 584.272 GB/s (gpu ms/iter: 0.060, cpu ms/iter 0.656) | |
num_params=54 world_size=8 mixed=False Param size: 0.961 GB Copy bandwidth: 728.234 GB/s (gpu ms/iter: 1.319, cpu ms/iter 0.264) | |
num_params=54 world_size=8 mixed=False Param size: 0.282 GB Copy bandwidth: 730.059 GB/s (gpu ms/iter: 0.386, cpu ms/iter 0.279) | |
num_params=50 world_size=8 mixed=False Param size: 0.149 GB Copy bandwidth: 670.899 GB/s (gpu ms/iter: 0.222, cpu ms/iter 0.274) | |
num_params=3 world_size=8 mixed=False Param size: 0.655 GB Copy bandwidth: 775.699 GB/s (gpu ms/iter: 0.845, cpu ms/iter 0.077) | |
num_params=9 world_size=8 mixed=False Param size: 0.634 GB Copy bandwidth: 773.612 GB/s (gpu ms/iter: 0.820, cpu ms/iter 0.112) | |
num_params=3 world_size=8 mixed=False Param size: 1.049 GB Copy bandwidth: 781.395 GB/s (gpu ms/iter: 1.342, cpu ms/iter 0.081) | |
num_params=9 world_size=8 mixed=False Param size: 1.711 GB Copy bandwidth: 789.156 GB/s (gpu ms/iter: 2.169, cpu ms/iter 0.116) | |
num_params=150 world_size=128 mixed=False Param size: 0.038 GB Copy bandwidth: 517.056 GB/s (gpu ms/iter: 0.073, cpu ms/iter 0.632) | |
num_params=54 world_size=128 mixed=False Param size: 0.963 GB Copy bandwidth: 684.246 GB/s (gpu ms/iter: 1.407, cpu ms/iter 0.294) | |
num_params=54 world_size=128 mixed=False Param size: 0.283 GB Copy bandwidth: 680.593 GB/s (gpu ms/iter: 0.416, cpu ms/iter 0.286) | |
num_params=50 world_size=128 mixed=False Param size: 0.151 GB Copy bandwidth: 682.197 GB/s (gpu ms/iter: 0.221, cpu ms/iter 0.255) | |
num_params=3 world_size=128 mixed=False Param size: 0.655 GB Copy bandwidth: 759.470 GB/s (gpu ms/iter: 0.863, cpu ms/iter 0.074) | |
num_params=9 world_size=128 mixed=False Param size: 0.634 GB Copy bandwidth: 765.694 GB/s (gpu ms/iter: 0.829, cpu ms/iter 0.094) | |
num_params=3 world_size=128 mixed=False Param size: 1.049 GB Copy bandwidth: 766.535 GB/s (gpu ms/iter: 1.368, cpu ms/iter 0.075) | |
num_params=9 world_size=128 mixed=False Param size: 1.711 GB Copy bandwidth: 787.608 GB/s (gpu ms/iter: 2.173, cpu ms/iter 0.105) | |
num_params=150 world_size=1024 mixed=False Param size: 0.122 GB Copy bandwidth: 640.203 GB/s (gpu ms/iter: 0.191, cpu ms/iter 0.668) | |
num_params=54 world_size=1024 mixed=False Param size: 1.000 GB Copy bandwidth: 713.947 GB/s (gpu ms/iter: 1.401, cpu ms/iter 0.274) | |
num_params=54 world_size=1024 mixed=False Param size: 0.318 GB Copy bandwidth: 642.855 GB/s (gpu ms/iter: 0.494, cpu ms/iter 0.276) | |
num_params=50 world_size=1024 mixed=False Param size: 0.185 GB Copy bandwidth: 643.297 GB/s (gpu ms/iter: 0.288, cpu ms/iter 0.262) | |
num_params=3 world_size=1024 mixed=False Param size: 0.671 GB Copy bandwidth: 690.626 GB/s (gpu ms/iter: 0.972, cpu ms/iter 0.078) | |
num_params=9 world_size=1024 mixed=False Param size: 0.645 GB Copy bandwidth: 754.431 GB/s (gpu ms/iter: 0.855, cpu ms/iter 0.109) | |
num_params=3 world_size=1024 mixed=False Param size: 1.074 GB Copy bandwidth: 769.985 GB/s (gpu ms/iter: 1.395, cpu ms/iter 0.080) | |
num_params=9 world_size=1024 mixed=False Param size: 1.711 GB Copy bandwidth: 766.337 GB/s (gpu ms/iter: 2.233, cpu ms/iter 0.103) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment