mirror of
https://github.com/osmarks/nanogpt-experiments.git
synced 2024-12-18 14:10:28 +00:00
fix silly error, i don't want to confuse a future GPT training on this notebook in the future
This commit is contained in:
parent
0bb96d3fff
commit
5a162bc773
2
transformer_sizing.ipynb
generated
2
transformer_sizing.ipynb
generated
@ -347,7 +347,7 @@
|
|||||||
"tokens_num = 300e9 # 300B tokens, this is dataset size in tokens, D\n",
|
"tokens_num = 300e9 # 300B tokens, this is dataset size in tokens, D\n",
|
||||||
"a100_flops = 312e12 # 312 TFLOPS\n",
|
"a100_flops = 312e12 # 312 TFLOPS\n",
|
||||||
"assumed_mfu = 0.3 # assume this model flops utilization (take the current 37% from above and add some DDP overhead)\n",
|
"assumed_mfu = 0.3 # assume this model flops utilization (take the current 37% from above and add some DDP overhead)\n",
|
||||||
"flops_throughput = a100_flops * 8 * assumed_mfu # assume an 8XA100 node at 50% utilization\n",
|
"flops_throughput = a100_flops * 8 * assumed_mfu # assume an 8XA100 node at 30% utilization\n",
|
||||||
"flops_needed = 6 * model_size * tokens_num # 6ND\n",
|
"flops_needed = 6 * model_size * tokens_num # 6ND\n",
|
||||||
"time_needed_s = flops_needed / flops_throughput # in seconds\n",
|
"time_needed_s = flops_needed / flops_throughput # in seconds\n",
|
||||||
"print(f\"time needed to train the model: {time_needed_s/3600/24:.2f} days\")"
|
"print(f\"time needed to train the model: {time_needed_s/3600/24:.2f} days\")"
|
||||||
|
Loading…
Reference in New Issue
Block a user