diff --git a/transformer_sizing.ipynb b/transformer_sizing.ipynb index 53791ae..2e528de 100644 --- a/transformer_sizing.ipynb +++ b/transformer_sizing.ipynb @@ -347,7 +347,7 @@ "tokens_num = 300e9 # 300B tokens, this is dataset size in tokens, D\n", "a100_flops = 312e12 # 312 TFLOPS\n", "assumed_mfu = 0.3 # assume this model flops utilization (take the current 37% from above and add some DDP overhead)\n", - "flops_throughput = a100_flops * 8 * assumed_mfu # assume an 8XA100 node at 50% utilization\n", + "flops_throughput = a100_flops * 8 * assumed_mfu # assume an 8XA100 node at 30% utilization\n", "flops_needed = 6 * model_size * tokens_num # 6ND\n", "time_needed_s = flops_needed / flops_throughput # in seconds\n", "print(f\"time needed to train the model: {time_needed_s/3600/24:.2f} days\")"