diff --git a/scaling_laws.ipynb b/scaling_laws.ipynb index e0e8fbd..946ea83 100644 --- a/scaling_laws.ipynb +++ b/scaling_laws.ipynb @@ -566,7 +566,7 @@ "source": [ "In the plot above, basically the models on the left of best are too small and trained for too long. The models on the right of best are way too large and trained for too little. The model at the red line is just right.\n", "\n", - "Now, the Chinchilla paper says that best model size for this flop budget is 400M params and 9.2B tokens (instead of 316M params 11.65B params) so there is some unresolved disagreement here too..." + "Now, the Chinchilla paper says that best model size for this flop budget is 400M params and 9.2B tokens (instead of 316M params and 11.65B tokens) so there is some unresolved disagreement here too..." ] }, {