diff --git a/scaling_laws.ipynb b/scaling_laws.ipynb
index c82deb6..e0e8fbd 100644
--- a/scaling_laws.ipynb
+++ b/scaling_laws.ipynb
@@ -5,7 +5,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Trying to reproduce results from [Chinchilla](https://arxiv.org/pdf/2203.15556.pdf):"
+    "Reproducing some scaling laws results from [Chinchilla](https://arxiv.org/pdf/2203.15556.pdf). Can't get the numbers to match exactly, but can still be used as a rough guide to help determine compute-optimal models. Also contains related utilities for calculating flops and param counts."
    ]
   },
   {
@@ -136,51 +136,6 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "our estimated params: 43.7094M, chinchilla params: 44.0000M, d_model: 512, n_heads: 8, n_layers: 8\n",
-      "our estimated params: 57.3287M, chinchilla params: 57.0000M, d_model: 576, n_heads: 9, n_layers: 9\n",
-      "our estimated params: 73.8253M, chinchilla params: 74.0000M, d_model: 640, n_heads: 10, n_layers: 10\n",
-      "our estimated params: 89.8285M, chinchilla params: 90.0000M, d_model: 640, n_heads: 10, n_layers: 13\n",
-      "our estimated params: 105.8317M, chinchilla params: 106.0000M, d_model: 640, n_heads: 10, n_layers: 16\n",
-      "our estimated params: 116.7283M, chinchilla params: 117.0000M, d_model: 768, n_heads: 12, n_layers: 12\n",
-      "our estimated params: 139.7660M, chinchilla params: 140.0000M, d_model: 768, n_heads: 12, n_layers: 15\n",
-      "our estimated params: 162.8037M, chinchilla params: 163.0000M, d_model: 768, n_heads: 12, n_layers: 18\n",
-      "our estimated params: 174.9745M, chinchilla params: 175.0000M, d_model: 896, n_heads: 14, n_layers: 14\n",
-      "our estimated params: 195.8746M, chinchilla params: 196.0000M, d_model: 896, n_heads: 14, n_layers: 16\n",
-      "our estimated params: 216.7747M, chinchilla params: 217.0000M, d_model: 896, n_heads: 14, n_layers: 18\n",
-      "our estimated params: 251.1196M, chinchilla params: 251.0000M, d_model: 1024, n_heads: 16, n_layers: 16\n",
-      "our estimated params: 278.4133M, chinchilla params: 278.0000M, d_model: 1024, n_heads: 16, n_layers: 18\n",
-      "our estimated params: 305.7070M, chinchilla params: 306.0000M, d_model: 1024, n_heads: 16, n_layers: 20\n",
-      "our estimated params: 424.6938M, chinchilla params: 425.0000M, d_model: 1280, n_heads: 10, n_layers: 18\n",
-      "our estimated params: 488.6490M, chinchilla params: 489.0000M, d_model: 1280, n_heads: 10, n_layers: 21\n",
-      "our estimated params: 509.3356M, chinchilla params: 509.0000M, d_model: 1408, n_heads: 11, n_layers: 18\n",
-      "our estimated params: 552.6042M, chinchilla params: 552.0000M, d_model: 1280, n_heads: 10, n_layers: 24\n",
-      "our estimated params: 586.7150M, chinchilla params: 587.0000M, d_model: 1408, n_heads: 11, n_layers: 21\n",
-      "our estimated params: 632.3389M, chinchilla params: 632.0000M, d_model: 1536, n_heads: 12, n_layers: 19\n",
-      "our estimated params: 664.0945M, chinchilla params: 664.0000M, d_model: 1408, n_heads: 11, n_layers: 24\n",
-      "our estimated params: 724.4206M, chinchilla params: 724.0000M, d_model: 1536, n_heads: 12, n_layers: 22\n",
-      "our estimated params: 816.5023M, chinchilla params: 816.0000M, d_model: 1536, n_heads: 12, n_layers: 25\n",
-      "our estimated params: 892.8138M, chinchilla params: 893.0000M, d_model: 1792, n_heads: 14, n_layers: 20\n",
-      "our estimated params: 1018.1338M, chinchilla params: 1018.0000M, d_model: 1792, n_heads: 14, n_layers: 23\n",
-      "our estimated params: 1143.4537M, chinchilla params: 1143.0000M, d_model: 1792, n_heads: 14, n_layers: 26\n",
-      "our estimated params: 1265.7869M, chinchilla params: 1266.0000M, d_model: 2048, n_heads: 16, n_layers: 22\n",
-      "our estimated params: 1424.5576M, chinchilla params: 1424.0000M, d_model: 2176, n_heads: 17, n_layers: 22\n",
-      "our estimated params: 1429.4569M, chinchilla params: 1429.0000M, d_model: 2048, n_heads: 16, n_layers: 25\n",
-      "our estimated params: 1593.1269M, chinchilla params: 1593.0000M, d_model: 2048, n_heads: 16, n_layers: 28\n",
-      "our estimated params: 1609.3196M, chinchilla params: 1609.0000M, d_model: 2176, n_heads: 17, n_layers: 25\n",
-      "our estimated params: 1730.7878M, chinchilla params: 1731.0000M, d_model: 2304, n_heads: 18, n_layers: 24\n",
-      "our estimated params: 1794.0815M, chinchilla params: 1794.0000M, d_model: 2176, n_heads: 17, n_layers: 28\n",
-      "our estimated params: 2006.9637M, chinchilla params: 2007.0000M, d_model: 2304, n_heads: 18, n_layers: 28\n",
-      "our estimated params: 2283.1396M, chinchilla params: 2283.0000M, d_model: 2304, n_heads: 18, n_layers: 32\n",
-      "our estimated params: 2298.0403M, chinchilla params: 2298.0000M, d_model: 2560, n_heads: 20, n_layers: 26\n",
-      "our estimated params: 2638.9811M, chinchilla params: 2639.0000M, d_model: 2560, n_heads: 20, n_layers: 30\n",
-      "our estimated params: 2979.9219M, chinchilla params: 2980.0000M, d_model: 2560, n_heads: 20, n_layers: 34\n",
-      "our estimated params: 3468.9339M, chinchilla params: 3530.0000M, d_model: 2688, n_heads: 22, n_layers: 36\n",
-      "our estimated params: 3802.8109M, chinchilla params: 3802.0000M, d_model: 2816, n_heads: 22, n_layers: 36\n",
-      "our estimated params: 4152.0233M, chinchilla params: 4084.0000M, d_model: 2944, n_heads: 22, n_layers: 36\n",
-      "our estimated params: 4516.5711M, chinchilla params: 4516.0000M, d_model: 3072, n_heads: 24, n_layers: 36\n",
-      "our estimated params: 6796.2747M, chinchilla params: 6796.0000M, d_model: 3584, n_heads: 28, n_layers: 40\n",
-      "our estimated params: 9294.0206M, chinchilla params: 9293.0000M, d_model: 4096, n_heads: 32, n_layers: 42\n",
-      "our estimated params: 11714.6222M, chinchilla params: 11452.0000M, d_model: 4352, n_heads: 32, n_layers: 47\n",
       "our estimated params: 12296.1623M, chinchilla params: 12295.0000M, d_model: 4608, n_heads: 36, n_layers: 44\n",
       "our estimated params: 13124.4826M, chinchilla params: 12569.0000M, d_model: 4608, n_heads: 32, n_layers: 47\n",
       "our estimated params: 14614.4279M, chinchilla params: 13735.0000M, d_model: 4864, n_heads: 32, n_layers: 47\n",
@@ -190,7 +145,7 @@
     }
    ],
    "source": [
-    "for m in chilchilla_models:\n",
+    "for m in chilchilla_models[-5:]: # only print last 5 models of the table\n",
     "    p, d, f, k, h, l = m\n",
     "    nparams = chinchilla_params(seq_len = 1024, vocab_size = 32000, d_model = d, num_heads = h, num_layers = l, ffw_size=f)\n",
     "    print(f\"our estimated params: {nparams/1e6:.4f}M, chinchilla params: {p/1e6:.4f}M, d_model: {d}, n_heads: {h}, n_layers: {l}\")"
@@ -201,7 +156,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We are almost able to reproduce the parameter counts for the Chinchilla models. TODO resolve...\n",
+    "We are almost able to reproduce the parameter counts for the Chinchilla models.\n",
     "\n",
     "Now turning to FLOPs:"
    ]
@@ -472,7 +427,9 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## scaling laws"
+    "## Scaling Laws: Approach 3\n",
+    "\n",
+    "In their \"Aproach 3\", Chinchilla paper fits a function L(N,D) to approximate the final loss gives the model size and the data size. Here is the final fit:"
    ]
   },
   {
@@ -483,7 +440,7 @@
     {
      "data": {
       "text/plain": [
-       "<matplotlib.colorbar.Colorbar at 0x7f9d2e9ba9e0>"
+       "<matplotlib.colorbar.Colorbar at 0x7f1bd262a9e0>"
       ]
      },
      "execution_count": 8,
@@ -555,7 +512,7 @@
      "output_type": "stream",
      "text": [
       "best model size: 316.23M\n",
-      "best dataset size: 10.12B\n"
+      "best dataset size: 11.65B\n"
      ]
     },
     {
@@ -570,7 +527,7 @@
     },
     {
      "data": {
-      "image/png": "",
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAS8AAAEqCAYAAABEE9ZrAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8o6BhiAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAuRElEQVR4nO3dd3xUVf7/8dekTdpMGpCEFEACCQETQhRIQEAFqVJ2XV0WBV3Erx1FccV1v7qiht8itoW14E/UXVlAFNgfRdpSFEJvoQiElkIKhJDJJKTN3N8fQ0aiBEKYzJ3yeT4e9/FI7tyZ+SRh3px77rnnaBRFURBCCCfjoXYBQgjRHBJeQginJOElhHBKEl5CCKck4SWEcEoSXkIIpyThJYRwSl5qF2BvZrOZs2fPotPp0Gg0apcjhPgFRVEoLy+nbdu2eHg03r5yu/A6e/YsMTExapchhLiO3NxcoqOjG33c7cJLp9MBll+MXq9XuRoXVlEBbdtavj57FgIC1K1HOA2DwUBMTIz1s9oYtwuv+lNFvV4v4dWSPD1//lqvl/ASN+x63TrSYS+EcEoSXkIIpyThJYRwShJeQginJOElhHBKEl5CCKck4SWEaHGf/3iKZfvyMVTV2uw1JbyEEC2q1mRm1pqjTF6wj5ySSpu9roSXEKJF7cu9SEWNidAAHxIjbTcwXMJLCNGifjh+HoD0jmF4eNhuMgQJLyFEi/rx+DkA7ujUyqavK+ElhGgxhqpa9ueVAdC3U2ubvraElxCixWSeKMFkVrilVQBRwX42fW0JLyFEi/nxcn9XXxufMoKElxCiBf2YbQmvPnESXkIIJ5FXWsmp8xV4emhI6xhm89eX8BJCtIgtl1tdydFB6H29bf76El5CiBbxg7W/y7ZXGetJeAkhbM5sVth6ogSw/fiuehJeQgibO1xg4EJFDYFaL7rHBLfIe0h4CSFsbtMxy6j63reE4u3ZMjEj4SWEsLn68Oof36bF3kPCSwhhU+VVtew5UwpA/xbqrAcJLyGEjW09UULd5VuCYsP8W+x9JLyEEDZVf8rYr3PLtbpAwksIYUOKorDpaH1/l4SXEMJJnDhXQf7FS/h4edC7g+1vCbqShJcQwmbqTxl7dQjFz8ezRd9LwksIYTPWIRIt3N8FEl5CCBupqjWx/aTllqABLdzfBRJeQggb2XayhOo6M1HBfnRsHdji7yfhJYSwiSuHSGg0tlslqDESXkIIm9jwUzFgn/4ucKDwmjFjBhqNhueee67RY7744gs0Gk2DzdfX135FCiGu6uQ5I6dLKvH21LTIfPVX42WXd7mOnTt38sknn5CUlHTdY/V6PUePHrV+b4/mqRDi2v57udXV+5YwArX2iRXVW15Go5Fx48Yxd+5cQkJCrnu8RqMhIiLCuoWHh9uhSiHEtdSH150tOIvEL6keXk899RTDhw9n4MCBTTreaDTSrl07YmJiGDVqFIcOHbrm8dXV1RgMhgabEMJ2yqtq2XHqAgB3JbhJeC1YsIA9e/aQkZHRpOPj4+P5/PPPWbZsGf/6178wm82kp6eTl5fX6HMyMjIICgqybjExMbYqXwiBZW3G+lkk2rcKsNv7qhZeubm5TJ48ma+//rrJne5paWmMHz+e7t27079/f7777jtat27NJ5980uhzpk2bRllZmXXLzc211Y8ghODnU0Z7trpAxQ773bt3U1xcTI8ePaz7TCYTmzdvZvbs2VRXV+Ppee17o7y9vUlJSSE7O7vRY7RaLVqt1mZ1CyF+ZjYrbDjqZuF19913k5WV1WDfI488QkJCAn/605+uG1xgCbusrCyGDRvWUmUKIa4hK7+M80bLQhu3tQ+163urFl46nY5u3bo12BcQEEBYWJh1//jx44mKirL2ib3xxhv07t2buLg4Ll68yMyZMzlz5gyPPvqo3esXQvx8ynhHp1b4eNm3F8ohxnk1JicnBw+Pn38hpaWlTJo0icLCQkJCQkhNTWXr1q0kJiaqWKUQ7kutU0YAjaIoit3fVUUGg4GgoCDKysrQ6/Vql+O6Kiog8PLNuUYjBNjvKpSwj2JDFT3fXo9GAzteGUhrnW36lpv6GVV9nJcQwjmtO2JpdXWPCbZZcN0ICS8hRLOsPVwIwMAu6tzlIuElhLhhFdV1bDlhmXjwnkQJLyGEk/jh+Dlq6sy0C/Mnrk3LTzx4NRJeQogbtuZwEQCDuoSrNrOLhJcQ4obUmczWiQcHqnTKCBJeQogbtPtMKaWVtQT7e3Nbu+tPY9VSJLyEEDdk7eVTxrvi2+DlqV6ESHgJIZpMURTWHrnc36XiKSNIeAkhbkB2sZEzJZX4eHpwh50W2miMhJcQosnqrzKmdbTfXPWNkfASQjTZ9wcto+qHdItQuRIJLyFEE+WVVpKVX4aHRv3+LpDwEkI00epDllPG29uH0ipQ/dmJJbyEEE3y/cECwDFOGUHCSwjRBMXlVew6UwrA4K4SXkIIJ7H2cBGKAskxwbQN9lO7HEDCSwjRBNarjA7S6gIJLyHEdZRV1pJ5ee6uwV3Vv8pYT8JLCHFN644UUWdWiA/XcUtrdebuuhoJLyHENa26fMo42EGuMtaT8BJCNKq8qpbNx84BMFTCSwjhLNYdKaLGZKZj6wASInRql9OAhJcQolErDlgGpg5PaqvadM+NkfASQlxV2aVaNh87D8CIpEiVq/k1CS8hxFWtO2w5ZezUJpDO4Y51yggSXkKIRqzIqj9ldLxWF0h4CSGuoqyylh+OW64yDr9VwksI4STWHC6k1mQZmNrJAU8ZQcJLCHEV9aeMwxy01QUSXkKIX7hYWcOPxy1XGYcnOdbA1CtJeAkhGliZVUidWSEhQkdcG8c8ZQQJLyHELyzblw/A6JQolSu5NgkvIYTV2YuX2HH6AgD3JrdVuZprk/ASQlgtP3AWRYGe7UOJcpAZUxsj4SWEsPrP/rMAjOzu2K0ukPASQlyWXWzkYL4BLw+NQw+RqCfhJYQAfm519evcmtAAH5WruT4JLyEEiqLwn8tXGUc5wSkjSHgJIYADeWWcLqnEz9uTgV0cZ5GNa5HwEkKwZK+l1TUoMZwArZfK1TSNhJcQbq7WZLb2d43p4dgDU68k4SWEm9t49BwXKmpordNyR1wrtctpMgkvIdzcd3vyABjdvS1ens4TCQ5T6YwZM9BoNDz33HPXPO6bb74hISEBX19fbr31VlauXGmfAoVwQRcra1h/pBiA3/SIVrmaG+MQ4bVz504++eQTkpKSrnnc1q1bGTt2LBMnTmTv3r2MHj2a0aNHc/DgQTtVKoRrWX6ggBqTmS6RerpE6tUu54aoHl5Go5Fx48Yxd+5cQkJCrnnsBx98wJAhQ5g6dSpdunRh+vTp9OjRg9mzZzf6nOrqagwGQ4NNCGFRf8r4WyfqqK+neng99dRTDB8+nIEDB1732MzMzF8dN3jwYDIzMxt9TkZGBkFBQdYtJibmpmsWwhWcPGdkT85FPD00TnEv4y+pGl4LFixgz549ZGRkNOn4wsJCwsMbDqALDw+nsLCw0edMmzaNsrIy65abm3tTNQvhKurHdvXr1Io2Ol+Vq7lxqo1Gy83NZfLkyaxduxZf35b7xWm1WrRabYu9vhDOyGRW+G6PJbzGOFlHfT3Vwmv37t0UFxfTo0cP6z6TycTmzZuZPXs21dXVeHp6NnhOREQERUVFDfYVFRUREeG482wL4Yi2ZJ8n/+Il9L5e3JPoHLcD/ZJqp4133303WVlZ7Nu3z7rddtttjBs3jn379v0quADS0tJYv359g31r164lLS3NXmUL4RIW7bJ0n4xOicLX+9efNWegWstLp9PRrVu3BvsCAgIICwuz7h8/fjxRUVHWPrHJkyfTv39/Zs2axfDhw1mwYAG7du3i008/tXv9Qjir0ooa1hyynMHcf5vzXsBS/WrjteTk5FBQUGD9Pj09nfnz5/Ppp5+SnJzM4sWLWbp06a9CUAjRuGX78qkxmUmM1NMtKkjtcppNoyiKonYR9mQwGAgKCqKsrAy93rkG5TmVigoIDLR8bTRCQIC69QjAMm/XsA9/5EiBgb+O7MqE9PZql/QrTf2MOnTLSwhhW4fOGjhSYMDHy8NpJh1sjISXEG5k4U5LR/3grhEE+zv+VM/XIuElhJuoqjVZF5R9wIk76utJeAnhJlYcKMBQVUdUsB/pHcPULuemSXgJ4Sbm78gB4A+9YvHw0Khczc1rVnh9+eWXrFixwvr9Sy+9RHBwMOnp6Zw5c8ZmxQkhbOOnQgO7z5Ti5aHhd7c55+1Av9Ss8Hr77bfx87MsBZ6ZmcmcOXP429/+RqtWrXj++edtWqAQ4ubN325pdQ1KDHfKm7Cvplkj7HNzc4mLiwNg6dKl/Pa3v+Wxxx6jT58+DBgwwJb1CSFuUmVNHUsu34Q9rlc7lauxnWa1vAIDAykpKQFgzZo1DBo0CABfX18uXbpku+qEEDft/+0/S3l1He3C/F2io75es1pegwYN4tFHHyUlJYVjx44xbNgwAA4dOkT79u1tWZ8Q4iZ9ffmU8Q89XaOjvl6zWl5z5swhLS2Nc+fO8e233xIWZknz3bt3M3bsWJsWKIRovqy8Mg7kleHj6cF9qa7RUV+vWS2v4ODgq84b/9e//vWmCxJC2M6/tlmu/g/uFkFYoGtNytmsltf333/Pjz/+aP1+zpw5dO/enT/84Q+UlpbarDghRPNdrKxh6eUR9ePTXKejvl6zwmvq1KnWVXiysrJ44YUXGDZsGKdOnWLKlCk2LVAI0TyLduVSXWeZ+ua2dtdemcsZNeu08dSpUyQmJgLw7bffMmLECN5++2327Nlj7bwXQqjHZFb4KtNyyjghvR0ajet01NdrVsvLx8eHyspKANatW8c999wDQGhoqKyLKIQD2PBTMXmllwjy82ZksvOtydgUzWp59e3blylTptCnTx927NjBwoULATh27BjR0a51RUMIZ/Rl5mkAfn97DH4+zjlH/fU0q+U1e/ZsvLy8WLx4MR999BFRUZZkX7VqFUOGDLFpgUKIG5NdbOSH4+fRaODB3q7XUV+vWS2v2NhYli9f/qv977333k0XJIS4OfXDI+5OaENMqL/K1bScZq8eZDKZWLp0KUeOHAGga9eujBw58qpLlgkh7MNQVcs3l5c1G5/WXt1iWlizwis7O5thw4aRn59PfHw8ABkZGcTExLBixQo6duxo0yKFEE2zcEcuFTUmOrUJ5I5OrdQup0U1q8/r2WefpWPHjuTm5rJnzx727NlDTk4OHTp04Nlnn7V1jUKIJqgzmZm35RQAj97RwSWHR1ypWS2vTZs2sW3bNkJDQ637wsLCmDFjBn369LFZcUKIplt1sJCzZVWEBfgwqrtrDo+4UrNaXlqtlvLy8l/tNxqN+Pg494okQjgjRVH47EdLq+vB3u3w9Xb9vudmhdeIESN47LHH2L59O4qioCgK27Zt4/HHH2fkyJG2rlEIcR17ckrZn3sRHy8Plx4ecaVmhdeHH35Ix44dSUtLw9fXF19fX9LT04mLi+P999+3cYlCiOv57AdLq2tM9yha61xr9ojGNHtKnGXLlpGdnW0dKtGlSxfr1NBCCPvJKalk9aFCACbe0UHlauynyeF1vdkiNmzYYP363XffbX5FQogbMveHk5gV6Ne5NZ3DdWqXYzdNDq+9e/c26ThXvzwrhCM5b6xm0eVBqU/0d6/xlU0OrytbVkIIx/DFltNU15npHhNM71tCr/8EFyIrZgvhpIzVdXx1efaIx/t3dLuzHgkvIZzUv7fnYKiq45bWAdyTGK52OXYn4dWI7GIj0747YP2fTQhHUl1n4rMfTwLweL+OLrWkWVNJeDVi1+kL/HtHLp9uPonJrKhdjhANLN2bT5Ghmgi9L6NS2qpdjiokvBoxOiWKEH9v8kovsfZwkdrlCGFVZzLzj40nAJjYtwNaL9e/FehqJLwa4evtydiesQDWO/WFcAT/2X+WMyWVhAb4MK53rNrlqEbC6xoeSmuHp4eG7acucOhsmdrlCIHJrDD7v9mAZdobf59mzyfq9CS8riEyyI+h3SIAmLfltLrFCAEsP3CWk+crCPb3dvmZUq9Hwus6HuljuVfsP/vOct5YrXI1wp2Zr2h1TezTgUCt+7a6QMLrunrEBpMcHUSNycz87TlqlyPc2PeHCjlebETn68WEPu3VLkd1El7XodFo+GNfS+vrn9vOUFNnVrki4Y7MZoUP1x8HLGcDel9vlStSn4RXEwztFkm4Xsu58mqW7ctXuxzhhlZkFfBTYTk6rRd/lFYXIOHVJD5eHvzxct/Xp5tPYpZBq8KO6kxm3lt3DIBH77iFYH+Zah1UDq+PPvqIpKQk9Ho9er2etLQ0Vq1a1ejxX3zxBRqNpsHm6+trl1rH9opFp/XieLGRDUeL7fKeQgAs3XeWk+csVxj/2Le92uU4DFXDKzo6mhkzZrB792527drFXXfdxahRozh06FCjz9Hr9RQUFFi3M2fO2KVWva83f7g8IPCTTSft8p5C1NSZ+WC9pdX1eP+O6KSvy0rV8Lr33nsZNmwYnTp1onPnzrz11lsEBgaybdu2Rp+j0WiIiIiwbuHh9rub/o99OuDtqWHH6QvsPlNqt/cV7mvRrlxyL1yiVaCWCW4+ruuXHKbPy2QysWDBAioqKkhLS2v0OKPRSLt27YiJibluKw2guroag8HQYGuucL0vY1Is6+F9sulEs19HiKaoqjVZx3U9fWdH/Hzc8x7GxqgeXllZWQQGBqLVann88cdZsmQJiYmJVz02Pj6ezz//nGXLlvGvf/0Ls9lMeno6eXl5jb5+RkYGQUFB1i0mJuam6n2s3y0ArD1SRHax8aZeS4hr+SrzNIWGKtoG+TK2l/vew9gYjaIoql46q6mpIScnh7KyMhYvXsxnn33Gpk2bGg2wK9XW1tKlSxfGjh3L9OnTr3pMdXU11dU/j4w3GAzExMRQVlaGXq9vVs2TvtrF2sNF3JcazTu/S27Wa7i8igoIDLR8bTRCQIC69TiZi5U19PvbBgxVdcy8L4nf3XZz/+k6E4PBQFBQ0HU/o6q3vHx8fIiLiyM1NZWMjAySk5P54IMPmvRcb29vUlJSyM7ObvQYrVZrvZpZv92sp+60LPG2ZG8+OSWVN/16QvzSPzaewFBVR0KEjt/0iFa7HIekenj9ktlsbtBSuhaTyURWVhaRkZEtXFVD3WOC6de5NSazwj82Nh6cQjRHXmklX1yeCOBPQxPwdMNZUptC1fCaNm0amzdv5vTp02RlZTFt2jQ2btzIuHHjABg/fjzTpk2zHv/GG2+wZs0aTp48yZ49e3jwwQc5c+YMjz76qN1rn3y3pfW1eHceeaXS+hK28+6aY9SYzKR3DGNA59Zql+OwVL0tvbi4mPHjx1NQUEBQUBBJSUmsXr2aQYMGAZCTk4OHx8/5WlpayqRJkygsLCQkJITU1FS2bt3apP4xW0ttF0qfuDC2ZJfw0cYTvDXmVrvXIFzPobNlLLl8C9q0oV3cbkWgG6F6h729NbUzsCm2nyzhgU+34e2pYdPUO2kb7GejKl2AdNjfMEVRGPfZdraeKGFkcls+HJuidkmqcJoOe2fW65YwenUIpdak8LGM+xI3ac3hIraeKMHHy4Opg+PVLsfhSXjdpMl3dwJgwY5c6fsSzVZdZ+LtlUcAmHRHB2JC/VWuyPFJeN2ktI5hpN0SRo3JbJ1vSYgbNW/Lac6UVNJGp+XJAXFql+MUJLxukkajYeoQSxN/8e48TpyTUffixpwrr7beBvSnIQkEuPn0zk0l4WUDPWJDGNglHLMC7649pnY5wsm8s/ooxuo6kmOCrffOiuuT8LKRFwd3RqOBFQcKOJgvy6SJptmbU8qi3bkA/O+IRDxkQGqTSXjZSEKEnlHJlmXX31lzVOVqhDMwmRX+suwgigK/7RFNarsQtUtyKhJeNvT8oM54eWjYePQcmSdK1C5HOLivt5/hYL4Bva8X04YlqF2O05HwsqF2YQH84fLUJW+tPCxz3YtGnSuvZuZqSwt96pAEWgVqVa7I+Uh42djkuzuh03pxMN/Akr2y0pC4uoyVRyivqiMpOog/9JS5uppDwsvGwgK1PH2XZZzOzNVHqaypU7ki4WgyT5Tw3d58NBqYPqqbzBrRTBJeLWBCenuiQ/woNFQxd/MptcsRDqSq1sS07w4AMK5XLMkxweoW5MQkvFqAr7cnLw+1dMB+vOkERYYqlSsSjuL9dcc5XVJJhN6Xl4ZIJ/3NkPBqIcNvjaRHbDCXak38n1U/qV2OcAAH88uY+4Nl2bzpo7uhl2XMboqEVwvRaDT8771d0Wjgu7357Dx9Qe2ShIrqTGZe/u4AJrPC8KRIBiXab8k+VyXh1YK6xwTz+9stCyf8ZelB6kxmlSsSapn7wykO5hsI8vPm9Xu7ql2OS5DwamFTBycQ7O/NT4Xl/HObfVb3Fo7lWFE5712+5/XV4V1orZMxXbYg4dXCQgN8rBPLvbvmGMXl0nnvTmpNZqYs2keNycxdCW24L1VWArIVCS87+P3tsSRFB1FeXceMldJ5705m/zebg/kGgv29mfGbW2VOehuS8LIDTw8Nb4zqZu2833zsnNolCTs4kHeR2Rss83RNH9WNNnpflStyLRJedtI9JpgJae0BeGVJFhXVMvLelV2qMTFl0X5MZoURSZHce3nGEWE7El52NHVwPFHBfuSVXmLWGpm00JW9ueIw2cVGWuu0TB/VTe1yXJKElx0FaL14a4zlH/K8rafYm1OqckWiJXx/sJCvt+eg0cB793cnJMBH7ZJckoSXnQ2Ib8NvUqJQFHj52yxq6mTslys5e/ESf/rWcu/iY/1uoW+nVipX5LokvFTwlxGJhAX4cLSonPfXyemjqzCZFZ5buI+yS7UkRwfxwiBZe7ElSXipICTAx3r6+PGmE+ySW4dcwgfrjrHj1AUCfDz5cGwKPl7y8WpJ8ttVyZBukfy2RzRmBaYs2o9Rrj46tQ0/FfPh5eXL3hpzK+3CAlSuyPVJeKnotZGJRAX7kXOhkjeXH1a7HNFMuRcqeW7hPgAe6t2O0bJ8mV1IeKlI7+vNrPuT0Whgwc5c1hwqVLskcYOqak08+fUeSz9XTDCvjuiidkluQ8JLZb1vCWPSHbcA8NK3B8i/eEnlikRTKYrC6/85RFZ+GSH+3vxjXA+0Xp5ql+U2JLwcwAv3dCYpOoiLlbU8M38PtTJ1jlP4cutpFuzMRaOB93+fQlSwn9oluRUJLweg9fJk9tge6Hy92JNzkXdWy6K1ju7H4+eZvuIIANOGJtC/c2uVK3I/El4OIjbMn5n3JQHwyeaT/PenIpUrEo05db6CJ7/ejcms8JseUdbTfmFfEl4OZEi3SB5Obw/A8wv3c6akQt2CxK9crKxh4pc7MVTVkRIbzNtjZJobtUh4OZhpwxJIjgmm7FItk77aJeO/HEhVrYnHvtrNyXMVRAb58slDqfh6Swe9WiS8HIzWy5NPH0qljU7LsSIjLyzah9msqF2W2zObFV5YtJ8dpy+g03ox75HbaaOT+bnUJOHlgML1vnz8UCo+nh6sPlTEh/89rnZJbu+tlUdYkVWAt6eGT8ankhChV7sktyfh5aB6xIbw5uX7H99fd5zlB86qXJH7+mTTCf7vj5aVz9/5XTLpHWWmCEcg4eXA7r8thj/26QDAlIX72X6yROWK3M8/t50h4/KiwS8PTWBUd7n1x1FIeDm4Pw/vwuCu4dSYzEz6ahfZxeVql+Q2vt2dx1+WHgTgyQEdebx/R5UrEleS8HJwnh4aPvh9Cj1igzFU1THh850UG2T5tJa2KquAqYv3A/Bwenvr8nXCcUh4OQFfb08+m3A7HVoFkH/xEuM/30FpRY3aZbms5QfO8vS/92JW4Hep0fzviEQZy+WAJLycRGiAD18+0pM2Oi0/FZbz0OfbKbtUq3ZZLmfJ3jye/fdeTGaFMSlRzPhtEh4eElyOSNXw+uijj0hKSkKv16PX60lLS2PVqlXXfM4333xDQkICvr6+3HrrraxcudJO1aovNsyf+ZN6ERbgw8F8Aw/P2yGDWG1o0a5cpizaj1mB+2+L5p3fJeMpweWwVA2v6OhoZsyYwe7du9m1axd33XUXo0aN4tChQ1c9fuvWrYwdO5aJEyeyd+9eRo8ezejRozl48KCdK1dPXBsd/5zYiyA/b/bmXOSP83ZKgNnA3M0neWnxARQFHuwdy4zfJElwOTiNoigONXw7NDSUmTNnMnHixF899sADD1BRUcHy5cut+3r37k337t35+OOPm/T6BoOBoKAgysrK0Oudd6DhgbyLjJu7nfLqOpJjgvnykdsJ9negJbYqKiAw0PK10QgBjjktstms8NbKI9ZxXBP7duDV4V2kj0tFTf2MOkyfl8lkYsGCBVRUVJCWlnbVYzIzMxk4cGCDfYMHDyYzM7PR162ursZgMDTYXEFSdDBfT+pFiL83+3Mv8sAn2ygul6uQN6K6zsRzC/dZg+uVYQkSXE5E9fDKysoiMDAQrVbL448/zpIlS0hMTLzqsYWFhYSHhzfYFx4eTmFh49MnZ2RkEBQUZN1iYmJsWr+akqKDWfg/abTRaTlaVM79H2eSU1KpdllO4byxmgc/285/9p/Fy0PDew8k81i/jhJcTkT18IqPj2ffvn1s376dJ554ggkTJnD4sO0Wo5g2bRplZWXWLTc312av7Qg6h+tY/Hg60SF+nC6pZMw/trD7jCyldi2HzpYxavYWdp4uRaf14vOHb2dMSrTaZYkbpHp4+fj4EBcXR2pqKhkZGSQnJ/PBBx9c9diIiAiKihpO0ldUVERERESjr6/Vaq1XM+s3VxMb5s+3T6TTLUpPSUUNY+duZ9m+fLXLckgrDhRw30eZ5F+8RIdWASx5qg/9ZBZUp6R6eP2S2Wymurr6qo+lpaWxfv36BvvWrl3baB+ZOwnX+7Lof9IYlBhOTZ2ZyQv28e6ao5hkOh3A0r/12rKDPDV/D5dqTdzRqRVLn+xDXJtAtUsTzeSl5ptPmzaNoUOHEhsbS3l5OfPnz2fjxo2sXr0agPHjxxMVFUVGRgYAkydPpn///syaNYvhw4ezYMECdu3axaeffqrmj+Ew/H28+PjBVGasOsLcH07x4X+z2Zt7kfcf6E5YoFbt8lRz+nwFT/97DwfzLRdr/qffLUwdHI+Xp8P93y1ugKrhVVxczPjx4ykoKCAoKIikpCRWr17NoEGDAMjJycHD4+d/YOnp6cyfP59XX32VV155hU6dOrF06VK6deum1o/gcDw9NPx5eCJdIvW8siSLH46fZ/iHPzJnXAqp7ULVLs+uFEVh0a5cpi8/grG6jhB/b969vzt3JrRRuzRhAw43zquluco4r6Y4WljOE19bpi329NDw1ICOPH1XJ3y87NDiUHmcV0HZJV7+NotNx84BcHv7ED4cm0JkkCxP5uicbpyXsL34CB3/ebovo7q3xWRW+PC/2Yyes4WfCl1jrNvVmM0K/96Rwz3vbWbTsXP4eHnwyrAEFjyWJsHlYqTl5SZWHCjg1aVZlFbW4u2p4bF+t/DUnXH4+7RQz4EKLa/9uRf532UH2Z9XBkByTDCzfpdEXBtdi7+3sJ2mfkYlvNxIcXkVr3x3kHVHLMNN2gb58pcRiQzpFmH7wZl2DK+Csku8v/Y4i3bnoiig03rx/KDOjE9rJ53yTkjCqxHuHF5g6cRefaiI6csPk3/xEgA9O4Ty4j3x9Oxgww59O4RXibGajzae4KttZ6ipMwPwm5QoXh6WICv7ODEJr0a4e3jVu1Rj4qNNJ/h40wnrB/+OTq2YMqgzKbEhN/8GLRheeaWVzNtymgU7cqioMQGWAP7TkHi3u6LqiiS8GiHh1dDZi5eYvSGbRTtzqbs8oDW1XQiP9GnP4K4ReDf3tMvG4aUoCntySvkq8wzLDxRYB992i9IzdXAC/Tq1kvsSXYSEVyMkvK4up6SSv//3OEv35VNrsvyTiAzyZUxKFKNTougcfoOd3jYKryJDFUv25rNoVy4nz1VY9/eJC2PSHbfQv3NrCS0XI+HVCAmvays2VPGv7TnM336G88af58nvEqlnWLcI+se3plvboOtPjdzM8FIUhePFRtYeLmLN4SL25160Pubn7cmwWyN5pE97ukUF3eiPJpyEhFcjJLyapqrWxNrDRSzbl8/Go+esp5RgmU8/rWMYydFBdIuybHpf74Yv0MTwulBRw/GicrLyy9h5+gK7TpdS8ovFRVLbhfC71GiGJ0Wi++X7CJcj4dUICa8bV1pRw/eHCtnwUzFbT5RcddrpVoE+RIf4Ex3iR6tALSFKDZNH9wBg3vdZVPr4UlNn5kJFDefKqzlnrOb0+YpfBRWAj5cH6R3DGJQYzsAu4YTr5cqhO5HwaoSE182pNZnZm3ORnacvkJVXRlZ+mXXIxZX8aqo48t59AHR5fjGXfBoPoJhQPzq30ZHaPoSe7UO5NToIrZdni/0MwrE19TOq6o3Zwvl4e3rQs0NogzFhZZW15JZWkldaSV7pJUora6i++PMtSCOT22L298fby4OwAB9a67S0DtQSFeJHXJvAlhvlL1yatLxEy3CSBTiE45Ebs4UQLk3CSwjhlCS8hBBOScJLCOGUJLyEEE5JwksI4ZQkvIQQTsntRgfWD2szGFx3HneHUPHzDBAYDGAyqVeLcCr1n83rDUF1u/AqLy8HICYmRuVK3EjbtmpXIJxQeXk5QUGNzx7idiPszWYzZ8+eRafTueU8UAaDgZiYGHJzc+UOAyfgjn8vRVEoLy+nbdu2DdZt/SW3a3l5eHgQHR2tdhmq0+v1bvNhcAXu9ve6VournnTYCyGckoSXEMIpSXi5Ga1Wy2uvvYZWq1W7FNEE8vdqnNt12AshXIO0vIQQTknCSwjhlCS8hBBOScJLCOGUJLyEEE5Jwks08N5779G1a1cSExN59tlnr3tzrLCvMWPGEBISwn333ddg//Lly4mPj6dTp0589tlnKlVnXzJUQlidO3eO3r17c+jQIby9venXrx/vvPMOaWlpapcmLtu4cSPl5eV8+eWXLF68GIC6ujoSExPZsGEDQUFBpKamsnXrVsLCwlSutmVJy0s0UFdXR1VVFbW1tdTW1tKmTRu1SxJXGDBgADqdrsG+HTt20LVrV6KioggMDGTo0KGsWbNGpQrtR8LLhWzevJl7772Xtm3botFoWLp06a+OmTNnDu3bt8fX15devXqxY8cO62OtW7fmxRdfJDY2lrZt2zJw4EA6duxox5/Atd3s36cxZ8+eJSoqyvp9VFQU+fn5tizdIUl4uZCKigqSk5OZM2fOVR9fuHAhU6ZM4bXXXmPPnj0kJyczePBgiouLASgtLWX58uWcPn2a/Px8tm7dyubNm+35I7i0m/37iIYkvFzI0KFDefPNNxkzZsxVH3/33XeZNGkSjzzyCImJiXz88cf4+/vz+eefA7Bu3Tri4uIIDQ3Fz8+P4cOHs23bNnv+CC7tZv8+jWnbtm2DllZ+fj5t3WACSAkvN1FTU8Pu3bsZOHCgdZ+HhwcDBw4kMzMTsMwuu3XrVqqqqjCZTGzcuJH4+Hi1SnYrTfn7NKZnz54cPHiQ/Px8jEYjq1atYvDgwS1dsurcbjJCd3X+/HlMJhPh4eEN9oeHh/PTTz8B0Lt3b4YNG0ZKSgoeHh7cfffdjBw5Uo1y3U5T/j4AAwcOZP/+/VRUVBAdHc0333xDWloas2bN4s4778RsNvPSSy+5/JVGkPASv/DWW2/x1ltvqV2GaMS6deuuun/kyJFu9x+NnDa6iVatWuHp6UlRUVGD/UVFRURERKhUlagnf58bJ+HlJnx8fEhNTWX9+vXWfWazmfXr18sgVAcgf58bJ6eNLsRoNJKdnW39/tSpU+zbt4/Q0FBiY2OZMmUKEyZM4LbbbqNnz568//77VFRU8Mgjj6hYtfuQv4+NKcJlbNiwQQF+tU2YMMF6zN///nclNjZW8fHxUXr27Kls27ZNvYLdjPx9bEvubRRCOCXp8xJCOCUJLyGEU5LwEkI4JQkvIYRTkvASQjglCS8hhFOS8BJCOCUJLyGEU5LwEkI4JQkv4dAGDBjAc8891+Tjv/jiC4KDg5v9fo3NLS8cj9yYLcQVCgoKCAkJUbsM0QQSXkJcQebOch5y2ihu2IABA3jmmWd47rnnCAkJITw8nLlz51qnb9HpdMTFxbFq1aoGz9u0aRM9e/ZEq9USGRnJyy+/TF1dnfXxiooKxo8fT2BgIJGRkcyaNetX711dXc2LL75IVFQUAQEB9OrVi40bNza59pqaGp5++mkiIyPx9fWlXbt2ZGRkWB+/8rTx9ddfR6PR/Gr74osvAMt8WxkZGXTo0AE/Pz+Sk5OtC8EKO1B7WgvhfPr376/odDpl+vTpyrFjx5Tp06crnp6eytChQ5VPP/1UOXbsmPLEE08oYWFhSkVFhaIoipKXl6f4+/srTz75pHLkyBFlyZIlSqtWrZTXXnvN+rpPPPGEEhsbq6xbt045cOCAMmLECEWn0ymTJ0+2HvPoo48q6enpyubNm5Xs7Gxl5syZilarVY4dO6YoiqLMmzdPCQoKarT2mTNnKjExMcrmzZuV06dPKz/88IMyf/586+OAsmTJEkVRFKW8vFwpKCiwbu+8847i7++vZGVlKYqiKG+++aaSkJCgfP/998qJEyeUefPmKVqtVtm4caNtftHimiS8xA3r37+/0rdvX+v3dXV1SkBAgPLQQw9Z9xUUFCiAkpmZqSiKorzyyitKfHy8YjabrcfMmTNHCQwMVEwmk1JeXq74+PgoixYtsj5eUlKi+Pn5WcPrzJkziqenp5Kfn9+gnrvvvluZNm2aoijXD69nnnlGueuuuxrUcaUrw+tKmZmZiq+vr7Jw4UJFURSlqqpK8ff3V7Zu3drguIkTJypjx45t9P2F7Uifl2iWpKQk69eenp6EhYVx6623WvfVr4JTv2DqkSNHSEtLQ6PRWI/p06cPRqORvLw8SktLqampoVevXtbHQ0NDGyy9lpWVhclkonPnzg1qqa6ubvJqOQ8//DCDBg0iPj6eIUOGMGLECO65555rPicnJ4fRo0fz4osvcv/99wOQnZ1NZWUlgwYNanBsTU0NKSkpTapF3BwJL9Es3t7eDb7XaDQN9tWHlNlsttl7Go1GPD092b17N56eng0eCwwMbNJr9OjRg1OnTrFq1SrWrVvH/fffz8CBAxvtq6qoqGDkyJGkpaXxxhtvNKgFYMWKFURFRTV4jlarvZEfSzSThJewiy5duvDtt9+iKIo12LZs2YJOpyM6OprQ0FC8vb3Zvn07sbGxAJSWlnLs2DH69+8PQEpKCiaTieLiYu64445m16LX63nggQd44IEHuO+++xgyZAgXLlwgNDS0wXGKovDggw9iNpv55z//2aDVmJiYiFarJScnx1qfsC8JL2EXTz75JO+//z7PPPMMTz/9NEePHuW1115jypQpeHh4EBgYyMSJE5k6dSphYWG0adOGP//5z3h4/HxBvHPnzowbN47x48cza9YsUlJSOHfuHOvXrycpKYnhw4dft453332XyMhI68K633zzDREREVcd2Pr666+zbt061qxZg9FotLa2goKC0Ol0vPjiizz//POYzWb69u1LWVkZW7ZsQa/XM2HCBJv97sTVSXgJu4iKimLlypVMnTqV5ORkQkNDmThxIq+++qr1mJkzZ2I0Grn33nvR6XS88MILlJWVNXidefPm8eabb/LCCy+Qn59Pq1at6N27NyNGjGhSHTqdjr/97W8cP34cT09Pbr/9dlauXNkgJOtt2rQJo9FIenr6r2p4+OGHmT59Oq1btyYjI4OTJ08SHBxMjx49eOWVV5rxGxI3ShbgEEI4JRmkKoRwShJeQginJOElhHBKEl5CCKck4SWEcEoSXkIIpyThJYRwShJeQginJOElhHBKEl5CCKck4SWEcEr/HzrztTTmmeQlAAAAAElFTkSuQmCC",
       "text/plain": [
        "<Figure size 300x300 with 1 Axes>"
       ]
@@ -580,7 +537,8 @@
     }
    ],
    "source": [
-    "c = 1.92e19 # target compute budget (usually know this because we know how many GPU for how long go brrr)\n",
+    "c = 2.21e19 # target compute budget (usually know this because we know how many GPU for how long go brrr)\n",
+    "# (I got this flop number from row 1 of Table A3)\n",
     "# sweep model sizes from 10M to 100B\n",
     "ns = 10 ** np.arange(7, 11, step=2**-4)\n",
     "# using C = 6*N*D, solve for D that maintains the compute budget c\n",
@@ -608,7 +566,7 @@
    "source": [
     "In the plot above, basically the models on the left of best are too small and trained for too long. The models on the right of best are way too large and trained for too little. The model at the red line is just right.\n",
     "\n",
-    "Now, the Chinchilla paper says that best model size is 400M params and 8B tokens, so this once again disagrees and there is some calculations problem. TODO figure out and fix..."
+    "Now, the Chinchilla paper says that best model size for this flop budget is 400M params and 9.2B tokens (instead of 316M params 11.65B params) so there is some unresolved disagreement here too..."
    ]
   },
   {
@@ -657,15 +615,15 @@
      "output_type": "stream",
      "text": [
       "closest model found:\n",
-      "model size: 1298.02M\n",
-      "dataset size: 60.32B\n",
-      "flops: 4.697589e+20\n",
-      "loss: 2.41\n"
+      "model size: 399.54M\n",
+      "dataset size: 14.43B\n",
+      "flops: 3.459892e+19\n",
+      "loss: 2.76\n"
      ]
     }
    ],
    "source": [
-    "query_model_size = 1.3e9 # GPT-3 size\n",
+    "query_model_size = 400e6\n",
     "ns = np.array([n for c, n, d in models])\n",
     "ds = np.array([d for c, n, d in models])\n",
     "# find the index of the closest model size in ns\n",
@@ -683,7 +641,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "So we predict 60B tokens is compute optimal. But e.g. [MosaicML quotes 26B](https://t.co/HyEvCqP70C). So again wrong."
+    "This should have come out as 9.2B according to Table A3 in Chinchilla paper, per my understanding of it."
    ]
   },
   {
@@ -691,7 +649,116 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "TLDR atm: nothing reproduces, but progress is being made."
+    "## Scaling Laws: Approach 2\n",
+    "\n",
+    "Approach 2 is probably my favorite one because it fixes a flop budget and runs a number of model/dataset sizes, measures the loss, fits a parabolla, and gets the minimum. So it's a fairly direct measurement of what we're after. The best way to then calculate the compute-optimal number of tokens for any given model size, as an example, is via simple interpolation."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Approach 1 numbers\n",
+    "# # parameters, tokens\n",
+    "# raw = [\n",
+    "#     [400e6, 8e9],\n",
+    "#     [1e9, 20.2e9],\n",
+    "#     [10e9, 205.1e9],\n",
+    "#     [67e9, 1.5e12],\n",
+    "#     [175e9, 3.7e12],\n",
+    "#     [280e9, 5.9e12],\n",
+    "#     [520e9, 11e12],\n",
+    "#     [1e12, 21.2e12],\n",
+    "#     [10e12, 216.2e12],\n",
+    "# ]\n",
+    "\n",
+    "# Approach 2 numbers\n",
+    "# parameters, tokens\n",
+    "raw = [\n",
+    "    [400e6, 7.7e9],\n",
+    "    [1e9, 20.0e9],\n",
+    "    [10e9, 219.5e9],\n",
+    "    [67e9, 1.7e12],\n",
+    "    [175e9, 4.3e12],\n",
+    "    [280e9, 7.1e12],\n",
+    "    [520e9, 13.4e12],\n",
+    "    [1e12, 26.5e12],\n",
+    "    [10e12, 292.0e12],\n",
+    "]\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "y = 1.0409573169995892x + 0.9353887152390791\n"
+     ]
+    }
+   ],
+   "source": [
+    "# fit a line by linear regression to the raw data\n",
+    "import numpy as np\n",
+    "x = np.array([np.log10(x[0]) for x in raw])\n",
+    "y = np.array([np.log10(x[1]) for x in raw])\n",
+    "A = np.vstack([x, np.ones(len(x))]).T\n",
+    "m, c = np.linalg.lstsq(A, y, rcond=None)[0]\n",
+    "print(f\"y = {m}x + {c}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "image/png": "",
+      "text/plain": [
+       "<Figure size 300x300 with 1 Axes>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "plt.figure(figsize=(3, 3))\n",
+    "# plot the line\n",
+    "plt.plot([q[0] for q in raw], [10**(m*np.log10(q[0]) + c) for q in raw], label='linear regression', color='r')\n",
+    "# plot the raw data\n",
+    "plt.scatter([q[0] for q in raw], [q[1] for q in raw], label='raw data')\n",
+    "plt.xscale('log')\n",
+    "plt.yscale('log')\n",
+    "plt.xlabel('parameters')\n",
+    "plt.ylabel('tokens')\n",
+    "plt.title('compute optimal models')\n",
+    "plt.grid()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "predicted parameters for 1.240000e+08 tokens: 2.292426e+09\n"
+     ]
+    }
+   ],
+   "source": [
+    "xquery = 124e6 # query model size here (e.g. GPT-2 small is 124M)\n",
+    "yquery = 10**(m*np.log10(xquery) + c)\n",
+    "print(f\"predicted parameters for {xquery:e} tokens: {yquery:e}\")"
    ]
   }
  ],