From 9352df23de68ae5da2541599cb894a195b098190 Mon Sep 17 00:00:00 2001
From: Andrej Karpathy <andrej.karpathy@gmail.com>
Date: Mon, 16 Jan 2023 05:57:33 +0000
Subject: [PATCH] docs for multinode ddp

---
 train.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/train.py b/train.py
index e69553d..e9cdf1e 100644
--- a/train.py
+++ b/train.py
@@ -2,11 +2,15 @@
 This training script can be run both on a single gpu in debug mode,
 and also in a larger training run with distributed data parallel (ddp).
 
-To run in debug mode example:
-$ python train.py --batch_size=32 --other=args
+To run on a single GPU, example:
+$ python train.py --batch_size=32 --compile=False
 
-To run DDP on 4 gpus on one node, example:
+To run with DDP on 4 gpus on 1 node, example:
 $ torchrun --standalone --nproc_per_node=4 train.py
+
+To run with DDP on 4 gpus across 2 nodes, example:
+$ torchrun --nproc_per_node=8 --nnodes=2 --node_rank=0 --master_addr=123.456.123.456 --master_port=1234 train.py
+$ torchrun --nproc_per_node=8 --nnodes=2 --node_rank=1 --master_addr=123.456.123.456 --master_port=1234 train.py
 """
 
 import os