mirror of
				https://github.com/osmarks/nanogpt-experiments.git
				synced 2025-10-24 20:07:41 +00:00 
			
		
		
		
	make mentions of mps in docs. ty good people in issue #28
This commit is contained in:
		| @@ -125,6 +125,8 @@ $ python train.py config/train_shakespeare_char.py --device=cpu --compile=False | ||||
|  | ||||
| Where we decrease the context length to just 64 characters and only use a batch size of 8. | ||||
|  | ||||
| Finally, on Apple Silicon Macbooks you can use device `--device mps` ("Metal Performance Shaders"), which can significantly accelerate training (2-3X). You will need a specific version of PyTorch. See [Issue 28](https://github.com/karpathy/nanoGPT/issues/28). | ||||
|  | ||||
| ## benchmarking | ||||
|  | ||||
| For model benchmarking `bench.py` might be useful. It's identical to what happens in the meat of the training loop of `train.py`, but omits much of the other complexities. | ||||
|   | ||||
							
								
								
									
										2
									
								
								train.py
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								train.py
									
									
									
									
									
								
							| @@ -67,7 +67,7 @@ min_lr = 6e-5 # minimum learning rate, should be ~= learning_rate/10 per Chinchi | ||||
| # DDP settings | ||||
| backend = 'nccl' # 'nccl', 'gloo', etc. | ||||
| # system | ||||
| device = 'cuda' # examples: 'cpu', 'cuda', 'cuda:0', 'cuda:1', etc. | ||||
| device = 'cuda' # examples: 'cpu', 'cuda', 'cuda:0', 'cuda:1' etc., or try 'mps' on macbooks | ||||
| dtype = 'bfloat16' # 'float32' or 'bfloat16' | ||||
| compile = True # use PyTorch 2.0 to compile the model to be faster | ||||
| # ----------------------------------------------------------------------------- | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Andrej Karpathy
					Andrej Karpathy