mirror of
				https://github.com/osmarks/nanogpt-experiments.git
				synced 2025-10-31 07:13:01 +00:00 
			
		
		
		
	disabling torch.jit.script here for massive performance boost when using torch.compile, our default. see issue #11. thanks @vgoklani for flagging
This commit is contained in:
		
							
								
								
									
										6
									
								
								model.py
									
									
									
									
									
								
							
							
						
						
									
										6
									
								
								model.py
									
									
									
									
									
								
							| @@ -14,8 +14,8 @@ import torch | ||||
| import torch.nn as nn | ||||
| from torch.nn import functional as F | ||||
|  | ||||
| @torch.jit.script | ||||
| def fused_gelu(x): | ||||
| # @torch.jit.script # good to enable when not using torch.compile, disable when using (our default) | ||||
| def new_gelu(x): | ||||
|     """ | ||||
|     Implementation of the GELU activation function currently in Google BERT repo (identical to OpenAI GPT). | ||||
|     Reference: Gaussian Error Linear Units (GELU) paper: https://arxiv.org/abs/1606.08415 | ||||
| @@ -71,7 +71,7 @@ class MLP(nn.Module): | ||||
|  | ||||
|     def forward(self, x): | ||||
|         x = self.c_fc(x) | ||||
|         x = fused_gelu(x) | ||||
|         x = new_gelu(x) | ||||
|         x = self.c_proj(x) | ||||
|         x = self.dropout(x) | ||||
|         return x | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Andrej Karpathy
					Andrej Karpathy