mirror of
				https://github.com/osmarks/nanogpt-experiments.git
				synced 2025-10-29 22:33:01 +00:00 
			
		
		
		
	add note of caution for the produced warning, investigate later
This commit is contained in:
		
							
								
								
									
										4
									
								
								model.py
									
									
									
									
									
								
							
							
						
						
									
										4
									
								
								model.py
									
									
									
									
									
								
							| @@ -115,6 +115,10 @@ class GPT(nn.Module): | ||||
|             ln_f = nn.LayerNorm(config.n_embd), | ||||
|         )) | ||||
|         self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False) | ||||
|         # with weight tying when using torch.compile() some warnings get generated: | ||||
|         # "UserWarning: functional_call was passed multiple values for tied weights. | ||||
|         # This behavior is deprecated and will be an error in future versions" | ||||
|         # not 100% sure what this is, so far seems to be harmless. TODO investigate | ||||
|         self.transformer.wte.weight = self.lm_head.weight # https://paperswithcode.com/method/weight-tying | ||||
|  | ||||
|         # report number of parameters | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Andrej Karpathy
					Andrej Karpathy