1
0
mirror of https://github.com/osmarks/nanogpt-experiments.git synced 2024-12-18 14:10:28 +00:00

add note of caution for the produced warning, investigate later

This commit is contained in:
Andrej Karpathy 2023-01-14 20:38:22 +00:00
parent 7d7ded25ce
commit 89da79eee1

View File

@ -115,6 +115,10 @@ class GPT(nn.Module):
ln_f = nn.LayerNorm(config.n_embd), ln_f = nn.LayerNorm(config.n_embd),
)) ))
self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False) self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)
# with weight tying when using torch.compile() some warnings get generated:
# "UserWarning: functional_call was passed multiple values for tied weights.
# This behavior is deprecated and will be an error in future versions"
# not 100% sure what this is, so far seems to be harmless. TODO investigate
self.transformer.wte.weight = self.lm_head.weight # https://paperswithcode.com/method/weight-tying self.transformer.wte.weight = self.lm_head.weight # https://paperswithcode.com/method/weight-tying
# report number of parameters # report number of parameters