mirror of
https://github.com/osmarks/nanogpt-experiments.git
synced 2024-11-10 20:09:58 +00:00
add note of caution for the produced warning, investigate later
This commit is contained in:
parent
7d7ded25ce
commit
89da79eee1
4
model.py
4
model.py
@ -115,6 +115,10 @@ class GPT(nn.Module):
|
||||
ln_f = nn.LayerNorm(config.n_embd),
|
||||
))
|
||||
self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)
|
||||
# with weight tying when using torch.compile() some warnings get generated:
|
||||
# "UserWarning: functional_call was passed multiple values for tied weights.
|
||||
# This behavior is deprecated and will be an error in future versions"
|
||||
# not 100% sure what this is, so far seems to be harmless. TODO investigate
|
||||
self.transformer.wte.weight = self.lm_head.weight # https://paperswithcode.com/method/weight-tying
|
||||
|
||||
# report number of parameters
|
||||
|
Loading…
Reference in New Issue
Block a user