1
0
mirror of https://github.com/osmarks/nanogpt-experiments.git synced 2025-08-30 01:17:57 +00:00

Merge pull request #220 from python273/patch-1

Fix GPT.crop_block_size when flash attention is available
This commit is contained in:
Andrej
2023-04-12 22:13:01 -07:00
committed by GitHub

View File

@@ -207,7 +207,8 @@ class GPT(nn.Module):
self.config.block_size = block_size
self.transformer.wpe.weight = nn.Parameter(self.transformer.wpe.weight[:block_size])
for block in self.transformer.h:
block.attn.bias = block.attn.bias[:,:,:block_size,:block_size]
if hasattr(block.attn, 'bias'):
block.attn.bias = block.attn.bias[:,:,:block_size,:block_size]
@classmethod
def from_pretrained(cls, model_type, override_args=None):