Merge pull request #220 from python273/patch-1

Fix GPT.crop_block_size when flash attention is available
2025-10-31 15:23:01 +00:00 · 2023-04-12 22:13:01 -07:00
parent 8aeea6d970 c3f254844d
commit ea24604b29
1 changed files with 2 additions and 1 deletions
--- a/model.py
+++ b/model.py
@@ -207,6 +207,7 @@ class GPT(nn.Module):
        self.config.block_size = block_size
        self.transformer.wpe.weight = nn.Parameter(self.transformer.wpe.weight[:block_size])
        for block in self.transformer.h:
            if hasattr(block.attn, 'bias'):
                block.attn.bias = block.attn.bias[:,:,:block_size,:block_size]
    @classmethod