From c3f254844d63ece0b6481e9d9777d740a66eb965 Mon Sep 17 00:00:00 2001 From: Kirill Date: Fri, 24 Mar 2023 14:51:02 +0300 Subject: [PATCH] Fix GPT.crop_block_size when flash attention is available --- model.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/model.py b/model.py index 0858f80..c9a6d7b 100644 --- a/model.py +++ b/model.py @@ -207,7 +207,8 @@ class GPT(nn.Module): self.config.block_size = block_size self.transformer.wpe.weight = nn.Parameter(self.transformer.wpe.weight[:block_size]) for block in self.transformer.h: - block.attn.bias = block.attn.bias[:,:,:block_size,:block_size] + if hasattr(block.attn, 'bias'): + block.attn.bias = block.attn.bias[:,:,:block_size,:block_size] @classmethod def from_pretrained(cls, model_type, override_args=None):