diff --git a/model.py b/model.py index 0858f80..c9a6d7b 100644 --- a/model.py +++ b/model.py @@ -207,7 +207,8 @@ class GPT(nn.Module): self.config.block_size = block_size self.transformer.wpe.weight = nn.Parameter(self.transformer.wpe.weight[:block_size]) for block in self.transformer.h: - block.attn.bias = block.attn.bias[:,:,:block_size,:block_size] + if hasattr(block.attn, 'bias'): + block.attn.bias = block.attn.bias[:,:,:block_size,:block_size] @classmethod def from_pretrained(cls, model_type, override_args=None):