mirror of
				https://github.com/osmarks/nanogpt-experiments.git
				synced 2025-10-31 07:13:01 +00:00 
			
		
		
		
	Merge pull request #220 from python273/patch-1
Fix GPT.crop_block_size when flash attention is available
This commit is contained in:
		
							
								
								
									
										3
									
								
								model.py
									
									
									
									
									
								
							
							
						
						
									
										3
									
								
								model.py
									
									
									
									
									
								
							| @@ -207,7 +207,8 @@ class GPT(nn.Module): | |||||||
|         self.config.block_size = block_size |         self.config.block_size = block_size | ||||||
|         self.transformer.wpe.weight = nn.Parameter(self.transformer.wpe.weight[:block_size]) |         self.transformer.wpe.weight = nn.Parameter(self.transformer.wpe.weight[:block_size]) | ||||||
|         for block in self.transformer.h: |         for block in self.transformer.h: | ||||||
|             block.attn.bias = block.attn.bias[:,:,:block_size,:block_size] |             if hasattr(block.attn, 'bias'): | ||||||
|  |                 block.attn.bias = block.attn.bias[:,:,:block_size,:block_size] | ||||||
|  |  | ||||||
|     @classmethod |     @classmethod | ||||||
|     def from_pretrained(cls, model_type, override_args=None): |     def from_pretrained(cls, model_type, override_args=None): | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Andrej
					Andrej